I have a hash with values that's an array. How do I delete repeated elements in the array and the corresponding ids in the most performant way?
Here's an example of my hash
hash = {
"id" => "sjfdkjfd",
"name" => "Field Name",
"type" => "field",
"options" => ["Language", "Question", "Question", "Answer", "Answer"],
"option_ids" => ["12345", "23456", "34567", "45678", "56789"]
}
The idea I have is something like this
hash["options"].each_with_index { |value, index |
h = {}
if h.key?(value)
delete(value)
delete hash["option_ids"].delete_at(index)
else
h[value] = index
end
}
The result should be
hash = {
"id" => "sjfdkjfd",
"name" => "Field Name",
"type" => "field",
"options" => ["Language", "Question", "Answer"],
"option_ids" => ["12345", "23456", "45678"]
}
I know I have to put into consideration that when I delete the values of the options and option_ids the indexes of those values are going to change. But not sure how to do this
The first idea I had is to zip the values and call uniq, then think a way to return back to the initial form:
h['options'].zip(h['option_ids']).uniq(&:first).transpose
#=> [["Language", "Question", "Answer"], ["12345", "23456", "45678"]]
Then, via parallel assignment:
h['options'], h['option_ids'] = h['options'].zip(h['option_ids']).uniq(&:first).transpose
h #=> {"id"=>"sjfdkjfd", "name"=>"Field Name", "type"=>"field", "options"=>["Language", "Question", "Answer"], "option_ids"=>["12345", "23456", "45678"]}
These are the steps:
h['options'].zip(h['option_ids'])
#=> [["Language", "12345"], ["Question", "23456"], ["Question", "34567"], ["Answer", "45678"], ["Answer", "56789"]]
h['options'].zip(h['option_ids']).uniq(&:first)
#=> [["Language", "12345"], ["Question", "23456"], ["Answer", "45678"]]
hash = {
"id" => "sjfdkjfd",
"name" => "Field Name",
"type" => "field",
"options" => ["L", "Q", "Q", "Q", "A", "A", "Q"],
"option_ids" => ["12345", "23456", "34567", "dog", "45678", "56789", "cat"]
}
I assume that "repeated elements" refers to contiguous equal elements (2 only in [1,2,2,1]) as opposed to "duplicated elements" (both 1 and 2 in the previous example). I do show how the code would be altered (simplified, in fact) if the second interpretation applies.
idx = hash["options"].
each_with_index.
chunk_while { |(a,_),(b,_)| a==b }.
map { |(_,i),*| i }
#=> [0, 1, 4, 6]
hash.merge(
["options", "option_ids"].each_with_object({}) { |k,h| h[k] = hash[k].values_at(*idx) }
)
#=> {"id"=>"sjfdkjfd",
# "name"=>"Field Name",
# "type"=>"field",
# "options"=>["L", "Q", "A", "Q"],
# "option_ids"=>["12345", "23456", "45678", "cat"]}
If "repeated elements" is interpreted to mean that the values of "options" and "option_ids" are to only have the first three elements shown above, calculate idx as follows:
idx = hash["options"].
each_with_index.
uniq { |s,_| s }.
map(&:last)
#=> [0, 1, 4]
See Enumerable#chunk_while (Enumerable#slice_when could be used instead) and Array#values_at. The steps are as follows.
a = hash["options"]
#=> ["L", "Q", "Q", "Q", "A", "A", "Q"]
e0 = a.each_with_index
#=> #<Enumerator: ["L", "Q", "Q", "Q", "A", "A", "Q"]:each_with_index>
e1 = e0.chunk_while { |(a,_),(b,_)| a==b }
#=> #<Enumerator: #<Enumerator::Generator:0x000055e4bcf17740>:each>
We can see the values the enumerator e1 will generate and pass to map by converting it to an array:
e1.to_a
#=> [[["L", 0]],
# [["Q", 1], ["Q", 2], ["Q", 3]],
# [["A", 4], ["A", 5]], [["Q", 6]]]
Continuing,
idx = e1.map { |(_,i),*| i }
#=> [0, 1, 4, 6]
c = ["options", "option_ids"].
each_with_object({}) { |k,h| h[k] = hash[k].values_at(*idx) }
#=> {"options"=>["L", "Q", "A", "Q"],
# "option_ids"=>["12345", "23456", "45678", "cat"]}
hash.merge(c)
#=> {"id"=>"sjfdkjfd",
# "name"=>"Field Name",
# "type"=>"field",
# "options"=>["L", "Q", "A", "Q"],
# "option_ids"=>["12345", "23456", "45678", "cat"]}
Using Array#transpose
hash = {
"options" => ["Language", "Question", "Question", "Answer", "Answer"],
"option_ids" => ["12345", "23456", "34567", "45678", "56789"]
}
hash.values.transpose.uniq(&:first).transpose.map.with_index {|v,i| [hash.keys[i], v]}.to_h
#=> {"options"=>["Language", "Question", "Answer"], "option_ids"=>["12345", "23456", "45678"]}
After the OP edit:
hash = {
"id" => "sjfdkjfd",
"name" => "Field Name",
"type" => "field",
"options" => ["Language", "Question", "Question", "Answer", "Answer"],
"option_ids" => ["12345", "23456", "34567", "45678", "56789"]
}
hash_array = hash.to_a.select {|v| v.last.is_a?(Array)}.transpose
hash.merge([hash_array.first].push(hash_array.last.transpose.uniq(&:first).transpose).transpose.to_h)
#=> {"id"=>"sjfdkjfd", "name"=>"Field Name", "type"=>"field", "options"=>["Language", "Question", "Answer"], "option_ids"=>["12345", "23456", "45678"]}
data = [
"Company one" => {
"number_1" => 46,
"number_2" => 3055,
"country" => "USA"
},
"Company two" => {
"number_1" => 32,
"number_2" => 6610,
"country" => "USA"
},
"Company three" => {
"number_1" => 40,
"number_2" => 9128,
"country" => "USA"
}
]
So I have this array in which I'm trying to get which of the company has the biggest number in 'number_2'. The largest would be Company three with 9128.
So I have this code that puts the largest number which would be 9128
def number(data)
collected_array=[]
data.each do |company_hash|
collected_array = company_hash.map do |k,v|
v["number_2"]
end
end
puts collected_array.max
end
number(data)
But I'm trying to puts the company name with the largest number which would be "Company three". I've tried .keys and other ways but it gives me error.
I've tried this way:
def number(data)
collected_array=[]
data.each do |company_hash|
collected_array = company_hash.map do |k,v|
v["number_2"]
k
end
end
puts collected_array.max
end
number(data)
but it gives me "Company two" rather than "Company three" which would be the company with the highest number
As stated by #Cary, it can be simplified accessing the first element on data, and there using max_by, on the hash local variable available within the block checking the number_2 key value.
As the result is an Array containing two elements, the first one is the company name, the second and last one, the hash containing its data:
data = [
"Company one" => {
"number_1" => 46,
"number_2" => 3055,
"country" => "USA"
},
"Company two" => {
"number_1" => 32,
"number_2" => 6610,
"country" => "USA"
},
"Company three" => {
"number_1" => 40,
"number_2" => 9128,
"country" => "USA"
}
]
max_company = data.first.max_by { |_, h| h['number_2'] }
p max_company.first # "Company three"
p max_company.last['number_2'] # 9128
Let's say I want to access the values of a hash like this:
munsters = {
"Herman" => { "age" => 32, "gender" => "male" },
"Lily" => { "age" => 30, "gender" => "female" },
"Grandpa" => { "age" => 402, "gender" => "male" },
"Eddie" => { "age" => 10, "gender" => "male" },
"Marilyn" => { "age" => 23, "gender" => "female"}
}
I could use #each with two parameters:
munsters.each do |key, value|
puts "#{name} is a #{value["age"]}-year-old #{value["gender"]}."
end
Or I could use #each_pair with two parameters:
munsters.each_pair do |key, value|
puts "#{name} is a #{value["age"]}-year-old #{value["gender"]}."
end
Perhaps the difference between the two is not borne out in this simple example, but can someone help me to understand the advantage of using #each_pair over #each ?
Because Hash is an Enumerable, it has to have an each method. each_pair may be a clearer name, since it strongly suggests that two-element arrays containing key-value pairs are passed to the block.
They are aliases for each other: they share the same source code.
I am trying to filter and reorder an array of hashes. The filter and the order is defined by another array of strings, which represent the value of the "slug" key of the hash. The resulting array should contain only the hashes whose value to the "slug" key is contained in the slugs array and ordered with the same order. If I have the first array as:
data = [
{
"slug" => "lemon",
"label" => "Lemon Label"
},
{
"slug" => "table",
"label" => "Table Label"
},
{
"slug" => "peach",
"label" => "Peach Label"
},
{
"slug" => "strawberry",
"label" => "Strawberry Label"
},
{
"slug" => "bread",
"label" => "Bread Label"
},
{
"slug" => "orange",
"label" => "Orange Label"
}
]
and the second array as:
ordered_keys = ["orange", "lemon", "strawberry"]
then, the result should be an array like this:
result = [
{
"slug" => "orange",
"label" => "Orange Label"
},
{
"slug" => "lemon",
"label" => "Lemon Label"
},
{
"slug" => "strawberry",
"label" => "Strawberry Label"
}
]
I managed to get just the filtering function with this:
result = data.select{|x| ordered_keys.include? x.slug}
but I cannot find a smart way to get the ordered one. Any ideas?
Use map to translate your array of ordered keys into the corresponding hash. The order of the input array to map defines the order of the output array.
ordered_keys.map{|k| data.find{|h| h["slug"] == k}}
a = {"rows" => [{"id" => "231-z", "name" => 'jon', "age"=> 27, "state" => 'AL'},
{"id" => "4121-x", "name" => 'ton', "age"=> 37, "state" => 'VA'}
]
}
b = {"rows" => [{"key" => ["xyz","4121-x"], "value" =>{"sum" => 12312, "realage" => 29}},
{"key" => ["xyz","231-z"], "value" =>{"sum" => 1212, "realage" => 33}}
]
}
In hash a, age is incorrect
In hash b, realage is correct. Also in hash b id is the second value in the first array that maps to id of hash a . Those are 4121-x, 231-z correspond to hash a
I want to correct the age in hash a and swap it with the realage of hash b
I can do it in multiple steps, but is it possible to do it in one liner or very short? So finally correct hash a should look like
a = {"rows" => [{"id" => "231-z", "name" => 'jon', "age"=> 33, "state" => 'AL'},
{"id" => "4121-x", "name" => 'ton', "age"=> 29, "state" => 'VA'}
]
}
does this look reasonable?
a['rows'].each_with_index do |ah, i|
(bh = b['rows'].select {|h| h['key'].last == ah['id'] }.first) &&
a['rows'][i] = ah.update('age' => bh['value']['realage'])
end
p a
{
"rows" => [
[0] {
"id" => "231-z",
"name" => "jon",
"age" => 33,
"state" => "AL"
},
[1] {
"id" => "4121-x",
"name" => "ton",
"age" => 29,
"state" => "VA"
}
]
}
Please note it will update a only if corresponding id found in b.
Also, the rows order does not matter, nor matter the rows number, it is only important b to have a row with same id as processed row in a
Here is a Working Demo