Return hash with modified values in Ruby - ruby

I'm trying this:
{:id => 5, :foos => [1,2,3]}.each {|k,v| v.to_s}
But that's returning this:
{:id=>5, :foos=>[1, 2, 3]}
I'd like to see this:
{:id=>"5", :foos=>"[1, 2, 3]"}
I've also tried variations of Hash#collect and Hash#map. Any ideas?

you could use Object#inspect:
{ :id => 5, :foos => [1, 2, 3] }.inject({}) do |hash, (key, value)|
hash.merge key => value.inspect
end
which returns:
{ :foos => "[1, 2, 3]", :id => "5" }
or if you want it to be destructive:
hash = { :id => 5, :foos => [1, 2, 3] }
hash.each_key { |key| hash[key] = hash[key].inspect }

Your stuff doesn't work because v.to_s doesn't modify v, so essentially the block doesn't do anything.
You could do it like this:
hash = {:id => 5, :foos => [1,2,3]}
hash.each_key { |k| hash[k] = hash[k].to_s }
If you don't want to modify the hash:
hash = {:id => 5, :foos => [1,2,3]}
new_hash = {}
hash.each_key { |k| new_hash[k] = hash[k].to_s }

Related

Convert array into hash and add a counter value to the new hash

I have the following array of hashes:
[
{"BREAD" => {:price => 1.50, :discount => true }},
{"BREAD" => {:price => 1.50, :discount => true }},
{"MARMITE" => {:price => 1.60, :discount => false}}
]
And I would like to translate this array into a hash that includes the counts for each item:
Output:
{
"BREAD" => {:price => 1.50, :discount => true, :count => 2},
"MARMITE" => {:price => 1.60, :discount => false, :count => 1}
}
I have tried two approaches to translate the array into a hash.
new_cart = cart.inject(:merge)
hash = Hash[cart.collect { |item| [item, ""] } ]
Both work but then I am stumped at how to capture and pass the count value.
Expected output
{
"BREAD" => {:price => 1.50, :discount => true, :count => 2},
"MARMITE" => {:price => 1.60, :discount => false, :count => 1}
}
We are given the array:
arr = [
{"BREAD" => {:price => 1.50, :discount => true }},
{"BREAD" => {:price => 1.50, :discount => true }},
{"MARMITE" => {:price => 1.60, :discount => false}}
]
and make the assumption that each hash has a single key and if two hashes have the same (single) key, the value of that key is the same in both hashes.
The first step is create an empty hash to which will add key-value pairs:
h = {}
Now we loop through arr to build the hash h. I've added a puts statement to display intermediate values in the calculation.
arr.each do |g|
k, v = g.first
puts "k=#{k}, v=#{v}"
if h.key?(k)
h[k][:count] += 1
else
h[k] = v.merge({ :count => 1 })
end
end
displays:
k=BREAD, v={:price=>1.5, :discount=>true}
k=BREAD, v={:price=>1.5, :discount=>true}
k=MARMITE, v={:price=>1.6, :discount=>false}
and returns:
#=> [{"BREAD" =>{:price=>1.5, :discount=>true}},
# {"BREAD" =>{:price=>1.5, :discount=>true}},
# {"MARMITE"=>{:price=>1.6, :discount=>false}}]
each always returns its receiver (here arr), which is not what we want.
h #=> {"BREAD"=>{:price=>1.5, :discount=>true, :count=>2},
# "MARMITE"=>{:price=>1.6, :discount=>false, :count=>1}}
is the result we need. See Hash#key? (aka, has_key?), Hash#[], Hash#[]= and Hash#merge.
Now let's wrap this in a method.
def hashify(arr)
h = {}
arr.each do |g|
k, v = g.first
if h.key?(k)
h[k][:count] += 1
else
h[k] = v.merge({ :count=>1 })
end
end
h
end
hashify(arr)
#=> {"BREAD"=>{:price=>1.5, :discount=>true, :count=>2},
# "MARMITE"=>{:price=>1.6, :discount=>false, :count=>1}}
Rubyists would often use the method Enumerable#each_with_object to simplify.
def hashify(arr)
arr.each_with_object({}) do |g,h|
k, v = g.first
if h.key?(k)
h[k][:count] += 1
else
h[k] = v.merge({ :count => 1 })
end
end
end
Compare the two methods to identify their differences. See Enumerable#each_with_object.
When, as here, the keys are symbols, Ruby allows you to use the shorthand { count: 1 } for { :count=>1 }. Moreover, she permits you to write :count = 1 or count: 1 without the braces when the hash is an argument. For example,
{}.merge('cat'=>'meow', dog:'woof', :pig=>'oink')
#=> {"cat"=>"meow", :dog=>"woof", :pig=>"oink"}
It's probably more common to see the form count: 1 when keys are symbols and for the braces to be omitted when a hash is an argument.
Here's a further refinement you might see. First create
h = arr.group_by { |h| h.keys.first }
#=> {"BREAD" =>[{"BREAD"=>{:price=>1.5, :discount=>true}},
# {"BREAD"=>{:price=>1.5, :discount=>true}}],
# "MARMITE"=>[{"MARMITE"=>{:price=>1.6, :discount=>false}}]}
See Enumerable#group_by. Now convert the values (arrays) to their sizes:
counts = h.transform_values { |arr| arr.size }
#=> {"BREAD"=>2, "MARMITE"=>1}
which can be written in abbreviated form:
counts = h.transform_values(&:size)
#=> {"BREAD"=>2, "MARMITE"=>1}
See Hash#transform_values. We can now write:
uniq_arr = arr.uniq
#=> [{"BREAD"=>{:price=>1.5, :discount=>true}},
#= {"MARMITE"=>{:price=>1.6, :discount=>false}}]
uniq_arr.each_with_object({}) do |g,h|
puts "g=#{g}"
k,v = g.first
puts " k=#{k}, v=#{v}"
h[k] = v.merge(counts: counts[k])
puts " h=#{h}"
end
which displays:
g={"BREAD"=>{:price=>1.5, :discount=>true}}
k=BREAD, v={:price=>1.5, :discount=>true}
h={"BREAD"=>{:price=>1.5, :discount=>true, :counts=>2}}
g={"MARMITE"=>{:price=>1.6, :discount=>false}}
k=MARMITE, v={:price=>1.6, :discount=>false}
h={"BREAD"=>{:price=>1.5, :discount=>true, :counts=>2},
"MARMITE"=>{:price=>1.6, :discount=>false, :counts=>1}}
and returns:
#=> {"BREAD"=>{:price=>1.5, :discount=>true, :counts=>2},
# "MARMITE"=>{:price=>1.6, :discount=>false, :counts=>1}}
See Array#uniq.
This did the trick:
arr = [
{ bread: { price: 1.50, discount: true } },
{ bread: { price: 1.50, discount: true } },
{ marmite: { price: 1.60, discount: false } }
]
Get the count for each occurrence of hash, add as key value pair and store:
h = arr.uniq.each { |x| x[x.first.first][:count] = arr.count(x) }
Then convert hashes into arrays, flatten to a single array then construct a hash:
Hash[*h.collect(&:to_a).flatten]
#=> {:bread=>{:price=>1.50, :discount=>true, :count=>2}, :marmite=>{:price=>1.60, :discount=>false, :count=>1}}
Combined a couple of nice ideas from here:
https://raycodingdotnet.wordpress.com/2013/08/05/array-of-hashes-into-single-hash-in-ruby/
and here:
http://carol-nichols.com/2015/08/07/ruby-occurrence-couting/

Most performant way to group/summarise two hashes?

I have two hashes with some data that I need to aggregate. The first one is a mapping of which ids (id_1, id_2, id_3, id_4) belong under what category (a, b, c):
hash_1 = {'a' => ['id_1','id_2'], 'b' => ['id_3'], 'c' => ['id_4']}
The second hash holds values of how many events happened per id for a given date (date_1, date_2, date_3):
hash_2 = {
'id_1' => {'date_1' => 5, 'date_2' => 6, 'date_3' => 8},
'id_2' => {'date_1' => 0, 'date_3' => 6},
'id_3' => {'date_1' => 0, 'date_2' => nil, 'date_3' => 1},
'id_4' => {'date_1' => 10, 'date_2' => 1}
}
What I want is to get the total event per category (a,b,c). For the above example, the result would look something like:
hash_3 = {'a' => (5+6+8+0+6), 'b' => (0+0+1), 'c' => (10+1)}
My problem is, that there are about 5000 categories, each pointing to typically 1 to 3 ids, and each ID having event counts for 30 dates or more. So this takes quite a bit of computation. What will be the most performant (time effective) way to do this grouping in Ruby?
update
This is what I tried so far (took like 6-8 seconds!, horribly slow):
def total_clicks_per_category
{}.tap do |res|
hash_1.each do |cat, ids|
res[cat] = total_event_per_ids(ids)
end
end
end
def total_event_per_ids(ids)
ids.reduce(0) do |memo, id|
events = hash_2.fetch(id, {})
memo + (events.values.reduce(:+) || 0)
end
end
P.S. I’m using Ruby 2.3.
I'm writing this on a phone so I cannot test right now, but it looks OK.
g = hash_2.each_with_object({}) { |(k,v),g| g[k] = v.values.compact.sum }
hash_3 = hash_1.each_with_object({}) { |(k,v),h| h[k] = g.values_at(*v).sum }
First, create an intermediate hash that holds the sum of hash_2:
hash_4 = hash_2.map{|k, v| [k, v.values.inject(:+)]}.to_h
# => {"id_1"=>19, "id_2"=>6, "id_3"=>1, "id_4"=>11}
Then do the final summation:
hash_3 = hash_1.map{|k, v| [k, v.map{|k| hash_4[k]}.inject(:+)]}.to_h
# => {"a"=>25, "b"=>1, "c"=>11}
Theory
5000*3*30 isn't that many. Ruby probably will need a second at most for this kind of job.
Hash lookup is fast by default, you won't be able to optimize much.
You could pre-calculate hash_2_sum, though :
hash_2_sum = {
'id_1' => 5+6+8,
'id_2' => 0+6,
'id_3' => 0+0+1,
'id_4' => 10+1
}
A loop on hash1 with hash_2_sum lookup, and you're done.
Code
Your example has been updated with some nil values. You need to remove them with compact, and make sure the sum is 0 when no element is found with inject(0, :+):
hash_1 = {'a' => ['id_1','id_2'], 'b' => ['id_3'], 'c' => ['id_4']}
hash_2 = {
'id_1' => { 'date_1' => 5, 'date_2' => 6, 'date_3' => 8 },
'id_2' => { 'date_1' => 0, 'date_3' => 6 },
'id_3' => { 'date_1' => 0, 'date_2' => nil, 'date_3' => 1 },
'id_4' => { 'date_1' => 10, 'date_2' => 1 }
}
hash_2_sum = hash_2.each_with_object({}) do |(key, dates), sum|
sum[key] = dates.values.compact.inject(0, :+)
end
hash_3 = hash_1.each_with_object({}) do |(key, ids), sum|
sum[key] = hash_2_sum.values_at(*ids).inject(0, :+)
end
# {"a"=>25, "b"=>1, "c"=>11}
Note
{}.tap do |res|
hash_1.each do |cat, ids|
res[cat] = total_event_per_ids(ids)
end
end
isn't very readable IMHO.
You can either use each_with_object or Array#to_h :
result = [1, 2, 3].each_with_object({}) do |i, hash|
hash[i] = i * i
end
#=> {1=>1, 2=>4, 3=>9}
result = [1, 2, 3].map { |i| [i, i * i] }.to_h
#=> {1=>1, 2=>4, 3=>9}

Averaging values across multiple hashes

EDIT I am accepting #CarySwoveland's answer because he got the closest on the first try, accounting for the most scenarios, and outputting the data into a hash so that you don't need to rely on order. Many honerable mentions though! Be sure to check out #ArupRakshit's answer as well if you want your output in an array!
I have an array of hashes like:
#my_hashes = [{"key1" => "10", "key2" => "5"...},{"key1" => "", "key2" => "9"...},{"key1" => "6", "key2" => "4"...}]
and I want an average for each key across the array. ie. 8.0,6.0...
Note that the hashes all have the exact same keys, in order, even if the value for the key is blank. Right now this works:
<%= #my_hashes[0].keys.each do |key| %>
<% sum = 0 %>
<% count = 0 %>
<% #my_hashes.each do |hash| %>
<% sum += hash[key].to_f %>
<% count += if hash[key].blank? then 0 else 1 end %>
<% end %>
<%= (sum/count) %>
<% end %>
but I feel like there may be a better way... any thoughts?
Do as below
#my_hashes = [{"key1" => "10", "key2" => "5"},{"key1" => "", "key2" => "9"},{"key1" => "6", "key2" => "4"}]
ar = #my_hashes[0].keys.map do |k|
a = #my_hashes.map { |h| h[k].to_f unless h[k].blank? }.compact
a.inject(:+)/a.size unless a.empty? #Accounting for "key1" => nil or "key1" => ""
end
ar # => [8, 6]
Another way:
#my_hashes = [ {"key1"=>"10", "key2"=>"5"},
{"key1"=> "", "key2"=>"9"},
{"key1"=> "6", "key2"=>"4"} ]
def avg(arr) arr.any? ? arr.reduce(:+)/arr.size.to_f : 0.0 end
(#my_hashes.each_with_object ( Hash.new { |h,k| h[k]=[] } ) {
|mh,h| mh.keys.each { |k| h[k] << mh[k].to_f unless mh[k].empty? } })
.each_with_object({}) { |(k,v),h| h[k] = avg(v) }
# => {"key1"=>8.0, "key2"=>6.0}
The object created by the first each_with_object is a hash whose default value is an empty array. That hash is represented by the block variable h. This means that if h[k] << mh[k].to_f is to be executed when h.key?(k) => false, h[k] = [] is executed first.
One could alternatively drop the avg method and create a temporary variable before computing the averages:
h = #my_hashes.each_with_object ( Hash.new { |h,k| h[k]=[] } ) { |mh,h|
mh.keys.each { |k| h[k] << mh[k].to_f unless mh[k].empty? } }
h.each_with_object({}) { |(k,v),h|
h[k] = ( avg(v) arr.any? ? arr.reduce(:+)/arr.size.to_f : 0.0 }
I think I found a quite elegant solution.
Here is a sample array:
a = [
{:a => 2, :b => 10},
{:a => 4, :b => 20},
{:a => 2, :b => 10},
{:a => 8, :b => 40},
]
And the solution:
class Array
def average
self.reduce(&:+) / self.size
end
end
r = a[0].keys.map do |key|
[key, a.map { |hash| hash[key] }.average]
end
puts Hash[*r.flatten]
Try this
#my_hashes = [{"key1" => "10", "key2" => "5"},{"key1" => "", "key2" => "9"},{"key1" => "6", "key2" => "4"}]
average_values = #my_hashes.map(&:values).transpose.map { |arr|
arr.map(&:to_f).inject(:+) / arr.size
}
with_keys = Hash[#my_hashes.first.keys.zip(average_values)]
average_values # => [5.333333333333333, 6.0]
with_keys # => {"key1"=>5.333333333333333, "key2"=>6.0}
if you want to exclude empty values from the average, could change average_values to reject empty values
average_values = #my_hashes.map(&:values).transpose.map { |arr|
arr.reject!(&:empty?)
arr.map(&:to_f).inject(:+) / arr.size
}
average_values # => [8.0, 6.0]
No super clean solution, but I would write:
a = [
{:a => 2, :b => 10},
{:a => 4, :b => 20},
{:a => 2, :b => 10},
{:a => 8, :b => 40},
]
grouped = a.flat_map(&:to_a).group_by{|x,|x}
grouped.keys.each do |key|
len = grouped[key].size
grouped[key] = 1.0 * grouped[key].map(&:last).inject(:+) / len
end

Ruby: Link two arrays of objects by attribute value

I'm pretty new in Ruby programming. In Ruby there are plenty ways to write elegant code. Is there any elegant way to link two arrays with objects of the same type by attribute value?
It's hard to explain. Let's look at the next example:
a = [ { :id => 1, :value => 1 }, { :id => 2, :value => 2 }, { :id => 3, :value => 3 } ]
b = [ { :id => 1, :value => 2 }, { :id => 3, :value => 4 } ]
c = link a, b
# Result structure after linkage.
c = {
"1" => {
:a => { :id => 1, :value => 1 },
:b => { :id => 1, :value => 1 }
},
"3" => {
:a => { :id => 3, :value => 3 },
:b => { :id => 3, :value => 4 }
}
}
So the basic idea is to get pairs of objects from different arrays by their common ID and construct a hash, which will give this pair by ID.
Thanks in advance.
If you want to take an adventure through Enumerable, you could say this:
(a.map { |h| [:a, h] } + b.map { |h| [:b, h] })
.group_by { |_, h| h[:id] }
.select { |_, a| a.length == 2 }
.inject({}) { |h, (n, v)| h.update(n => Hash[v]) }
And if you really want the keys to be strings, say n.to_s => Hash[v] instead of n => Hash[v].
The logic works like this:
We need to know where everything comes from we decorate the little hashes with :a and :b symbols to track their origins.
Then add the decorated arrays together into one list so that...
group_by can group things into almost-the-final-format.
Then find the groups of size two since those groups contain the entries that appeared in both a and b. Groups of size one only appeared in one of a or b so we throw those away.
Then a little injection to rearrange things into their final format. Note that the arrays we built in (1) just somehow happen to be in the format that Hash[] is looking for.
If you wanted to do this in a link method then you'd need to say things like:
link :a => a, :b => b
so that the method will know what to call a and b. This hypothetical link method also easily generalizes to more arrays:
def link(input)
input.map { |k, v| v.map { |h| [k, h] } }
.inject(:+)
.group_by { |_, h| h[:id] }
.select { |_, a| a.length == input.length }
.inject({}) { |h, (n, v)| h.update(n => Hash[v]) }
end
link :a => [...], :b => [...], :c => [...]
I assume that, for any two elements h1 and h2 of a (or of b), h1[:id] != h2[:id].
I would do this:
def convert(arr) Hash[arr.map {|h| [h[:id], h]}] end
ah, bh = convert(a), convert(b)
c = ah.keys.each_with_object({}) {|k,h|h[k]={a: ah[k], b: bh[k]} if bh.key?(k)}
# => {1=>{:a=>{:id=>1, :value=>1}, :b=>{:id=>1, :value=>2}},
# 3=>{:a=>{:id=>3, :value=>3}, :b=>{:id=>3, :value=>4}}}
Note that:
ah = convert(a)
# => {1=>{:id=>1, :value=>1}, 2=>{:id=>2, :value=>2}, 3=>{:id=>3, :value=>3}}
bh = convert(b)
# => {1=>{:id=>1, :value=>2}, 3=>{:id=>3, :value=>4}}
Here's a second approach. I don't like it as well, but it represents a different way of looking at the problem.
def sort_by_id(a) a.sort_by {|h| h[:id]} end
c = Hash[*sort_by_id(a.select {|ha| b.find {|hb| hb[:id] == ha[:id]}})
.zip(sort_by_id(b))
.map {|ha,hb| [ha[:id], {a: ha, b: hb}]}
.flatten]
Here's what's happening. The first step is to select only the elements ha of a for which there is an element hb of b for which ha[:id] = hb[id]. Then we sort both (what's left of) a and b on h[:id], zip them together and then make the hash c.
r1 = a.select {|ha| b.find {|hb| hb[:id] == ha[:id]}}
# => [{:id=>1, :value=>1}, {:id=>3, :value=>3}]
r2 = sort_by_id(r1)
# => [{:id=>1, :value=>1}, {:id=>3, :value=>3}]
r3 = sort_by_id(b)
# => [{:id=>1, :value=>2}, {:id=>3, :value=>4}]
r4 = r2.zip(r3)
# => [[{:id=>1, :value=>1}, {:id=>1, :value=>2}],
# [{:id=>3, :value=>3}, {:id=>3, :value=>4}]]
r5 = r4.map {|ha,hb| [ha[:id], {a: ha, b: hb}]}
# => [[1, {:a=>{:id=>1, :value=>1}, :b=>{:id=>1, :value=>2}}],
# [3, {:a=>{:id=>3, :value=>3}, :b=>{:id=>3, :value=>4}}]]
r6 = r5.flatten
# => [1, {:a=>{:id=>1, :value=>1}, :b=>{:id=>1, :value=>2}},
# 3, {:a=>{:id=>3, :value=>3}, :b=>{:id=>3, :value=>4}}]
c = Hash[*r6]
# => {1=>{:a=>{:id=>1, :value=>1}, :b=>{:id=>1, :value=>2}},
# 3=>{:a=>{:id=>3, :value=>3}, :b=>{:id=>3, :value=>4}}}
Ok, I've found the answer by myself. Here is a quite short line of code, which should do the trick:
Hash[a.product(b)
.select { |pair| pair[0][:id] == pair[1][:id] }
.map { |pair| [pair[0][:id], { :a => pair[0], :b => pair[1] }] }]
The product method gives us all possible pairs, then we filter them by equal IDs of pair elements. And then we map pairs to the special form, which will produce a Hash we are looking for.
So Hash[["key1", "value1"], ["key2", "value2"]] returns { "key1" => "value1", "key2" => "value2" }. And I use this to get the answer on my question.
Thanks.
P.S.: you can use pair.first instead of pair[0] and pair.last instead of pair[1] for better readability.
UPDATE
As Cary pointed out, it is better to replace |pair| with |ha, hb| to avoid these ugly indices:
Hash[a.product(b)
.select { |ha, hb| ha[:id] == hb[:id] }
.map { |ha, hb| [ha[:id], { :a => ha, :b => hb }] }]

Add up values in hashes

array1 = { "d1" => 2, "d2" => 3}
array2 = { "d1" => 3, "d3" => 10}
i want this:
array3 = { "d1" => 5, "d2" => 3, "d3" => 10}
i tried this, it doesn't work. i am getting the error: "NoMethodError: undefined method `+' for nil:NilClass"
array3 = {}
array1.each {|key, count| array3[key] += count}
array2.each {|key, count| array3[key] += count}
You're getting the error because array1.each tries to access array3['d1'], which doesn't exist yet, so it returns nil as the value. You just need to define array3 a bit more specifically, using Hash.new to tell it to assign 0 to all keys by default.
array3 = Hash.new(0)
array1.each {|key, count| array3[key] += count}
array2.each {|key, count| array3[key] += count}
Be careful going forward, though: the object you pass as the default value can be modified, so if you were to write my_hash = Hash.new(Array.new); my_hash[:some_key] << 3 then all keys that receive a default value will share the same object. This is one of those strange gotchas in Ruby, and you would want to use the block version of Hash.new in that case.
it is much more simplier
=> h1 = { "a" => 100, "b" => 200 }
{"a"=>100, "b"=>200}
=> h1 = { "a" => 100, "b" => 200 }
{"b"=>254, "c"=>300}
=> h1.merge(h2) {|key, oldval, newval| newval + oldval}
{"a"=>100, "b"=>454, "c"=>300}
it was undocumented in core-1.8.7 but you can read more here:
http://www.ruby-doc.org/core/classes/Hash.src/M000759.html
it works on both versions
User you have to initialize the key for the non-existent values in the second array:
irb(main):007:0> array1 = {"d1" => 2, "d2" => 3}
=> {"d1"=>2, "d2"=>3}
irb(main):008:0> array2 = {"d1" => 3, "d3" => 10}
=> {"d1"=>3, "d3"=>10}
irb(main):009:0> array3 = {}
=> {}
irb(main):010:0> array1.each {|key, count| array3[key] = (array3[key] || 0) + count}
=> {"d1"=>2, "d2"=>3}
irb(main):011:0> array2.each {|key, count| array3[key] = (array3[key] || 0) + count}
=> {"d1"=>3, "d3"=>10}
irb(main):012:0> array3
=> {"d1"=>5, "d2"=>3, "d3"=>10}
irb(main):013:0>
If all the keys in your hash are not integers you can use this trick which uses the fact that string.to_i gives zero
hash.to_a.flatten.inject{|sum, n| sum.to_s.to_i + n.to_s.to_i }

Resources