Deleting duplicate data in ruby - ruby

I want to remove duplicates from my code based on all the three fields i.e based on event-name,date and time,if all the three fields match for two events then one should be removed showing only unique events.The code I have written works for only for one field i.e its removing entries based on event-name.How do i match all the three fields. I was using & but that didn't work out.
include Enumerable
list = [
{
:eventname => "music show",
:date => "1st august",
:time => "9 pm"
},
{
:eventname => "dance show",
:date => "11st august",
:time => "9 pm"
},
{
:eventname => "music show",
:date => "1st august",
:time => "9 pm"
},
{
:eventname => "music show",
:date => "15st august",
:time => "9 pm"
},
{
:eventname => "magic show",
:date => "12st august",
:time => "9 pm"
},
{
:eventname => "rock show",
:date => "1st august",
:time => "9 pm"
}
]
b=list.group_by{|r| r[:eventname]}.map do |k, v|
v.inject({}) { |r, h| r.merge(h){ |key, o, n| o || n } }
end

Use the uniq mthod of Array:
list.uniq

Related

group_by multiple times in ruby

I have an array of hashes called events:
events = [
{:name => "Event 1", :date => "2019-02-21 08:00:00", :area => "South", :micro_area => "A"},
{:name => "Event 2", :date => "2019-02-21 08:00:00", :area => "South", :micro_area => "A"},
{:name => "Event 3", :date => "2019-02-21 08:00:00", :area => "South", :micro_area => "B"},
{:name => "Event 4", :date => "2019-02-21 08:00:00", :area => "South", :micro_area => "B"},
{:name => "Event 5", :date => "2019-02-21 08:00:00", :area => "North", :micro_area => "A"},
{:name => "Event 6", :date => "2019-02-21 08:00:00", :area => "North", :micro_area => "A"},
{:name => "Event 7", :date => "2019-02-21 08:00:00", :area => "North", :micro_area => "B"},
{:name => "Event 8", :date => "2019-02-21 08:00:00", :area => "North", :micro_area => "B"}
]
I want to know how to group_by first date, then area then micro_area to end up with a single array of hashes for example:
[
{
"2019-02-21 08:00:00": {
"South": {
"A": [
{:name=>"Event 1", :date=>"2019-02-21 08:00:00", :area=>"South", :micro_area=>"A" },
{:name=>"Event 2", :date=>"2019-02-21 08:00:00", :area=>"South", :micro_area=>"A" }
],
"B": [
{:name=>"Event 3", :date=>"2019-02-21 08:00:00", :area=>"South", :micro_area=>"B" },
{:name=>"Event 4", :date=>"2019-02-21 08:00:00", :area=>"South", :micro_area=>"B" }
]
},
"North": {
"A": [
{:name=>"Event 5", :date=>"2019-02-21 08:00:00", :area=>"North", :micro_area=>"A" },
{:name=>"Event 6", :date=>"2019-02-21 08:00:00", :area=>"North", :micro_area=>"A" }
],
"B": [
{:name=>"Event 7", :date=>"2019-02-21 08:00:00", :area=>"North", :micro_area=>"B" },
{:name=>"Event 8", :date=>"2019-02-21 08:00:00", :area=>"North", :micro_area=>"B" }
]
}
}
}
]
Trying events.group_by { |r| [r[:date], r[:area], r[:micro_area]] } doesn't seem too work the way I want it to.
I think following will work for you,
events = [
{ name: 'Event 1', date: '2019-02-21 08:00:00', area: 'South', micro_area: 'A' }
]
events.group_by { |x| x[:date] }.transform_values do |v1|
v1.group_by { |y| y[:area] }.transform_values do |v2|
v2.group_by { |z| z[:micro_area] }
end
end
# {
# "2019-02-21 08:00:00"=>{
# "South"=>{
# "A"=>[
# {:name=>"Event 1", :date=>"2019-02-21 08:00:00", :area=>"South", :micro_area=>"A"}
# ]
# }
# }
# }
Another option is to build the nested structure as you traverse your hash:
events.each_with_object({}) do |event, result|
d, a, m = event.values_at(:date, :area, :micro_area)
result[d] ||= {}
result[d][a] ||= {}
result[d][a][m] ||= []
result[d][a][m] << event
end
Another option is grouping them like you did in the question. Then build the nested structure from the array used as key.
# build an endless nested structure
nested = Hash.new { |hash, key| hash[key] = Hash.new(&hash.default_proc) }
# group by the different criteria and place them in the above nested structure
events.group_by { |event| event.values_at(:date, :area, :micro_area) }
.each { |(*path, last), events| nested.dig(*path)[last] = events }
# optional - reset all default procs
reset_default_proc = ->(hash) { hash.each_value(&reset_default_proc).default = nil if hash.is_a?(Hash) }
reset_default_proc.call(nested)
The above leaves the answer in the nested variable.
References:
Hash::new to create the nested hash.
Hash#default_proc to get the default proc of a hash.
Hash#default= to reset the hash default back to nil.
Hash#dig to traverse the nested structure until the last node.
Hash#[]= to set the last node equal to the grouped events.
Array decomposition and array to argument conversion to capture all but the last node into path and call #dig with the contents of path as arguments.
Here is a recursive solution that will handle arbitrary levels of nesting and arbitrary grouping objects.
def hashify(events, grouping_keys)
return events if grouping_keys.empty?
first_key, *remaining_keys = grouping_keys
events.group_by { |h| h[first_key] }.
transform_values { |a|
hashify(a.map { |h|
h.reject { |k,_| k == first_key } },
remaining_keys) }
end
Before executing this with the sample data from the questions let's add a hash with a different date to events.
events <<
{ :name=>"Event 8", :date=>"2018-12-31 08:00:00",
:area=>"North", :micro_area=>"B" }
grouping_keys = [:date, :area, :micro_area]
hashify(events, grouping_keys)
#=> {"2019-02-21 08:00:00"=>{
# "South"=>{
# "A"=>[{:name=>"Event 1"}, {:name=>"Event 2"}],
# "B"=>[{:name=>"Event 3"}, {:name=>"Event 4"}]
# },
# "North"=>{
# "A"=>[{:name=>"Event 5"}, {:name=>"Event 6"}],
# "B"=>[{:name=>"Event 7"}, {:name=>"Event 8"}]
# }
# },
# "2018-12-31 08:00:00"=>{
# "North"=>{
# "B"=>[{:name=>"Event 8"}]
# }
# }
# }
hashify(events, [:date, :area])
#=> {"2019-02-21 08:00:00"=>{
# "South"=>[
# {:name=>"Event 1", :micro_area=>"A"},
# {:name=>"Event 2", :micro_area=>"A"},
# {:name=>"Event 3", :micro_area=>"B"},
# {:name=>"Event 4", :micro_area=>"B"}
# ],
# "North"=>[
# {:name=>"Event 5", :micro_area=>"A"},
# {:name=>"Event 6", :micro_area=>"A"},
# {:name=>"Event 7", :micro_area=>"B"},
# {:name=>"Event 8", :micro_area=>"B"}
# ]
# },
# "2018-12-31 08:00:00"=>{
# "North"=>[
# {:name=>"Event 8", :micro_area=>"B"}
# ]
# }
# }
See Enumerable#group_by, Hash#transform_values and Hash#reject.

Outputting the keys of the max value of a hash within Arrays in Ruby using methods

data = [
"Company one" => {
"number_1" => 46,
"number_2" => 3055,
"country" => "USA"
},
"Company two" => {
"number_1" => 32,
"number_2" => 6610,
"country" => "USA"
},
"Company three" => {
"number_1" => 40,
"number_2" => 9128,
"country" => "USA"
}
]
So I have this array in which I'm trying to get which of the company has the biggest number in 'number_2'. The largest would be Company three with 9128.
So I have this code that puts the largest number which would be 9128
def number(data)
collected_array=[]
data.each do |company_hash|
collected_array = company_hash.map do |k,v|
v["number_2"]
end
end
puts collected_array.max
end
number(data)
But I'm trying to puts the company name with the largest number which would be "Company three". I've tried .keys and other ways but it gives me error.
I've tried this way:
def number(data)
collected_array=[]
data.each do |company_hash|
collected_array = company_hash.map do |k,v|
v["number_2"]
k
end
end
puts collected_array.max
end
number(data)
but it gives me "Company two" rather than "Company three" which would be the company with the highest number
As stated by #Cary, it can be simplified accessing the first element on data, and there using max_by, on the hash local variable available within the block checking the number_2 key value.
As the result is an Array containing two elements, the first one is the company name, the second and last one, the hash containing its data:
data = [
"Company one" => {
"number_1" => 46,
"number_2" => 3055,
"country" => "USA"
},
"Company two" => {
"number_1" => 32,
"number_2" => 6610,
"country" => "USA"
},
"Company three" => {
"number_1" => 40,
"number_2" => 9128,
"country" => "USA"
}
]
max_company = data.first.max_by { |_, h| h['number_2'] }
p max_company.first # "Company three"
p max_company.last['number_2'] # 9128

How to count values in a array of hashes

I have an array of hashes
[ {:name => "bob", :type => "some", :product => "apples"},
{:name => "ted", :type => "other", :product => "apples"},....
{:name => "Will", :type => "none", :product => "oranges"} ]
and was wondering if there is a simple way to count the number of product's and store the count as well as the value in an array or hash.
I want the result to be something like:
#products = [{"apples" => 2, "oranges => 1", ...}]
You can do as
array = [
{:name => "bob", :type => "some", :product => "apples"},
{:name => "ted", :type => "other", :product => "apples"},
{:name => "Will", :type => "none", :product => "oranges"}
]
array.each_with_object(Hash.new(0)) { |h1, h2| h2[h1[:product]] += 1 }
# => {"apples"=>2, "oranges"=>1}
You can use Enumerable#group_by and Enumerable#map
array.group_by{|h| h[:product]}.map{|k,v| [k, v.size]}.to_h
# => {"apples"=>2, "oranges"=>1}
While not exactly what the OP was looking for, this may be helpful to many. If you're just looking for the count of a specific product, you could do this:
array = [
{:name => "bob", :type => "some", :product => "apples"},
{:name => "ted", :type => "other", :product => "apples"},
{:name => "Will", :type => "none", :product => "oranges"}
]
array.count { |h| h[:product] == 'apples' }
# => 2
You could count:
hashes = [
{:name => "bob", :type => "some", :product => "apples"},
{:name => "ted", :type => "other", :product => "apples"},
{:name => "Will", :type => "none", :product => "oranges"}
]
hashes.inject(Hash.new(0)) { |h,o| h[o[:product]] += 1; h }
Or maybe...
hashes.instance_eval { Hash[keys.map { |k| [k,count(k)] }] }
I do not know which is the more performant, the latter seims weird to read though.
I would do:
items =[ {:name => "bob", :type => "some", :product => "apples"},
{:name => "ted", :type => "other", :product => "apples"},
{:name => "Will", :type => "none", :product => "oranges"} ]
counts = items.group_by{|x|x[:product]}.map{|x,y|[x,y.count]}
p counts #=> [["apples", 2], ["oranges", 1]]
Then if you need it as a Hash just do:
Hash[counts]

Nested hash iteration: How to iterate a merge over an ( (array of hashes) within a hash )

I'm trying to do as the title says. Here is my code:
school.each { |x| school[:students][x].merge!(semester:"Summer") }
I think I pinpointed the problem to the "[x]" above. If I substitute an array position such as "[2]" it works fine. How can make the iteration work?
If the info above is not enough or you'd like to offer a better solution, please see the details below. Thanks!
The error message I get:
file.rb:31:in []': no implicit conversion of Array into Integer (TypeError)
from file.rb:31:inblock in '
from file.rb:31:in each'
from file.rb:31:in'
The nested hash below before alteration:
school = {
:name => "Happy Funtime School",
:location => "NYC",
:instructors => [
{:name=>"Blake", :subject=>"being awesome" },
{:name=>"Ashley", :subject=>"being better than blake"},
{:name=>"Jeff", :subject=>"karaoke"}
],
:students => [
{:name => "Marissa", :grade => "B"},
{:name=>"Billy", :grade => "F"},
{:name => "Frank", :grade => "A"},
{:name => "Sophie", :grade => "C"}
]
}
I'm trying to append :semester=>"Summer" to each of the last four hashes. Here is what I'm trying to go for:
# ...preceding code is the same. Changed code below...
:students => [
{:name => "Marissa", :grade => "B", :semester => "Summer"},
{:name=>"Billy", :grade => "F", :semester => "Summer"},
{:name => "Frank", :grade => "A", :semester => "Summer"},
{:name => "Sophie", :grade => "C", :semester => "Summer"}
]
}
Just iterate over the students:
school[:students].each { |student| student[:semester] = "Summer" }
Or, using merge:
school[:students].each { |student| student.merge!(semester: "Summer") }
The issue is that when you do array.each {|x| do something}, x actually refers to each element in the array.
For example, in the first iteration of the loop,
x = {:name => "Marissa", :grade => "B"}
So what you are really doing is trying to reference:
school[:student][{:name => "Marissa", :grade => "B"}]
Which will not work
What you could do instead is create a for loop to track the index.
for i in 0 ... school[:student].count
school[:students][i].merge!(semester:"Summer")
end
Edit: Stefan's solution is much better than mine, but I will leave this up to show where you went wrong.
I would do as below using Hash#store :
require 'awesome_print'
school = {
:name => "Happy Funtime School",
:location => "NYC",
:instructors => [
{
:name => "Blake",
:subject => "being awesome"
},
{
:name => "Ashley",
:subject => "being better than blake"
},
{
:name => "Jeff",
:subject => "karaoke"
}
],
:students => [
{
:name => "Marissa",
:grade => "B"
},
{
:name => "Billy",
:grade => "F"
},
{
:name => "Frank",
:grade => "A"
},
{
:name => "Sophie",
:grade => "C"
}
]
}
school[:students].each{|h| h.store(:semester ,"Summer")}
ap school,:index => false,:indent => 10
output
{
:name => "Happy Funtime School",
:location => "NYC",
:instructors => [
{
:name => "Blake",
:subject => "being awesome"
},
{
:name => "Ashley",
:subject => "being better than blake"
},
{
:name => "Jeff",
:subject => "karaoke"
}
],
:students => [
{
:name => "Marissa",
:grade => "B",
:semester => "Summer"
},
{
:name => "Billy",
:grade => "F",
:semester => "Summer"
},
{
:name => "Frank",
:grade => "A",
:semester => "Summer"
},
{
:name => "Sophie",
:grade => "C",
:semester => "Summer"
}
]
}

Getting the hash key whose value array contains a given string

I have the following code:
country_code = infer_country # will grab a user's two character country code
region = 'us' # united states by default
region_map = {
"au" => ["au"], # australia
"al" => ["al", "ba", "bg", "hr", "md", "me", "mk", "ro", "si"], # bulgaria and the balkans
"cn" => ["cn"], # china
"ee" => ["ee", "lt", "lv"], # estonia and the baltics
"fi" => ["fi"], # finland
"at" => ["at", "ch", "de"], # germany, austria, switzerland
"cy" => ["cy", "gr", "mt"], # greece, cyprus, malta
"hk" => ["hk"], # hong_kong
"id" => ["id"], # indonesia
"it" => ["it"], # italy
"jp" => ["jp"], # japan
"kp" => ["kp", "kr"], # korea
"ar" => ["ar", "bl", "bo", "br", "bz", "cl", "co", "cr", "cu", "do", "ec", "gf", "gp", "gt", "hn",
"ht", "mf", "mq", "mx", "ni", "pa", "pe", "pr", "py", "sv", "uy", "ve"], # latin america including brazil
"my" => ["my"], # malaysia
"af" => ["af", "eg", "iq", "ir", "sa", "ye", "sy", "il", "jo", "ps", "lb", "om", "kw", "qa", "bh"], # middle east
"nl" => ["nl"], # netherlands
"no" => ["no"], # norway
"pl" => ["pl"], # poland
"pt" => ["pt"], # portugal
"ph" => ["ph"], # philippines
"ru" => ["ru"], # russia
"rs" => ["rs"], # serbia
"sg" => ["sg"], # singapore
"za" => ["za"], # south africa
"bn" => ["bn", "bu", "kh", "la", "tl", "vn"], # south east asia
"es" => ["es"], # spain
"tw" => ["tw"], # taiwan
"th" => ["th"], # thailand
"tr" => ["tr"], # turkey
"gb" => ["gb" ] # united kingdom
}.invert
# version 0.0
region_map.each do |key, value|
if key.include? country_code
region = value
break
end
end
puts region
If country_code is "gb", then "gb" should be printed out. If country_code is in south east asia, say it's "vn", then "bn" should be printed out.
How can I elegantly solve this problem? I can restructure my hash if necessary.
def find_region(country_code)
pair = #region_map.find{|k, v| v.include?(country_code)}
pair && pair.first
end
find_region('gb') # => "gb"
find_region('bz') # => "ar"
find_region('lv') # => "ee"
find_region('ls') # => nil
def find_region(country_code)
#region_map.each {|k,v| return k if v.include? country_code}
nil
end
region_map = Hash.new("us").merge(
"au" => "au",
"al" => "al",
"ba" => "al",
"bg" => "al",
"hr" => "al",
"md" => "al",
"me" => "al",
"mk" => "al",
"ro" => "al",
"si" => "al",
"al" => "al",
"cn" => "cn",
...
)
region_map["non existing code"] # => "us"
region_map[nil] # => "us"
region_map["au"] # => "au"
region_map["ba"] # => "al"
region_map["cn"] # => "cn"
or
def region_map code
case code
when "au"
"au"
when "al", "ba", "bg", "hr", "md", "me", "mk", "ro", "si"
"al"
when "cn"
"cn"
when "ee", "lt", "lv"
"ee"
...
else
"us"
end
end
region_map("non existing code") # => "us"
region_map(nil) # => "us"
region_map("au") # => "au"
region_map("ba") # => "al"
region_map("cn") # => "cn"

Resources