How do I extract a sub-hash from a hash? - ruby

I have a hash:
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
What is the best way to extract a sub-hash like this?
h1.extract_subhash(:b, :d, :e, :f) # => {:b => :B, :d => :D}
h1 #=> {:a => :A, :c => :C}

ActiveSupport, at least since 2.3.8, provides four convenient methods: #slice, #except and their destructive counterparts: #slice! and #except!. They were mentioned in other answers, but to sum them in one place:
x = {a: 1, b: 2, c: 3, d: 4}
# => {:a=>1, :b=>2, :c=>3, :d=>4}
x.slice(:a, :b)
# => {:a=>1, :b=>2}
x
# => {:a=>1, :b=>2, :c=>3, :d=>4}
x.except(:a, :b)
# => {:c=>3, :d=>4}
x
# => {:a=>1, :b=>2, :c=>3, :d=>4}
Note the return values of the bang methods. They will not only tailor existing hash but also return removed (not kept) entries. The Hash#except! suits best the example given in the question:
x = {a: 1, b: 2, c: 3, d: 4}
# => {:a=>1, :b=>2, :c=>3, :d=>4}
x.except!(:c, :d)
# => {:a=>1, :b=>2}
x
# => {:a=>1, :b=>2}
ActiveSupport does not require whole Rails, is pretty lightweight. In fact, a lot of non-rails gems depend on it, so most probably you already have it in Gemfile.lock. No need to extend Hash class on your own.

If you specifically want the method to return the extracted elements but h1 to remain the same:
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
h2 = h1.select {|key, value| [:b, :d, :e, :f].include?(key) } # => {:b=>:B, :d=>:D}
h1 = Hash[h1.to_a - h2.to_a] # => {:a=>:A, :c=>:C}
And if you want to patch that into the Hash class:
class Hash
def extract_subhash(*extract)
h2 = self.select{|key, value| extract.include?(key) }
self.delete_if {|key, value| extract.include?(key) }
h2
end
end
If you just want to remove the specified elements from the hash, that is much easier using delete_if.
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
h1.delete_if {|key, value| [:b, :d, :e, :f].include?(key) } # => {:a=>:A, :c=>:C}
h1 # => {:a=>:A, :c=>:C}

Ruby 2.5 added Hash#slice:
h = { a: 100, b: 200, c: 300 }
h.slice(:a) #=> {:a=>100}
h.slice(:b, :c, :d) #=> {:b=>200, :c=>300}

If you use rails, Hash#slice is the way to go.
{:a => :A, :b => :B, :c => :C, :d => :D}.slice(:a, :c)
# => {:a => :A, :c => :C}
If you don't use rails, Hash#values_at will return the values in the same order as you asked them so you can do this:
def slice(hash, *keys)
Hash[ [keys, hash.values_at(*keys)].transpose]
end
def except(hash, *keys)
desired_keys = hash.keys - keys
Hash[ [desired_keys, hash.values_at(*desired_keys)].transpose]
end
ex:
slice({foo: 'bar', 'bar' => 'foo', 2 => 'two'}, 'bar', 2)
# => {'bar' => 'foo', 2 => 'two'}
except({foo: 'bar', 'bar' => 'foo', 2 => 'two'}, 'bar', 2)
# => {:foo => 'bar'}
Explanation:
Out of {:a => 1, :b => 2, :c => 3} we want {:a => 1, :b => 2}
hash = {:a => 1, :b => 2, :c => 3}
keys = [:a, :b]
values = hash.values_at(*keys) #=> [1, 2]
transposed_matrix =[keys, values].transpose #=> [[:a, 1], [:b, 2]]
Hash[transposed_matrix] #=> {:a => 1, :b => 2}
If you feels like monkey patching is the way to go, following is what you want:
module MyExtension
module Hash
def slice(*keys)
::Hash[[keys, self.values_at(*keys)].transpose]
end
def except(*keys)
desired_keys = self.keys - keys
::Hash[[desired_keys, self.values_at(*desired_keys)].transpose]
end
end
end
Hash.include MyExtension::Hash

You can use slice!(*keys) which is available in the core extensions of ActiveSupport
initial_hash = {:a => 1, :b => 2, :c => 3, :d => 4}
extracted_slice = initial_hash.slice!(:a, :c)
initial_hash would now be
{:b => 2, :d =>4}
extracted_slide would now be
{:a => 1, :c =>3}
You can look at slice.rb in ActiveSupport 3.1.3

module HashExtensions
def subhash(*keys)
keys = keys.select { |k| key?(k) }
Hash[keys.zip(values_at(*keys))]
end
end
Hash.send(:include, HashExtensions)
{:a => :A, :b => :B, :c => :C, :d => :D}.subhash(:a) # => {:a => :A}

h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
keys = [:b, :d, :e, :f]
h2 = (h1.keys & keys).each_with_object({}) { |k,h| h.update(k=>h1.delete(k)) }
#=> {:b => :B, :d => :D}
h1
#=> {:a => :A, :c => :C}

if you use rails, it may be convenient to use Hash.except
h = {a:1, b:2}
h1 = h.except(:a) # {b:2}

Both delete_if and keep_if are part of Ruby core. Here you can achieve what you would like to without patching the Hash type.
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
h2 = h1.clone
p h1.keep_if { |key| [:b, :d, :e, :f].include?(key) } # => {:b => :B, :d => :D}
p h2.delete_if { |key, value| [:b, :d, :e, :f].include?(key) } #=> {:a => :A, :c => :C}
For futher info, check the links below from the documentation:
delete_if
keep_if

As others have mentioned, Ruby 2.5 added the Hash#slice method.
Rails 5.2.0beta1 also added it's own version of Hash#slice to shim the functionality for users of the framework that are using an earlier version of Ruby.
https://github.com/rails/rails/commit/01ae39660243bc5f0a986e20f9c9bff312b1b5f8
If looking to implement your own for whatever reason, it's a nice one liner as well:
def slice(*keys)
keys.each_with_object(Hash.new) { |k, hash| hash[k] = self[k] if has_key?(k) }
end unless method_defined?(:slice)

if you want to extract from data base record also it is better to use slice
hash = { a: 1, b: 2, c: 3, d: 4 }
hash.slice!(:a, :b) # => {:c=>3, :d=>4}
hash # => {:a=>1, :b=>2}
https://api.rubyonrails.org/classes/Hash.html#method-i-slice-21

class Hash
def extract(*keys)
key_index = Hash[keys.map{ |k| [k, true] }] # depends on the size of keys
partition{ |k, v| key_index.has_key?(k) }.map{ |group| Hash[group] }
end
end
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
h2, h1 = h1.extract(:b, :d, :e, :f)

Here is a quick performance comparison of the suggested methods, #select seems to be the fastest
k = 1_000_000
Benchmark.bmbm do |x|
x.report('select') { k.times { {a: 1, b: 2, c: 3}.select { |k, _v| [:a, :b].include?(k) } } }
x.report('hash transpose') { k.times { Hash[ [[:a, :b], {a: 1, b: 2, c: 3}.fetch_values(:a, :b)].transpose ] } }
x.report('slice') { k.times { {a: 1, b: 2, c: 3}.slice(:a, :b) } }
end
Rehearsal --------------------------------------------------
select 1.640000 0.010000 1.650000 ( 1.651426)
hash transpose 1.720000 0.010000 1.730000 ( 1.729950)
slice 1.740000 0.010000 1.750000 ( 1.748204)
----------------------------------------- total: 5.130000sec
user system total real
select 1.670000 0.010000 1.680000 ( 1.683415)
hash transpose 1.680000 0.010000 1.690000 ( 1.688110)
slice 1.800000 0.010000 1.810000 ( 1.816215)
The refinement will look like this:
module CoreExtensions
module Extractable
refine Hash do
def extract(*keys)
select { |k, _v| keys.include?(k) }
end
end
end
end
And to use it:
using ::CoreExtensions::Extractable
{ a: 1, b: 2, c: 3 }.extract(:a, :b)

This code injects the functionality you're asking for into the Hash class:
class Hash
def extract_subhash! *keys
to_keep = self.keys.to_a - keys
to_delete = Hash[self.select{|k,v| !to_keep.include? k}]
self.delete_if {|k,v| !to_keep.include? k}
to_delete
end
end
and produces the results you provided:
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
p h1.extract_subhash!(:b, :d, :e, :f) # => {b => :B, :d => :D}
p h1 #=> {:a => :A, :c => :C}
Note: this method actually returns the extracted keys/values.

Here's a functional solution that can be useful if you're not running on Ruby 2.5 and in the case that you don't wan't to pollute your Hash class by adding a new method:
slice_hash = -> keys, hash { hash.select { |k, _v| keys.include?(k) } }.curry
Then you can apply it even on nested hashes:
my_hash = [{name: "Joe", age: 34}, {name: "Amy", age: 55}]
my_hash.map(&slice_hash.([:name]))
# => [{:name=>"Joe"}, {:name=>"Amy"}]

Just an addition to slice method, if the subhash keys which you want to separate from original hash is going to be dynamic you can do like,
slice(*dynamic_keys) # dynamic_keys should be an array type

We can do it by looping on keys only we want to extract and just checking the key is exist and then extract it.
class Hash
def extract(*keys)
extracted_hash = {}
keys.each{|key| extracted_hash[key] = self.delete(key) if self.has_key?(key)}
extracted_hash
end
end
h1 = {:a => :A, :b => :B, :c => :C, :d => :D}
h2 = h1.extract(:b, :d, :e, :f)

Related

Path to an embedded object

Given a nested array or hash as the receiver and some object as the argument, what is the best way to return the path to an occurrence of the object if the receiver includes the object, or nil otherwise? I define path as an array of array indices or hash keys that leads to the object. The argument object will never be any of the hash keys, and will never appear more than once. For example, I expect:
[
:a,
[:b, :c, {:d => :foo}],
:e,
]
.path_to(:foo) # => [1, 2, :d]
{
:a => [3, "foo"],
:b => 5,
:c => 2,
}
.path_to(3) # => [:a, 0]
When there is no occurrence, return nil:
[:foo, "hello", 3]
.path_to(:bar) => nil
If no one comes up with a reasonable answer, then I will post my own answer shortly.
Here you are my own recursive solution. I am sure that it could be improved but it is a good start and works exactly as requested.
# path.rb
module Patheable
def path_to item_to_find
path = []
find_path(self, item_to_find, path)
result = path.empty? ? nil : path
result.tap { |r| puts r.inspect } # just for testing
end
private
def find_path(current_item, item_to_find, result)
if current_item.is_a?(Array)
current_item.each_with_index do |value, index|
find_path(value, item_to_find, result.push(index))
end
elsif current_item.is_a?(Hash)
current_item.each do |key, value|
find_path(value, item_to_find, result.push(key))
end
else
result.pop unless current_item == item_to_find
end
end
end
class Array
include Patheable
end
class Hash
include Patheable
end
[
:a,
[:b, :c, {:d => :foo}],
:e,
].path_to(:foo) # => [1, 2, :d]
{
:a => [3, "foo"],
:b => 5,
:c => 2,
}.path_to(3) # => [:a, 0]
[:foo, "hello", 3].path_to(:bar) # => nil
#end path.rb
# example of use
$ ruby path.rb
[1, 2, :d]
[:a, 0]
nil
Nothing like a bit of recursion.
require 'minitest/autorun'
class Array
def path_to(obj)
# optimize this
Hash[self.each.with_index.to_a.map {|k,v| [v,k]}].path_to(obj)
end
end
class Hash
def path_to(obj)
inverted = self.invert
if inverted[obj]
[inverted[obj]]
else
self.map {|k, v|
if v.respond_to?(:path_to)
if res = v.path_to(obj)
[k] + res
end
end
}.find {|path|
path and path[-1] != nil
}
end
end
end
describe "path_to" do
it "should work with really simple arrays" do
[:a, :e,].path_to(:a).must_equal [0]
end
it "should work with simple arrays" do
[:a, [:b, :c], :e,].path_to(:c).must_equal [1, 1]
end
it "should work with arrays" do
[:a, [:b, :c, {:d => :foo}], :e,].path_to(:foo).must_equal [1, 2, :d]
end
it "should work with simple hashes" do
{:d => :foo}.path_to(:foo).must_equal [:d]
end
it "should work with hashes" do
({:a => [3, "foo"], :b => 5, :c => 2,}.path_to(3).must_equal [:a, 0])
end
end
This is the answer that I came up with.
class Object
def path_to obj; end
end
class Array
def path_to obj
if i = index(obj) then return [i] end
a = nil
_, i = to_enum.with_index.find{|e, _| a = e.path_to(obj)}
a.unshift(i) if i
end
end
class Hash
def path_to obj
if value?(obj) then return [key(obj)] end
a = nil
kv = find{|_, e| a = e.path_to(obj)}
a.unshift(kv.first) if kv
end
end

set ruby hash element value by array of keys

here is what i got:
hash = {:a => {:b => [{:c => old_val}]}}
keys = [:a, :b, 0, :c]
new_val = 10
hash structure and set of keys can vary.
i need to get
hash[:a][:b][0][:c] == new_val
Thanks!
You can use inject to traverse your nested structures:
hash = {:a => {:b => [{:c => "foo"}]}}
keys = [:a, :b, 0, :c]
keys.inject(hash) {|structure, key| structure[key]}
# => "foo"
So, you just need to modify this to do a set on the last key. Perhaps something like
last_key = keys.pop
# => :c
nested_hash = keys.inject(hash) {|structure, key| structure[key]}
# => {:c => "foo"}
nested_hash[last_key] = "bar"
hash
# => {:a => {:b => [{:c => "bar"}]}}
Similar to Andy's, but you can use Symbol#to_proc to shorten it.
hash = {:a => {:b => [{:c => :old_val}]}}
keys = [:a, :b, 0, :c]
new_val = 10
keys[0...-1].inject(hash, &:fetch)[keys.last] = new_val

Create Nested Hashes from a List of Hashes in Ruby

I have a set of categories and their values stored as a list of hashes:
r = [{:A => :X}, {:A => :Y}, {:B => :X}, {:A => :X}, {:A => :Z}, {:A => :X},
{:A => :X}, {:B => :Z}, {:C => :X}, {:C => :Y}, {:B => :X}, {:C => :Y},
{:C => :Y}]
I'd like to get a count of each value coupled with its category as a hash like this:
{:A => {:X => 4, :Y => 1, :Z => 1},
:B => {:X => 2, :Z => 1},
:C => {:X => 1, :Y => 3}}
How can I do this efficiently?
Here's what I have so far (it returns inconsistent values):
r.reduce(Hash.new(Hash.new(0))) do |memo, x|
memo[x.keys.first][x.values.first] += 1
memo
end
Should I first compute the counts of all instances of specific {:cat => :val}s and then create the hash? Should I give a different base-case to reduce and change the body to check for nil cases (and assign zero when nil) instead of always adding 1?
EDIT:
I ended up changing my code and using the below method to have a cleaner way of achieving a nested hash:
r.map do |x|
[x.keys.first, x.values.last]
end.reduce({}) do |memo, x|
memo[x.first] = Hash.new(0) if memo[x.first].nil?
memo[x.first][x.last] += 1
memo
end
The problem of your code is: memo did not hold the value.
Use a variable outside the loop to hold the value would be ok:
memo = Hash.new {|h,k| h[k] = Hash.new {|hh, kk| hh[kk] = 0 } }
r.each do |x|
memo[x.keys.first][x.values.first] += 1
end
p memo
And what's more, it won't work to init a hash nested inside a hash directly like this:
# NOT RIGHT
memo = Hash.new(Hash.new(0))
memo = Hash.new({})
Here is a link for more about the set default value issue:
http://www.themomorohoax.com/2008/12/31/why-setting-the-default-value-of-a-hash-to-be-a-hash-is-wrong
Not sure what "inconsistent values" means, but your problem is the hash you're injecting into is not remembering its results
r.each_with_object(Hash.new { |h, k| h[k] = Hash.new 0 }) do |individual, consolidated|
individual.each do |key, value|
consolidated[key][value] += 1
end
end
But honestly, it would probably be better to just go to wherever you're making this array and change it to aggregate values like this.
Functional approach using some handy abstractions -no need to reinvent the wheel- from facets:
require 'facets'
r.map_by { |h| h.to_a }.mash { |k, vs| [k, vs.frequency] }
#=> {:A=>{:X=>4, :Y=>1, :Z=>1}, :B=>{:X=>2, :Z=>1}, :C=>{:X=>1, :Y=>3}}

How do I recursively define a Hash in Ruby from supplied arguments?

This snippet of code populates an #options hash. values is an Array which contains zero or more heterogeneous items. If you invoke populate with arguments that are Hash entries, it uses the value you specify for each entry to assume a default value.
def populate(*args)
args.each do |a|
values = nil
if (a.kind_of? Hash)
# Converts {:k => "v"} to `a = :k, values = "v"`
a, values = a.to_a.first
end
#options[:"#{a}"] ||= values ||= {}
end
end
What I'd like to do is change populate such that it recursively populates #options. There is a special case: if the values it's about to populate a key with are an Array consisting entirely of (1) Symbols or (2) Hashes whose keys are Symbols (or some combination of the two), then they should be treated as subkeys rather than the values associated with that key, and the same logic used to evaluate the original populate arguments should be recursively re-applied.
That was a little hard to put into words, so I've written some test cases. Here are some test cases and the expected value of #options afterwards:
populate :a
=> #options is {:a => {}}
populate :a => 42
=> #options is {:a => 42}
populate :a, :b, :c
=> #options is {:a => {}, :b => {}, :c => {}}
populate :a, :b => "apples", :c
=> #options is {:a => {}, :b => "apples", :c => {}}
populate :a => :b
=> #options is {:a => :b}
# Because [:b] is an Array consisting entirely of Symbols or
# Hashes whose keys are Symbols, we assume that :b is a subkey
# of #options[:a], rather than the value for #options[:a].
populate :a => [:b]
=> #options is {:a => {:b => {}}}
populate :a => [:b, :c => :d]
=> #options is {:a => {:b => {}, :c => :d}}
populate :a => [:a, :b, :c]
=> #options is {:a => {:a => {}, :b => {}, :c => {}}}
populate :a => [:a, :b, "c"]
=> #options is {:a => [:a, :b, "c"]}
populate :a => [:one], :b => [:two, :three => "four"]
=> #options is {:a => :one, :b => {:two => {}, :three => "four"}}
populate :a => [:one], :b => [:two => {:four => :five}, :three => "four"]
=> #options is {:a => :one,
:b => {
:two => {
:four => :five
}
},
:three => "four"
}
}
It is acceptable if the signature of populate needs to change to accommodate some kind of recursive version. There is no limit to the amount of nesting that could theoretically happen.
Any thoughts on how I might pull this off?
So here's some simple code that works.
def to_value args
ret = {}
# make sure we were given an array
raise unless args.class == Array
args.each do |arg|
case arg
when Symbol
ret[arg] = {}
when Hash
arg.each do |k,v|
# make sure that all the hash keys are symbols
raise unless k.class == Symbol
ret[k] = to_value v
end
else
# make sure all the array elements are symbols or symbol-keyed hashes
raise
end
end
ret
rescue
args
end
def populate *args
#options ||= {}
value = to_value(args)
if value.class == Hash
#options.merge! value
end
end
It does deviate from your test cases:
test case populate :a, :b => "apples", :c is a ruby syntax error. Ruby will assume the final argument to a method is a hash (when not given braces), but not a non-final one, as you assume here. The given code is a syntax error (no matter the definition of populate) since it assumes :c is a hash key, and finds an end of line when it's looking for :c's value. populate :a, {:b => "apples"}, :c works as expected
test case populate :a => [:one], :b => [:two, :three => "four"] returns {:a=>{:one=>{}}, :b=>{:two=>{}, :three=>"four"}}. This is consistent with the test case populate :a => [:b].
Ruby isn't Perl, => works only inside real Hash definition or as final argument in method call. Most things you want will result in a syntax error.
Are you sure that populate limited to cases supported by Ruby syntax is worth it?

Turning a Hash of Arrays into an Array of Hashes in Ruby

We have the following datastructures:
{:a => ["val1", "val2"], :b => ["valb1", "valb2"], ...}
And I want to turn that into
[{:a => "val1", :b => "valb1"}, {:a => "val2", :b => "valb2"}, ...]
And then back into the first form. Anybody with a nice looking implementation?
This solution works with arbitrary numbers of values (val1, val2...valN):
{:a => ["val1", "val2"], :b => ["valb1", "valb2"]}.inject([]){|a, (k,vs)|
vs.each_with_index{|v,i| (a[i] ||= {})[k] = v}
a
}
# => [{:a=>"val1", :b=>"valb1"}, {:a=>"val2", :b=>"valb2"}]
[{:a=>"val1", :b=>"valb1"}, {:a=>"val2", :b=>"valb2"}].inject({}){|a, h|
h.each_pair{|k,v| (a[k] ||= []) << v}
a
}
# => {:a=>["val1", "val2"], :b=>["valb1", "valb2"]}
Using a functional approach (see Enumerable):
hs = h.values.transpose.map { |vs| h.keys.zip(vs).to_h }
#=> [{:a=>"val1", :b=>"valb1"}, {:a=>"val2", :b=>"valb2"}]
And back:
h_again = hs.first.keys.zip(hs.map(&:values).transpose).to_h
#=> {:a=>["val1", "val2"], :b=>["valb1", "valb2"]}
Let's look closely what the data structure we are trying to convert between:
#Format A
[
["val1", "val2"], :a
["valb1", "valb2"], :b
["valc1", "valc2"] :c
]
#Format B
[ :a :b :c
["val1", "valb1", "valc1"],
["val2", "valb2", "valc3"]
]
It is not diffculty to find Format B is the transpose of Format A in essential , then we can come up with this solution:
h={:a => ["vala1", "vala2"], :b => ["valb1", "valb2"], :c => ["valc1", "valc2"]}
sorted_keys = h.keys.sort_by {|a,b| a.to_s <=> b.to_s}
puts sorted_keys.inject([]) {|s,e| s << h[e]}.transpose.inject([]) {|r, a| r << Hash[*sorted_keys.zip(a).flatten]}.inspect
#[{:b=>"valb1", :c=>"valc1", :a=>"vala1"}, {:b=>"valb2", :c=>"valc2", :a=>"vala2"}]
m = {}
a,b = Array(h).transpose
b.transpose.map { |y| [a, y].transpose.inject(m) { |m,x| m.merge Hash[*x] }}
My attempt, perhaps slightly more compact.
h = { :a => ["val1", "val2"], :b => ["valb1", "valb2"] }
h.values.transpose.map { |s| Hash[h.keys.zip(s)] }
Should work in Ruby 1.9.3 or later.
Explanation:
First, 'combine' the corresponding values into 'rows'
h.values.transpose
# => [["val1", "valb1"], ["val2", "valb2"]]
Each iteration in the map block will produce one of these:
h.keys.zip(s)
# => [[:a, "val1"], [:b, "valb1"]]
and Hash[] will turn them into hashes:
Hash[h.keys.zip(s)]
# => {:a=>"val1", :b=>"valb1"} (for each iteration)
This will work assuming all the arrays in the original hash are the same size:
hash_array = hash.first[1].map { {} }
hash.each do |key,arr|
hash_array.zip(arr).each {|inner_hash, val| inner_hash[key] = val}
end
You could use inject to build an array of hashes.
hash = { :a => ["val1", "val2"], :b => ["valb1", "valb2"] }
array = hash.inject([]) do |pairs, pair|
pairs << { pair[0] => pair[1] }
pairs
end
array.inspect # => "[{:a=>["val1", "val2"]}, {:b=>["valb1", "valb2"]}]"
Ruby documentation has a few more examples of working with inject.

Resources