Search through Nested Hash having groups using Hashie Gem - ruby

I have a nested hash of a PDF that is in this format:
[ { :page => 1,
:lines => [
{ :y => 774.0,
:text_groups => [ { :x => 18.0, :width => 421.59599999999995, :text => "XXXX" } ]
},
# ...
]
},
{ :page => 2,
:lines => [
{ :y => 774.0,
:text_groups => [ { :x => 18.0, :width => 421.59599999999995, :text => "XXXX" } ]
},
# ...
],
# ...
}
]
I want to get the :x and :y for given :text from all 4 pages.
I tried this:
require 'hashie'
coordinates.extend(Hashie::Extensions::DeepLocate)
#hash_array = Hash.new
#hash_array = coordinates.deep_locate -> (key, value, object) { key == :text && value == "XXXX" }
This is giving me:
[ { :x => 18.0, :width => 421.59599999999995, :text => "XXXX" } },
{ :x => 18.0, :width => 421.59599999999995, :text => "XXXX" },
{ :x => 18.0, :width => 421.59599999999995, :text => "XXXX" },
{ :x => 18.0, :width => 421.59599999999995, :text => "XXXX" } ]
But I need :x and :y to be displayed like this:
x = " " and y = " "
I will use these values for my further validation.

I don't know if you'll accept a solution that doesn't use Hashie, but here's how I'd do it:
data = [
{ :page => 1,
:lines => [
{ :y => 774.0,
:text_groups => [ { :x => 18.0, :width => 421.59599999999995, :text => "XXXX" } ]
},
# ...
]
},
{ :page => 2,
:lines => [
{ :y => 774.0,
:text_groups => [ { :x => 18.0, :width => 421.59599999999995, :text => "XXXX" } ]
},
# ...
],
# ...
}
]
SEARCH_TEXT = "XXXX"
coords = data.each_with_object([]) do |page, res|
page[:lines].each do |line|
line[:text_groups].each do |group|
next unless group[:text] == SEARCH_TEXT
res << { x: group[:x], y: line[:y] }
end
end
end
p coords
# => [ { :x => 18.0, :y => 774.0 },
# { :x => 18.0, :y => 774.0 } ]

Related

Pushing objects into a hash inside a loop

I'm trying to achieve the following JSON results:
{
"movie" =>
[{
"title": "Thor",
"year" : 2011,
},
{
"title": "Iron Man",
"year" : 2008,
}],
"tv" =>
[{
"title": "Parks and Recreation"
"year": 2009
},
{
"title": "Friends"
"year": 1994
}]
}
With JavaScript, I would loop through my results and do something like:
results['movie'].push(item);
results['tv'].push(item);
With Ruby code, the farthest I've gone is this:
#results = Hash.new
results['Search'].each do |r|
if r['Type'] == 'movie'
#results['movie'] << {
'title' => r['Title'],
'year' => r['Year']
}
elsif r['Type'] == 'series'
#results['tv'] << {
'title' => r['Title'],
'year' => r['Year']
}
end
end
What am I missing here?
I think you can get what you want by using Enumerable#each_with_object and assigning a default value to the hash.
def group_search_results(items)
results = Hash.new { |hash, key| hash[key] = [] }
items.each_with_object(results) do |item|
results[item['Type']] << {'title' => item['Title'], 'year' => item['Year']}
end
end
describe "search_results" do
it "groups into an object" do
items = [
{'Type' => 'movie', 'Title' => 'Thor', 'Year' => 2011},
{'Type' => 'movie', 'Title' => 'Iron Man', 'Year' => 2008},
{'Type' => 'series', 'Title' => 'Parks and Recreation', 'Year' => 2009},
{'Type' => 'series', 'Title' => 'Friends', 'Year' => 1994},
]
results = group_search_results(items)
expect(results).to eq({
'movie' => [
{'title' => 'Thor', 'year' => 2011},
{'title' => 'Iron Man', 'year' => 2008},
],
'series' => [
{'title' => 'Parks and Recreation', 'year' => 2009},
{'title' => 'Friends', 'year' => 1994},
],
})
end
end
I believe the problem has to do with the initialization of your hash. The movie and tv keys aren't currently an array. You can initialize your hash like this:
#results = { 'movie' => [], 'tv' => [] }
Here's how it looks with the rest of your code:
#results = { 'movie' => [], 'tv' => [] }
results['Search'].each do |r|
if r['Type'] == 'movie'
#results['movie'] << {
'title' => r['Title'],
'year' => r['Year']
}
elsif r['Type'] == 'series'
#results['tv'] << {
'title' => r['Title'],
'year' => r['Year']
}
end
end
results = {
search: {
movie: [
{ title: 'Thor', year: 2011 },
{ title: 'Iron Man', year: 2008 },
],
tv: [
{ title: 'Parks and Recreation', year: 2009 },
{ title: 'Friends', year: 1994 },
]
}
}
#results = Hash.new{|k, v| k[v] = []}
results[:search].each do |type, array|
#results[type].push(*array)
end
results[:search].each_with_object(Hash.new{|k, v| k[v] = []}) do |(type, array), hash|
hash[type].push(*array)
end

Nested hash iteration: How to iterate a merge over an ( (array of hashes) within a hash )

I'm trying to do as the title says. Here is my code:
school.each { |x| school[:students][x].merge!(semester:"Summer") }
I think I pinpointed the problem to the "[x]" above. If I substitute an array position such as "[2]" it works fine. How can make the iteration work?
If the info above is not enough or you'd like to offer a better solution, please see the details below. Thanks!
The error message I get:
file.rb:31:in []': no implicit conversion of Array into Integer (TypeError)
from file.rb:31:inblock in '
from file.rb:31:in each'
from file.rb:31:in'
The nested hash below before alteration:
school = {
:name => "Happy Funtime School",
:location => "NYC",
:instructors => [
{:name=>"Blake", :subject=>"being awesome" },
{:name=>"Ashley", :subject=>"being better than blake"},
{:name=>"Jeff", :subject=>"karaoke"}
],
:students => [
{:name => "Marissa", :grade => "B"},
{:name=>"Billy", :grade => "F"},
{:name => "Frank", :grade => "A"},
{:name => "Sophie", :grade => "C"}
]
}
I'm trying to append :semester=>"Summer" to each of the last four hashes. Here is what I'm trying to go for:
# ...preceding code is the same. Changed code below...
:students => [
{:name => "Marissa", :grade => "B", :semester => "Summer"},
{:name=>"Billy", :grade => "F", :semester => "Summer"},
{:name => "Frank", :grade => "A", :semester => "Summer"},
{:name => "Sophie", :grade => "C", :semester => "Summer"}
]
}
Just iterate over the students:
school[:students].each { |student| student[:semester] = "Summer" }
Or, using merge:
school[:students].each { |student| student.merge!(semester: "Summer") }
The issue is that when you do array.each {|x| do something}, x actually refers to each element in the array.
For example, in the first iteration of the loop,
x = {:name => "Marissa", :grade => "B"}
So what you are really doing is trying to reference:
school[:student][{:name => "Marissa", :grade => "B"}]
Which will not work
What you could do instead is create a for loop to track the index.
for i in 0 ... school[:student].count
school[:students][i].merge!(semester:"Summer")
end
Edit: Stefan's solution is much better than mine, but I will leave this up to show where you went wrong.
I would do as below using Hash#store :
require 'awesome_print'
school = {
:name => "Happy Funtime School",
:location => "NYC",
:instructors => [
{
:name => "Blake",
:subject => "being awesome"
},
{
:name => "Ashley",
:subject => "being better than blake"
},
{
:name => "Jeff",
:subject => "karaoke"
}
],
:students => [
{
:name => "Marissa",
:grade => "B"
},
{
:name => "Billy",
:grade => "F"
},
{
:name => "Frank",
:grade => "A"
},
{
:name => "Sophie",
:grade => "C"
}
]
}
school[:students].each{|h| h.store(:semester ,"Summer")}
ap school,:index => false,:indent => 10
output
{
:name => "Happy Funtime School",
:location => "NYC",
:instructors => [
{
:name => "Blake",
:subject => "being awesome"
},
{
:name => "Ashley",
:subject => "being better than blake"
},
{
:name => "Jeff",
:subject => "karaoke"
}
],
:students => [
{
:name => "Marissa",
:grade => "B",
:semester => "Summer"
},
{
:name => "Billy",
:grade => "F",
:semester => "Summer"
},
{
:name => "Frank",
:grade => "A",
:semester => "Summer"
},
{
:name => "Sophie",
:grade => "C",
:semester => "Summer"
}
]
}

turn list of depth first traversal nodes back into tree structure in Ruby

Given the following input (from a CSV file):
input = [
{ :level => 0, :value => "a" },
{ :level => 1, :value => "1" },
{ :level => 1, :value => "2" },
{ :level => 2, :value => "I" },
{ :level => 2, :value => "II" },
{ :level => 2, :value => "III" },
{ :level => 0, :value => "b" },
{ :level => 0, :value => "c" },
{ :level => 0, :value => "d" },
{ :level => 1, :value => "3" },
{ :level => 1, :value => "4" },
]
How can I convert this to the following in "The Ruby Way":
expected = [
{ :value => "a", :children => [ { :value => 1, :children => nil },
{ :value => 2, :children => [ { :value => "I", :children => nil },
{ :value => "II", :children => nil },
{ :value => "III", :children => nil } ] } ] },
{ :value => "b", :children => nil },
{ :value => "c", :children => nil },
{ :value => "d", :children => [ { :value => 3, :children => nil },
{ :value => 4, :children => nil } ] },
]
?
Edited:
My solution to this was to sidestep the problem, transform it and get someone else to solve it:
require 'yaml'
def linear_to_tree(a)
yaml_lines = []
a.each do |el|
indent = " " * 4 * el[:level]
yaml_lines << "#{indent}-"
yaml_lines << "#{indent} :value: #{(el[:value])}"
yaml_lines << "#{indent} :children:"
end
yaml_lines << "" # without this, YAML.load complains
yaml = yaml_lines.join("\n")
# open("test_yaml.txt", "w"){|f| f.write(yaml)}
YAML.load(yaml)
end
But there must be a more elegant way to solve this.
P.S. I'd also like to see a one-liner for this transformation, just to see if it's possible.
You should use an empty array for nodes that have no children, an empty array is the null object for a collection. Otherwise you have to dance around both when you assign it, and when you use it.
def transform(inputs)
transform! inputs.dup
end
def transform!(inputs, output=[], current_level=0)
while inputs.any?
input = inputs.shift
level, value = input.values_at :level, :value
value = value.to_i if value =~ /\A\d+\z/
if level < current_level
inputs.unshift input
break
elsif level == current_level
next_children = []
output << {value: value, children: next_children}
transform! inputs, next_children, current_level.next
else
raise "presumably should not have gotten here"
end
end
output
end

Ruby: Transform a flat array in a tree representation

I am trying to write a function to convert a flat array with a path information into a tree representation of that array.
The goal would be to turn an array like the following:
[
{ :name => "a", :path => [ 'a' ] },
{ :name => "b", :path => [ 'a', 'b' ] },
{ :name => "c", :path => [ 'a', 'b', 'c' ] },
{ :name => "d", :path => [ 'a', 'd' ] },
{ :name => "e", :path => [ 'e' ] }
]
into one like this:
[{:node=>{:name=>"a", :path=>["a"]},
:children=>
[{:node=>{:name=>"b", :path=>["a", "b"]},
:children=>
[{:node=>{:name=>"c", :path=>["a", "b", "c"]}, :children=>[]}]},
{:node=>{:name=>"d", :path=>["a", "d"]}, :children=>[]}]},
{:node=>{:name=>"e", :path=>["e"]}, :children=>[]}]
The closest result I got with was with the following code:
class Tree
def initialize
#root = { :node => nil, :children => [ ] }
end
def from_array( array )
array.inject(self) { |tree, node| tree.add(node) }
#root[:children]
end
def add(node)
recursive_add(#root, node[:path].dup, node)
self
end
private
def recursive_add(parent, path, node)
if(path.empty?)
parent[:node] = node
return
end
current_path = path.shift
children_nodes = parent[:children].find { |child| child[:node][:path].last == current_path }
unless children_nodes
children_nodes = { :node => nil, :children => [ ] }
parent[:children].push children_nodes
end
recursive_add(children_nodes, path, node)
end
end
flat = [
{ :name => "a", :path => [ 'a' ] },
{ :name => "b", :path => [ 'a', 'b' ] },
{ :name => "c", :path => [ 'a', 'b', 'c' ] },
{ :name => "d", :path => [ 'a', 'd' ] },
{ :name => "e", :path => [ 'e' ] }
]
require 'pp'
pp Tree.new.from_array( flat )
But it is quite verbose and I have the feeling that it might not be very effective for very large sets.
What would be the cleanest and most effective way to achieve that in ruby?
This is my try.
array = [
{ :name => "a", :path => [ 'a' ] },
{ :name => "b", :path => [ 'a', 'b' ] },
{ :name => "c", :path => [ 'a', 'b', 'c' ] },
{ :name => "d", :path => [ 'a', 'd' ] },
{ :name => "e", :path => [ 'e' ] }
]
array
.sort_by{|h| -h[:path].length}
.map{|h| {node: h, children: []}}
.tap{|array|
while array.first[:node][:path].length > 1
child = array.shift
array
.find{|h| h[:node][:name] == child[:node][:path][-2]}[:children]
.push(child)
end
}
# => [
{:node=>{:name=>"e", :path=>["e"]}, :children=>[]},
{:node=>{:name=>"a", :path=>["a"]}, :children=>[
{:node=>{:name=>"d", :path=>["a", "d"]}, :children=>[]},
{:node=>{:name=>"b", :path=>["a", "b"]}, :children=>[
{:node=>{:name=>"c", :path=>["a", "b", "c"]}, :children=>[]}
]}
]}
]

ruby language - merge an array into another by finding same element

A = [
{ :id => 1, :name => 'good', :link => nil },
{ :id => 2, :name => 'bad', :link => nil }
]
B = [
{ :id => 3, :name => 'good' },
{ :id => 4, :name => 'good' },
{ :id => 5, :name => 'bad' }
]
I need to merge array B into A so that :link in array A includes the entry in array B if :name is the same value in each array.
For example, after processing array A should be:
A = [
{ :id => 1, :name => 'good', :link => [{ :id => 3, :name => 'good' }, { :id => 4, :name => 'good' }] },
{ :id => 2, :name => 'bad', :link => [{ :id => 5, :name => 'bad' }] }
]
thanks.
The short version;
a.each { | item | item[:link] = b.find_all { | x | x[:name] == item[:name] } }
Demo here.
In ruby the constants begin with an uppercase letter, so you should use lowercase letter:
A => a, B => b
a.each do |ha|
b.each do |hb|
if ha[:name] == hb[:name]
ha[:link] |= []
ha[:link] << hb
end
end
end
Functional approach:
B_grouped = B.group_by { |h| h[:name] }
A2 = A.map { |h| h.merge(:link => B_grouped[h[:name]]) }
#=> [{:link=>[{:name=>"good", :id=>3}, {:name=>"good", :id=>4}], :name=>"good", :id=>1},
# {:link=>[{:name=>"bad", :id=>5}], :name=>"bad", :id=>2}]

Resources