I'm using the following code to generate a JSON file containing all category information for a particular website.
The goal is to have a JSON file with the following format:
[
{
"id":"36_17",
"name":"Diversen Particulier",
"group":"Diversen",
"search_attributes":{
"0":"Prijs van/tot",
"1":"Groep en Rubriek",
"2":"Conditie",
}
},
{
"id":"36_18",
"name":"Diversen Zakelijk",
"group":"Diversen",
"search_attributes":{
"0":"Prijs van/tot",
"1":"Groep en Rubriek",
"2":"Conditie",
}
},
{
"id":"36_19",
"name":"Overige Diversen",
"group":"Diversen",
"search_attributes":{
"0":"Prijs van/tot",
"1":"Groep en Rubriek",
"2":"Conditie",
}
}, {...}
]
But I keep getting this format:
[
{
"id":"36_17",
"name":"Diversen Particulier",
"group":"Diversen",
"search_attributes":{"0":"Prijs van/tot"}
},
{
"id":"36_17",
"name":"Diversen Particulier",
"group":"Diversen",
"search_attributes":{"1":"Groep en Rubriek"}
},
{
"id":"36_17",
"name":"Diversen Particulier",
"group":"Diversen",
"search_attributes":{"2":"Conditie"}
}, {...}
]
The search_attributes are not getting saved correctly.
I'm using the following code:
require 'mechanize'
#hashes = []
# Initialize Mechanize object
a = Mechanize.new
# Begin scraping
a.get('http://www.marktplaats.nl/') do |page|
groups = page.search('//*[(#id = "navigation-categories")]//a')
groups.each_with_index do |group, index_1|
a.get(group[:href]) do |page_2|
categories = page_2.search('//*[(#id = "category-browser")]//a')
categories.each_with_index do |category, index_2|
a.get(category[:href]) do |page_3|
search_attributes = page_3.search('//*[contains(concat( " ", #class, " " ), concat( " ", "heading", " " ))]')
search_attributes.each_with_index do |attribute, index_3|
item = {
id: "#{index_1}_#{index_2}",
name: category.text,
group: group.text,
:search_attributes => {
:index_3.to_s => "#{attribute.text unless attribute.text == 'Outlet '}"
}
}
#hashes << item
puts item
end
end
end
end
end
end
# Open file and begin
File.open("json/light/#{Time.now.strftime '%Y%m%d%H%M%S'}_light_categories.json", 'w') do |f|
puts '# Writing category data to JSON file'
f.write(#hashes.to_json)
puts "|-----------> Done. #{#hashes.length} written."
end
puts '# Finished.'
The question is what's causing this and how do I solve it?
Update
A big thanks to arie-shaw for his answer.
Here's the working code:
require 'mechanize'
#hashes = []
# Initialize Mechanize object
a = Mechanize.new
# Begin scraping
a.get('http://www.marktplaats.nl/') do |page|
groups = page.search('//*[(#id = "navigation-categories")]//a')
groups.each_with_index do |group, index_1|
a.get(group[:href]) do |page_2|
categories = page_2.search('//*[(#id = "category-browser")]//a')
categories.each_with_index do |category, index_2|
a.get(category[:href]) do |page_3|
search_attributes = page_3.search('//*[contains(concat( " ", #class, " " ), concat( " ", "heading", " " ))]')
attributes_hash = {}
search_attributes.each_with_index do |attribute, index_3|
attributes_hash[index_3.to_s] = "#{attribute.text unless attribute.text == 'Outlet '}"
end
item = {
id: "#{index_1}.#{index_2}",
name: category.text,
group: group.text,
:search_attributes => attributes_hash
}
#hashes << item
puts item
end
end
end
end
end
# Open file and begin
File.open("json/light/#{Time.now.strftime '%Y%m%d%H%M%S'}_light_categories.json", 'w') do |f|
puts '# Writing category data to JSON file'
f.write(#hashes.to_json)
puts "|-----------> Done. #{#hashes.length} written."
end
puts '# Finished.'
The most inner each_with_index should be only be used to generate the search_attributes hash, rather than an element hash of the top level array in the result.
# Begin scraping
a.get('http://www.marktplaats.nl/') do |page|
groups = page.search('//*[(#id = "navigation-categories")]//a')
groups.each_with_index do |group, index_1|
a.get(group[:href]) do |page_2|
categories = page_2.search('//*[(#id = "category-browser")]//a')
categories.each_with_index do |category, index_2|
a.get(category[:href]) do |page_3|
search_attributes = page_3.search('//*[contains(concat( " ", #class, " " ), concat( " ", "heading", " " ))]')
attributes_hash = {}
search_attributes.each_with_index do |attribute, index_3|
attributes_hash[index_3.to_s] = "#{attribute.text unless attribute.text == 'Outlet '}"
end
#hashes << {
id: "#{index_1}_#{index_2}",
name: category.text,
group: group.text,
search_attributes: attributes_hash
}
end
end
end
end
end
Related
I have a JSON file being generated from my console and would like to parse it in order to see how many nodes I have running on what version and on which operating system.
Example JSON:
[
{
"facts.aio_agent_version": "7.9.0",
"facts.operatingsystem": "windows"
},
{
"facts.aio_agent_version": "7.8.0",
"facts.operatingsystem": "windows"
},
{
"facts.aio_agent_version": "7.9.0",
"facts.operatingsystem": "CentOS"
},
{
"facts.aio_agent_version": "7.9.0",
"facts.operatingsystem": "CentOS"
},
{
"facts.aio_agent_version": "7.8.0",
"facts.operatingsystem": "CentOS"
}
]
I need an output of a ruby hash:
{"CentOS"=>"7.8.0"=>"nodes"=> 1, "CentOS"=>"7.9.0"=>"nodes"=> 2, "windows"=>"7.8.0"=>"nodes"=> 1, "windows"=>"7.9.0"=>"nodes"=> 1}
This is as far as I've managed to get:
require 'json'
file = File.read('./data.json')
hash = JSON.parse(file)
hash2 = {}
hash.each { |key|
if !hash2.key?(key["facts.operatingsystem"]=>key["facts.aio_agent_version"])
hash2[key["facts.operatingsystem"]] = key["facts.aio_agent_version"] = "node"
else
hash2[key["facts.operatingsystem"]["facts.aio_agent_version"]["node"]] =+ 1
end
}
puts hash2
output:
{"windows"=>"node", "CentOS"=>"node"}
json = File.read('./data.json')
array = JSON.parse(json)
result = {}
array.each do |hash|
os = hash['facts.operatingsystem']
version = hash['facts.aio_agent_version']
count = result.dig(os, version, 'nodes')
if count
result[os][version]['nodes'] = count + 1
else
result[os] = { version => { 'nodes' => 1 } }.merge(result[os] || {})
end
end
puts result
for every hash in the array, we dig into our results hash to determine if we previously saw a node. if we did, we increment the count. if we didn't we create the node in the results hash. since an os can have many versions, we merge the existing nodes with the newly created node.
I have a class method ::add_method(name, params = {}) that creates an instance method with define_method.
I need the parameters of the defined method to be keyword arguments depending on the params.
class Whatever
def self.add_method(name, params = {})
# do something with params
define_method name do |?|
# some business
end
end
end
The goal is that when the ::add_method is called with:
params = {
foo: { required: false, default: 0 },
bar: { required: true }
}
Whatever.add_method(:hello, params)
then it creates this method:
def hello(foo: 0, bar:)
# some business
end
Nota bene: this is not the real business, I've over simplified it so the question is easier to understand.
So as advised I went to class_eval.
class Whatever
class << self
def add_method(name, parameters = {})
class_eval <<-RUBY, __FILE__, __LINE__ + 1
def #{name}(#{method_parameters(parameters)})
#{method_body(parameters)}
end
RUBY
end
# method_parameters({
# foo: { required: false, default: 0 },
# bar: { required: true }
# })
# => "foo: 0, bar:"
def method_parameters(parameters)
parameters.map do |key, options|
value = options[:required] ? '' : " #{options[:default] || 'nil'}"
"#{key}:#{value}"
end.join(', ')
end
# method_parameters({
# foo: { required: false, default: 0 },
# bar: { required: true }
# })
# => "[foo, bar]"
def method_body(parameters)
"[#{parameters.keys.map(&:to_s).join(', ')}]"
end
end
end
params = {
foo: { required: false, default: 0 },
bar: { required: true }
}
Whatever.add_method(:hello, params)
Whatever.new.hello(bar: true) # => [0, true]
Whatever.new.hello(foo: 42, bar: true) # => [42, true]
Whatever.new.hello # missing keyword: bar (ArgumentError)
I am trying to build a new_hash from this hash:
languages = {
:oo => {
:ruby => {
:type => "interpreted"
},
:javascript => {
:type => "interpreted"
},
:python => {
:type => "interpreted"
}
},
:functional => {
:clojure => {
:type => "compiled"
},
:erlang => {
:type => "compiled"
},
:javascript => {
:type => "interpreted"
}
}
}
and the desired result is:
{
:ruby => {
:type => "interpreted",
:style => [:oo]
},
:javascript => {
:type => "interpreted",
:style => [:oo, :functional]
},
:python => {
:type => "interpreted",
:style => [:oo]
},
:clojure => {
:type => "compiled",
:style => [:functional]
},
:erlang => {
:type => "compiled",
:style => [:functional]
}
}
Here is what I've done so far:
def reformat_languages(languages)
new_hash = {}
languages.each do |k, v|
v.each do |k1, v1|
new_hash[k1] = v1
new_hash[k1][:style] = []
new_hash[k1][:style] << k
end
end
new_hash
end
unfortunately, I cannot get the desired result. I understand that when the iteration arrives at the second javascript key, it re-writes over the first iteration giving me:
:javascript => {
:type => "interpreted",
:style => [:functional]
}
instead of:
:javascript => {
:type => "interpreted",
:style => [:oo, :functional]
}
Here is a link of a repl.it where I you can see the code in action: https://repl.it/BebC
I know I need to use a conditional, but I am not really sure where and on what to use it. If somebody could help me getting the desired result and explain a little bit why it works the way it works.
You can use something like
h = {}
languages.each do |k, v| # oo or func
v.each do |k1, v1| # ruby/python
if h[k1]
h[k1][:style] << k
else
h[k1] = {type: v1[:type], style: [k]}
end
end
end
It checks to see that h is defined, and if so, appends to its array. Otherwise it defines the entire hash with your type and a style array of size 1.
There is too much unconditional overwriting going on in your code. Should be something like this instead:
new_hash[k1] ||= {} # init to empty hash
new_hash[k1][:type] = v1[:type]
new_hash[k1][:style] ||= [] # make sure array exists
new_hash[k1][:style] << k
Instead of replacing entire new_hash[k1], you should change individual parts of it.
This is not an answer (so please no upvotes). Rather, it is an extended comment to help you understand the code #Martin suggested. (I see you are new to SO and quite possibly to Ruby as well.) Salting code with puts statements, as I have done, is often quite helpful, even after you become experienced with the language.
languages = {
:oo => {
:ruby => {
:type => "interpreted"
},
:javascript => {
:type => "interpreted"
}
},
:functional => {
:clojure => {
:type => "compiled"
},
:javascript => {
:type => "interpreted"
}
}
}
h = {}
languages.each do |k, v| # oo or func
puts "k=#{k}, v=#{v}"
v.each do |k1, v1| # ruby/python
puts " k1=#{k1}, v1=#{v1}"
if h[k1]
puts " h[#{k1}]=#{h[k1]} (truthy)"
h[k1][:style] << k
puts " h after h[#{k1}][:style] << #{k}: #{h}"
else
puts " h[#{k1}].nil?=true (falsy)"
h[k1] = {type: v1[:type], style: [k]}
puts " h after h[#{k1}] = {type: v1[:type], style: #{k}}: #{h}"
end
end
end
prints:
k=oo, v={:ruby=>{:type=>"interpreted"}, :javascript=>{:type=>"interpreted"}}
k1=ruby, v1={:type=>"interpreted"}
h[ruby].nil?=true (falsy)
h after h[ruby] = {type: v1[:type], :style: oo}:
{:ruby=>{:type=>"interpreted", :style=>[:oo]}}
k1=javascript, v1={:type=>"interpreted"}
h[javascript].nil?=true (falsy)
h after h[javascript] = {type: v1[:type], :style: oo}:
{:ruby=>{:type=>"interpreted", :style=>[:oo]},
:javascript=>{:type=>"interpreted", :style=>[:oo]}}
k=functional, v={:clojure=>{:type=>"compiled"}, :javascript=>{:type=>"interpreted"}}
k1=clojure, v1={:type=>"compiled"}
h[clojure].nil?=true (falsy)
h after h[clojure] = {type: v1[:type], :style: functional}:
{:ruby=>{:type=>"interpreted", :style=>[:oo]},
:javascript=>{:type=>"interpreted", :style=>[:oo]},
:clojure=>{:type=>"compiled", :style=>[:functional]}}
k1=javascript, v1={:type=>"interpreted"}
h[javascript]={:type=>"interpreted", :style=>[:oo]} (truthy)
h after h[javascript][:style] << functional:
{:ruby=>{:type=>"interpreted", :style=>[:oo]},
:javascript=>{:type=>"interpreted", :style=>[:oo, :functional]},
:clojure=>{:type=>"compiled", :style=>[:functional]}}
and returns:
#=> {:oo =>{:ruby=>{:type=>"interpreted"},
# :javascript=>{:type=>"interpreted"}},
# :functional=>{:clojure=>{:type=>"compiled"},
# :javascript=>{:type=>"interpreted"}}}
You are overwriting the hashes generated which is leading to the unexpected behavior you mentioned. The following piece of code does what you need. Its just a slightly modified version of your code.
def reformat_languages(languages)
new_hash = {}
languages.each do |k, v|
v.each do |k1, v1|
new_hash[k1] ||= v1 #ensures we do not overwrite the already generated language hash
new_hash[k1][:style] ||= [] #protecting against re-initialization of the style array
new_hash[k1][:style] << k
end
end
new_hash
end
I watched the video at https://gorails.com/blog/refactoring-if-statements but was looking for a more concise way of evading the use of multiple if or case statements.
The following works
def process(input)
commands = {
:q => Proc.new { puts "Goodbye" },
:tweet => Proc.new { puts "tweeting" },
:dm => Proc.new { puts "direct messaging"},
:help => Proc.new { puts "helping"}
}
commands[input.to_sym].call
end
process "tweet"
But how could i further shorten this ? I tried the following
def process(input)
commands = {
:q => { puts "Goodbye" },
:tweet => { puts "tweeting" },
:dm => { puts "direct messaging"},
:help => { puts "helping"}
}
commands[input.to_sym].to_proc.call
end
process "tweet"
but then i get the error
# syntax error, unexpected tSTRING_BEG, expecting keyword_do or '{' or '('
# :q => { puts "Goodbye" },
# ^
Any suggestions please ?
Use the lambda syntax
def process(input)
commands = {
:q => ->{ puts "Goodbye" },
:tweet => ->{ puts "tweeting" },
:dm => ->{ puts "direct messaging"},
:help => ->{ puts "helping"}
}
commands[input.to_sym].to_proc.call
end
process "tweet"
Using the new Hash syntax can shorten this further:
def process(input)
{
q: ->{ puts "Goodbye" },
tweet: ->{ puts "tweeting" },
dm: ->{ puts "direct messaging"},
help: ->{ puts "helping"}
}[input.to_sym].call
end
process "tweet"
Use Kernel#proc:
Equivalent to Proc.new
def process(input)
commands = {
:q => proc { puts "Goodbye" },
:tweet => proc { puts "tweeting" },
:dm => proc { puts "direct messaging"},
:help => proc { puts "helping"}
}[input.to_sym].call
end
I am not sure whether what you suggest or I will suggest here improves the elegance or the readability of the code under question but you could shorten it even further by using the hash accessor pattern as in:
def process(input)
commands = {
:q => Proc.new { puts "Goodbye" },
:tweet => Proc.new { puts "tweeting" },
:dm => Proc.new { puts "direct messaging"},
:help => Proc.new { puts "helping"}
}[input.to_sym].call
end
So I'm getting records from ActiveRecord and I'd like to do something like:
VIP_LIST = ['John', 'Larry', 'Dan']
records = [
{ name: "Adam" },
{ name: "Larry" },
{ name: "John" },
{ name: "Eric" },
{ name: "Dan" }
]
# This is what I want to end up with:
sort_please(records, VIP_LIST)
=> [
{ name: "John" },
{ name: "Larry" },
{ name: "Dan" },
{ name: "Adam" },
{ name: "Eric" }
]
How can i achieve this?
P.S. There could be values in VIP_LIST that are not even in records
It's not a clever one liner, but it works:
VIP_LIST = ['John', 'Larry', 'Dan', 'Fred']
records = [
{ name: "Adam" },
{ name: "Larry" },
{ name: "John" },
{ name: "Eric" },
{ name: "Dan" }
]
sorted = records.sort_by do |record|
name = record[:name]
if VIP_LIST.include?(name)
VIP_LIST.index(name)
else
records.index(record) + VIP_LIST.length
end
end
p sorted # => [{:name=>"John"}, {:name=>"Larry"}, {:name=>"Dan"}, {:name=>"Adam"}, {:name=>"Eric"}]
try this:
records.sort_by do |x|
[VIP_LIST.index(x[:name]) || VIP_LIST.length, records.index(x)]
end
# => [{:name=>"John"}, {:name=>"Larry"}, {:name=>"Dan"}, {:name=>"Adam"}, {:name=>"Eric"}]
this one?
new_arr = []
VIP_LIST.each do |v|
new_arr << records.select {|r| r[:name] == v } unless nil?
end
new_arr.flatten!
new_arr = new_arr + (records - new_arr)
I think this is both clear and concise:
vip = records.select{ |hash| VIP_LIST.include? hash[:name] }
vip = vip.sort_by{|x| VIP_LIST.find_index(x[:name])} #reorder vip array
sorted_records = (vip + records).uniq