Iterate over array of hashes to sum values of one hash value based on another hash key

Iterate over array of hashes to sum values of one hash value based on another hash key - ruby

I have the following array of hashes as input:
ar = [{"Sales"=>"11", "CustID"=>"Cust04"},
{"Sales"=>"44.9", "CustID"=>"Cust04"},
{"Sales"=>"79.17", "CustID"=>"Cust06"},
{"Sales"=>"73.84", "CustID"=>"Cust06"},
{"Sales"=>"34.9", "CustID"=>"Cust06"},
{"Sales"=>"29.6825", "CustID"=>"Cust06"},
{"Sales"=>"2048.7", "CustID"=>"Cust06"},
{"Sales"=>"15.24", "CustID"=>"Cust02"},
{"Sales"=>"54.874", "CustID"=>"Cust04"},
{"Sales"=>"12.79", "CustID"=>"Cust08"},
{"Sales"=>"22.65", "CustID"=>"Cust08"},
{"Sales"=>"63.26", "CustID"=>"Cust08"},
{"Sales"=>"16.207", "CustID"=>"Cust08"},
{"Sales"=>"782.2", "CustID"=>"Cust07"},
{"Sales"=>"215.45", "CustID"=>"Cust07"},
{"Sales"=>"781.23", "CustID"=>"Cust07"},
{"Sales"=>"370.14", "CustID"=>"Cust07"},
{"Sales"=>"1.7", "CustID"=>"Cust09"},
{"Sales"=>"22.405", "CustID"=>"Cust09"}
]
I am looking for an output as below, based on the total of sales, with a rank:
ar_out # =>
["Customer" => "Cust04", "TotalSales" => "xxxx", "Rank" => "1"]

You can try this way
Input
ar = [
{ "Sales" => "11", "CustID" => "Cust04" },
{ "Sales" => "44.9", "CustID" => "Cust04" },
{ "Sales" => "79.17", "CustID" => "Cust06" },
{ "Sales" => "73.84", "CustID" => "Cust06" },
{ "Sales" => "34.9", "CustID" => "Cust06" },
{ "Sales" => "29.6825", "CustID" => "Cust06" },
{ "Sales" => "2048.7", "CustID" => "Cust06" },
{ "Sales" => "15.24", "CustID" => "Cust02" },
{ "Sales" => "54.874", "CustID" => "Cust04" },
{ "Sales" => "12.79", "CustID" => "Cust08" },
{ "Sales" => "22.65", "CustID" => "Cust08" },
{ "Sales" => "63.26", "CustID" => "Cust08" },
{ "Sales" => "16.207", "CustID" => "Cust08" },
{ "Sales" => "782.2", "CustID" => "Cust07" },
{ "Sales" => "215.45", "CustID" => "Cust07" },
{ "Sales" => "781.23", "CustID" => "Cust07" },
{ "Sales" => "370.14", "CustID" => "Cust07" },
{ "Sales" => "1.7", "CustID" => "Cust09" },
{ "Sales" => "22.405", "CustID" => "Cust09" }
]
Proces
ar.each_with_object(Hash.new(0)) { |hsh, e| e[hsh['CustID']] += hsh['Sales'].to_f }.
sort_by { |_, v| -v }.
map.with_index { |(k, v), i| [{ 'Customer' => k, 'TotalSales' => v, 'Rank' => i + 1 }] }
Output
[
[
{
"Customer": "Cust06",
"TotalSales": 2266.2925,
"Rank": 1
}
],
[
{
"Customer": "Cust07",
"TotalSales": 2149.02,
"Rank": 2
}
],
[
{
"Customer": "Cust08",
"TotalSales": 114.90699999999998,
"Rank": 3
}
],
[
{
"Customer": "Cust04",
"TotalSales": 110.774,
"Rank": 4
}
],
[
{
"Customer": "Cust09",
"TotalSales": 24.105,
"Rank": 5
}
],
[
{
"Customer": "Cust02",
"TotalSales": 15.24,
"Rank": 6
}
]
]

ar.
each_with_object({}) do |hash, acc|
(acc[hash["CustID"]] ||= {"Customer" => hash["CustID"], "TotalSales" => 0}).
tap { |h| h["TotalSales"] += hash["Sales"].to_f }
end.
values.
sort_by { |h| -h["TotalSales"] }.
map.
with_index(1) { |h, idx| h.merge("Rank" => idx) }
#⇒ [{"Customer"=>"Cust06", "TotalSales"=>2266.2925, "Rank"=>1},
# {"Customer"=>"Cust07", "TotalSales"=>2149.02, "Rank"=>2},
# {"Customer"=>"Cust08", "TotalSales"=>114.90699999999998, "Rank"=>3},
# {"Customer"=>"Cust04", "TotalSales"=>110.774, "Rank"=>4},
# {"Customer"=>"Cust09", "TotalSales"=>24.105, "Rank"=>5},
# {"Customer"=>"Cust02", "TotalSales"=>15.24, "Rank"=>6}]

Related

Logstash-extract multiple subfield values in multiple events

I have below type of events. I'm trying to split field's key, value as new event.
I'm able to do it for two fields(TOTAl_VOLUME, SUCCESS_VOLUME), but when I try for 3rd field, logstash stop responding.(Split doesn't work for more than 2 fields).
{
"agentId" => "Log_Agent",
"#metadata" => {
"A1EvtFingerprint" => "AGENTID=Log_Agent&TIME=1657708200000&RESPTYPE=DC",
"indexname" => "heal_collated_agent_txn",
"tablename" => "agent_transactions_data",
"accountid" => "mle_account",
"enable_rubydebug" => "true"
},
"total" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
},
"success" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
},
"max_response" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.011000156402588
},
"response_type" => "DC",
"aggLevelInMins" => 15,
"timeInGMT" => 1657708200000,
"avg_response" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.5954742431640625,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.6110687255859375,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 4.580192565917969
},
"timeout" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 777,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 839,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 781
},
"unknown" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 773,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 794,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 746
},
"fail" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 770,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 737,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 800
},
"#timestamp" => 2022-07-13T10:30:00.000Z,
"slow" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 782,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 788,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 744
},
"min_response" => {
"GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0,
"GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0,
"GET#/txn/branchserver23.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12" => 5.0
},
"#version" => "1"
}
Desired output should be(Splitted multiple field value into multiple events)
{
"txnId" : "GET#/txn/branchserver50.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
"timeInGMT" : 1657048320000,
"dcKpis" : {
"UNKNOWN_VOLUME" : 773,
"TIMEOUT_VOLUME" : 777,
"FAIL_VOLUME" : 770,
"MIN_RESPONSE_TIME" : 50,
"TOTAL_VOLUME" : 5.011000156402588,
"AVG_RESPONSE_TIME" : 4.5954742431640625,
"MAX_RESPONSE_TIME" : 5.011000156402588,
"SUCCESS_VOLUME" : 5.011000156402588,
"SLOW_VOLUME" : 782
}
},
{
"txnId" : "GET#/txn/branchserver51.aspx|srv=73689505-0ca6-48fe-a4da-4cf7ed4acd82|acc=12",
"timeInGMT" : 1657048320000,
"dcKpis" : {
"UNKNOWN_VOLUME" : 794,
"TIMEOUT_VOLUME" : 839,
"FAIL_VOLUME" : 737,
"MIN_RESPONSE_TIME" : 5.0,
"TOTAL_VOLUME" : 5.011000156402588,
"AVG_RESPONSE_TIME" : 4.6110687255859375,
"MAX_RESPONSE_TIME" : 5.011000156402588,
"SUCCESS_VOLUME" : 5.011000156402588,
"SLOW_VOLUME" : 788
}
}
Following is my pipeline:
ruby {
code => '
values = event.get("total")
if values.is_a? Hash
someField1 = []
values.each { |k, v|
someField1 << { "txnId1" => k, "total" => v }
}
event.set("someField1", someField1)
end
event.remove("total")
'
}
ruby {
code => '
values = event.get("success")
if values.is_a? Hash
someField2 = []
values.each { |k, v|
someField2 << { "txnId2" => k, "success" => v }
}
event.set("someField2", someField2)
end
event.remove("success")
'
}
split {
field => 'someField1'
}
split {
field => 'someField2'
}
mutate {
rename => {
"[someField1][txnId1]" => "[#metadata][txnId1]"
"[someField1][total]" => "[dcKpis][TOTAl_VOLUME]"
"[someField2][txnId2]" => "[#metadata][txnId2]"
"[someField2][success]" => "[dcKpis][SUCCESS_VOLUME]"
}
remove_field => ["someField1","someField2","someField3","someField4","someField5","someField6","someField7","someField8","someField9"]
}

Elasticsearch: Is it possible to sort collapsed results by a nested field?

I have a fairly complex mapping which is storing products, and within each document it contains a nested array of pre-calculated prices for each customer.
There may be multiple versions of each product in the index (with unique codes). Alternative products are grouped by a common xrefs_hash. The query I'm writing needs to select the best product for each customer (i.e. aggregate/collapse on the xrefs_hash), and then select the top product based on the value of the prices.weight nested field.
The prices.weight field is a float which we've pre-calculated based on the shops' customer settings on how they want to prioritise their own items. A hash is created from these settings (stored in prices.pricing_hash) so that we can store a single set of pricing if multiple customers share the same settings.
The index contains up to 300,000 products and can end up with ~100,000,000 documents once all prices are calculated and inserted.
The mapping looks something like this (shortened for brevity):
'mappings' => [
'_source' => [
'enabled' => true,
],
'dynamic' => false,
'properties' => [
'dealer_item_id' => [
'type' => 'integer',
],
'code' => [
'type' => 'text',
'analyzer' => 'custom_code_analyzer',
'fields' => [
'raw' => [
'type' => 'keyword',
],
],
],
'xrefs' => [
'type' => 'text',
'analyzer' => 'custom_code_analyzer',
'fields' => [
'raw' => [
'type' => 'keyword',
],
],
],
'xrefs_hash' => [
'type' => 'keyword',
],
'title' => [
'type' => 'text',
'analyzer' => 'custom_english_analyzer',
'fields' => [
'ngram_title' => [
'type' => 'text',
'analyzer' => 'custom_title_analyzer',
],
'raw' => [
'type' => 'keyword',
],
],
],
...
'prices' => [
'type' => 'nested',
'dynamic' => false,
'properties' => [
'pricing_hash' => [
'type' => 'keyword',
'index' => true,
],
'unit_price' => [
'type' => 'float',
'index' => true,
],
'pricebreaks' => [
'type' => 'object',
'dynamic' => false,
'properties' => [
'quantity' => [
'type' => 'integer',
'index' => false,
],
'price' => [
'type' => 'integer',
'index' => false,
],
],
],
'weight' => [
'type' => 'float',
'index' => true,
],
],
],
],
],
Example documents:
{
"dealer_item_id": 122023,
"code": "ABC123A",
"xrefs": [
"ABC123A",
"ABC123B",
],
"title": "Product A",
"xrefs_hash": "16d5415674c8365f63329b11ffc88da109590cec",
"prices": [
{
"pricebreaks": [
{
"quantity": 1,
"price": 9.75,
"contract": false
}
],
"weight": 0.20512820512820512,
"pricing_hash": "aabe06b7",
"unit_price": 9.75,
},
{
"pricebreaks": [
{
"quantity": 1,
"price": 9.75,
"contract": false
}
],
"weight": 0.20512820512820512,
"pricing_hash": "73643f3b",
"unit_price": 9.75,
}
]
},
{
"dealer_item_id": 124293,
"code": "ABC1234B",
"xrefs": [
"ABC123A",
"ABC123B",
],
"title": "Product B",
"xrefs_hash": "16d5415674c8365f63329b11ffc88da109590cec",
"prices": [
{
"contract_item": false,
"pricebreaks": [
{
"quantity": 1,
"price": 7.39,
"contract": false
}
],
"weight": 0.33829499323410017,
"pricing_hash": "aabe06b7",
"unit_price": 7.39,
},
{
"pricebreaks": [
{
"quantity": 1,
"price": 9.75,
"contract": false
}
],
"weight": 0.20512820512820512,
"pricing_hash": "73643f3b",
"unit_price": 9.75,
}
]
},
Example query:
{
"track_total_hits": 100000,
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "prices",
"score_mode": "none",
"inner_hits": {
"_source": {
"include": [
"prices"
]
}
},
"query": {
"bool": {
"must": [
{
"term": {
"prices.pricing_hash": "aabe06b7"
}
}
]
}
}
}
},
{
"term": {
"code.raw": "RX58022"
}
}
],
"must_not": [
{
"term": {
"disabled": true
}
}
]
}
}
}
},
"_source": {
"includes": [
"code",
"dealer_item_id",
"title",
"xrefs"
]
},
"collapse": {
"field": "xrefs_hash",
"inner_hits": {
"name": "best_xrefs",
"sort": {
"prices.weight": "desc"
},
"size": 1
}
},
"aggregations": {
"xrefs_count": {
"cardinality": {
"field": "xrefs_hash",
"precision_threshold": 40000
}
}
}
}
I have tried using a collapse query to select the best product, but this does not seem to support sorting by the nested prices.weight field.
I've also tried aggretating based on the xrefs_hash, but this seems to make pagination at the category level impossible.
The above example query almost works, but does not return the collapsed results in the correct order. When inspecting the query it seems to be replacing the collapse sort with Infinity, which apparently ES does if a document does not contain a sort field.
So what I'm wondering is; is it possible to:
Return 1 document per unique xref_hash value
Return the specific document whith the highest prices.weight value, matching customer's pricing_hash
Also make this work with pagination

Logstash : Split json object

Please I have a json Object which results from an xml input, it looks like this :
{
"#version" => "1",
"#timestamp" => "2016-04-11T15:35:07.372Z",
"host" => "YUSUF-PC",
"command" => "nana",
"doc" => {
"TotalResults" => "1892",
"Audit" => [
[0] {
"Id" => "2260167",
"Action" => "UPDATE",
"ParentId" => "30612",
"ParentType" => "defect",
"Time" => "2016-01-04 08:27:59",
"User" => "nana",
"Properties" => {
"Property" => [
[0] {
"Label" => "Statut",
"Name" => "status",
"NewValue" => [
[0] "En cours"
]
},
[1] {
"Label" => "Affecté à",
"Name" => "owner",
"NewValue" => [
[0] "nana"
]
},
[2] {
"Label" => "Priorité",
"Name" => "severity",
"NewValue" => [
[0] "nana"
]
}
]
}
},
[1] {
"Id" => "2260168",
"Action" => "UPDATE",
"ParentId" => "30612",
"ParentType" => "defect",
"Time" => "2016-01-04 09:45:33",
"User" => "nana",
"Properties" => {
"Property" => [
[0] {
"Label" => "Affecté à",
"Name" => "owner",
"NewValue" => [
[0] "nana"
],
"OldValue" => [
[0] "nana"
]
}
]
}
}
]
} }
I need to split this json to properties, ie to get each document containing one property, the problem is not the split operation, but when I insert this to elasticsearch, the "NewValue" field doesn't get into account... So I need to write a ruby filter to alter the value to value[0]. Anyone can help, I'm not good at ruby ?
I want to get a json like this one :
{
"#version" => "1",
"#timestamp" => "2016-04-11T15:35:07.372Z",
"host" => "YUSUF-PC",
"command" => "nana",
"doc" => {
"TotalResults" => "1892",
"Audit" => [
[0] {
"Id" => "2260167",
"Action" => "UPDATE",
"ParentId" => "30612",
"ParentType" => "defect",
"Time" => "2016-01-04 08:27:59",
"User" => "nana",
"Properties" => {
"Property" =>
{
"Label" => "Statut",
"Name" => "status",
"NewValue" => "En cours"
}
}
}
]
}
}
Thank you

I hope this helps.
old = {
"#version" => "1",
"#timestamp" => "2016-04-11T15:35:07.372Z",
"host" => "YUSUF-PC",
"command" => "nana",
"doc" => {
"TotalResults" => "1892",
"Audit" => [
{
"Id" => "2260167",
"Action" => "UPDATE",
"ParentId" => "30612",
"ParentType" => "defect",
"Time" => "2016-01-04 08:27:59",
"User" => "nana",
"Properties" => {
"Property" => [
{
"Label" => "Statut",
"Name" => "status",
"NewValue" => [
"En cours"
]
},
{
"Label" => "Affecté à",
"Name" => "owner",
"NewValue" => [
"nana"
]
},
{
"Label" => "Priorité",
"Name" => "severity",
"NewValue" => [
"nana"
]
}
]
}
},
{
"Id" => "2260168",
"Action" => "UPDATE",
"ParentId" => "30612",
"ParentType" => "defect",
"Time" => "2016-01-04 09:45:33",
"User" => "nana",
"Properties" => {
"Property" => [
{
"Label" => "Affecté à",
"Name" => "owner",
"NewValue" => [
"nana"
],
"OldValue" => [
"nana"
]
}
]
}
}
]
} }
##THIS IS THE LINE ACTUALLY DOING WORK.
old["doc"]["Audit"].map{|prop| prop["Properties"]["Property"].map{|value| value['NewValue']= value['NewValue'].first} }
old
=> {"#version"=>"1", "#timestamp"=>"2016-04-11T15:35:07.372Z", "host"=>"YUSUF-PC", "command"=>"nana", "doc"=>{"TotalResults"=>"1892", "Audit"=>[{"Id"=>"2260167", "Action"=>"UPDATE", "ParentId"=>"30612", "ParentType"=>"defect", "Time"=>"2016-01-04 08:27:59", "User"=>"nana", "Properties"=>{"Property"=>[{"Label"=>"Statut", "Name"=>"status", "NewValue"=>"En cours"}, {"Label"=>"Affecté à", "Name"=>"owner", "NewValue"=>"nana"}, {"Label"=>"Priorité", "Name"=>"severity", "NewValue"=>"nana"}]}}, {"Id"=>"2260168", "Action"=>"UPDATE", "ParentId"=>"30612", "ParentType"=>"defect", "Time"=>"2016-01-04 09:45:33", "User"=>"nana", "Properties"=>{"Property"=>[{"Label"=>"Affecté à", "Name"=>"owner", "NewValue"=>"nana", "OldValue"=>["nana"]}]}}]}}

Ruby - combining/flattening multiple array of hashes on common hash key/value combination

I am working with a large data set with multiple arrays of hashes which all have a key-value pair in common ("date" & the date value) as the first element of the hash.
The array of hashes I need to parse (#data["snapshot"]) is in the following format. Note that #data["snapshot"][0], #data["snapshot"][1], and #data["snapshot"][2] are in identical format with identical dates but their total's differ. In the resulting hash I need to have a key-value pair which identifies where the data came from.
#data["snapshot"][0] is as follows:
[{"date"=>"1455672010", "total"=>"**817**", "I"=>"1", "L"=>"3", "M"=>"62", "H"=>"5", "C"=>"0"},
{"date"=>"1455595298", "total"=>"**40**", "I"=>"8", "L"=>"5", "M"=>"562", "H"=>"125", "C"=>"0"},
{"date"=>"1455336016", "total"=>"**555**", "I"=>"10", "L"=>"1", "M"=>"93", "H"=>"121", "C"=>"0"}]
#data["snapshot"][1] is as follows:
[{"date"=>"1455672010", "total"=>"**70**", "I"=>"1", "L"=>"9", "M"=>"56", "H"=>"25", "C"=>"0"},
{"date"=>"1455595298", "total"=>"**54**", "I"=>"8", "L"=>"2", "M"=>"5", "H"=>"5", "C"=>"0"},
{"date"=>"1455336016", "total"=>"**25**", "I"=>"0", "L"=>"9", "M"=>"93", "H"=>"12", "C"=>"0"}]
#data["snapshot"][2] is as follows:
[{"date"=>"1455672010", "total"=>"**70**", "I"=>"12", "L"=>"5", "M"=>"5662", "H"=>"125", "C"=>"0"},
{"date"=>"1455595298", "total"=>"**43212**", "I"=>"56", "L"=>"6", "M"=>"5662", "H"=>"125", "C"=>"0"},
{"date"=>"1455336016", "total"=>"**55525**", "I"=>"100", "L"=>"19", "M"=>"5593", "H"=>"121", "C"=>"0"}]
My Question Is Ultimately:
How do I convert (flatten?) the 3 existing array of hashes (#data["snapshot"][0], #data["snapshot"][1], and #data["snapshot"][2]) into a single array of hashes in the following format?
[{"date"=>"1455672010", "CameFromDataSource0"=>"817", "CameFromDataSource1"=>"70", "CameFromDataSource2"=>"70"},
{"date"=>"1455595298", "CameFromDataSource0"=>"40", "CameFromDataSource1"=>"54", "CameFromDataSource2"=>"43212"},
{"date"=>"1455336016", "CameFromDataSource0"=>"555", "CameFromDataSource1"=>"25", "CameFromDataSource2"=>"55525"}]

TL;DR
snapshots.each_with_object(Hash.new {|hsh, date| hsh[date] = { "date" => date } })
.with_index do |(snapshot, hsh), i|
snapshot["data"].each {|datum| hsh[datum["date"]]["data#{i}"] = datum["total"] }
end.values
How it works
I'll break it down so you see how each part works. Here's our data (extraneous keys elided for clarity):
snapshots = [
{ "dataSourceID" => "152970",
"data" => [ { "date" => "1455672010", "total" => "817" },
{ "date" => "1455595298", "total" => "40" },
{ "date" => "1455336016", "total" => "555" } ]
}
{ "dataSourceID" => "33151",
"data" => [ { "date" => "1455672010", "total" => "70" },
{ "date" => "1455595298", "total" => "54" },
{ "date" => "1455336016", "total" => "25" } ]
},
{ "dataSourceID" => "52165",
"data" => [ { "date" => "1455672010", "total" => "70" },
{ "date" => "1455595298", "total" => "43212" },
{ "date" => "1455336016", "total" => "55525" } ]
}
]
Most of the magic is here:
result_hash = Hash.new {|hsh, date| hsh[date] = { "date" => date } }
Here we're using the Hash's default proc to automatically initialize new keys in the following way:
result_hash = Hash.new {|hsh, date| hsh[date] = { "date" => date } }
p result_hash["1455672010"]
# => { "date" => "1455672010" }
p result_hash
# => { "1455672010" => { "date" => "1455672010" } }
Simply accessing result_hash[foo] creates the hash { "date" => foo } and assigns it to result_hash[foo]. This enables the following:
result_hash["1455672010"]["data0"] = "817"
p result_hash
# => { "1455672010" => { "date" => "1455672010", "data0" => "817" } }
Magic!
Now suppose we have the following data:
data = [ { "date" => "1455672010", "total" => "817" },
{ "date" => "1455595298", "total" => "40" },
{ "date" => "1455336016", "total" => "555" } ]
Using our magic result_hash, we can do this:
data.each do |datum|
result_hash[datum["date"]]["data0"] = datum["total"]
end
p result_hash
# => { "1455672010" => { "date" => "1455672010", "data0" => "817" },
# "1455595298" => { "date" => "1455595298", "data0" => "40" },
# "1455336016" => { "date" => "1455336016", "data0" => "555" } }
See where I'm going with this? Here's all of our data, finally:
snapshots = [
{ "dataSourceID" => "152970",
"data" => [ { "date" => "1455672010", "total" => "817" },
{ "date" => "1455595298", "total" => "40" },
{ "date" => "1455336016", "total" => "555" } ]
}
{ "dataSourceID" => "33151",
"data" => [ { "date" => "1455672010", "total" => "70" },
{ "date" => "1455595298", "total" => "54" },
{ "date" => "1455336016", "total" => "25" } ]
},
{ "dataSourceID" => "52165",
"data" => [ { "date" => "1455672010", "total" => "70" },
{ "date" => "1455595298", "total" => "43212" },
{ "date" => "1455336016", "total" => "55525" } ]
}
]
Instead of hard-coding "data0", we can iterate over the snapshots hashes using each_with_index and build that key ("data0", then "data1", and so on) for each iteration. Inside that loop we can do exactly what we did above but with the "data" array from each snapshots hash:
result_hash = Hash.new {|hsh, date| hsh[date] = { "date" => date } }
snapshots.each_with_index do |snapshot, i|
data_key = "data#{i}"
snapshot["data"].each do |datum|
date = datum["date"]
result_hash[date][data_key] = datum["total"]
end
end
p result_hash.values
# => [ { "date" => "1455672010", "data0" => "817", "data1" => "70", "data2" => "70" },
# { "date" => "1455595298", "data0" => "40", "data1" => "54", "data2" => "43212" },
# { "date" => "1455336016", "data0" => "555", "data1" => "25", "data2" => "55525" } ]
Of course, this can be condensed some, which I've done in TL;DR above.

This is one way to do it.
Code
def convert(data)
data.each_with_object({}) { |a,h|
a.each { |g| h.update(g["date"]=>[g["total"][/\d+/]]) { |_,o,n| o+n } } }.
map { |date, arr| arr.each_with_index.with_object({"date"=>date}) { |(e,i),h|
h["key#{i}"] = e } }
end
Example
convert(data)
#=> [{"date"=>"1455672010", "key0"=>"817", "key1"=>"70", "key2"=>"70"},
# {"date"=>"1455595298", "key0"=>"40", "key1"=>"54", "key2"=>"43212"},
# {"date"=>"1455336016", "key0"=>"555", "key1"=>"25", "key2"=>"55525"}]
Two steps
You can see that I've done this in two steps. First construct a hash:
f = data.each_with_object({}) { |a,h| a.each { |g|
h.update(g["date"]=>[g["total"][/\d+/]]) { |_,o,n| o+n } } }
#=> {"1455672010"=>["817", "70", "70"],
# "1455595298"=>["40", "54", "43212"],
# "1455336016"=>["555", "25", "55525"]}
Here I have used the form of Hash#update (aka merge!) that employs a block ({ |_,o,n| o+n }) to determine the values of keys that are present in both hashes being merged.
Then convert the hash to the desired format:
f.map { |date, arr| arr.each_with_index.with_object({"date"=>date}) { |(e,i),h|
h["key#{i}"] = e } }
#=> [{"date"=>"1455672010", "key0"=>"817", "key1"=>"70", "key2"=>"70"},
# {"date"=>"1455595298", "key0"=>"40", "key1"=>"54", "key2"=>"43212"},
# {"date"=>"1455336016", "key0"=>"555", "key1"=>"25", "key2"=>"55525"}]

How can i update the ids field with this rethinkdb document structure?

Having trouble trying to update the ids field in the document structure:
[
[0] {
"rank" => nil,
"profile_id" => 3,
"daily_providers" => [
[0] {
"relationships" => [
[0] {
"relationship_type" => "friend",
"count" => 0
},
[1] {
"relationship_type" => "acquaintance",
"ids" => [],
"count" => 0
}
],
"countries" => [
[0] {
"country_name" => "United States",
"count" => 0
},
[1] {
"country_name" => "Great Britain",
"count" => 0
}
],
"provider_name" => "foo",
"date" => 20130912
},
[1] {
"provider_name" => "bar"
}
]
}
]

In JavaScript, you can do
r.db('test').table('test').get(3).update(function(doc) {
return {daily_providers: doc("daily_providers").changeAt(
0,
doc("daily_providers").nth(0).merge({
relationships: doc("daily_providers").nth(0)("relationships").changeAt(
1,
doc("daily_providers").nth(0)("relationships").nth(1).merge({
ids: [1]
})
)
})
)}
})
Which becomes in Ruby
r.db('test').table('test').get(3).update{ |doc|
{"daily_providers" => doc["daily_providers"].changeAt(
0,
doc["daily_providers"][0].merge({
"relationships" => doc["daily_providers"][0]["relationships"].changeAt(
1,
doc["daily_providers"][0]["relationships"][1].merge({
ids => [1]
})
)
})
)}
}
You should probably have another table for the daily providers and do joins.
That would make things way more simpler.

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Iterate over array of hashes to sum values of one hash value based on another hash key - ruby

Related

Logstash-extract multiple subfield values in multiple events

Elasticsearch: Is it possible to sort collapsed results by a nested field?

Logstash : Split json object

Ruby - combining/flattening multiple array of hashes on common hash key/value combination

How can i update the ids field with this rethinkdb document structure?

Categories

Resources