Filter on nested field with Elasticsearch

Filter on nested field with Elasticsearch - elasticsearch

I have 2 entities in my project, users and schedule
I need to create a page on which, in the form of a weekly calendar, I can display all employees and their shift for each day
Example:
https://monosnap.com/file/tEb3rUYNRmredPWOdfxRBTBpqkh36H
For this, I created a new index in which I indexed all employees. Each employee has a nested field, where his shifts are stored
The problem is that I can't figure out aggregations and filters.
I need to
there is always a filter by date that refers to the shift field property.
It doesn't matter if there are suitable shifts or not, we show ALL employees
the following 2 aggregations, user role and type of shift are also displayed.
user role filters the list of employees
type of shift, shows or hides associated shifts
An example of my request
{
"aggs": {
"shifts.ref_type": {
"nested": {
"path": "shifts"
},
"aggs": {
"shifts.ref_type": {
"terms": {
"field": "shifts.ref_type",
"size": 1000
}
}
}
},
"role": {
"terms": {
"field": "role",
"size": 1000
}
},
"name": {
"terms": {
"field": "name",
"size": 1000
}
}
},
"query": {
"bool": {
"must": [
{
"term": {
"_routing": "1"
}
}
],
"should": [
{
"range": {
"shifts.date_from": {
"lte": 1636923600,
"gte": 1636318800
}
}
}
]
}
},
"sort": [
{
"created": "ASC"
}
],
"size": 1
}
Sample response
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 36,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "employee_shift",
"_type": "_doc",
"_id": "a8abf060-25c8-45ee-a50e-02a2e2ad1c40",
"_score": null,
"_routing": "1",
"_source": {
"created": 1633967157,
"type": "user",
"title": null,
"description": "",
"uuid": "a8abf060-25c8-45ee-a50e-02a2e2ad1c40",
"author": "System",
"author:name": "System",
"author:role": "",
"acc": 1,
"property": [
1
],
"status": "Enabled",
"class": [
""
],
"weight": "",
"tags": [],
"language": "en",
"ref_source_id": null,
"ref_source_helper": null,
"ref_property": [
"test hostel2"
],
"ref_property_default": "test hostel2",
"name": "Housekeeper 1",
"role": [
"Housekeeper"
],
"role:weight": "2",
"role:id": [
5
],
"pay_rate": null,
"experience": null,
"supervisor": null,
"gender": null,
"units": [
"102",
"103",
"106",
"107",
"110",
"111",
"116",
"117",
"120",
"121",
"124",
"125",
"128",
"129",
"132",
"133",
"136",
"137"
],
"task_inspection": "All tasks",
"shifts": [
{
"uuid": "f48ae398-0668-4693-b335-2fee3baa2941",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635196500",
"date_to": "1635197400",
"notes": null
},
{
"uuid": "8b4d8148-2583-4ccf-a1cc-ae5e6d1e728e",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635287400",
"date_to": "1635289200",
"notes": null
},
{
"uuid": "3f5520d8-8108-4abd-8e2a-70c00faf6994",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635369300",
"date_to": "1635373800",
"notes": null
},
{
"uuid": "d4009660-447c-47de-b0f3-3c1f2d8d8f99",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635286500",
"date_to": "1635288300",
"notes": null
},
{
"uuid": "b3d883f0-b71f-4df7-bb63-a50f137528a4",
"ref_type": "Work",
"ref_type:color": "",
"date_from": "1635370200",
"date_to": "1635372900",
"notes": null
}
]
},
"sort": [
1633967157000
]
}
]
},
"aggregations": {
"role": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Houseman",
"doc_count": 4
},
{
"key": "Maintenance",
"doc_count": 4
},
{
"key": "Supervisor",
"doc_count": 4
},
{
"key": "Supervisor HSKP",
"doc_count": 4
},
{
"key": "Supervisor Maintenance",
"doc_count": 4
},
{
"key": "Administrator",
"doc_count": 3
},
{
"key": "Concierge dispatcher",
"doc_count": 3
},
{
"key": "Frontdesk",
"doc_count": 3
},
{
"key": "General manager",
"doc_count": 3
},
{
"key": "HKeeper",
"doc_count": 3
},
{
"key": "Housekeeper",
"doc_count": 3
},
{
"key": "Manager",
"doc_count": 3
}
]
},
"shifts.ref_type": {
"doc_count": 21,
"shifts.ref_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Work",
"doc_count": 19
},
{
"key": "test",
"doc_count": 2
}
]
}
},
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Administrator 123",
"doc_count": 1
},
{
"key": "Administrator 223",
"doc_count": 1
},
{
"key": "Administrator 3",
"doc_count": 1
},
{
"key": "Concierge dispatcher 1",
"doc_count": 1
},
{
"key": "Concierge dispatcher 2",
"doc_count": 1
},
{
"key": "Concierge dispatcher 3",
"doc_count": 1
},
{
"key": "Frontdesk 1",
"doc_count": 1
},
{
"key": "Frontdesk 2",
"doc_count": 1
},
{
"key": "Frontdesk 3",
"doc_count": 1
},
{
"key": "General manager 1",
"doc_count": 1
},
{
"key": "General manager 2",
"doc_count": 1
},
{
"key": "General manager 3",
"doc_count": 1
},
{
"key": "HKeeper 1",
"doc_count": 1
},
{
"key": "HKeeper 2",
"doc_count": 1
},
{
"key": "HKeeper 3",
"doc_count": 1
},
{
"key": "Housekeeper 1",
"doc_count": 1
},
{
"key": "Housekeeper 2",
"doc_count": 1
},
{
"key": "Housekeeper 3",
"doc_count": 1
},
{
"key": "Houseman 1",
"doc_count": 1
},
{
"key": "Houseman 2",
"doc_count": 1
},
{
"key": "Houseman 3",
"doc_count": 1
},
{
"key": "Maintenance 1",
"doc_count": 1
},
{
"key": "Maintenance 2",
"doc_count": 1
},
{
"key": "Maintenance 3",
"doc_count": 1
},
{
"key": "Manager 1222",
"doc_count": 1
},
{
"key": "Manager 2",
"doc_count": 1
},
{
"key": "Manager 3",
"doc_count": 1
},
{
"key": "Supervisor 1",
"doc_count": 1
},
{
"key": "Supervisor 2",
"doc_count": 1
},
{
"key": "Supervisor 3",
"doc_count": 1
},
{
"key": "Supervisor HSKP 1",
"doc_count": 1
},
{
"key": "Supervisor HSKP 2",
"doc_count": 1
},
{
"key": "Supervisor HSKP 3",
"doc_count": 1
},
{
"key": "Supervisor Maintenance 1",
"doc_count": 1
},
{
"key": "Supervisor Maintenance 2",
"doc_count": 1
},
{
"key": "Supervisor Maintenance 3",
"doc_count": 1
}
]
}
}
}
At the moment, everything seems to be working correctly, except for one point. Aggregation by the type of shift ALWAYS outputs data, although they should not be found by the filter for the date.
any advice? thank you

I'm making the answer based on the assumption from my comment:
you want your query to return all employees
you want the ref_type aggregation to only include shifts matching your date range
you want the "shifts" collections under your results to likewise only include shifts matching your date range
Apologies if I misunderstood your question.
One thing to get out of the way first, though you may have been aware: the should part of your query is not restricting the results, it's only affecting the score, since you already have a must.
As a corollary it's not going to affect the aggregated results instead, for that you need to use a filter aggregation:
"aggs": {
"shifts.ref_type": {
"nested": {
"path": "shifts"
},
"aggs": {
"shifts.ref_type": {
"filter": {
"range": {
"shifts.date_from": {
"gte": 1635370100,
"lte": 1635370300
}
}
},
"aggs": {
"shifts.ref_type": {
"terms": {
"field": "shifts.ref_type",
"size": 1000
}
}
}
}
}
},
This is going to get you the filtered counts in your aggregation, but that still won't filter your results - you'll get all the "shifts" in your hits. So a thing to be aware of with nested documents, your query is going to restrict the documents that are returned, based on the matches in the nested documents, but it's not going to actually filter out the nested documents that did not match. In order to do that you have another feature, nested inner hits, which lets you figure out exactly which nested documents matched. It's still not enough in your case since you don't want to filter out the results entirely (so even if there is no "inner hit" you still want to return the document, or at least have it be part of the aggregation). So now you have yet another feature, post-filter, which you can use to filter the documents after they have been aggregated. Taking the 2 together:
"post_filter": {
"nested": {
"path": "shifts",
"query": {
"bool": {
"must": [
{
"range": {
"shifts.date_from": {
"lte": 635370200,
"gte": 635370200
}
}
}
]
}
},
"inner_hits": {}
}
},
If you now set _source: false, you won't have the hits, and you just get the shifts that matched (and then you still have the employee name and roles in your aggregation results):
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.08701137,
"hits": [
{
"_index": "employee_shift",
"_type": "_doc",
"_id": "-tRnLn0B5PjpsgKgGXlB",
"_score": 0.08701137,
"inner_hits": {
"shifts": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "employee_shift",
"_type": "_doc",
"_id": "-tRnLn0B5PjpsgKgGXlB",
"_nested": {
"field": "shifts",
"offset": 4
},
"_score": 1,
"_source": {
"notes": null,
"ref_type:color": "",
"date_to": 635372900,
"ref_type": "Work",
"uuid": "b3d883f0-b71f-4df7-bb63-a50f137528a4",
"date_from": 635370200
}
}
]
}
}
}
}
]
},
"aggregations": {
"role": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Housekeeper",
"doc_count": 5
}
]
},
"shifts.ref_type": {
"doc_count": 25,
"shifts.ref_type": {
"doc_count": 4,
"shifts.ref_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Work",
"doc_count": 3
},
{
"key": "Work2",
"doc_count": 1
}
]
}
}
},
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Housekeeper 4",
"doc_count": 2
},
{
"key": "Housekeeper 1",
"doc_count": 1
},
{
"key": "Housekeeper 2",
"doc_count": 1
},
{
"key": "Housekeeper 3",
"doc_count": 1
}
]
}
}
}

Related

Elastic search terms aggregation for getting filter options

im trying to implement product searching and want to get search results along with filters to filter from. i have managed to get the filter keys reference, but also want values of those keys
my product body is
{
...product,
"attributes": [
{
"name": "Color",
"value": "Aqua Blue"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "0"
}
],
}
and im using the this query in es
GET product/_search
{
"aggs": {
"filters": {
"terms": {
"field": "attributes.name"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value",
"size": 10
}
}
}
}
}
}
Not sure why, but im getting all values for each key
"aggregations": {
"filters": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
}
]
}
Also i do not want to specify manually all keys explicitly like Color, Size to get their respective values each.
Thanks :)

To keep things simple must you use a single field to store attributes:
"gender":"Male"
I assume you have tons of attributes so you create an array instead, to handle that you will have to use "nested" field type.
Nested type preserves the relation between each of the nested document properties. If you dont use nested you will see all the properties and values mixed and you will not be able to aggregate by a property without manually adding filters.
You can read an article I wrote about that here:
https://opster.com/guides/elasticsearch/data-architecture/elasticsearch-nested-field-object-field/
Mappings :
PUT test_product_nested
{
"mappings": {
"properties": {
"attributes": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
This query will only show Red products of size XL and aggregate by attributes.
If you want to do OR's instead of AND's you must use "should" clauses instead of "filter" clauses.
Query
POST test_product_nested/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Color"
}
},
{
"term": {
"attributes.value.keyword": "Red"
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Size"
}
},
{
"term": {
"attributes.value.keyword": "XL"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"attributes": {
"nested": {
"path": "attributes"
},
"aggs": {
"name": {
"terms": {
"field": "attributes.name.keyword"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value.keyword",
"size": 10
}
}
}
}
}
}
}
}
Results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test_product_nested",
"_id": "aJRayoQBtNG1OrZoEOQi",
"_score": 0,
"_source": {
"title": "Product 1",
"attributes": [
{
"name": "Color",
"value": "Red"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "XL"
}
]
}
}
]
},
"aggregations": {
"attributes": {
"doc_count": 4,
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Red",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Female",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "XL",
"doc_count": 1
}
]
}
}
]
}
}
}
}

Using Vega with Elasticsearch data containing nested aggregations (or dividing one aggregation by another in Elasticsearch)

I'm trying to do something with Elasticsearch that should be quite simple. I have an index which contains documents of the shape: {"timestamp": int, "pricePerUnit": int, "units": int}. I want to visualize the average price over time in a histogram. Note that I don't want the average of the "pricePerUnit", I want the average price paid per unit, which means finding the total value in each time bucket by multiplying the "pricePerUnit" by the "units" for each document, and summing the total value sold in each document, then dividing by the sum of the total units sold in the time bucket to get the average price paid per unit. A standard Kibana line chart won't work. I can get the average "pricePerUnit * units", but can't divide this aggregation by the sum of the total units. Also can't be done in TSVB, as this doesn't allow for scripts/scripted fields. Can't use timelion, because the "timestamp" field isn't a time field (I know, but there's nothing I can do about it). I'm therefore trying to use Vega. However, I'm running into a problem with nested aggregations. Here's the ES query I'm running:
{
"$schema": "https://vega.github.io/schema/vega/v3.json",
"data": {
"name": "vals",
"url": {
"index": "index_name",
"body": {
"aggs": {
"2": {
"histogram": {
"field": "timestamp",
"interval": 2000,
"min_doc_count": 1
},
"aggs": {
"1": {
"avg": {
"field": "pricePerUnit",
"script": {
"inline": "doc['pricePerUnit'].value * doc['units'].value",
"lang": "painless"
}
}
}
}
}
},
"size": 0,
"stored_fields": [
"*"
],
"script_fields": {
"spend": {
"script": {
"source": "doc['pricePerUnit'].value * doc['units'].value",
"lang": "painless"
}
}
},
"docvalue_fields": [],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"timeslot.startTime": {
"gte": 1621292400,
"lt": 1621428349
}
}
}
],
"should": [],
"must_not": []
}
}
},
"format": {"property": "aggregations.2.buckets"}
}
}
,
"scales": [
{
"name": "yscale",
"type": "linear",
"zero": true,
"domain": {"data": "vals", "field": "1.value"},
"range": "height"
},
{
"name": "xscale",
"type": "time",
"range": "width"
}
],
"axes": [
{"scale": "yscale", "orient": "left"},
{"scale": "xscale", "orient": "bottom"}
],
"marks": [
{
"type": "line",
"encode": {
"update": {
"x": {"scale": "xscale", "field": "key"},
"y": {"scale": "yscale", "field": "1.value"}
}
}
}
]
}
It gives me the following result set:
"took": 1,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 401,
"max_score": null,
"hits": []
},
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": 86340
},
"key": 1621316000,
"doc_count": 7
},
{
"1": {
"value": 231592.92307692306
},
"key": 1621318000,
"doc_count": 13
},
{
"1": {
"value": 450529.23529411765
},
"key": 1621320000,
"doc_count": 17
},
{
"1": {
"value": 956080.0555555555
},
"key": 1621322000,
"doc_count": 18
},
{
"1": {
"value": 1199865.5714285714
},
"key": 1621324000,
"doc_count": 14
},
{
"1": {
"value": 875300.7368421053
},
"key": 1621326000,
"doc_count": 19
},
{
"1": {
"value": 926738.8
},
"key": 1621328000,
"doc_count": 20
},
{
"1": {
"value": 3239475.3333333335
},
"key": 1621330000,
"doc_count": 18
},
{
"1": {
"value": 3798063.714285714
},
"key": 1621332000,
"doc_count": 21
},
{
"1": {
"value": 482089.5
},
"key": 1621334000,
"doc_count": 4
},
{
"1": {
"value": 222952.33333333334
},
"key": 1621336000,
"doc_count": 12
},
{
"1": {
"value": 742225.75
},
"key": 1621338000,
"doc_count": 8
},
{
"1": {
"value": 204203.25
},
"key": 1621340000,
"doc_count": 4
},
{
"1": {
"value": 294886
},
"key": 1621342000,
"doc_count": 4
},
{
"1": {
"value": 284393.75
},
"key": 1621344000,
"doc_count": 4
},
{
"1": {
"value": 462800.5
},
"key": 1621346000,
"doc_count": 4
},
{
"1": {
"value": 233321.2
},
"key": 1621348000,
"doc_count": 5
},
{
"1": {
"value": 436757.8
},
"key": 1621350000,
"doc_count": 5
},
{
"1": {
"value": 4569021
},
"key": 1621352000,
"doc_count": 1
},
{
"1": {
"value": 368489.5
},
"key": 1621354000,
"doc_count": 4
},
{
"1": {
"value": 208359.4
},
"key": 1621356000,
"doc_count": 5
},
{
"1": {
"value": 7827146.375
},
"key": 1621358000,
"doc_count": 8
},
{
"1": {
"value": 63873.5
},
"key": 1621360000,
"doc_count": 6
},
{
"1": {
"value": 21300
},
"key": 1621364000,
"doc_count": 1
},
{
"1": {
"value": 138500
},
"key": 1621366000,
"doc_count": 2
},
{
"1": {
"value": 5872400
},
"key": 1621372000,
"doc_count": 1
},
{
"1": {
"value": 720200
},
"key": 1621374000,
"doc_count": 1
},
{
"1": {
"value": 208634.33333333334
},
"key": 1621402000,
"doc_count": 3
},
{
"1": {
"value": 306248.5
},
"key": 1621404000,
"doc_count": 10
},
{
"1": {
"value": 328983.77777777775
},
"key": 1621406000,
"doc_count": 18
},
{
"1": {
"value": 1081724
},
"key": 1621408000,
"doc_count": 10
},
{
"1": {
"value": 2451076.785714286
},
"key": 1621410000,
"doc_count": 14
},
{
"1": {
"value": 1952910.2857142857
},
"key": 1621412000,
"doc_count": 14
},
{
"1": {
"value": 2294818.1875
},
"key": 1621414000,
"doc_count": 16
},
{
"1": {
"value": 2841910.388888889
},
"key": 1621416000,
"doc_count": 18
},
{
"1": {
"value": 2401278.9523809524
},
"key": 1621418000,
"doc_count": 21
},
{
"1": {
"value": 4311845.4
},
"key": 1621420000,
"doc_count": 5
},
{
"1": {
"value": 617102.5333333333
},
"key": 1621422000,
"doc_count": 15
},
{
"1": {
"value": 590469.7142857143
},
"key": 1621424000,
"doc_count": 14
},
{
"1": {
"value": 391918.85714285716
},
"key": 1621426000,
"doc_count": 14
},
{
"1": {
"value": 202163.66666666666
},
"key": 1621428000,
"doc_count": 3
}
]
}
}
}
The problem is that I can't extract the "value" field from the "1" sub-aggregation. I've tried using a flatten transform, but it doesn't seem to work. If anyone can either:
a) Tell me how to solve this specific problem with Vega; or
b) Tell me another way to solve my original problem
I'd be much obliged!

Your DSL query is looking great. If I've read this correctly I believe what you are looking for is a project transform. This can make life a lot easier when dealing with nested variables, as there are certain instances where they just don't function as expected.
You also need to reference data within marks otherwise it will plot nothing.
Below is how to fix this, you'll just need to add your url parameter in.
{
$schema: https://vega.github.io/schema/vega/v3.json
data: [
{
name: vals
url: ... // fill this in
transform: [
{
type: project
fields: [
1.value
doc_count
key
]
as: [
val
doc_count
key
]
}
]
}
]
scales: [
{
name: yscale
type: linear
zero: true
domain: {
data: vals
field: val
}
range: height
}
{
name: xscale
type: time
domain: {
data: vals
field: key
}
range: width
}
]
axes: [
{
scale: yscale
orient: left
}
{
scale: xscale
orient: bottom
}
]
marks: [
{
type: line
from: {
data: vals
}
encode: {
update: {
x: {
scale: xscale
field: key
}
y: {
scale: yscale
field: val
}
}
}
}
]
}
In future if you are having issues, look at the examples found on the Vega Gallery. They also have extensive documentation. These two combined is all you need.

ElasticSearch nested sort with filter

I've documents that contain a list of prices for specific keys, for example as the following
document1
{
"name":"doc1",
"cheapestPrices": [{
"key": "10000_BB",
"value": 50
}, {
"key": "10000_LO",
"value": 10
}, {
"key": "10000",
"value": 10
}, {
"key": "",
"value": 10
}
]
}
document2
{
"name":"doc2",
"cheapestPrices": [{
"key": "10000_BB",
"value": 15
}, {
"key": "10000_LO",
"value": 30
}, {
"key": "10000",
"value": 15
}, {
"key": "",
"value": 15
}
]
}
Now I send a query and I want to sort by given keys and the order should be from lowest to highest. I created this query:
{
"size": 10000,
"sort": [
{
"cheapestPrices.value": {
"mode": "min",
"nested": {
"filter": {
"bool": {
"should": [
{
"term": {
"cheapestPrices.key": {
"value": "10000_BB"
}
}
}
]
}
},
"path": "cheapestPrices"
},
"order": "asc"
}
}
]
}
Expecting that I would get doc2 (value 15 for that key) first and then doc1 (value 50 for that key)... but the result are doc1 and then doc2 and the sort score is exactly the same.
Result:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [{
"_index": "test_sortbyprice",
"_type": "_doc",
"_id": "doc1",
"_score": null,
"_source": {
"cheapestPrices": [{
"key": "10000_BB",
"value": 50
}, {
"key": "10000_LO",
"value": 10
}, {
"key": "10000",
"value": 10
}, {
"key": "",
"value": 10
}
],
"name": "doc1"
},
"sort": [
9223372036854775807
]
}, {
"_index": "test_sortbyprice",
"_type": "_doc",
"_id": "doc2",
"_score": null,
"_source": {
"cheapestPrices": [{
"key": "10000_BB",
"value": 15
}, {
"key": "10000_LO",
"value": 30
}, {
"key": "10000",
"value": 15
}, {
"key": "",
"value": 15
}
],
"name": "doc2"
},
"sort": [
9223372036854775807
]
}
]
}
}
The mapping is as follow:
{
"properties": {
"cheapestPrices": {
"type": "nested",
"properties": {
"value": {
"type": "integer"
},
"key": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
}
}
}

TL;DR
Change the term query to target the field cheapestPrices.key.keyword instead of cheapestPrices.key.
The sort query does not match any documents due to using term (an exact match) on a field that's been lowercased thanks to the standard analyzer which was applied by default on a text field w/ no extra analyzers. This means it's never going to equal 10000_BB (uppercase). But luckily you have the .keyword available which ensures no value modifications.
The sort scores are the same (I assume 9223372036854775807 a.k.a. Long.MAX_VALUE) because that's the default ES behavior. It's not really that far fetched when you think about it: if the sort query does not match anything, it'll assign the highest possible value.
If your order were desc, it'd have returned -Long.MAX_VALUE

date.getHourOfDay() is giving strange results in aggregation

I am indexing some events and trying to get unique hours but the terms aggregation is giving weird response . I have the following query.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"City": [
"Chicago"
]
}
},
{
"range": {
"eventDate": {
"gte": "2018-06-22",
"lte": "2018-06-22"
}
}
}
]
}
},
"aggs": {
"Hours": {
"terms": {
"script": "doc['eventDate'].date.getHourOfDay()"
}
}
}
}
This query produces following response.
"buckets": [
{
"key": "19",
"doc_count": 12
},
{
"key": "9",
"doc_count": 7
},
{
"key": "15",
"doc_count": 4
},
{
"key": "16",
"doc_count": 4
},
{
"key": "20",
"doc_count": 4
},
{
"key": "12",
"doc_count": 2
},
{
"key": "6",
"doc_count": 2
},
{
"key": "8",
"doc_count": 2
},
{
"key": "10",
"doc_count": 1
},
{
"key": "11",
"doc_count": 1
}
]
Now I changed the range to get the events for past one month
{
"range": {
"eventDate": {
"gte": "2018-05-22",
"lte": "2018-06-22"
}
}
}
and the response I got was
"Hours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1319,
"buckets": [
{
"key": "22",
"doc_count": 805
},
{
"key": "14",
"doc_count": 370
},
{
"key": "15",
"doc_count": 250
},
{
"key": "21",
"doc_count": 248
},
{
"key": "16",
"doc_count": 195
},
{
"key": "0",
"doc_count": 191
},
{
"key": "13",
"doc_count": 176
},
{
"key": "3",
"doc_count": 168
},
{
"key": "20",
"doc_count": 159
},
{
"key": "11",
"doc_count": 148
}
]
}
As you can see I got buckets with key 6,8,9,10 and 12 in the response of first query but not in the second query which is very strange as documents returned by first query is a small subset of the second query. Is this a bug or am I missing something obvious?
Thanks

ElasticSearch Aggregation Missing Data

On elastic search, when I run an aggregation query it only runs it on one index. How do I get it to run on multiple indicies?

Do a multi-index search.
As an example, I created a couple of indexes (implicitly) by bulk indexing some data:
POST /_bulk
{ "index": {"_index": "test_index", "_type":"doc"}}
{ "name": "Brown foxes"}
{ "index": {"_index": "test_index_2", "_type":"doc"}}
{ "name": "Yellow furballs" }
{ "index": {"_index": "test_index", "_type":"doc"}}
{ "name": "my discovery" }
{ "index": {"_index": "test_index_2", "_type":"doc"}}
{ "name": "myself is fun" }
{ "index": {"_index": "test_index", "_type":"doc"}}
{ "name": ["foxy", "foo"] }
{ "index": {"_index": "test_index_2", "_type":"doc"}}
{ "name": ["foo bar", "baz"] }
Then I can run an aggregation on both indexes:
POST /test_index,test_index_2/_search?search_type=count
{
"aggs": {
"name_terms": {
"terms": {
"field": "name"
}
}
}
}
and I get back all the terms from both:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 0,
"hits": []
},
"aggregations": {
"name_terms": {
"buckets": [
{
"key": "foo",
"doc_count": 2
},
{
"key": "bar",
"doc_count": 1
},
{
"key": "baz",
"doc_count": 1
},
{
"key": "brown",
"doc_count": 1
},
{
"key": "discovery",
"doc_count": 1
},
{
"key": "foxes",
"doc_count": 1
},
{
"key": "foxy",
"doc_count": 1
},
{
"key": "fun",
"doc_count": 1
},
{
"key": "furballs",
"doc_count": 1
},
{
"key": "is",
"doc_count": 1
}
]
}
}
}
Here's the code:
http://sense.qbox.io/gist/e053a0c6c5453eae68d7b7ff2ff12588669b046e

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Filter on nested field with Elasticsearch - elasticsearch

Related

Elastic search terms aggregation for getting filter options

Using Vega with Elasticsearch data containing nested aggregations (or dividing one aggregation by another in Elasticsearch)

ElasticSearch nested sort with filter

date.getHourOfDay() is giving strange results in aggregation

ElasticSearch Aggregation Missing Data

Categories

Resources