elastic search nested sub aggregations

elastic search nested sub aggregations - elasticsearch

We are using elastic search which holds records as documents with following definition
{
"loadtender": {
"aliases": {},
"mappings": {
"_doc": {
"_meta": {
"version": 20
},
"properties": {
"carrierId": {
"type": "long"
},
"destinationData": {
"type": "keyword"
},
"destinationZip": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"effStartTime": {
"type": "date"
},
"endTime": {
"type": "date"
},
"id": {
"type": "long"
},
"mustRespondByTime": {
"type": "date"
},
"orgdiv": {
"type": "keyword"
},
"originData": {
"type": "keyword"
},
"originZip": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"purchaseOrderNum": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 255
}
}
},
"startTime": {
"type": "date"
},
"tenderStatus": {
"type": "keyword"
},
"tenderedTime": {
"type": "date"
}
}
}
},
"settings": {
"index": {
"creation_date": "1655105542470",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "ohcXgA8EQ5iJj0X6_4BqXA",
"version": {
"created": "6080499"
},
"provided_name": "loadtender"
}
}
}
}
I am trying to search records to return me following filtered results
Input Parameter : startDate (yesterday), originData.originCity and originData.destinationCity
Output Required:
Three buckets for 0-30 days, 30-60 days and 60-90 days
buckets of distinct originData.city and destinationData.city combinations under each of the above
Under each of the above, buckets of data for each unique carrierId and the corresponding record list / count
Basically I was trying to achieve something like the below
{
"aggregations": {
"aggr": {
"buckets": [
{
"key": "0-30 days",
"doc_count": 10,
"aggr": {
"buckets": [
{
"key": "(originCity)Menasha, WI, US|Hanover, MD, US (DestinationCity)",
"aggr": {
"buckets": [
{
"key": "10183-carrierId",
"count": 10
}
]
}
}
]
}
},
{
"key": "30-60 days",
"doc_count": 11,
"aggr": {
"buckets": [
{
"key": "Dallas, TX, US|Houston, TX, US",
"aggr": {
"buckets": [
{
"key": "10183-carrierId",
"count": 10
},
{
"key": "10022-carrierId",
"count": 1
}
]
}
}
]
}
}
]
}
}
}
I've tried the following but I think I am not finding a way to filter it further using the sub aggregators.
{
"_source":["id", "effStartTime", "carrierId", "originData", "destinationData"],
"size": 100,
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"range": {
"startTime": {
"from": "2021-08-27T23:59:59.000Z",
"to": "2022-09-01T00:00:00.000Z",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"must_not": [
{
"term": {
"tenderStatus": {
"value": "REMOVED",
"boost": 1
}
}
}
],
"filter" : {
"exists" : {
"field" : "carrierId"
}
},
"adjust_pure_negative": true,
"boost": 1
}
},
"aggregations": {
"aggr": {
"terms": {
"script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]"
}
}
}
}
I started beginning to think if this is even possible OR should I shift to issuing multiple queries for the same

I was able to achieve the same using the following sub-aggregations:
"aggregations": {
"aggr":{
"date_range": {
"field": "startTime",
"format": "MM-yyyy",
"ranges": [
{"to": "now-1M/M", "from": "now"}, --> now to 30 days back
{"to": "now-1M/M", "from": "now-2M/M"}, from 30 days back to 60 days back
{"to": "now-2M/M", "from": "now-3M/M"}, from 60 days back to 90 days back
{"to": "now-3M/M", "from": "now-12M/M"}
]
},
"aggregations": {
"aggr":{
"terms": {
"script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]" --> concatenated origin and destination address as a key
},
"aggregations": {
"aggr": {
"terms": {
"field": "carrierId" --> nested carrier count
}
}
}
}
}
}
}
Following is the response template that I receive.
"aggregations": {
"aggr": {
"buckets": [
{
"key": "09-2021-06-2022",
"from": 1630454400000,
"from_as_string": "09-2021",
"to": 1654041600000,
"to_as_string": "06-2022",
"doc_count": 1,
"aggr": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Dallas, TX, US|Houston, TX, US",
"doc_count": 14,
"aggr": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 10022,
"doc_count": 14
}
]
}
}
]
}
}
]
}
}
Thank you to all of you for your efforts and time. Do let me know if you discover any better way.

Related

Elasticsearch complex aggregation on match results

I'm trying to get the top 5 teams based on win rate (matches won / all matches) having the match result documents in the index (see below).
The fields I need to get in the bucket:
apiId
name
winrate (won/total)
I guess this would require complex aggregation calculation witch is far beyond my current elasticsearch skills.
Elasticsearch version: 7.15.2
Could anyone help me with such elasticsearch query?
Thanks a lot in advance!
{
"lastModified": "2022-01-14T09:33:48.232Z",
"uuid": "01234567",
"started": "2022-01-14T09:31:27.651Z",
"editing": false,
"approved": true,
"statistics": {
"teams": [
{
"name": "Team1",
"score": 0,
"winner": false,
"apiId": "1"
},
{
"name": "Team2",
"score": 2,
"winner": true,
"apiId": "2"
}
]
}
}
and the mapping:
{
"mappings": {
"properties": {
"_class": {
"type": "keyword",
"index": false,
"doc_values": false
},
"approved": {
"type": "boolean"
},
"editing": {
"type": "boolean"
},
"ended": {
"type": "date",
"format": "date_optional_time||epoch_millis"
},
"game": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"lastModified": {
"type": "date",
"format": "date_optional_time||epoch_millis"
},
"started": {
"type": "date",
"format": "date_optional_time||epoch_millis"
},
"statistics": {
"type": "nested",
"include_in_parent": true,
"properties": {
"_class": {
"type": "keyword",
"index": false,
"doc_values": false
},
"teams": {
"type": "nested",
"include_in_parent": true,
"properties": {
"_class": {
"type": "keyword",
"index": false,
"doc_values": false
},
"apiId": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"score": {
"type": "long"
},
"winner": {
"type": "boolean"
}
}
}
}
},
"uuid": {
"type": "keyword"
}
}
}
}
Edit:
Based on #ilvar answer, I constructed the query:
{
"query": {
"bool": {
"must": [
{
"term": {
"approved": true
}
}
]
}
},
"size": 0,
"aggs": {
"top_teams": {
"nested": {
"path": "statistics.teams"
},
"aggs": {
"the_all": {
"multi_terms": {
"terms": [
{
"field": "statistics.teams.name.keyword"
},
{
"field": "statistics.teams.apiId"
}
]
}
},
"the_won": {
"filter": {
"terms": {
"statistics.teams.winner": [
true
]
}
},
"aggs": {
"teams": {
"multi_terms": {
"terms": [
{
"field": "statistics.teams.name.keyword"
},
{
"field": "statistics.teams.apiId"
}
]
}
}
}
}
}
}
}
}
Which gives me:
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"top_teams": {
"doc_count": 10,
"the_all": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": [
"Team1",
2
],
"key_as_string": "Team1|2",
"doc_count": 2
},
{
"key": [
"Team2",
3
],
"key_as_string": "Team2|3",
"doc_count": 2
},
{
"key": [
"Team3",
5
],
"key_as_string": "Team3|5",
"doc_count": 2
},
{
"key": [
"Team4",
1
],
"key_as_string": "Team4|1",
"doc_count": 1
},
{
"key": [
"Team5",
4
],
"key_as_string": "Team5|4",
"doc_count": 1
},
{
"key": [
"Team6",
7
],
"key_as_string": "Team6|7",
"doc_count": 1
},
{
"key": [
"Team7",
6
],
"key_as_string": "Team7|6",
"doc_count": 1
}
]
},
"the_won": {
"doc_count": 4,
"teams": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": [
"Team2",
5
],
"key_as_string": "Team2|5",
"doc_count": 2
},
{
"key": [
"Team4",
2
],
"key_as_string": "Team4|2",
"doc_count": 1
},
{
"key": [
"Team7",
3
],
"key_as_string": "Team7|3",
"doc_count": 1
}
]
}
}
}
}
}
But I still cannot get the winrate from two siblings, where one sibling might have missing teams that have not won any match.
Should I use some pipeline aggregation?

The final solution I came out with is bellow. the sort pipeline aggregation did the final part of the job
{
"query": {
"bool": {
"must": [
{
"term": {
"approved": true
}
}
]
}
},
"size": 0,
"aggs": {
"top_teams": {
"nested": {
"path": "statistics.teams"
},
"aggs": {
"the_all": {
"terms": {
"field": "statistics.teams.apiId"
},
"aggs": {
"the_won_by_team": {
"filter": {
"terms": {
"statistics.teams.winner": [
true
]
}
}
},
"the_lost_by_team": {
"filter": {
"terms": {
"statistics.teams.winner": [
false
]
}
}
},
"the_all_by_team": {
"filter": {
"terms": {
"statistics.teams.winner": [
true,
false
]
}
}
},
"the_winrate": {
"bucket_script": {
"buckets_path": {
"the_won_count": "the_won_by_team._count",
"the_all_count": "the_all_by_team._count"
},
"script": "params.the_won_count / params.the_all_count"
}
},
"the_sort": {
"bucket_sort": {
"sort": [
{
"the_winrate": "desc"
},
{
"the_all_by_team._count": "desc"
}
],
"size": 5
}
}
}
}
}
}
}
}

This is very similar to the example docs have for nested aggregation. The only difference would be that you'll have a terms aggregation on the top level instead of filter so you get back all of the teams.

What is the elastic search query for nested aggregation to return buckets of count values?

I have data of individual customers in Elastic Search, whose likings of Food_Item are stored as shown below. A customer likes many "Food_Items". So its a list. I have many customers also.
I have data in the following format:
{
"id": 1,
"customerName":"John",
"likings":[
{
"Food_Item": "Pizza",
"OnAScaleOfTen": 9
},
{
"Food_Item": "Chinese",
"OnAScaleOfTen": 10
}
]
},
{
"id": 2,
"customerName":"Mary",
"likings":[
{
"Food_Item": "Burger",
"OnAScaleOfTen": 10
},
{
"Food_Item": "Chinese",
"OnAScaleOfTen": 6
}
]
}
Now if i want to bucket list the unique "Food_Items" and their corresponding count something like this in the AGGR result:
"Liking_Status": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Chinese",
"Liking Count": {
"value": 2
}
},
{
"key": "Pizza",
"Liking Count": {
"value": 1
}
},
{
"key": "Burger",
"Liking Count": {
"value": 1
}
}]}
My mapping for the index is:
{
"mappings": {
"doc": {
"properties": {
"customerName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"likings": {
"type":"nested",
"properties": {
"Food_Item": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"OnAScaleOfTen": {
"type": "long"
}
}
}
}
}
}
}
Can anyone help me with the Elastic Search Query. Thank you.

What you need is nested aggregation.
{
"size": 0,
"aggs": {
"buckets": { //aggregating on nested field
"nested": {
"path": "likings"
},
"aggs": {
"liking_count": {//term aggregation on the obj
"terms": {
"field": "likings.Food_Item.keyword"
}
}
}
}
}
}
Mapping:
I just mentioned that likings as nested. Apart from others are default. In this case, Food_Item is a text. Terms aggs works on keywords. So used keyword version of it from the index.
Output:
"aggregations": {
"buckets": {
"doc_count": 4,
"liking_count": { //You can name what you want here
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Chinese",
"doc_count": 2
},
{
"key": "Burger",
"doc_count": 1
},
{
"key": "Pizza",
"doc_count": 1
}
]
}
}
}

how do I implement a single-word auto complete using Elasticsearch 6

I would like to implement an single word autocomplete using elasticsearch 6. I have seen a fair amount of posts on how to do this using lesser versions however, it seems that autocomplete has changed significantly in the last version.
I am using the standard mapping for autocomplete:
PUT advertising_tins
{
"settings": {
"analysis": {
"analyzer": {
"completion_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"completion_filter"
],
"tokenizer": "keyword"
}
},
"filter": {
"completion_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 24
}
}
}
},
"mappings": {
"item": {
"properties": {
"date": {
"type": "long"
},
"id": {
"type": "text"
},
"title": {
"type": "text"
},
"suggest": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"completion": {
"type": "text",
"analyzer": "completion_analyzer",
"search_analyzer": "standard"
}
}
}
}
}
}
}
I am indexing like this:
POST advertising_tins/item/_bulk
{"index":{}}
{"date": 20180217, "title": "Vintage Spice Cardboard Tin of Mace Dainty Brand St. Paul, MN 1 oz.","id": "305232814","suggest": [ "spice","cardboard","tin","mace","dainty","brand","st","paul","mn","oz"]}
And querying like this:
POST advertising_tins/_search?pretty
{
"size": 0,
"query": {
"term": {
"suggest.completion": "car"
}
},
"aggs": {
"suggestions": {
"terms": {
"field": "suggest.raw"
}
}
}
}
However my results return all terms in the suggest field instead of just single term "cardboard".
{
"took": 4,
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"suggestions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "brand",
"doc_count": 1
},
{
"key": "cardboard",
"doc_count": 1
},
{
"key": "dainty",
"doc_count": 1
},
{
"key": "mace",
"doc_count": 1
},
{
"key": "mn",
"doc_count": 1
},
{
"key": "oz",
"doc_count": 1
},
{
"key": "paul",
"doc_count": 1
},
{
"key": "spice",
"doc_count": 1
},
{
"key": "st",
"doc_count": 1
},
{
"key": "tin",
"doc_count": 1
}
]
}
}
}
And idea how I fix this and get just a single term match?

You are almost there. It can be achieved with the default Completion Suggester, you only need to change the type of your completion field to "completion":
"mappings": {
"item": {
"properties": {
"date": {
"type": "long"
},
"id": {
"type": "text"
},
"title": {
"type": "text"
},
"suggest": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"completion": {
"type" : "completion", <--- here
"analyzer": "completion_analyzer",
"search_analyzer": "standard"
}
}
}
}
}
}
And add a "suggest" part into your query:
POST advertising_tins/_search
{
"size": 0,
"query": {
"term": {
"suggest.completion": "car"
}
},
"suggest" : { <--- Here goes he suggest query
"my-suggestion" : {
"text" : "car",
"completion" : {
"field" : "suggest.completion"
}
}
},
"aggs": {
"suggestions": {
"terms": {
"field": "suggest.raw"
}
}
}
}
The response will look like this:
{
// ...
"hits": //... ,
"aggregations": // ...,
"suggest": {
"my-suggestion": [
{
"text": "car",
"offset": 0,
"length": 3,
"options": [
{
"text": "cardboard", <--- here is the suggestion
"_index": "advertising_tins",
"_type": "item",
"_id": "GLeUqGEBVrFe7u7pR5uA",
"_score": 1,
"_source": {
"date": 20180217,
"title": "Vintage Spice Cardboard Tin of Mace Dainty Brand St. Paul, MN 1 oz.",
"id": "305232814",
"suggest": [
"spice",
"cardboard",
"tin",
"mace",
"dainty",
"brand",
"st",
"paul",
"mn",
"oz"
]
}
}
]
}
]
}
}
The response also includes the _source of the suggested document, so you might not even need to use "query" and "aggs" parts.
Hope that helps!

Further filtering of aggregations

I have a question regarding aggregation in elastic search. I have a document like the following:
{
"_index": "products",
"_type": "product",
"_id": "ID-12345",
"_score": 1,
"_source": {
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Product Name",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Grey"
},
{
"facet_name": "Category",
"facet_value": "Linen"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
},
{
"facet_name": "Keyword",
"facet_value": "Sophisticated"
}
]
}
}
}
I am storing product information such as Colour, Material, Category and Keywords within the product_detail.string_facets field. I'd like to use this for aggregation to get Colour/Material/Category/Keyword suggestions but as separate buckets. I.e, there is a separate bucket for each of those string_facet types as defined in product_detail.string_facets.facet_name.
This is the query I have at the moment which is returning data, but not as I expect. First the query (this was just to try and get Colours):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
},
{
"multi_match": {
"query": "Blue",
"fields": ["product_information.name", "product_detail.string_facets.facet_name"]
}
}
],
"minimum_should_match": "100%"
}
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
This is giving me output like the following:
"aggregations": {
"suggestions": {
"doc_count": 21,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 23,
"buckets": [
{
"key": "Rug",
"doc_count": 21
},
{
"key": "Blue",
"doc_count": 18
},
{
"key": "Bold",
"doc_count": 7
},
{
"key": "Modern",
"doc_count": 6
},
{
"key": "Multi-Coloured",
"doc_count": 5
},
{
"key": "Contemporary",
"doc_count": 4
},
{
"key": "Traditional",
"doc_count": 4
},
{
"key": "White",
"doc_count": 4
},
{
"key": "Luxurious",
"doc_count": 3
},
{
"key": "Minimal",
"doc_count": 3
}
]
}
}
}
It has given me the results of all facet_name rather those of facet_type Colour as I thought it would.
Any help would be greatly appreciated. Elasticsearch seems very powerful but the documentation is quite daunting!

You did not show how the mapping looks like, but I suppose that product_detail.string_facets field is just an inner object field and that is the reason why you get this kind of result. With this type of mapping Elasticsearch flattens the array into a simple list of field names and values. In your case it becomes:
{
"product_detail.string_facets.facet_name": ["Colour", "Category", "Keyword"],
"product_detail.string_facets.facet_value": ["Grey", "Linen", "Throws & Blanket", "Contemporary", "Sophisticated"]
}
As you can see, based on this structure, Elasticsearch cannot know how to aggregate the data.
To make it work product_detail.string_facets field should be of type nested. Mapping for string_facets should be similar to this (note "type": "nested"):
"string_facets": {
"type": "nested",
"properties": {
"facet_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"facet_value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
Now I index following document:
{
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Rug",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Blue"
},
{
"facet_name": "Colour",
"facet_value": "Red"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
}
]
}
}
Now, to get aggregation of colour suggestions as separate buckets, you can try this query (I simplified the bool query for the need of my document):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
}
]
}
},
"aggs": {
"facets": {
"nested" : {
"path" : "product_detail.string_facets"
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
}
}
And result:
{
...,
"hits": {
...
},
"aggregations": {
"facets": {
"doc_count": 5,
"suggestions": {
"doc_count": 2,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 1
},
{
"key": "Red",
"doc_count": 1
}
]
}
}
}
}
}

ElasticSearch - Aggregations with document details

I need to aggregate the following documents:
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Justin Theroux",
"category": "Actor"
}
]
}
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Morgan Freeman",
"category": "Actor"
}
]
}
by actor, and would like to get the following structure:
[
{"name": "Christian Bale"},
{"movies": [
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6"
},
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0"
}, ...
]
Beyong using a standard term aggregation based on the casting.name field, how can I retrieve the releaseDate and imdbRate of the related documents?
For each actor, I also need movies to be sorted by releaseDate asc.
Can I perform this using one single request?

As you have an array of casting objects in your documents you'll need to use the nested type in your mapping. To get the aggregations you want you need a combination of Terms Aggregations, Nested Aggregations and Reverse Nested Aggregations. Below is an example.
Create and index with the mapping:
POST /test
{
"mappings": {
"movie": {
"properties": {
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"releaseDate": {
"type": "string",
"index": "not_analyzed"
},
"casting": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"fields":{
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"category": {
"type": "string",
"fields":{
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
Index the documents:
POST /test/movie/1
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Justin Theroux",
"category": "Actor"
}
]
}
POST /test/movie/2
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Morgan Freeman",
"category": "Actor"
}
]
}
And finally search:
POST /test/movie/_search?search_type=count
{
"aggs": {
"nested_path": {
"nested": {
"path": "casting"
},
"aggs": {
"actor_name": {
"terms": {
"field": "casting.name.raw"
},
"aggs": {
"movies": {
"reverse_nested": {},
"aggs": {
"movie_title": {
"terms": {
"field": "title.raw"
},
"aggs": {
"release_date": {
"terms": {
"field": "releaseDate"
}
},
"imdbRate_date": {
"terms": {
"field": "imdbRate"
}
}
}
}
}
}
}
}
}
}
}
}
The response for Christian Bale is:
{
"key": "Christian Bale",
"doc_count": 2,
"movies": {
"doc_count": 2,
"movie_title": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "American Psycho",
"doc_count": 1,
"release_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "7/06/2000",
"doc_count": 1
}
]
},
"imdbRate_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "7.6",
"doc_count": 1
}
]
}
},
{
"key": "The Dark Knight",
"doc_count": 1,
"release_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "13/08/2008",
"doc_count": 1
}
]
},
"imdbRate_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "9.0",
"doc_count": 1
}
]
}
}
]
}
}
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

elastic search nested sub aggregations - elasticsearch

Related

Elasticsearch complex aggregation on match results

What is the elastic search query for nested aggregation to return buckets of count values?

how do I implement a single-word auto complete using Elasticsearch 6

Further filtering of aggregations

ElasticSearch - Aggregations with document details

Categories

Resources