Aggregations on an array in a nested query - elasticsearch

I am trying to query for all Users that have at least one color in common with a particular User and I have been able to do that however I am unable to figure out how to aggregate my results so that I can get a the user along with the colors that they have in common.
Part of my document for a sample user is as follows:
{
// ... other fields
"colors" : [
{
"id" : 1,
"name" : "Green"
},
{
"id" : 7,
"name" : "Blue"
}
]
}
This is my query for getting the colors in common with another User that has the colors Red, Orange and Green:
{
"query": {
"nested": {
"path": "colors",
"scoreMode": "sum",
"query": {
"function_score": {
"filter": {
"terms": {
"colors.name": [
"Red","Orange","Green"
]
}
},
"functions": [
// Functions here for custom scoring
]
}
}
}
}
}
How can I aggregate the Users with the colors in common?

You need to use nested aggregation, then apply filter aggregation for colors and finally use top hits to get the matching colors. I am using source filtering to get only color value
This is the query
{
"size": 0,
"query": {
"nested": {
"path": "colors",
"query": {
"terms": {
"colors.color": [
"green",
"red"
]
}
}
}
},
"aggs": {
"user": {
"terms": { <----get users with unique name or user_id
"field": "name",
"size": 10
},
"aggs": {
"nested_color_path": { <---go inside nested documents
"nested": {
"path": "colors"
},
"aggs": {
"match_color": {
"filter": { <--- use the filter to match for colors
"terms": {
"colors.color": [
"green",
"red"
]
}
},
"aggs": {
"get_match_color": { <--- use this to get matched color
"top_hits": {
"size": 10,
"_source": {
"include": "name"
}
}
}
}
}
}
}
}
}
}
}

You have to use nested aggregations to achieve this. See the query below:
POST <index>/<type>/_search
{
"query": {
"nested": {
"path": "colors",
"query": {
"terms": {
"colors.name": [
"Red",
"Orange",
"Green"
]
}
}
}
},
"aggs": {
"users_with_common_colors": {
"terms": {
"field": "user_id",
"size": 0,
"order": {
"color_distribution>common": "desc" <-- This will sort the users in descending order of number of common colors
}
},
"aggs": {
"color_distribution": {
"nested": {
"path": "colors"
},
"aggs": {
"common": {
"filter": {
"terms": {
"colors.name": [
"Red",
"Orange",
"Green"
]
}
},
"aggs": {
"colors": {
"terms": {
"field": "colors.name",
"size": 0
}
}
}
}
}
}
}
}
}
}

Related

Performing a text search and filtering on nested terms in elasticsearch

I'm trying to perform a search th e.g. searches the word coyotes in the description , but are red and green and are in the cartoon category. Now I think I understand you can't have match and terms in the same query (the query below doesn't work for this reason), but also you that you shouldn't use terms to search on a text field. Can anyone point me in the right direction?
here's my query
GET /searchproducts/_search
{
"query": {
"match": {
"description": {
"query": "coyote"
}
},
"bool": {
"should": [{
"terms": {
"colours.name": ["red", "green"]
}
},
{
"terms": {
"categories.name": ["Cartoon"]
}
}
]
}
},
"aggs": {
"colours": {
"terms": {
"field": "colour.name.value",
"size": 100
}
},
"categories": {
"terms": {
"field": "categories.id",
"size": 100
}
}
}
}
You can use a bool query to combine multiple queries. Try out this query:
{
"query": {
"bool": {
"should": [
{
"match": {
"description": {
"query": "coyote"
}
}
},
{
"bool": {
"should": [
{
"terms": {
"colours.name": [
"red",
"green"
]
}
},
{
"terms": {
"categories.name": [
"Cartoon"
]
}
}
]
}
}
]
}
},
"aggs": {
"colours": {
"terms": {
"field": "colour.name.value",
"size": 100
}
},
"categories": {
"terms": {
"field": "categories.id",
"size": 100
}
}
}
}

ELASTICSEARCH - Ordering aggregation by date on nested field

I am developing a query where I count how many unique "cp" the most recent document contains.
The json is made up of several nested fields.
I am having trouble showing only the json value with the most recent date when I add to a json with nested fields.
I have done nested aggregations, and finally I have used top_hits filter to sort in descending order, and it returns me the last one through the size.
But still it is returning all the documents with different dates.
JSON:
"data" : [
{
"addresses" : [
{
"cp" : "33.33.33",
"services" : [
{
"field1" : "true",
"field2" : "1234",
}
]
}
],
}
],
"created_at" : "2020-09-03 14:39:01",
"#timestamp" : "2020-09-04T05:53:22.341661Z",
}
},
QUERY:
{"size": 0,
"aggs": {
"nested": {
"nested": {
"path": "data.addresses"
},
"aggs": {
"nested": {
"nested": {
"path": "data.addresses.services"
},
"aggs": {
"filter": {
"filter": {
"term": {
"data.addresses.services.field1.keyword": "true"
}
},
"aggs": {
"unique": {
"cardinality": {
"field": "data.addresses.services.field2.keyword"
}
},
"range":{
"top_hits": {
"size": 1,
"sort": [
{"created_at.keyword": {"order": "desc"}}]
}
}
}
}
}
}
}
}
}
I have tried sorting by the predefined field "created_at" or with #timestamp, but the result is the same.
Any advice that can help me to solve my problem?
For this case the solution is to add
"order": {
"_key": "desc":
instead of top_hits.
QUERY
{"size": 0,
"aggs": {
"filtrofecha": {
"terms": {
"field": "created_at.keyword",
"order": {
"_key": "desc"
},
"size": 1
},
"aggs": {
"nested": {
"nested": {
"path": "data.addresses"
},
"aggs": {
"nested": {
"nested": {
"path": "data.addresses.services"
},
"aggs": {
"filter": {
"filter": {
"term": {
"data.addresses.services.field1.keyword": "true"
}
},
"aggs": {
"unique": {
"cardinality": {
"field": "data.addresses.services.field2.keyword"
}
}
}
}
}
}
}
}
}
}
}

Elasticsearch: Aggregation on filtered nested objects to find unique values

I have an array of objects (tags) in each document in Elasticsearch 5:
{
"tags": [
{ "key": "tag1", "value": "val1" },
{ "key": "tag2", "value": "val2" },
...
]
}
Now I want to find unique tag values for a certain tag key. Something similiar to this SQL query:
SELECT DISTINCT(tags.value) FROM tags WHERE tags.key='some-key'
I have came to this DSL so far:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"filter" : { "terms": { "tags.key": "tag1" } },
"aggs": {
"my_tags_values": {
"terms" : {
"field" : "tags.value",
"size": 9999
}
}
}
}
}
}
}
But It is showing me this error:
[terms] unknown field [tags.key], parser not found.
Is this the right approach to solve the problem? Thanks for your help.
Note: I have declared the tags field as a nested field in my mapping.
You mixed up things there. You wanted probably to add a filter aggregation, but you didn't give it any name:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"my_filter": {
"filter": {
"terms": {
"tags.key": [
"tag1"
]
}
},
"aggs": {
"my_tags_values": {
"terms": {
"field": "tags.value",
"size": 9999
}
}
}
}
}
}
}
}
Try Bool Query inside the Filter-Aggregation:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"filter": {
"bool": {
"must": [
{
"term": {
"tags.key": "tag1"
}
}
]
},
"aggs": {
"my_tags_values": {
"terms": {
"field": "tags.value",
"size": 0
}
}
}
}
}
}
}
}
BTW: if you want to retrieve all buckets, you can write 0 instead of 9999 in aggregation size.

sorting elasticsearch top hits results

I am trying to execute a query in elasticsearch to get reuslt of specific users from certain date range. the results should be grouped by userId and sorted on trackTime field, I am able to use group by using aggregation but i am not able to sort aggregation buckets on tracktime, i write down the following query
GET _search
{
"size": 0,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"range": {
"trackTime": {
"from": "2016-02-08T05:51:02.000Z"
}
}
}
]
}
},
"filter": {
"terms": {
"userId": [
9,
10,
3
]
}
}
}
},
"aggs": {
"by_district": {
"terms": {
"field": "userId"
},
"aggs": {
"tops": {
"top_hits": {
"size": 2
}
}
}
}
}
}
what more should i have to use to sort the top hits result? Thanks in advance...
You can use sort like .
"aggs": {
"by_district": {
"terms": {
"field": "userId"
},
"aggs": {
"tops": {
"top_hits": {
"sort": [
{
"fieldName": {
"order": "desc"
}
}
],
"size": 2
}
}
}
}
}
Hope it helps

For each country/colour/brand combination , find sum of number of items in elasticsearch

This is a portion of the data I have indexed in elasticsearch:
{
"country" : "India",
"colour" : "white",
"brand" : "sony"
"numberOfItems" : 3
}
I want to get the total sum of numberOfItems on a per country basis, per colour basis and per brand basis. Is there any way to do this in elasticsearch?
The following should land you straight to the answer.
Make sure you enable scripting before using it.
{
"aggs": {
"keys": {
"terms": {
"script": "doc['country'].value + doc['color'].value + doc['brand'].value"
},
"aggs": {
"keySum": {
"sum": {
"field": "numberOfItems"
}
}
}
}
}
}
To get a single result you may use sum aggregation applied to a filtered query with term (terms) filter, e.g.:
{
"query": {
"filtered": {
"filter": {
"term": {
"country": "India"
}
}
}
},
"aggs": {
"total_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
}
To get statistics for all countries/colours/brands in a single pass over the data you may use the following query with 3 multi-bucket aggregations, each of them containing a single-bucket sum sub-aggregation:
{
"query": {
"match_all": {}
},
"aggs": {
"countries": {
"terms": {
"field": "country"
},
"aggs": {
"country_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
},
"colours": {
"terms": {
"field": "colour"
},
"aggs": {
"colour_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
},
"brands": {
"terms": {
"field": "brand"
},
"aggs": {
"brand_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
}
}
}

Resources