Getting full documents using ElasticSearch aggregations - elasticsearch

I've got an index like:
[
{
"Name": "Alex",
"LastName": "Ich",
"Department": 2
},
{
"Name": "Charlie",
"LastName": "Sheen",
"Department": 3
},
{
"Name": "Peter",
"LastName": "Petrelli",
"Department": 5
},
{
"Name": "Alan",
"LastName": "Harper",
"Department": 6
},
{
"Name": "Ann",
"LastName": "Bottle",
"Department": 3
},
]
And I want to get the results with distinct Department, I don't care about order, just 1 result per Department. I tried with aggregations but I could only manage to get the different Deppartments with the doc_count associated. They query I tried is something like:
{
"aggs": {
"deppartments": {
"terms": {
"field": "Department"
}
}
},"size": 0
}
It returns:
"buckets": [
{
"key": 2,
"doc_count": 1
},
{
"key": 3,
"doc_count": 2
},
{
"key": 5,
"doc_count": 1
},
{
"key": 6,
"doc_count": 1
},
]
When I want something like:
[
{
"Name": "Alex",
"LastName": "Ich",
"Department": 2
},
{
"Name": "Charlie",
"LastName": "Sheen",
"Department": 3
},
{
"Name": "Peter",
"LastName": "Petrelli",
"Department": 5
},
{
"Name": "Alan",
"LastName": "Harper",
"Department": 6
}
]

You can use Top hits aggregation for this
{
"aggs": {
"departments": {
"terms": {
"field": "Department",
"size": 10
},
"aggs": {
"search_results": {
"top_hits": {
"size": 10 <--- you can change the size to 1 if you want
}
}
}
}
},
"size": 0
}
Does this help?

Related

Elasticsearch - Count occurrences of elements in nested field

I have an elasticsearch index with this simplified structure:
{
"id": "group1",
"users": [
{
"user_id": "user1"
},
{
"user_id": "user2"
}
]
},
{
"id": "group2",
"users": [
{
"user_id": "user1"
},
{
"user_id": "user3"
},
]
},
{
"id": "group3",
"users": [
{
"user_id": "user1"
},
{
"user_id": "user3"
},
]
}
I need to get the number of documents where each user appears. Something like this:
[
{
"key": "user1",
"doc_count": 3
},
{
"key": "user2",
"doc_count": 1
},
{
"key": "user3",
"doc_count: 2
}
]
You need to use nested aggregation with the terms
aggregation
Adding a working example with index mapping, search query, and search result
Index Mapping:
{
"mappings":{
"properties":{
"users":{
"type":"nested"
}
}
}
}
Search Query:
{
"size":0,
"aggs": {
"resellers": {
"nested": {
"path": "users"
},
"aggs": {
"unique_user": {
"terms": {
"field": "users.user_id.keyword"
}
}
}
}
}
}
Search Result:
"aggregations": {
"resellers": {
"doc_count": 6,
"unique_user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "user1",
"doc_count": 3
},
{
"key": "user3",
"doc_count": 2
},
{
"key": "user2",
"doc_count": 1
}
]
}
}
}

Can I pass result of an aggregation to a range aggregation?

I have the following dataset
[
{
"rating": "10",
"subject": "maths"
},
{
"rating": "9",
"subject": "physics"
},
{
"rating": "10",
"subject": "chemistry"
},
{
"rating": "5",
"subject": "physics"
},
{
"rating": "2",
"subject": "geography"
},
{
"rating": "5",
"subject": "maths"
},
{
"rating": "1",
"subject": "geography"
},
{
"rating": "5",
"subject": "maths"
},
{
"rating": "8",
"subject": "chemistry"
}
]
What I need to do is find the avg rating for each subject, and then calculate the # of subjects in ranges of rating (0-2,2-5,5-8,8-10) with an elastic search query.
The query I have so far creates buckets for each subject calculating the avg of each bucket. But I can't find how to do a range aggregation on the result of the composite aggregation. Is it even possible? Is there an alternative?
Here is my query that buckets the data according to the subject and calculates the avg rating.
GET kibana_sample/_search
{
"size":0,
"aggs" : {
"my_buckets": {
"composite" : {
"sources" : [
{ "subject": { "terms" : { "field": "subject" } } }
]
},
"aggs": {
"avg_rating": {
"avg" : { "field" : "rating" }
}
}
}
}
}
It results in the following.
"aggregations": {
"my_buckets": {
"buckets": [
{
"key": {
"subject": "maths"
},
"doc_count": 3,
"avg_rating": {
"value": 6.66666667
}
},
{
"key": {
"subject": "physics"
},
"doc_count": 2,
"avg_rating": {
"value": 7
}
},
{
"key": {
"subject": "chemistry"
},
"doc_count": 2,
"avg_rating": {
"value": 9
}
},
{
"key": {
"subject": "geography"
},
"doc_count": 2,
"avg_rating": {
"value": 1.5
}
}
]
}
}
It's all good, but now I need to perform a range aggregation on top of this result to get the number of subjects in ranges of ratings
eg:
ratings range: {0-2}: 1 subject, {2-5}: 0 subjects, {5-8}: 2 subjects,
{8-10}: 1 subject
You can use pipeline aggregations in order to concat one aggregation results through more aggregations. Another thing you can do is use scripts in the pipeline, in order to filter only the relevant results.
Check out for the scripts examples here:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline.html

Is there any solution with elasticsearch parent-child join

I have an es settings like following:
PUT /test
{
"mappings": {
"doc": {
"properties": {
"status": {
"type": "keyword"
},
"counting": {
"type": "integer"
},
"join": {
"type": "join",
"relations": {
"vsim": ["pool", "package"]
}
},
"poolId": {
"type": "keyword"
},
"packageId": {
"type": "keyword"
},
"countries": {
"type": "keyword"
},
"vId": {
"type": "keyword"
}
}
}
}}
Then add data:
// add vsim
PUT /test/doc/doc1
{"counting":6, "join": {"name": "vsim"}, "content": "1", "status": "disabled"}
PUT /test/doc/doc2
{"counting":5,"join": {"name": "vsim"}, "content": "2", "status": "disabled"}
PUT /test/doc/doc3
{"counting":5,"join": {"name": "vsim"}, "content": "2", "status": "enabled"}
// add package
PUT /test/doc/ner2?routing=doc2
{"join": {"name": "package", "parent": "doc2"}, "countries":["CN", "UK"]}
PUT test/doc/ner12?routing=doc1
{"join": {"name": "package", "parent": "doc1"}, "countries":["CN", "US"]}
PUT /test/doc/ner11?routing=doc1
{"join":{"name": "package", "parent": "doc1"}, "countries":["US", "KR"]}
PUT /test/doc/ner13?routing=doc3
{"join":{"name": "package", "parent": "doc3"}, "countries":["UK", "AU"]}
// add pool
PUT /test/doc/ner21?routing=doc1
{"join": {"name": "pool", "parent": "doc1"}, "poolId": "MER"}
PUT /test/doc/ner22?routing=doc2
{"join": {"name": "pool", "parent": "doc2"}, "poolId": "MER"}
PUT /test/doc/ner23?routing=doc2
{"join": {"name": "pool", "parent": "doc2"}, "poolId": "NER"}
and then I want to count the counting group by the status(vsim), poolId(pool) and countries(package), the expect result like:
disabled-MER-CN: 3
disabled-MER-US: 3
enabled-MR-CN: 1
... and so on.
I'm a new player for elasticsearch, and I have learnt the document like
https://www.elastic.co/guide/en/elasticsearch/reference/current/joining-queries.html
and
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-children-aggregation.html
but still have no idea to implement this aggregation query, PLEASE give me some suggestion, thanks!
If I followed your structure of the documents - you have types pool and package on the same level (they are siblings) - I wasn't able to achieve exactly your expected results. I also highly doubt that it's possible with those types being siblings.
However, it's still possible to slice per one field in your doc (status) and later separately slice both by poolId and countries with a query like this:
{
"aggs": {
"status-aggs": {
"terms": {
"field": "status",
"size": 10
},
"aggs": {
"to-pool": {
"children": {
"type": "pool"
},
"aggs": {
"top-poolid": {
"terms": {
"field": "poolId",
"size": 10
}
}
}
},
"to-package": {
"children": {
"type": "package"
},
"aggs": {
"top-countries": {
"terms": {
"field": "countries",
"size": 10
}
}
}
}
}
}
}
}
with a response from Elasticsearch like this (I've omitted some part of json for readability):
{
"status-aggs": {
"buckets": [
{
"key": "disabled",
"doc_count": 2,
"to-pool": {
"doc_count": 3,
"top-poolid": {
"buckets": [
{
"key": "MER",
"doc_count": 2
},
{
"key": "NER",
"doc_count": 1
}
]
}
},
"to-package": {
"doc_count": 3,
"top-countries": {
"buckets": [
{
"key": "CN",
"doc_count": 2
},
{
"key": "US",
"doc_count": 2
},
{
"key": "KR",
"doc_count": 1
},
{
"key": "UK",
"doc_count": 1
}
]
}
}
},
{
"key": "enabled",
"doc_count": 1,
"to-pool": {
"doc_count": 0,
"top-poolid": {
"buckets": []
}
},
"to-package": {
"doc_count": 1,
"top-countries": {
"buckets": [
{
"key": "AU",
"doc_count": 1
},
{
"key": "UK",
"doc_count": 1
}
]
}
}
}
]
}
}

elasticsearch nested aggregation inside a reverse nested aggregation

Elasticsearch version: 2.3.1
JVM version: 1.8.0_66 / 25.66-b17
OS version: Mac OS X 10.11.4
I am having trouble getting the correct values to show up in a 4 level deep aggregation scenario where the first two levels are nested, the third is reverse_nested, and the fourth is nested again.
Here is my index mapping:
curl -XDELETE localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d/order-d/_mapping -d '{
"order-d": {
"properties": {
"id": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"groupId": {
"type": "string"
},
"groupOrderNumber": {
"type": "string"
},
"dateCreated": {
"type": "date"
},
"dateUpdated": {
"type": "date"
},
"location": {
"type": "object"
},
"orderSubmitter": {
"type": "object"
},
"distributor": {
"type": "object"
},
"salesRep": {
"type": "object"
},
"status": {
"type": "string"
},
"total": {
"type": "double"
},
"isTTOrder": {
"type": "boolean"
},
"lineItems": {
"type": "nested",
"include_in_parent": true,
"properties": {
"product": {
"type": "object"
},
"category": {
"type": "object"
},
"subCategory": {
"type": "object"
},
"quantity": {
"type": "double"
},
"unitPrice": {
"type": "double"
},
"totalPrice": {
"type": "double"
},
"pricedByUnitPrice": {
"type": "double"
}
}
}
}
}
}'
Here are the documents:
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4577",
"orderNumber": "1617590686",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55c3879459ad0c63008b4569",
"name": "Performance Foodservice Metro NY"
},
"salesRep": null,
"status": "pending",
"total": 5410.21,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55bfb445c440b26a008b4571",
"name": "Sabrett Sauerkraut 12 x 2 lb bags"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4586",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Perishable)"
},
"quantity": 1,
"unitPrice": 25.24,
"totalPrice": 25.24,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55bc219238c0376e008b4570",
"name": "Franks Red Hot Cayenne Pepper Sauce 4 x 1 gallon"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4606",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 45.06,
"totalPrice": 45.06,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "56d76c41bd821fda008b459a",
"name": "Cereal, Classic Variety Pack, Kelloggs 1\/60 ct."
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b462d",
"name": "Grains\/Cereal - Ready to Eat - (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 56.03,
"totalPrice": 56.03,
"pricedByUnitPrice": 0
}]
}'
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4576",
"orderNumber": "2041063294",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55cdeece0a41216c008b4583",
"name": "Driscoll Foods"
},
"salesRep": null,
"status": "pending",
"total": 7575.27,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55ad05e08d28c36b008b456c",
"name": "Pepper 3000 pcs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4582",
"name": "Herbs\/Spices (Shelf Stable)"
},
"quantity": 3,
"unitPrice": 8.95,
"totalPrice": 26.85,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b3a12f6b415c68008b4568",
"name": "Venice Maid Deluxe Corned Beef Hash 6 x 6 lb 10 oz"
},
"category": {
"id": "53df846c3b8e77710e7b23f7",
"name": "Meat"
},
"subCategory": {
"id": "54d8c56a279871b9078b4581",
"name": "Beef - Prepared\/Processed"
},
"quantity": 1,
"unitPrice": 59.75,
"totalPrice": 59.75,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b145798c26dc69008b4568",
"name": "Aladdin Bakers Sesame Bread Sticks 150 x 2 packs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b45b0",
"name": "Dried Breads (Shelf Stable)"
},
"quantity": 8,
"unitPrice": 15.5,
"totalPrice": 124,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55ad074a8d28c36f008b456d",
"name": "Smuckers Breakfast Syrup 100 cups"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b457d",
"name": "Syrup\/Treacle\/Molasses (Shelf Stable)"
},
"quantity": 10,
"unitPrice": 8.95,
"totalPrice": 89.5,
"pricedByUnitPrice": 0
}]
}'
Here is my query:
curl -XPOST localhost:9200/orders-d/_search -d '{
"from": 0,
"size": 0,
"aggregations": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.category.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.product.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "distributor.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"sum": {
"field": "lineItems.totalPrice"
}
}
},
"nested": {
"path": "lineItems"
}
}
}
}
},
"reverse_nested": {}
}
}
}
}
}
},
"nested": {
"path": "lineItems"
}
}
},
"query": {
"bool": {
"must": [{
"range": {
"dateCreated": {
"format": "yyyy-MM-dd",
"gte": "2016-01-01",
"lte": "2016-04-30"
}
}
}]
}
}
}'
...and here are my results:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "53df845b3b8e77710e7b23ec",
"doc_count": 3,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55ad05e08d28c36b008b456c",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55ad074a8d28c36f008b456d",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55b145798c26dc69008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}, {
"key": "53df846c3b8e77710e7b23f7",
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55b3a12f6b415c68008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}]
}
}
}
}
As you can see from the results, all the aggregated values for each drilldown of totalLineItems have the same exact value. This is obviously incorrect.
Did I do something wrong, is it a bug, or is nesting inside a reverse nesting unsupported?

elasticsearch terms and sum aggregation

I have documents in elasticsearch (1.5) that looks like:
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
not all of the documents contains the three options (male/female/unknown)
i would like to get the sum of all values per each gender name. like that:
{
"buckets": [
{
"key": "unknown",
"doc_count": 112,
"gender_a": {
"value": 462
}
},
{
"key": "male",
"doc_count": 107,
"gender_a": {
"value": 438
}
},
{
"key": "female",
"doc_count": 36,
"gender_a": {
"value": 186
}
}
]
}
i tried this query:
{
"aggs": {
"gender_name": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_sum": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
but something weird is going on, and i don't get the right values.
any idea what i am missing ?
You will probably need to make sure that your "gender" property has type "nested". With that, I was able to make the following do what I think you're asking.
First I set up a simple index:
PUT /test_index
{
"mappings": {
"doc": {
"properties": {
"gender": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"value": {
"type": "long"
}
}
}
}
}
}
}
Then added a couple of docs:
PUT /test_index/doc/1
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
PUT /test_index/doc/2
{
"gender": [
{
"name": "male",
"value": 8
},
{
"name": "female",
"value": 4
}
]
}
Then I was able to get total counts by gender name as follows:
POST /test_index/_search?search_type=count
{
"aggs": {
"genders": {
"nested": {
"path": "gender"
},
"aggs": {
"gender_terms": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_name_value_sums": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"genders": {
"doc_count": 5,
"gender_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 2,
"gender_name_value_sums": {
"value": 88,
"value_as_string": "88.0"
}
},
{
"key": "male",
"doc_count": 2,
"gender_name_value_sums": {
"value": 97,
"value_as_string": "97.0"
}
},
{
"key": "unknown",
"doc_count": 1,
"gender_name_value_sums": {
"value": 12,
"value_as_string": "12.0"
}
}
]
}
}
}
}
Here is the code I used to test it:
http://sense.qbox.io/gist/d4533215806b858aa2cc1565546d167fdec3c973

Resources