Elasticsearch - How to get min/max/avg of set of nested documents - elasticsearch

Given the following mapping and documents in Elasticsearch, how would I get the min/max/avg of a set of nested documents that match a certain condition? For instance, how would I get them min age of pet that are dogs? My filter gets the correct people that have dogs, but how do I make the min then calculate against the correct nested documents.
(1) Mapping
{
"myIndex": {
"mappings": {
"person": {
"properties": {
"name": {
"type": "string"
},
"pets": {
"type": "nested",
"properties": {
"age": {
"type": "long"
},
"name": {
"type": "string"
},
"type": {
"type": "string"
}
}
}
}
}
}
}
}
(2) Data
{
"name": "bob",
"pets": [
{
"type": "dog",
"name": "wolfie",
"age": 20
},
{
"type": "cat",
"name": "kitty",
"age": 6
}
]
}
{
"name": "bill",
"pets": [
{
"type": "fish",
"name": "goldie",
"age": 2
},
{
"type": "cat",
"name": "meowie",
"age": 18
}
]
}
(3) Query and aggregation
{
"query": {
"filtered": {
"filter": {
"nested": {
"path": "pets",
"filter" : {
"terms": {
"pets.type": ["dog"]
}
}
}
}
}
},
"aggs": {
"minage": {
"nested": {
"path": "pets"
},
"aggs": {
"minage": {
"min": {
"field": "age"
}
}
}
}
}
}

I think you can get what you want with a combination of filter aggregation and the nested filter's join option.
This code worked for me:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"person": {
"properties": {
"name": {
"type": "string"
},
"pets": {
"type": "nested",
"properties": {
"age": {
"type": "long"
},
"name": {
"type": "string"
},
"type": {
"type": "string"
}
}
}
}
}
}
}
PUT /test_index/person/1
{
"name": "bob",
"pets": [
{
"type": "dog",
"name": "wolfie",
"age": 20
},
{
"type": "cat",
"name": "kitty",
"age": 6
}
]
}
PUT /test_index/person/2
{
"name": "bill",
"pets": [
{
"type": "fish",
"name": "goldie",
"age": 2
},
{
"type": "cat",
"name": "meowie",
"age": 18
}
]
}
PUT /test_index/person/3
{
"name": "john",
"pets": [
{
"type": "dog",
"name": "oldie",
"age": 25
}
]
}
POST /test_index/_search?search_type=count
{
"aggs": {
"minage_1": {
"nested": {
"path": "pets"
},
"aggs": {
"minage_2": {
"filter": {
"nested": {
"path": "pets",
"filter": {
"terms": {
"pets.type": [
"dog"
]
}
},
"join": false
}
},
"aggs": {
"min_age_3": {
"min": {
"field": "age"
}
}
}
}
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"minage_1": {
"doc_count": 5,
"minage_2": {
"doc_count": 2,
"min_age_3": {
"value": 20
}
}
}
}
}

Related

Range value search in elastic search, not returning results

I have a problem.
I have tried to make plenty of queries and none have returned any documents.
My data format is something like:
{
"_index": "orders",
"_type": "order",
"_id": "AVad66hjiOD-asNwVILB",
"_score": 1,
"_source": {
"document": {
"orderID": "1337",
"sku": "awesomeSku",
"customerID": "7331",
"productID": "20490859",
"variantID": "97920239",
"createTime": "2016-07-13T13:23:19Z",
"retailPrice": "10000",
"costPrice": "10000",
"new": 123
}
}
}
My query:
{
"query": {
"bool": {
"filter": [
{ "range": { "new": { "gte": "20" } } }
]
}
}
}
I just want to start somewhere simply and find all documents which has the attribute "new" with a value above 20.
Any feedback would be awesome.
Edit:
Data formart in ES:
{
"orders": {
"mappings": {
"order": {
"properties": {
"document": {
"properties": {
"costPrice": {
"type": "string"
},
"createTime": {
"type": "string"
},
"customerID": {
"type": "string"
},
"new": {
"type": "long"
},
"orderID": {
"type": "string"
},
"productID": {
"type": "string"
},
"retailPrice": {
"type": "string"
},
"sku": {
"type": "string"
},
"variantID": {
"type": "string"
}
}
}
}
}
}
}
}
You need to make your query like this on the document.new field since all your fields are nested into the top-level document section:
{
"query": {
"bool": {
"filter": [
{
"range": {
"document.new": {
"gte": 20
}
}
}
]
}
}
}

How to get by ids and filter nested data to leave only objects filtered by category?

How to get by ids and filter notes to leave only given category?
Data:
POST c1_2/Blog/1
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/2
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "second"
},
{
"message": "blablabla",
"category": "third"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
Search:
POST c1_2/Blog/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"post.notes.main.category": [
"test"
]
}
}
]
}
}
}
Current results, objects in notes main/cart aren't filtered by category:
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.0122644,
"hits": [
{
"_index": "c1_2",
"_type": "Blog",
"_id": "1",
"_score": 1.0122644,
"_source": {
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
}
]
}
}
Desired effect:
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
}
]
}
In my real app query is embedded in "filtered" and "filter", if I will put query above instead filter "ids" like in example below, then will it return the same data?
POST c1_2/Blog/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"ids": {
"values": [
"1",
"2"
]
}
}
}
}
}
Mapping:
{
"posts": {
"mappings": {
"posts": {
"dynamic_templates": [{
"blog": {
"mapping": {
"index": "analyzed"
},
"path_match": "blog.*",
"path_unmatch": "*.medias.*"
}
}, {
"ids": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "_id|base_id",
"match_pattern": "regex"
}
}],
"_all": {
"enabled": false
},
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"ids": {
"properties": {
"values": {
"type": "string"
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"match_all": {
"type": "object"
}
}
},
"source": {
"dynamic": "true",
"properties": {
"post": {
"dynamic": "true",
"properties": {
"_id": {
"type": "string",
"index": "not_analyzed"
},
"base_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"blog": {
"properties": {
"post": {
"properties": {
"_id": {
"type": "string"
},
"notes": {
"properties": {
"main": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
},
"cart": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": {
"terms": {
"_id": [1, 2]
}
},
"must_not": {
"terms": {
"post.notes.main.category": [
"other"
]
}
}
}
}
}
}
}

How to filter by ids together with filter fields value?

How to add additional filter to match category values in blog.post.notes all fields? First I want to filter by ids, then filter notes category, is it possible?
I can filter only by ids:
GET posts/posts/_search?fields=_id&_source=blog.post.notes
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"ids": {
"values": [
"100000000001234"
]
}
}
}
}
}
How to filter e.g. "test" category from current results:
{
"took": 58,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [{
"_index": "posts",
"_type": "posts",
"_id": "100000000001234",
"_score": 1,
"_source": {
"blog": {
"post": {
"notes": {
"main": [{
"message": "blablabla",
"category": "test"
}, {
"message": "blablabla",
"category": "other"
}],
"cart": [{
"message": "blablabla",
"category": "test"
}, {
"message": "blablabla",
"category": "other"
}]
}
}
}
}
}]
}
}
curl -XGET localhost:9200/posts/_mapping/posts
{
"posts": {
"mappings": {
"posts": {
"dynamic_templates": [{
"blog": {
"mapping": {
"index": "analyzed"
},
"path_match": "blog.*",
"path_unmatch": "*.medias.*"
}
}, {
"ids": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "_id|base_id",
"match_pattern": "regex"
}
}],
"_all": {
"enabled": false
},
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"ids": {
"properties": {
"values": {
"type": "string"
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"match_all": {
"type": "object"
}
}
},
"source": {
"dynamic": "true",
"properties": {
"post": {
"dynamic": "true",
"properties": {
"_id": {
"type": "string",
"index": "not_analyzed"
},
"base_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"blog": {
"properties": {
"post": {
"properties": {
"_id": {
"type": "string"
},
"notes": {
"properties": {
"main": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
},
"cart": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
You can use bool query with must on ids and terms
POST c1_2/Test/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"blog.post.notes.main.category": [
"categoryfilter"
]
}
}
]
}
}
}
But since you have main and cart categories you must use filter on each of them, in my example i filter on main categories, if you need to do filter on both you need to use one more or filter which will filter on main or cart categories
Also you should know that category should be not_analyzed in order to filter on something like "my super category" other wise query will not be working properly.
Example
POST c1_2/Blog/1
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/2
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "second"
},
{
"message": "blablabla",
"category": "third"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"post.notes.main.category": [
"test"
]
}
}
]
}
}
}

Elasticsearch: generating terms from array using script

Would love an explanation of why this happens and how to correct it.
Here's a snippet of the source document:
{
"created_time":1412988495000,
"tags":{
"items":[
{
"tag_type":"Placement",
"tag_id":"id1"
},
{
"tag_type":"Product",
"tag_id":"id2"
}
]
}
}
The following terms aggregation:
"aggs":{
"tags":{
"terms":{
"script":"doc['tags'].value != null ? doc['tags.items.tag_type'].value + ':' + doc['tags.items.tag_id'].value : ''",
"size":2000,
"exclude":{
"pattern":"null:null"
}
}
}
}
returns:
"buckets":[
{
"key":"Placement:id1",
"doc_count":1
},
{
"key":"Placement:id2",
"doc_count":1
}
]
...when you would expect:
"buckets":[
{
"key":"Placement:id1",
"doc_count":1
},
{
"key":"Product:id2",
"doc_count":1
}
]
I would probably go with a nested type. I don't know all the details of your setup, but here is a proof of concept, at least. I took out the "items" property because I didn't need that many layers, and just used "tags" as the nested type. It could be added back in if needed, I think.
So I set up an index with a "nested" property:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"doc": {
"properties": {
"created_time": {
"type": "date"
},
"tags": {
"type": "nested",
"properties": {
"tag_type": {
"type": "string",
"index": "not_analyzed"
},
"tag_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
Then added a couple of docs (notice that the structure differs slightly from yours):
PUT /test_index/doc/1
{
"created_time": 1412988495000,
"tags": [
{
"tag_type": "Placement",
"tag_id": "id1"
},
{
"tag_type": "Product",
"tag_id": "id2"
}
]
}
PUT /test_index/doc/2
{
"created_time": 1412988475000,
"tags": [
{
"tag_type": "Type3",
"tag_id": "id3"
},
{
"tag_type": "Type4",
"tag_id": "id3"
}
]
}
Now a scripted terms aggregation inside a nested aggregation seems to do the trick:
POST /test_index/_search?search_type=count
{
"query": {
"match_all": {}
},
"aggs": {
"tags": {
"nested": { "path": "tags" },
"aggs":{
"tag_vals": {
"terms": {
"script": "doc['tag_type'].value+':'+doc['tag_id'].value"
}
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"tags": {
"doc_count": 4,
"tag_vals": {
"buckets": [
{
"key": "Placement:id1",
"doc_count": 1
},
{
"key": "Product:id2",
"doc_count": 1
},
{
"key": "Type3:id3",
"doc_count": 1
},
{
"key": "Type4:id3",
"doc_count": 1
}
]
}
}
}
}
Here is the code I used:
http://sense.qbox.io/gist/4ceaf8693f85ff257c2fd0639ba62295f2e5e8c5

ElasticSearch Multi Field Terms Facet doesn't seem to work

I'm trying to get the Multi-Field Terms Facet to work, but the results can't seem to do what the documentation (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets-terms-facet.html#_multi_fields_2) is saying.
A bit about the setup and the queries I've tried:
I'm using ES 1.1.0:
"name": "William Baker",
"version": {
"number": "1.1.0",
...
}
The mapping:
{
"index_name": {
"mappings": {
"object": {
"properties": {
"nested_object": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
...
}
}
}
}
}
The query JSON:
{
"query": {
"filtered": {
"query": {
"match_all": {}
}
}
},
"facets": {
"test4": {
"terms": {
"fields": ["nested_object.name", "nested_object.id"]
},
"nested": "nested_object"
}
}
}
The result:
"facets": {
"test4": {
"_type": "terms",
"missing": 0,
"total": 26,
"other": 12,
"terms": [
{
"term": "Port Anastasia College",
"count": 3
},
{
"term": "31",
"count": 3
},
{
"term": "Zorahaven College",
"count": 1
},
{
"term": "West Lonzoview College",
"count": 1
},
...
]
}
}
I don't understand why the query is returning only the first ID, and as a separate term, instead of an aggregation of the two, as described in the documentation.
I'd appreciate any idea on how to get this multi-field facet to work.
Thanks.

Resources