Querying array with nested objects in Elasticsearch to get multiple objects - elasticsearch

I have data in Elasticsearch in the below format -
"segments": [
{"id": "ABC", "value":123},
{"id": "PQR", "value":345},
{"id": "DEF", "value":567},
{"id": "XYZ", "value":789},
]
I want to retrieve all segments where id is "ABC" or "DEF".
I looked up the docs (https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-nested-query.html) and few examples on YouTube but the all look to retrieve only a single object while I want to retrieve more than 1.
Is there a way to do this?

You can use nested query with inner hits as shown here.
I hope your index mapping is looks like below and segments field is define as nested
"mappings": {
"properties": {
"segments": {
"type": "nested",
"properties": {
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "long"
}
}
}
}
}
You can use below Query:
{
"_source" : false,
"query": {
"nested": {
"path": "segments",
"query": {
"terms": {
"segments.id.keyword": [
"ABC",
"DEF"
]
}
},
"inner_hits": {}
}
}
}
Response:
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_score": 1,
"inner_hits": {
"segments": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 0
},
"_score": 1,
"_source": {
"id": "ABC",
"value": 123
}
},
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 2
},
"_score": 1,
"_source": {
"id": "DEF",
"value": 567
}
}
]
}
}
}
}
]
}

Related

Elasticsearch - Is it possible to collapse first then aggregate data of a nested field?

I am using Elasticsearch and I want to group our results by a specific field, returning top n documents per group. The document have a nested filed and I want to aggregate all the documents' nested field for each group.
Example
I have 5 documents and each have a groupId and also a nested field peoples. I want group these documents by the groupId. And then for each group, I want to get top 2 people(some documents may contain same people).
PUT test/_mapping
{
"properties": {
"groupId":{
"type":"keyword"
},
"id":{
"type":"keyword"
},
"name":{
"type":"text"
},
"people":{
"type":"nested",
"properties":{
"email":{
"type":"keyword"
}
}
}
}
}
PUT test/_doc/1
{
"name": "docs1",
"groupId": "1",
"people":[{
"email":"people1#test.com"
}]
}
PUT test/_doc/2
{
"name": "docs2",
"groupId": "1",
"people":[{
"email":"people2.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/3
{
"name": "docs3",
"groupId": "2",
"people":[{
"email":"people3.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/4
{
"name": "docs4",
"groupId": "1",
"people":[{
"email":"people4.1#test.com"
},
{
"email":"people4.2#test.com"
}]
}
PUT test/_doc/5
{
"name": "docs5",
"groupId": "3",
"people":[{
"email":"people5.1#test.com"
},
{
"email":"people5.2#test.com"
}]
}
Search query
GET test/_search
{
"collapse": {
"field": "groupId",
"inner_hits": {
"name":"inner",
"size": 2
}
},
"sort": [
{
"groupId": {
"order": "asc"
}
}
],
"size": 2,
"from": 0
}
Result
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": null,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
},
"fields": {
"groupId": [
"1"
]
},
"sort": [
"1"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": 0,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
}
},
{
"_index": "test",
"_id": "2",
"_score": 0,
"_source": {
"name": "docs2",
"groupId": "1",
"people": [
{
"email": "people2.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
},
{
"_index": "test",
"_id": "3",
"_score": null,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
},
"fields": {
"groupId": [
"2"
]
},
"sort": [
"2"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "3",
"_score": 0,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
}
]
}
}
Expecting is to aggregate a groupPeople field for each group and it contains top n people of that group(should not affected by the inner_hit size, like for groupId=1, it contains 3 documents and 5 people).
The query that you're looking for is this one:
POST test/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId",
"size": 10
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}
If you need pagination, you can achieve the same using the composite aggregation:
POST test/_search
{
"size": 0,
"aggs": {
"pages": {
"composite": {
"sources": [
{
"groups": {
"terms": {
"field": "groupId"
}
}
}
]
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}

Elasticsearch returns NullPointerException during inner_hits query

I have an index, which stores a nested document. I wanna see this nested documents, for this purpose I used 'inner_hits' in request, but elastic returns nullPointerException. Do anyone meet with this problem?)
Request to elasticsearch using Postman:
GET http://localhost/my-index/_search
{
"query": {
"nested": {
"path": "address_object",
"query": {
"bool": {
"must": {
"term": {"address_object.city": "Paris"}
}
}
},
"inner_hits" : {}
}
}
}
Response with status code 200:
{
"took": 161,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 1,
"skipped": 0,
"failed": 1,
"failures": [
{
"shard": 0,
"index": "my-index",
"node": "DWdD83KaTmUiodENQkGDww",
"reason": {
"type": "null_pointer_exception",
"reason": null
}
}
]
},
"hits": {
"total": 6500039,
"max_score": 2.1761138,
"hits": []
}
}
Elasticsearch version: 6.2.4
Lucene version: 7.2.1
Update:
Mapping:
{
"my-index": {
"mappings": {
"mytype": {
"dynamic": "false",
"_source": {
"enabled": false
},
"properties": {
"adverts_count": {
"type": "integer",
"store": true
},
...
"address_object": {
"type": "nested",
"properties": {
"adverts_count": {
"type": "integer",
"store": true
},
"city": {
"type": "keyword",
"store": true
}
}
},
...
Sample document:
{
"_index": "my-index",
"_type": "mytype",
"_id": "XDWrGncBdwNBWGEagAM2",
"_score": 2.1587489,
"fields": {
"is_target_page_shown": [
0
],
"updated_at": [
1612264276
],
"is_shown": [
0
],
"nb_queries": [
1
],
"search_query": [
"phone"
],
"target_category": [
15
],
"adverts_count": [
1
]
}
}
Extra information:
If I remove the "inner_hits": {} from search request, elastic returns nested documents(_index, _type, _id, _score), but ain't other fields(e.g city)
Also, as suggested in the comments, I tried setting to true ignore_unmapped, but it doesn't helped. The same nullPointerException.
I tried reproducing your issue, but as you have not provided the proper sample documents(one which you provided doesn't have the address_object properties), I used your mapping and below sample documents.
PUT index-name/_doc/1
{
"address_object" :{
"adverts_count" : 1,
"city": "paris"
}
}
PUT index-name/_doc/2
{
"address_object" :{
"adverts_count" : 1,
"city": "blr"
}
}
And when I use the same search provided by you.
POST 71907588/_search
{
"query": {
"nested": {
"path": "address_object",
"query": {
"bool": {
"must": {
"term": {
"address_object.city": "paris"
}
}
}
},
"inner_hits": {}
}
}
}
I get a proper response, matching paris as city as shown in the search response.
"hits": [
{
"_index": "71907588",
"_id": "1",
"_score": 0.6931471,
"_source": {
"address_object": {
"adverts_count": 1,
"city": "paris"
}
},
"inner_hits": {
"address_object": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.6931471,
"hits": [
{
"_index": "71907588",
"_id": "1",
"_nested": {
"field": "address_object",
"offset": 0
},
"_score": 0.6931471,
"_source": {
"city": "paris",
"adverts_count": 1
}
}
]
}
}
}
}
]

Elasticsearch cross-index query with aggregations

I use: Elasticsearch 7.7 , Kibana 7.7
For example, lets take two indexes:
User index with simple mapping:
PUT /user_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"user_phone": { "type": "text" },
"name": { "type": "text" }
}
}
}
Check with simple mapping:
PUT /check_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"price": { "type": "integer" },
"goods_count": {"type": "integer"}
}
}
}
I want to build table visualization like that:
________________________________________________________________________
user_id | user_phone | average_price | sum_goods_count |
___________|_______________|_____________________|______________________
1 | 123 | 512 | 64 |
___________|_______________|_____________________|______________________
2 | 456 | 256 | 16 |
___________|_______________|_____________________|______________________
So my questions are:
Is it real?
Do I understand correctly that I need to query these two indexes, get a list of users, and then in a loop create shopping carts with checks?
First thing first, you should try to de-normalize data in ES as much as possible to get the best performance and capability offered by it, And I went through the samples provided by you and comments in the question and it seems it can be easily achieved in your use-case and shown in below example, by combining user and check index into single index.
Index mapping
{
"mappings": {
"properties": {
"user_id": {
"type": "text",
"fielddata": "true"
},
"price": {
"type": "integer"
},
"goods_count": {
"type": "integer"
}
}
}
}
Index Data:
With the index mapping defined above, index these three documents, where one document is having "user_id":"1" and 2 documents have "user_id":"2"
{
"user_id":"1",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":100,
"goods_count":200
}
Search Query:
Refer to ES official documentation on Terms Aggregation, Top Hits aggregation, Sum aggregation and Avg aggregation to get detailed explanation.
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "user_id"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"user_id"
]
}
}
},
"avg_price": {
"avg": {
"field": "price"
}
},
"goods_count": {
"sum": {
"field": "goods_count"
}
}
}
}
}
}
Search Result:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
]
},
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"user_id": "2"
}
},
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"user_id": "2"
}
}
]
}
},
"avg_price": {
"value": 300.0
},
"goods_count": {
"value": 300.0
}
},
{
"key": "1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"user_id": "1"
}
}
]
}
},
"avg_price": {
"value": 500.0
},
"goods_count": {
"value": 100.0
}
}
]
}
}
}
As you can see in the search results above, for "user_id":"2" the average price is (500+100)/2 = 300 and sum of goods_count is 100+200 = 300.
Similarly for "user_id":"1" the average price is 500/1 = 500 and sum of goods_count is 100.

Conditional source filtering of nested objects in elasticsearch responses

I have an index of documents with the following (simplified) structure.
{
"product_id": "abc123",
"properties": [
{
"key": "width",
"value": 1000
},
{
"key": "height",
"value": 2000
},
{
"key": "depth",
"value": 500
}
]
}
Each document can have hundreds of properties.
Now - i want to be able to search for documents matching a query, and also specify which properties each document should be populated with when returned. So basically i want to write the following request:
Get me all documents that match query x, and populate each document
with the properties ["height", "width", "foobar" ].
The array with the properties I want to return is created at query time based on input from the user. The document in the response to the query would look like this:
{
"product_id": "abc123",
"properties": [
{
"key": "width",
"value": 1000
},
{
"key": "height",
"value": 2000
}
// No depth!
]
}
I have tried to achieve this through source filtering to no avail. I suspect script fields might be the only way to solve this, but I would rather use some standard way. Anyone got any ideas?
The best that I can think of is to use inner_hits. For example:
PUT proptest
{
"mappings": {
"default": {
"properties": {
"product_id": {
"type": "keyword"
},
"color": {
"type": "keyword"
},
"props": {
"type": "nested"
}
}
}
}
}
PUT proptest/default/1
{
"product_id": "abc123",
"color": "red",
"props": [
{
"key": "width",
"value": 1000
},
{
"key": "height",
"value": 2000
},
{
"key": "depth",
"value": 500
}
]
}
PUT proptest/default/2
{
"product_id": "def",
"color": "red",
"props": [
]
}
PUT proptest/default/3
{
"product_id": "ghi",
"color": "blue",
"props": [
{
"key": "width",
"value": 1000
},
{
"key": "height",
"value": 2000
},
{
"key": "depth",
"value": 500
}
]
}
Now we can query by color and fetch only the height, depth and foobar properties:
GET proptest/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"color": {
"value": "red"
}
}
},
{
"bool": {
"should": [
{
"nested": {
"path": "props",
"query": {
"match": {
"props.key": "height depth foobar"
}
},
"inner_hits": {}
}
},
{
"match_all": {}
}
]
}
}
]
}
},
"_source": {
"excludes": "props"
}
}
The output is
{
"hits": {
"total": 2,
"max_score": 2.2685113,
"hits": [
{
"_index": "proptest",
"_type": "default",
"_id": "1",
"_score": 2.2685113,
"_source": {
"color": "red",
"product_id": "abc123"
},
"inner_hits": {
"props": {
"hits": {
"total": 2,
"max_score": 0.9808292,
"hits": [
{
"_index": "proptest",
"_type": "default",
"_id": "1",
"_nested": {
"field": "props",
"offset": 2
},
"_score": 0.9808292,
"_source": {
"key": "depth",
"value": 500
}
},
{
"_index": "proptest",
"_type": "default",
"_id": "1",
"_nested": {
"field": "props",
"offset": 1
},
"_score": 0.9808292,
"_source": {
"key": "height",
"value": 2000
}
}
]
}
}
}
},
{
"_index": "proptest",
"_type": "default",
"_id": "2",
"_score": 1.287682,
"_source": {
"color": "red",
"product_id": "def"
},
"inner_hits": {
"props": {
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
}
}
]
}
}
Note that the results contains both products abc123 and def with the correct properties filtered. Product abc123 matches partially with the given property list, def does not contain any of them. The main results are defined only by the outer query color:red
The drawback of the method is the properties won't be found under the same top level _source but under the inner hits key.

Search all unique terms from a given query in elastic search

I am trying to search for all the unique names in the index test_nested.
GET test_nested/_mappings
{
"test_nested": {
"mappings": {
"my_type": {
"properties": {
"group": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
GET test_nested/_search
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "test_nested",
"_type": "my_type",
"_id": "AWG5iVBz4bQsVnslc9gL",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "Linux"
},
{
"name": "Android (operating system)"
},
{
"name": "Widows 10"
}
]
}
},
{
"_index": "test_nested",
"_type": "my_type",
"_id": "AWG5ieKW4bQsVnslc9gM",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "Bitcoin"
},
{
"name": "PHP"
},
{
"name": "Microsoft Windows"
}
]
}
},
{
"_index": "test_nested",
"_type": "my_type",
"_id": "AWG5irrV4bQsVnslc9gN",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "Windows XP"
}
]
}
},
{
"_index": "test_nested",
"_type": "my_type",
"_id": "1",
"_score": 1,
"_source": {
"group": "fans",
"user": [
{
"name": "iOS"
},
{
"name": "Android (operating system)"
},
{
"name": "Widows 10"
},
{
"name": "Widows XP"
}
]
}
}
]
}
}
I want all the unique names for a term. i.e. if I search for "wi"* then I should get [Microsoft Windows, Widows 10, Windows XP]
I don't know exactly what you mean but I use that query to list all statuses:
GET order/default/_search
{
"size": 0,
"aggs": {
"status_terms": {
"terms": {
"field": "status.keyword",
"missing": "N/A",
"min_doc_count": 0,
"order": {
"_key": "asc"
}
}
}
}
}
My model has status field and that query lists all statuses.
This is bucket aggregations
One of fields in result is:
sum_other_doc_count - Elastic returns the top unique terms. So if you have many different terms then some of them will not appear in the results. This field is a sum of documents which will not be a part of the response.
For nested objects try to read and use Nested Query docs
I found the solution. Hope it helps someone.
GET record_new/_search
{
"size": 0,
"query": {
"term": {
"software_tags": {
"value": "windows"
}
}
},
"aggs": {
"software_tags": {
"terms": {
"field": "software_tags.keyword",
"include" : ".*Windows.*",
"size": 10000,
"order": {
"_count": "desc"
}
}
}
}
}

Resources