Sorting aggregated data in elastic search - elasticsearch

I am doing a search that is doing an aggregation by xyz field and getting the latest version. Now I need to sort the aggregated data based on created field. Let me know how we can do that.
{
"query": {
"query_string": {
"query": ""
}
},
"aggs": {
"uuid": {
"terms": {
"field": "xyz.keyword"
},
"aggs": {
"top_trades_hits": {
"top_hits": {
"sort": [
{
"version": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
the Above mentioned query returns
{
"aggregations": {
"uuid": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "794a5b8f-3e22-4ff9-98bb-b8b54c85948e",
"doc_count": 3,
"agg": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "index",
"_type": "doc",
"_id": "7",
"_score": null,
"_source": {
"uuid": "794a5b8f-3e22-4ff9-98bb-b8b54c85948e",
"type": "qsdn",
"discontinued": false,
"minSupportedPlatformVersion": "11.5.3.3",
"version": 2,
"created": 1658428291346
},
"sort": [
2
]
}
]
}
}
},
{
"key": "03504029-a029-417d-bd67-fb1b5fc5055b",
"doc_count": 2,
"agg": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "index",
"_type": "doc",
"_id": "9",
"_score": null,
"_source": {
"uuid": "03504029-a029-417d-bd67-fb1b5fc5055b",
"type": "gdsg",
"discontinued": false,
"version": 1.1,
"created": 1554904300799
},
"sort": [
1.1
]
}
]
}
}
}
]
}
}
}
Document for the elastic search is as follows
{
"_index": "index",
"_type": "doc",
"_id": "3",
"_version": 2,
"_seq_no": 1,
"_primary_term": 1,
"found": true,
"_source": {
"doc": {
"uuid": "abcd",
"type": "strifn",
"name": "default",
"version": 3.12,
"s3ObjectVersionId": "",
"created": 165842829134
}
}
}
Expected result
{
"aggregations": {
"uuid": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "03504029-a029-417d-bd67-fb1b5fc5055b",
"doc_count": 2,
"agg": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "index",
"_type": "doc",
"_id": "9",
"_score": null,
"_source": {
"uuid": "03504029-a029-417d-bd67-fb1b5fc5055b",
"type": "gdsg",
"discontinued": false,
"version": 1.1,
"created": 1554904300799
},
"sort": [
1.1
]
}
]
}
}
},
{
"key": "794a5b8f-3e22-4ff9-98bb-b8b54c85948e",
"doc_count": 3,
"agg": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "index",
"_type": "doc",
"_id": "7",
"_score": null,
"_source": {
"uuid": "794a5b8f-3e22-4ff9-98bb-b8b54c85948e",
"type": "qsdn",
"discontinued": false,
"minSupportedPlatformVersion": "11.5.3.3",
"version": 2,
"created": 1658428291346
},
"sort": [
2
]
}
]
}
}
}
]
}
}
}
I am using AWS opensearch for the same

Your query is correct only, you just need to increase the size from 1 to see all the documents in your bucket sorted according to version field in your Elasticsearch index.
Can you share more info, if above doesn't help you, like sample documents and index mapping.

Related

Querying array with nested objects in Elasticsearch to get multiple objects

I have data in Elasticsearch in the below format -
"segments": [
{"id": "ABC", "value":123},
{"id": "PQR", "value":345},
{"id": "DEF", "value":567},
{"id": "XYZ", "value":789},
]
I want to retrieve all segments where id is "ABC" or "DEF".
I looked up the docs (https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-nested-query.html) and few examples on YouTube but the all look to retrieve only a single object while I want to retrieve more than 1.
Is there a way to do this?
You can use nested query with inner hits as shown here.
I hope your index mapping is looks like below and segments field is define as nested
"mappings": {
"properties": {
"segments": {
"type": "nested",
"properties": {
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "long"
}
}
}
}
}
You can use below Query:
{
"_source" : false,
"query": {
"nested": {
"path": "segments",
"query": {
"terms": {
"segments.id.keyword": [
"ABC",
"DEF"
]
}
},
"inner_hits": {}
}
}
}
Response:
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_score": 1,
"inner_hits": {
"segments": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 0
},
"_score": 1,
"_source": {
"id": "ABC",
"value": 123
}
},
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 2
},
"_score": 1,
"_source": {
"id": "DEF",
"value": 567
}
}
]
}
}
}
}
]
}

How to filter an aggregation based on the values of top hits?

The mapping of the elastic search documents are as follows:
{
"mappings": {
"properties": {
"vote_id": {
"type": "keyword"
},
"user_id": {
"type": "text"
},
"song_id": {
"type": "text"
},
"type": {
"type": "byte"
},
"timestamp": {
"type": "date"
}
}
}
}
I want to aggregate these votes such that it returns songs that you AND your friends like.
So far, I have buckets of songs that you and your friends like, but some buckets may be songs that only your friends like.
{
"query": {
"bool": {
"must": {
"terms": {
"user_id": ["you and your friend ids"]
}
}
}
},
"aggs": {
"songs": {
"terms": {
"field": "song_id"
},
"aggs": {
"docs": {
"top_hits": {
"size": "length of you and your friends",
"_source": ["vote_id", "song_id", "user_id", "type", "timestamp"]
}
},
"more_than_one": {
"bucket_selector": {
"buckets_path": {
"count": "_count"
},
"script": "params.count > 1"
}
}
},
}
}
}
I want to filter the buckets such that at least one of the documents in the top hits has your user id.
This is the current response
"aggregations": {
"songs": {
"buckets": [
{
"doc_count": 5,
"docs": {
"hits": {
"hits": [
{
"_id": "ivotNtFBb9TCEfpk3S54q6gcMbjZB82Xc1_ZCgA6kYsUmvk",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngVOkuaMqJTu6eQDj73gDYGObUus3g5Qp8",
"timestamp": "2022-07-08T19:41:56.118207343Z",
"type": 1,
"user_id": "iusr8keSbPjX9ZqFhX4Dei4G",
"vote_id": "ivotNtFBb9TCEfpk3S54q6gcMbjZB82Xc1_ZCgA6kYsUmvk"
},
"_type": "_doc"
},
{
"_id": "ivotEFcqOlCL5htJZJ43NslAP555DaPj0Dgkcay_Ml2jAT4",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngVOkuaMqJTu6eQDj73gDYGObUus3g5Qp8",
"timestamp": "2022-07-08T19:41:52.143988883Z",
"type": 1,
"user_id": "iusr18wnuxsy8oVVK3Xic4Sy",
"vote_id": "ivotEFcqOlCL5htJZJ43NslAP555DaPj0Dgkcay_Ml2jAT4"
},
"_type": "_doc"
},
{
"_id": "ivotToZ-0iBiM_zF5TP1Shj5C29WV3U0ibedlxvcQccimeo",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngVOkuaMqJTu6eQDj73gDYGObUus3g5Qp8",
"timestamp": "2022-07-08T19:41:50.178450007Z",
"type": 1,
"user_id": "iusrKDxnm75fADEpusbmx5JM",
"vote_id": "ivotToZ-0iBiM_zF5TP1Shj5C29WV3U0ibedlxvcQccimeo"
},
"_type": "_doc"
},
{
"_id": "ivotAHBPual232E12ggibhr6GfQ5E3f9Ryov0gYKGrIRB0Y",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngVOkuaMqJTu6eQDj73gDYGObUus3g5Qp8",
"timestamp": "2022-07-08T19:41:52.886305925Z",
"type": 1,
"user_id": "iusrJG4GwkWa6Y70LPkuNCPg",
"vote_id": "ivotAHBPual232E12ggibhr6GfQ5E3f9Ryov0gYKGrIRB0Y"
},
"_type": "_doc"
},
{
"_id": "ivot7a8rWunlFu_q5St44PYDeNelLq4bsxr9wzYP9D80wxE",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngVOkuaMqJTu6eQDj73gDYGObUus3g5Qp8",
"timestamp": "2022-07-08T19:41:49.031694548Z",
"type": 1,
"user_id": "iusrxunBXT1UD0IrvjqjgWaj",
"vote_id": "ivot7a8rWunlFu_q5St44PYDeNelLq4bsxr9wzYP9D80wxE"
},
"_type": "_doc"
}
],
"max_score": 1,
"total": {
"relation": "eq",
"value": 5
}
}
},
"key": "isngVOkuaMqJTu6eQDj73gDYGObUus3g5Qp8"
},
{
"doc_count": 4,
"docs": {
"hits": {
"hits": [
{
"_id": "ivot9_2eQ_3eqU7SXQBnLWGQwFI5DE99Naf8wYbFNFrj1lk",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isng4hKgRPQvH0YhtBy5GaUqgCdwDoVhJuf6",
"timestamp": "2022-07-08T19:41:55.761587927Z",
"type": 1,
"user_id": "iusr8keSbPjX9ZqFhX4Dei4G",
"vote_id": "ivot9_2eQ_3eqU7SXQBnLWGQwFI5DE99Naf8wYbFNFrj1lk"
},
"_type": "_doc"
},
{
"_id": "ivotUZRVSKzGbmlP4LlmBkMwMM8xcR4nGTE9KNpysVR0vXQ",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isng4hKgRPQvH0YhtBy5GaUqgCdwDoVhJuf6",
"timestamp": "2022-07-08T19:41:52.555377592Z",
"type": 1,
"user_id": "iusr18wnuxsy8oVVK3Xic4Sy",
"vote_id": "ivotUZRVSKzGbmlP4LlmBkMwMM8xcR4nGTE9KNpysVR0vXQ"
},
"_type": "_doc"
},
{
"_id": "ivot5Wj8pIkbO0JOV_5s2PqEvZU3sy0WSYYUSlgs2Qizfo8",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isng4hKgRPQvH0YhtBy5GaUqgCdwDoVhJuf6",
"timestamp": "2022-07-08T19:41:49.756332674Z",
"type": 1,
"user_id": "iusrKDxnm75fADEpusbmx5JM",
"vote_id": "ivot5Wj8pIkbO0JOV_5s2PqEvZU3sy0WSYYUSlgs2Qizfo8"
},
"_type": "_doc"
},
{
"_id": "ivot8QNCJGsNtRiZYa-QMTUHEh5MHHHr4EKJsXm4UTAwJkg",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isng4hKgRPQvH0YhtBy5GaUqgCdwDoVhJuf6",
"timestamp": "2022-07-08T19:41:53.26319105Z",
"type": 1,
"user_id": "iusrJG4GwkWa6Y70LPkuNCPg",
"vote_id": "ivot8QNCJGsNtRiZYa-QMTUHEh5MHHHr4EKJsXm4UTAwJkg"
},
"_type": "_doc"
}
],
"max_score": 1,
"total": {
"relation": "eq",
"value": 4
}
}
},
"key": "isng4hKgRPQvH0YhtBy5GaUqgCdwDoVhJuf6"
},
{
"doc_count": 3,
"docs": {
"hits": {
"hits": [
{
"_id": "ivotoCfJc_q5vuY27KmvZUo8s4tilI57_xJoPXqfSeJTikg",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngHAVCux40BgjGVZuAwZlTiEjVQFxuxurc",
"timestamp": "2022-07-08T19:41:50.527352591Z",
"type": 1,
"user_id": "iusrL8FCabxg1YCeaakcVXG5",
"vote_id": "ivotoCfJc_q5vuY27KmvZUo8s4tilI57_xJoPXqfSeJTikg"
},
"_type": "_doc"
},
{
"_id": "ivotStjKDIRy6vfaO5dNws4wGPELywPRgg7D7uSavatfIEo",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngHAVCux40BgjGVZuAwZlTiEjVQFxuxurc",
"timestamp": "2022-07-08T19:41:51.733375716Z",
"type": 1,
"user_id": "iusr18wnuxsy8oVVK3Xic4Sy",
"vote_id": "ivotStjKDIRy6vfaO5dNws4wGPELywPRgg7D7uSavatfIEo"
},
"_type": "_doc"
},
{
"_id": "ivotUHj_Ebh-xIqqPJNEdWAuc_JO_mcVVG8F9wM67bJ7_6A",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngHAVCux40BgjGVZuAwZlTiEjVQFxuxurc",
"timestamp": "2022-07-08T19:41:48.60900159Z",
"type": 1,
"user_id": "iusrxunBXT1UD0IrvjqjgWaj",
"vote_id": "ivotUHj_Ebh-xIqqPJNEdWAuc_JO_mcVVG8F9wM67bJ7_6A"
},
"_type": "_doc"
}
],
"max_score": 1,
"total": {
"relation": "eq",
"value": 3
}
}
},
"key": "isngHAVCux40BgjGVZuAwZlTiEjVQFxuxurc"
},
{
"doc_count": 3,
"docs": {
"hits": {
"hits": [
{
"_id": "ivotE5xO9hZGLhrS2sL1mgf5UbcHtf_5qAwbrp3QwEtf4zk",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngOMnUKFVT1cKsH6Q9JfpF3WEZ4H4iU75W",
"timestamp": "2022-07-08T19:41:54.99023451Z",
"type": 1,
"user_id": "iusre80pxIMFB XfF61SHlCiz",
"vote_id": "ivotE5xO9hZGLhrS2sL1mgf5UbcHtf_5qAwbrp3QwEtf4zk"
},
"_type": "_doc"
},
{
"_id": "ivotI0OCI6gz6oEV94hgvjZmGB-qA4n-EigirDRJpgeZt68",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngOMnUKFVT1cKsH6Q9JfpF3WEZ4H4iU75W",
"timestamp": "2022-07-08T19:41:50.952366924Z",
"type": 1,
"user_id": "iusr3oWy3mxsBWu6CU4mlw5L",
"vote_id": "ivotI0OCI6gz6oEV94hgvjZmGB-qA4n-EigirDRJpgeZt68"
},
"_type": "_doc"
},
{
"_id": "ivotm7GrIeyWRHamPXF9klzTZ0La8H4evCgWkCTIpx8rLl4",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngOMnUKFVT1cKsH6Q9JfpF3WEZ4H4iU75W",
"timestamp": "2022-07-08T19:41:48.234881506Z",
"type": 1,
"user_id": "iusrCbCltg4nzv0b2JfUbyhj",
"vote_id": "ivotm7GrIeyWRHamPXF9klzTZ0La8H4evCgWkCTIpx8rLl4"
},
"_type": "_doc"
}
],
"max_score": 1,
"total": {
"relation": "eq",
"value": 3
}
}
},
"key": "isngOMnUKFVT1cKsH6Q9JfpF3WEZ4H4iU75W"
},
{
"doc_count": 2,
"docs": {
"hits": {
"hits": [
{
"_id": "ivotGWwrgPehs9s7ZwZACzkVNp4-_SUaUu3noUKyBH8IBnw",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngfMv4IemhtjXX78LTqxKFBc1VMeUz6And",
"timestamp": "2022-07-08T19:41:53.785333717Z",
"type": 1,
"user_id": "iusrYvNFTaTg4RBNBxG63nkY",
"vote_id": "ivotGWwrgPehs9s7ZwZACzkVNp4-_SUaUu3noUKyBH8IBnw"
},
"_type": "_doc"
},
{
"_id": "ivotrWG5cy6vEbe0N4JO4IKzHZyahOlkyPctCdrBnBu-v9Q",
"_index": "votes-index",
"_score": 1,
"_source": {
"song_id": "isngfMv4IemhtjXX78LTqxKFBc1VMeUz6And",
"timestamp": "2022-07-08T19:41:51.303745591Z",
"type": 1,
"user_id": "iusr18wnuxsy8oVVK3Xic4Sy",
"vote_id": "ivotrWG5cy6vEbe0N4JO4IKzHZyahOlkyPctCdrBnBu-v9Q"
},
"_type": "_doc"
}
],
"max_score": 1,
"total": {
"relation": "eq",
"value": 2
}
}
},
"key": "isngfMv4IemhtjXX78LTqxKFBc1VMeUz6And"
}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
}
}
I want to filter out the aggregate hits that have documents with your own id

ElasticSearch 2.4 - inner_hits nested merge queries result

I'm using ElasticSearch 2.4
I need to get all Purchases that match all queries.
I'm actually using inner_hits function but it doesn´t works as expected because it only shows the match of the current nested query and the problem is the combination with main document query.
I have this mapping and bellow I created an example with my comments:
PUT /example_contact_purchases
{
"mappings": {
"contact": {
"dynamic": false,
"properties": {
"name": {
"type": "string"
},
"country": {
"type": "string"
},
"purchases": {
"type": "nested",
"properties": {
"uuid":{
"type":"string"
},
"brand":{
"type":"string"
}
}
}
}
}
}
}
POST example_contact_purchases/contact
{
"name" : "Fran",
"country": "ES",
"purchases" : [
{
"uuid" : "23",
"brand":"Sony"
},
{
"uuid":"23",
"brand":"Sony"
}
]
}
POST example_contact_purchases/contact
{
"name" : "Jhon",
"country": "UK",
"purchases" : [
{
"uuid" : "45",
"brand": "Lenovo"
},
{
"uuid":"23",
"brand":"Sony"
},
{
"uuid":"77",
"brand":"HP"
}
]
}
POST example_contact_purchases/contact
{
"name" : "Lucas",
"country": "ES",
"purchases" : [
{
"uuid" : "45",
"brand": "Lenovo"
},
{
"uuid":"23",
"brand":"Sony"
},
{
"uuid":"77",
"brand":"HP"
}
]
}
GET example_contact_purchases/contact/_search
{
"query": {
"bool": {
"should": [
{"bool": {
"must": [
{
"query_string": {
"query": "country:ES"
}
},
{
"nested": {
"path": "purchases",
"inner_hits":{
"name":"0"
},
"filter": {
"query": {
"query_string": {
"query": "(purchases.brand:Sony)"
}
}
}
}
}
]
}},
{"bool": {
"must": [
{
"query_string": {
"query": "country:UK"
}
},
{
"nested": {
"path": "purchases",
"inner_hits":{
"name":"1"
},
"filter": {
"query": {
"query_string": {
"query": "(purchases.uuid:45)"
}
}
}
}
}
]
}
}
]
}
}
}
I am using simple query like this:
"(country.raw:ES AND purchases.brand:Sony) OR (country:UK AND purchases.uuid:45)"
And the result of the search query is:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.5949223,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_score": 0.5949223,
"_source": {
"name": "Jhon",
"country": "UK",
"purchases": [
{
"uuid": "45",
"brand": "Lenovo"
},
{
"uuid": "23",
"brand": "Sony"
},
{
"uuid": "77",
"brand": "HP"
}
]
},
"inner_hits": {
"0": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "45",
"brand": "Lenovo"
}
}
]
}
}
}
},
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJKBHXthyTIlmcERN",
"_score": 0.5949223,
"_source": {
"name": "Lucas",
"country": "ES",
"purchases": [
{
"uuid": "45",
"brand": "Lenovo"
},
{
"uuid": "23",
"brand": "Sony"
},
{
"uuid": "77",
"brand": "HP"
}
]
},
"inner_hits": {
"0": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJKBHXthyTIlmcERN",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJKBHXthyTIlmcERN",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "45",
"brand": "Lenovo"
}
}
]
}
}
}
},
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJI1SXthyTIlmcERL",
"_score": 0.5139209,
"_source": {
"name": "Fran",
"country": "ES",
"purchases": [
{
"uuid": "23",
"brand": "Sony"
},
{
"uuid": "23",
"brand": "Sony"
}
]
},
"inner_hits": {
"0": {
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJI1SXthyTIlmcERL",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
},
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJI1SXthyTIlmcERL",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
}
}
]
}
}
Unfortunatly the first result is wrong:
"inner_hits": {
"0": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 1
},
"_score": 1,
"_source": {
"uuid": "23",
"brand": "Sony"
}
}
]
}
},
"1": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "example_contact_purchases",
"_type": "contact",
"_id": "AXFfJJdZXthyTIlmcERM",
"_nested": {
"field": "purchases",
"offset": 0
},
"_score": 1,
"_source": {
"uuid": "45",
"brand": "Lenovo"
}
}
]
}
}
}
It should show the purchase for Jhon UK with parameters:
{"uuid": "45","brand":"Lenovo"} ( inner_hits with name "1")
Thanks

Find Duplicate Documents in Elastic Search

I'm looking for a solution to find duplicate(exact) Docs in ElasticSearch.
I've read https://qbox.io/blog/minimizing-document-duplication-in-elasticsearch and tried it but its results are not as I expected as example this is my sample simple query :
GET /last_month_ads/_search
{
"size": 0,
"fields": [
"title"
],
"aggs": {
"duplicateCount": {
"terms": {
"field": "title",
"size" : 3
},
"aggs": {
"duplicateDocuments": {
"top_hits": {}
}
}
}
}
}
and the result is
{
"took": 981,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 482909,
"max_score": 0,
"hits": []
},
"aggregations": {
"duplicateCount": {
"doc_count_error_upper_bound": 11667,
"sum_other_doc_count": 1958146,
"buckets": [
{
"key": "CM",
"doc_count": 46867,
"duplicateDocuments": {
"hits": {
"total": 46867,
"max_score": 1,
"hits": [
{
"_index": "last_month_ads",
"_type": "ads",
"_id": "AV73EtoBQTqkjEa7YQG1",
"_score": 1,
"_source": {
"id": "20642316",
"cat_id": "43606",
"user_id": "1825875",
"title": "125 CM HOME",
"desc": "DESC"
}
},
{
"_index": "last_month_ads",
"_type": "ads",
"_id": "AV73EtpdQTqkjEa7YQHc",
"_score": 1,
"_source": {
"id": "20642379",
"cat_id": "43604",
"user_id": "4642299",
"title": "Home with Big CM",
"desc": "DESC"
}
},
{
"_index": "last_month_ads",
"_type": "ads",
"_id": "AV73Etp6QTqkjEa7YQHp",
"_score": 1,
"_source": {
"id": "20642409",
"cat_id": "43607",
"user_id": "4813303",
"title": "100 of live CM is here ",
"desc": "DESC"
}
}
]
}
}
},
}
]
}
}
}
I'm looking for Exact (or similar) titles not abundance words in titles, how can I get get Duplicate(similar) Docs in Elastic Search?

How to correctly aggregate with the field is a list on Elasticsearch

Currently the ES logs are indexed in a way that some fields have a list instead of a single value.
For example:
_source:{
"field1":"["item1", "item2", "item3"],
"field2":"something",
"field3": "something_else"
}
Of course, the length of list is not always the same. I'm trying to find a way to aggregate the number of logs that consist each item (so some logs will be counted multiple times)
I know I have to use aggs, but how can I form the right query (after -d)?
You can use below query that uses terms aggregation and top_hits.
{
"size": 0,
"aggs": {
"group": {
"terms": {
"script": "_source.field1.each{}"
},
"aggs":{
"top_hits_log" :{
"top_hits" :{
}
}
}
}
}
}
Output will be:
"buckets": [
{
"key": "item1",
"doc_count": 3,
"top_hits_log": {
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "so",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2",
"item3"
],
"field2": "something1"
}
},
{
"_index": "so",
"_type": "test",
"_id": "2",
"_score": 1,
"_source": {
"field1": [
"item1"
],
"field2": "something2"
}
},
{
"_index": "so",
"_type": "test",
"_id": "3",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2"
],
"field2": "something3"
}
}
]
}
}
},
{
"key": "item2",
"doc_count": 2,
"top_hits_log": {
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "so",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2",
"item3"
],
"field2": "something1"
}
},
{
"_index": "so",
"_type": "test",
"_id": "3",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2"
],
"field2": "something3"
}
}
]
}
}
},
{
"key": "item3",
"doc_count": 1,
"top_hits_log": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "so",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2",
"item3"
],
"field2": "something1"
}
}
]
}
}
}
]
Make sure to enable dynamic scripting. Set script.disable_dynamic: false
Hope this helps.
There is no need to use scripting. It will be slow especially _source parsing. You also need to make sure your field1 is not_analyzed or you will get weird results as terms aggregation is performed on unique tokens in Inverted Index.
{
"size": 0,
"aggs": {
"unique_items": {
"terms": {
"field": "field1",
"size": 100
},
"aggs": {
"documents": {
"top_hits": {
"size": 10
}
}
}
}
}
}
Here the size is 100 inside terms aggregation, change this according to how many unique values you think you have(default is 10).
Hope this helps!

Resources