ElasticSearch: nested items count in search results - elasticsearch

I have following mapping:
{
"test_index" : {
"mappings" : {
"test_type" : {
"properties" : {
"field1" : {
"type" : "string"
},
"field2" : {
"type" : "string"
},
"items" : {
"type" : "nested",
"properties" : {
"nested_field1" : {
"type" : "string"
},
"nested_field2" : {
"type" : "string"
}
}
}
}
}
}
}
}
With search results I want to get total nested items inside the results structure:
{
"hits": {
"total": 2,
"max_score": 1.0,
"hits": [
{
"_index": "test_index",
"_type": "test_type",
"_id": "AWfAc79wljtimCd5JZlJ",
"_score": 1.0,
"_source": {
"field1": "Some string 1",
"field2": "Some string 2",
"items": [
{
"nested_field1": "Some val1",
"nested_field2": "Some val2"
}
],
"totalItems": 1
}
},
{
"_index": "test_index",
"_type": "test_type",
"_id": "AZxfc79dtrt878xx",
"_score": 1.0,
"_source": {
"field1": "Some string 3",
"field2": "Some string 4",
"items": [
{
"nested_field1": "Some val3",
"nested_field2": "Some val4"
},
{
"nested_field1": "Some val5",
"nested_field2": "Some val6"
}
],
"totalItems": 2
}
}
]
}
}
Can I achieve this via aggregations?

Since you have had the great idea to also store the totalItems field at the root level you could just sum up that field and you'd get the number of nested items:
{
"query": {
"match_all": {}
},
"aggs": {
"total_items": {
"sum": {
"field": "totalItems"
}
}
}
}

Related

Elastic search query string

Why can't I get the same result in the second query as in the third one? What am I doing wrong?
I make this query:
{
"size": 20,
"track_total_hits": false,
"_source": [
"title"
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "63 ",
"default_field": "title",
"type": "phrase_prefix"
}
}
]
}
}
}
and got this result:
{
"hits": {
"max_score": 13.483224,
"hits": [
{
"_index": "products_2022_11_3_17_30_44_56920",
"_type": "_doc",
"_id": "19637",
"_score": 13.483224,
"_source": {
"title": "Заднее стекло 6302BGNE"
}
}
]
}
}
all right, after this I am typing one more character:
"query": "63 2"
and got empty result:
"hits" : {
"max_score" : null,
"hits" : [ ]
}
}
then I am adding one more character again:
"query": "63 21"
and got not empty result again:
{
"hits": [
{
"_index": "products_2022_11_3_17_30_44_56920",
"_type": "_doc",
"_id": "105863",
"_score": 440.54578,
"_source": {
"title": "Лампа накаливания 63 21 0 151 620 BMW"
}
}
]
}
Index mapping:
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
GET products/_settings
{
"products_2022_11_7_8_57_7_118045" : {
"settings" : {
"index" : {
"routing" : {
"allocation" : {
"include" : {
"_tier_preference" : "data_content"
}
}
},
"number_of_shards" : "1",
"provided_name" : "products_2022_11_7_8_57_7_118045",
"creation_date" : "1667800627119",
"number_of_replicas" : "0",
"uuid" : "GV6-5tzQQPavncFUcvq9NA",
"version" : {
"created" : "7170299"
}
}
}
}
}
Looks like you are using the some analyzer on your title field, that is creating tokens in search a way it doesn't match your search term.
I used the standard analyzer for title field and index the sample documents shown by you and its giving me results in all three queries. as shown below:
{
"size": 20,
"track_total_hits": true,
"_source": [
"title"
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "63 2",
"default_field": "title",
"type": "phrase_prefix"
}
}
]
}
}
}
Search Result
"hits": [
{
"_index": "74308224",
"_type": "_doc",
"_id": "2",
"_score": 1.1689311,
"_source": {
"title": "Лампа накаливания 63 21 0 151 620 BMW"
}
}
]
Giving your index mapping and settings would be helpful to identify why its not giving the expected result.

How to get specific items from nested object in elastic search

I've prepared an Elastic Search query in which I'm trying to fetch results from nested objects. The query looks something like this:
{
"from": 0,
"size": 100,
"_source": {
"excludes": [
"#version"
]
},
"query": {
"bool": {
"must": [
{
"term": {
"doc.workflow_id.keyword": "workflow1"
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "price"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*34*"
}
}
]
}
}
]
}
}
}
}
],
"must_not": []
}
}
}
Output:
"hits" : [
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv1",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "weight",
"value" : "10"
},
{
"name" : "color",
"value" : "green"
},
{
"name" : "city",
"value" : "#error"
}
]
}
}
},
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv2",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "color",
"value" : "green"
}
]
}
}
}
]
I've omitted a few trivial details in query and output for simplicity. The attributes array in the response is of type nested and contains name and value fields of type string.
I've put filters on attributes color and price, but as you can see, I'm getting other attributes too in the attributes array. Can I somehow pass specific attribute names to the ES query and get the value of those attributes only?
I tried using inner_hits in both nested queries, but it returns the attribute value only for the passed attribute name in the nested query.
E.g.
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
},
"inner_hits": {
"name": "two",
"_source": [
"doc.product_attributes.name",
"doc.product_attributes.value"
]
}
}
}
gives result
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "weight",
"value": "34"
},
{
"name": "color",
"value": "green"
},
{
"name": "city",
"value": "#ERROR"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
},
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv2",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "color",
"value": "green"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
}
]
}
Note the attribute name and value received inside the inner_hits object.
I want to get other attribute names and values as well in the response for which I'm putting any filter. For example, if I want to get attribute names and values for weight, color & city only, how do I do that?
I've checked this thread select matching objects from array in elasticsearch, but it doesn't solve my problem.

has_child and has_parent not returning results

I went through the following links before pasting the ques
Elasticsearch has_child returning no results
ElasticSearch 7.3 has_parent/has_child don't return any hits
ES documentation
I created a simple mapping with text_doc as the parent and flag_doc as the child.
{
"doc_index_ap3" : {
"mappings" : {
"properties" : {
"domain" : {
"type" : "keyword"
},
"email_text" : {
"type" : "text"
},
"id" : {
"type" : "keyword"
},
"my_join_field" : {
"type" : "join",
"eager_global_ordinals" : true,
"relations" : {
"text_doc" : "flag_doc"
}
}
}
}
}
}
The query with parent_id works fine & returns 1 doc as expected
GET doc_index_ap3/_search
{
"query": {
"parent_id": {
"type": "flag_doc",
"id":"f0d2cb3c-bf4b-11eb-9f67-93a282921115"
}
}
}
But none of the below queries return any results.
GET doc_index_ap3/_search
{
"query": {
"has_parent": {
"parent_type": "text_doc",
"query": {
"match_all": {
}
}
}
}
}
GET doc_index_ap3/_search
{
"query": {
"has_child": {
"type": "flag_doc",
"query": {
"match_all": {}
}
}
}
}
There must be some issue in the way you have indexed the parent and child documents. Refer to this official documentation, to know more about parent-child relationship
Adding a working example using the same index mapping as given in the question above
Parent document in the text_doc context
PUT /index-name/_doc/1
{
"domain": "ab",
"email_text": "ab",
"id": "ab",
"my_join_field": {
"name": "text_doc"
}
}
Child document
PUT /index-name/_doc/2?routing=1&refresh
{
"domain": "cs",
"email_text": "cs",
"id": "cs",
"my_join_field": {
"name": "flag_doc",
"parent": "1"
}
}
Search Query:
{
"query": {
"has_parent": {
"parent_type": "text_doc",
"query": {
"match_all": {
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "67731507",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_routing": "1",
"_source": {
"domain": "cs",
"email_text": "cs",
"id": "cs",
"my_join_field": {
"name": "flag_doc",
"parent": "1"
}
}
}
]
Search Query:
{
"query": {
"has_child": {
"type": "flag_doc",
"query": {
"match_all": {}
}
}
}
}
Search Result:
"hits": [
{
"_index": "67731507",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"domain": "ab",
"email_text": "ab",
"id": "ab",
"my_join_field": {
"name": "text_doc"
}
}
}
]

elk's elastic search dsl case sensitive

I'm doing an Elasticsearch Query DSL query on ELK such as:
{
"query": {
"wildcard": {
"url.path": {
"value": "*download*",
"boost": 1,
"rewrite": "constant_score"
}
}
}
}
but it seems is case sensitive (so show only info with "download", not "Download" or "DOWNLOAD").
i.e. is case sensitive.
can I disable this? and search case insensitive?
Version used: 7.9.1
The below query will help you perform case-insensitive search as it will fetch results for *download, *Download and *DOWNLOAD. You may replace with your index and with the field you would like to perform this search.
Search Query
GET /<my-index>/_search
{
"query" : {
"bool" : {
"must" : {
"query_string" : {
"query" : "*download",
"fields": ["<field1>"]
}
}
}
}
}
If you wish to perform the same search on multiple fields, you can add the same in list.
Search on multiple fields
GET /<my-index>/_search
{
"query" : {
"bool" : {
"must" : {
"query_string" : {
"query" : "*download",
"fields": ["<field1>","<field2>","field3>"]
}
}
}
}
}
There is a case_insensitive parameter available for wildcard query, but it was introduced in Elasticsearch 7.10.0, so you need to upgrade if you are still on 7.9.1.
If you can upgrade to 7.10.0 or higher:
Ideally, in index mapping field should use wildcard type:
{
"mappings": {
"properties": {
"url.path": {
"type": "wildcard"
}
}
}
}
Then a wildcard query with case insensitivity enabled will find all the variants ("download", "DOWNLOAD", "download", etc)
{
"query": {
"wildcard": {
"url.path": {
"value": "*download*",
"boost": 1,
"rewrite": "constant_score",
"case_insensitive": true
}
}
}
}
If you must remain at 7.9.1:
Define your mapping in such a way that Elasticsearch treats the field contents as lowercase. The following will mimic wildcard type (it's a keyword, so only one token) indexed as lowercase.
{
"mappings": {
"properties": {
"url": {
"type": "text",
"analyzer": "lowercase-keyword"
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lowercase-keyword": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
}
}
The query, without the case_insensitive parameter which is unsupported in this version:
{
"query": {
"wildcard": {
"url": {
"value": "*download*",
"boost": 1,
"rewrite": "constant_score"
}
}
}
}
Example results (note that searching for "*download*" and "*DoWnLoAd*" with both work in the same way):
{
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "PtbQe3wByTvslqtrs7Cn",
"_score": 1.0,
"_source": {
"url": "http://example.com/download"
}
},
{
"_index": "my-index",
"_type": "_doc",
"_id": "P9bQe3wByTvslqtrvbDt",
"_score": 1.0,
"_source": {
"url": "http://example.com/Download"
}
},
{
"_index": "my-index",
"_type": "_doc",
"_id": "QNbQe3wByTvslqtrzbDw",
"_score": 1.0,
"_source": {
"url": "http://example.com/DOWNLOAD"
}
}
]
}
}
You can use case_insensitive parameter for wildcard query. This parameter was introduced in 7.10.0 version
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"url": {
"properties": {
"path": {
"type": "wildcard"
}
}
}
}
}
}
Index Data:
{
"url":{
"path":"xx/download"
}
}
Search Query:
{
"query": {
"wildcard": {
"url.path": {
"value": "*Download*",
"boost": 1,
"rewrite": "constant_score",
"case_insensitive": false
}
}
}
}
Search Result:
No results will be there when you are searching for *Download* or *DOWNLOAD*
Update:
You can use the wildcard query with "case_insensitive": true parameter
Adding a sample index data, search query, and search result
Index Data:
{
"url": {
"path": "download"
}
}
{
"url": {
"path": "DOWNLOAD"
}
}
{
"url": {
"path": "Download"
}
}
Search Query:
{
"query": {
"wildcard": {
"url.path": {
"value": "*DOWNLOAD*",
"boost": 1,
"rewrite": "constant_score",
"case_insensitive": true
}
}
}
}
Search Result:
"hits": [
{
"_index": "67210888",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"url": {
"path": "download"
}
}
},
{
"_index": "67210888",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"url": {
"path": "Download"
}
}
},
{
"_index": "67210888",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"url": {
"path": "DOWNLOAD"
}
}
}
]

sort _score desc elasticsearch

I have created a elasticsearch query with function score and top_hit. This query will remove the duplicate and return top 1 record for each bucket.
GET employeeid/info/_search
{"size": 0,
"query" : {
"function_score" : {
"query" : {
"match" : {
"employeeID" : "23141A"
}
},
"functions" : [{
"linear" : {
"AcquiredDate" : {
"scale" : "90d",
"decay" : 0.5
}
}
}, {
"filter" : {
"match" : {
"name" : "sorna"
}
},
"boost_factor" : 10
}, {
"filter" : {
"match" : {
"name" : "lingam"
}
},
"boost_factor" : 7
}
],
"boost_mode" : "replace"
}
},
"aggs": {
"duplicateCount": {
"terms": {
"field": "employeehash",
"min_doc_count": 1
},
"aggs": {
"duplicateDocuments": {
"top_hits": {
"size":1
}
}
}
}
}
}
I am getting the expected result, But the problem is i want to sort the result using _score.
Following is my simple o/p
{
"key": "567",
"doc_count": 2,
"duplicateDocuments": {
"hits": {
"total": 2,
"max_score": 0.40220365,
"hits": [
{
"_index": "employeeid",
"_type": "info",
"_id": "5",
"_score": 0.40220365,
"_source": {
"name": "John",
"organisation": "google",
"employeeID": "23141A",
"employeehash": "567",
"AcquiredDate": "2016-02-01T07:57:28Z"
}
}
]
}
}
},
{
"key": "102",
"doc_count": 1,
"duplicateDocuments": {
"hits": {
"total": 1,
"max_score": 2.8154256,
"hits": [
{
"_index": "employeeid",
"_type": "info",
"_id": "8",
"_score": 2.8154256,
"_source": {
"name": "lingam",
"organisation": "google",
"employeeID": "23141A",
"employeehash": "102",
"AcquiredDate": "2016-02-01T07:57:28Z"
}
}
]
}
}
}
Question: How to sort _score : desc ?
i have not enabled groovy so i can not use script

Resources