How to get specific items from nested object in elastic search - elasticsearch

I've prepared an Elastic Search query in which I'm trying to fetch results from nested objects. The query looks something like this:
{
"from": 0,
"size": 100,
"_source": {
"excludes": [
"#version"
]
},
"query": {
"bool": {
"must": [
{
"term": {
"doc.workflow_id.keyword": "workflow1"
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "price"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*34*"
}
}
]
}
}
]
}
}
}
}
],
"must_not": []
}
}
}
Output:
"hits" : [
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv1",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "weight",
"value" : "10"
},
{
"name" : "color",
"value" : "green"
},
{
"name" : "city",
"value" : "#error"
}
]
}
}
},
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv2",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "color",
"value" : "green"
}
]
}
}
}
]
I've omitted a few trivial details in query and output for simplicity. The attributes array in the response is of type nested and contains name and value fields of type string.
I've put filters on attributes color and price, but as you can see, I'm getting other attributes too in the attributes array. Can I somehow pass specific attribute names to the ES query and get the value of those attributes only?
I tried using inner_hits in both nested queries, but it returns the attribute value only for the passed attribute name in the nested query.
E.g.
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
},
"inner_hits": {
"name": "two",
"_source": [
"doc.product_attributes.name",
"doc.product_attributes.value"
]
}
}
}
gives result
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "weight",
"value": "34"
},
{
"name": "color",
"value": "green"
},
{
"name": "city",
"value": "#ERROR"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
},
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv2",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "color",
"value": "green"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
}
]
}
Note the attribute name and value received inside the inner_hits object.
I want to get other attribute names and values as well in the response for which I'm putting any filter. For example, if I want to get attribute names and values for weight, color & city only, how do I do that?
I've checked this thread select matching objects from array in elasticsearch, but it doesn't solve my problem.

Related

How to sort by a provide value and get only matched element in nested array in Elasticsearch7

It's okay to get all the element back. But I feel docs size is too heavy. How to get match element only in nested array.
Details below, I will be grateful for any help you can provide.
my test index below
PUT test
PUT test/_mapping
{
"properties": {
"items": {
"type": "nested",
"properties": {
"item": {
"type": "keyword"
},
"price": {
"type": "integer"
}
}
}
}
}
PUT test/_doc/1
{
"items": [
{"item": "A", "price": 350},
{"item": "B", "price": 500}
]
}
PUT test/_doc/2
{
"items": [
{"item": "A", "price": 400},
{"item": "C", "price": 200}
]
}
PUT test/_doc/3
{
"items": [
{"item": "B", "price": 600},
{"item": "C", "price": 150}
]
}
I can get docs which exist item: "B" and sorting by price of item: "B"
Here is the query
POST test/_search
{
"query" : {
"nested" : {
"query" : {
"term" : {
"items.item": {
"value" : "B",
"boost" : 1.0
}
}
},
"path" : "items",
"ignore_unmapped" : false,
"score_mode" : "none",
"boost" : 1.0
}
},
"sort":[
{
"items.price": {
"order": "desc",
"nested": {
"path": "items",
"filter": {
"term" : { "items.item": "B" }
}
}
}
}
]
}
And Results
[
{
"items" : [
{"item" : "B", "price" : 600},
{"item" : "C", "price" : 150}
]
},
{
"items" : [
{"item" : "A", "price" : 350},
{"item" : "B", "price" : 500}
]
}
]
How do I get the result with the item: B only like below
[
{
"items" : [
{"item" : "B", "price" : 600}
]
},
{
"items" : [
{"item" : "B", "price" : 500}
]
}
]
You can use inner_hits along with the nested query, to get only the matching object in the result
{
"query": {
"nested": {
"query": {
"term": {
"items.item": {
"value": "B",
"boost": 1.0
}
}
},
"inner_hits": {}, // note this
"path": "items",
"ignore_unmapped": false,
"score_mode": "none",
"boost": 1.0
}
},
"sort": [
{
"items.price": {
"order": "desc",
"nested": {
"path": "items",
"filter": {
"term": {
"items.item": "B"
}
}
}
}
}
]
}
Search Result:
"hits": [
{
"_index": "68902032",
"_type": "_doc",
"_id": "3",
"_score": null,
"_source": {
"items": [
{
"item": "B",
"price": 600
},
{
"item": "C",
"price": 150
}
]
},
"sort": [
600
],
"inner_hits": {
"items": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.9444616,
"hits": [
{
"_index": "68902032",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "items",
"offset": 0
},
"_score": 0.9444616,
"_source": {
"item": "B",
"price": 600 // note this
}
}
]
}
}
}
},
{
"_index": "68902032",
"_type": "_doc",
"_id": "1",
"_score": null,
"_source": {
"items": [
{
"item": "A",
"price": 350
},
{
"item": "B",
"price": 500
}
]
},
"sort": [
500
],
"inner_hits": {
"items": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.9444616,
"hits": [
{
"_index": "68902032",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "items",
"offset": 1
},
"_score": 0.9444616,
"_source": {
"item": "B",
"price": 500 // note this
}
}
]
}
}
}
}
]

"should" query affect scoring, how to avoid that?

I would like to change the following ElasticSearch so the "should" array will not affect the scoring of the result. I want that the score will be calculated by the "query_string" for the name property only.
how can i achieve that with minimum chnages
GET customers/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "properties.name",
"query": "Joe*"
}
}
],
"should": [
{
"match": {
"properties.role": "admin"
}
},
{
"match": {
"properties.role": "sysop"
}
},
{
"match": {
"properties.role": "client"
}
},
{
"match": {
"properties.status": "public"
}
},
{
"match": {
"properties.status": "public"
}
}
],
"must_not": [
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
},
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
},
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
}
]
}
},
"size": 30,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "return doc['_index'][0] == 'customers' && doc.containsKey('properties.videoCount')?doc['properties.videoCount'].value:0"
}
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "long timestampNow = new Date().getTime(); return doc['_index'][0] == 'customers' && doc.containsKey('properties.subscriptions.features.allow-application')?(timestampNow < doc['properties.subscriptions.features.first-on-search'].value.getMillis()):false"
}
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "return doc['_index'][0] == 'customers' && doc.containsKey('properties.videoCount')?doc['properties.videoCount'].value:0"
}
}
}
]
}
You need to use a combination of bool should and filter clause to achieve your required result.
Adding a working example with index data, search query, and search result
Index Data:
{
"properties":{
"name": "Joe",
"role":"sysop"
}
}
{
"properties":{
"name": "Joe",
"role":"admin"
}
}
{
"properties":{
"name": "Joe",
"role":"student"
}
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "properties.name",
"query": "Joe*"
}
}
],
"should": [
{
"bool": {
"filter": {
"bool": {
"should": [
{
"match": {
"properties.role": "student"
}
},
{
"match": {
"properties.role": "sysop"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "65469210",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "admin"
}
}
},
{
"_index": "65469210",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "student"
}
}
},
{
"_index": "65469210",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "sysop"
}
}
}
]
You can even use the Explain API, to know how the score is calculated. Here you can see that the should clauses match have a value of 0.0. Therefore, they do not contribute in the overall scoring of the query.
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "admin"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
}
]
}
},
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "student"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
},
{
"value": 0.0, // note this
"description": "ConstantScore(properties.role:student properties.role:sysop)^0.0",
"details": []
}
]
}
},
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "sysop"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
},
{
"value": 0.0, // note this
"description": "ConstantScore(properties.role:student properties.role:sysop)^0.0",
"details": []
}
]
}
}
]
}
}
Use filter, filter just remove documents, and wont affect the score:
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html

how can I fetch only inner fields from source in ElasticSearch?

I have index structure like this:
{
"id" : 42,
"Person" : {
"contracts" : [
{
"contractID" : "000000000000102"
}
],
"Ids" : [
3,
387,
100,
500,
274,
283,
328,
400,
600
]
},
"dateUpdate" : "2020-12-07T13:15:00.408Z"
}
},
...
}
I need a search query that will fetch only inner "Ids" field from source and nothing more. How can I do this?
You can use _source in inner_hits, in the following way
Index Mapping:
{
"mappings": {
"properties": {
"Person": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "Person",
"query": {
"match_all": {}
},
"inner_hits": {
"_source": {
"includes": [
"Person.Ids"
]
}
}
}
}
]
}
}
}
Search Result:
"inner_hits": {
"Person": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65237264",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Person",
"offset": 0
},
"_score": 1.0,
"_source": {
"Ids": [
3,
387,
100,
500,
274,
283,
328,
400,
600
]
}
}
]
}
}
}
You can also use nested inner_hits and _souce, in the following way
{
"query": {
"nested": {
"path": "Person",
"query": {
"match_all": {}
},
"inner_hits": {
"_source" : false,
"docvalue_fields" : [
{
"field": "Person.Ids",
"format": "use_field_mapping"
}
]
}
}
}
}

ElasticSearch: nested items count in search results

I have following mapping:
{
"test_index" : {
"mappings" : {
"test_type" : {
"properties" : {
"field1" : {
"type" : "string"
},
"field2" : {
"type" : "string"
},
"items" : {
"type" : "nested",
"properties" : {
"nested_field1" : {
"type" : "string"
},
"nested_field2" : {
"type" : "string"
}
}
}
}
}
}
}
}
With search results I want to get total nested items inside the results structure:
{
"hits": {
"total": 2,
"max_score": 1.0,
"hits": [
{
"_index": "test_index",
"_type": "test_type",
"_id": "AWfAc79wljtimCd5JZlJ",
"_score": 1.0,
"_source": {
"field1": "Some string 1",
"field2": "Some string 2",
"items": [
{
"nested_field1": "Some val1",
"nested_field2": "Some val2"
}
],
"totalItems": 1
}
},
{
"_index": "test_index",
"_type": "test_type",
"_id": "AZxfc79dtrt878xx",
"_score": 1.0,
"_source": {
"field1": "Some string 3",
"field2": "Some string 4",
"items": [
{
"nested_field1": "Some val3",
"nested_field2": "Some val4"
},
{
"nested_field1": "Some val5",
"nested_field2": "Some val6"
}
],
"totalItems": 2
}
}
]
}
}
Can I achieve this via aggregations?
Since you have had the great idea to also store the totalItems field at the root level you could just sum up that field and you'd get the number of nested items:
{
"query": {
"match_all": {}
},
"aggs": {
"total_items": {
"sum": {
"field": "totalItems"
}
}
}
}

sort _score desc elasticsearch

I have created a elasticsearch query with function score and top_hit. This query will remove the duplicate and return top 1 record for each bucket.
GET employeeid/info/_search
{"size": 0,
"query" : {
"function_score" : {
"query" : {
"match" : {
"employeeID" : "23141A"
}
},
"functions" : [{
"linear" : {
"AcquiredDate" : {
"scale" : "90d",
"decay" : 0.5
}
}
}, {
"filter" : {
"match" : {
"name" : "sorna"
}
},
"boost_factor" : 10
}, {
"filter" : {
"match" : {
"name" : "lingam"
}
},
"boost_factor" : 7
}
],
"boost_mode" : "replace"
}
},
"aggs": {
"duplicateCount": {
"terms": {
"field": "employeehash",
"min_doc_count": 1
},
"aggs": {
"duplicateDocuments": {
"top_hits": {
"size":1
}
}
}
}
}
}
I am getting the expected result, But the problem is i want to sort the result using _score.
Following is my simple o/p
{
"key": "567",
"doc_count": 2,
"duplicateDocuments": {
"hits": {
"total": 2,
"max_score": 0.40220365,
"hits": [
{
"_index": "employeeid",
"_type": "info",
"_id": "5",
"_score": 0.40220365,
"_source": {
"name": "John",
"organisation": "google",
"employeeID": "23141A",
"employeehash": "567",
"AcquiredDate": "2016-02-01T07:57:28Z"
}
}
]
}
}
},
{
"key": "102",
"doc_count": 1,
"duplicateDocuments": {
"hits": {
"total": 1,
"max_score": 2.8154256,
"hits": [
{
"_index": "employeeid",
"_type": "info",
"_id": "8",
"_score": 2.8154256,
"_source": {
"name": "lingam",
"organisation": "google",
"employeeID": "23141A",
"employeehash": "102",
"AcquiredDate": "2016-02-01T07:57:28Z"
}
}
]
}
}
}
Question: How to sort _score : desc ?
i have not enabled groovy so i can not use script

Resources