how can I fetch only inner fields from source in ElasticSearch? - elasticsearch

I have index structure like this:
{
"id" : 42,
"Person" : {
"contracts" : [
{
"contractID" : "000000000000102"
}
],
"Ids" : [
3,
387,
100,
500,
274,
283,
328,
400,
600
]
},
"dateUpdate" : "2020-12-07T13:15:00.408Z"
}
},
...
}
I need a search query that will fetch only inner "Ids" field from source and nothing more. How can I do this?

You can use _source in inner_hits, in the following way
Index Mapping:
{
"mappings": {
"properties": {
"Person": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "Person",
"query": {
"match_all": {}
},
"inner_hits": {
"_source": {
"includes": [
"Person.Ids"
]
}
}
}
}
]
}
}
}
Search Result:
"inner_hits": {
"Person": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65237264",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Person",
"offset": 0
},
"_score": 1.0,
"_source": {
"Ids": [
3,
387,
100,
500,
274,
283,
328,
400,
600
]
}
}
]
}
}
}
You can also use nested inner_hits and _souce, in the following way
{
"query": {
"nested": {
"path": "Person",
"query": {
"match_all": {}
},
"inner_hits": {
"_source" : false,
"docvalue_fields" : [
{
"field": "Person.Ids",
"format": "use_field_mapping"
}
]
}
}
}
}

Related

How to get specific items from nested object in elastic search

I've prepared an Elastic Search query in which I'm trying to fetch results from nested objects. The query looks something like this:
{
"from": 0,
"size": 100,
"_source": {
"excludes": [
"#version"
]
},
"query": {
"bool": {
"must": [
{
"term": {
"doc.workflow_id.keyword": "workflow1"
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "price"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*34*"
}
}
]
}
}
]
}
}
}
}
],
"must_not": []
}
}
}
Output:
"hits" : [
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv1",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "weight",
"value" : "10"
},
{
"name" : "color",
"value" : "green"
},
{
"name" : "city",
"value" : "#error"
}
]
}
}
},
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv2",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "color",
"value" : "green"
}
]
}
}
}
]
I've omitted a few trivial details in query and output for simplicity. The attributes array in the response is of type nested and contains name and value fields of type string.
I've put filters on attributes color and price, but as you can see, I'm getting other attributes too in the attributes array. Can I somehow pass specific attribute names to the ES query and get the value of those attributes only?
I tried using inner_hits in both nested queries, but it returns the attribute value only for the passed attribute name in the nested query.
E.g.
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
},
"inner_hits": {
"name": "two",
"_source": [
"doc.product_attributes.name",
"doc.product_attributes.value"
]
}
}
}
gives result
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "weight",
"value": "34"
},
{
"name": "color",
"value": "green"
},
{
"name": "city",
"value": "#ERROR"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
},
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv2",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "color",
"value": "green"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
}
]
}
Note the attribute name and value received inside the inner_hits object.
I want to get other attribute names and values as well in the response for which I'm putting any filter. For example, if I want to get attribute names and values for weight, color & city only, how do I do that?
I've checked this thread select matching objects from array in elasticsearch, but it doesn't solve my problem.

in elastic search, how can get document max value for nested field?

is My Mapping.
"script": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
and sample document below
PUT /btest/_create/1
{
"script": [
{
"name": "john",
"age": 14
}
]
}
PUT /btest/_create/2
{
"script": [
{
"name": "tt",
"age": 14
},
{
"name": "jj",
"age": 17
},
{
"name": "tim",
"age": 34
}
]
}
PUT /btest/_create/3
{
"script": [
{
"name": "john",
"age": 42
},
{
"name": "jj",
"age": 12
}
]
}
and use max aggregation for get max ages :
GET /btest/_search
{
"query": {
"nested": {
"path": "script",
"query": {
"match": {
"script.name": "john"
}
}
}
},
"aggs": {
"age": {
"nested": {
"path": "script"
},
"aggs": {
"script_age": {
"filter": {
"match": {
"script.name": "john"
}
},
"aggs": {
"length": {
"max": {
"field": "script.age"
}
}
}
}
}
}
}
}
but it returns all matched "script.name": "john".
i want to get document only max age john.
should I use aggregation to get this document?
or is there a way to use a query similar to max without aggregation for nested field?
According to your requirement, you need to fetch only those documents that match with name john. This can be achieved in the query section using a nested query with match query.
Now, to get the document having max-age (with name john) you can perform top hits aggregation with sort on script.age field.
{
"size": 0,
"query": {
"nested": {
"path": "script",
"query": {
"match": {
"script.name": "john"
}
}
}
},
"aggs": {
"nested-agg": {
"nested": {
"path": "script"
},
"aggs": {
"by_age": {
"top_hits": {
"sort": [
{
"script.age": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
The search response will be
"aggregations": {
"nested-agg": {
"doc_count": 3,
"by_age": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "71081556",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "script",
"offset": 0
},
"_score": null,
"_source": {
"name": "john",
"age": 42
},
"sort": [
42
]
}
]
}
}
}
}
Option 2
You can use sort with the nested query, to get the document having max age
{
"size": 1,
"sort": [
{
"script.age": {
"order": "desc",
"nested": {
"path": "script",
"filter": {
"term": {
"script.name": "john"
}
}
}
}
}
]
}
But in this case, the response contains the entire document, instead of only the matching document
"hits": [
{
"_index": "71081556",
"_type": "_doc",
"_id": "3",
"_score": null,
"_source": {
"script": [
{
"name": "john",
"age": 42
},
{
"name": "jj",
"age": 12
}
]
},
"sort": [
42
]
}
]

Elasticsearch - Nested field sorting

I have an index defined by the following :
{
"mappings": {
"properties": {
"firstName": {
"type": "keyword"
},
"lastName": {
"type": "keyword"
},
"affiliations": {
"type": "nested",
"properties": {
"organisation": {
"type": "keyword"
},
"team": {
"type": "keyword"
},
"dateBeginning": {
"type": "date",
"format": "yyyy-MM-dd"
},
"dateEnding": {
"type": "date",
"format": "yyyy-MM-dd"
},
"country": {
"type": "keyword"
}
}
}
}
}
}
Basically, for each researcher (researchers is how I named my index) I want to sort the the affiliations by dateBeginning, in descending order. I've read about inner hits in the EL official doc, and not being exactly sure how it works I've tried this for researcher with _id : 3 :
{
"query": {
"nested": {
"path": "affiliations",
"query": {
"match": { "_id": 3 }
},
"inner_hits": {
"sort" : [
{
"affiliations.dateBeginning" : {
"order" : "desc",
"nested": {
"path": "affiliations",
"filter": {
"term": { "_id": 3 }
}
}
}
}
]
}
}
}
}
And it doesn't really work.
Having two affiliation for researchers with _id : 3, with one dateBeginning set on 2015-06-30, and the other on 2017-06-30. So I've tried this also :
{
"sort" : [
{
"affiliations.dateBeginning" : {
"order" : "desc",
"nested": {
"path": "affiliations"
}
}
}
],
"query": {
"nested": {
"path": "affiliations",
"query": {
"match": { "_id": 3 }
}
}
}
}
And it doesn't sort the affiliations by dateBeginning.
I've also tried to do it with the SQL API (since I'm more familiar with SQL language), and still, I can't get the data I want.
So I'm quite new to ElasticSearch, I'm using version 7.10, and I don't know what else to do.
Any suggestions about what I'm doing wrong here ?
EDIT
here's an example of a document from that index:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [{
"_index": "researchers",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"firstName": "Kimmich",
"lastName": "Yoshua",
"affiliations": [{
"organisation": "University of Ottawa",
"team": "Neural Network Elite Team",
"dateBeginning": "2015-06-30",
"datEnding": "2017-01-31",
"country": "Canada"
},
{
"organisation": "University of Montréal",
"team": "Picture processing team",
"dateBeginning": "2017-06-30",
"dateEnding": null,
"country": "Canada"
}
]
}
}]
}
}
Once you're inside the nested query, the inner hits don't need the extra nested query. Remove it and the sort will work properly:
{
"query": {
"nested": {
"path": "affiliations",
"query": {
"match": {
"_id": 3
}
},
"inner_hits": {
"sort": [
{
"affiliations.dateBeginning": {
"order": "desc"
}
}
]
}
}
}
}
Note that this wouldn't sort the top-level hits -- only the inner hits.
But you can sort on the top level by the values of affiliations.dateBeginning like so:
POST researchers/_search
{
"sort": [
{
"affiliations.dateBeginning": {
"order": "desc",
"nested_path": "affiliations"
}
}
]
}
but note that the syntax is now slightly different: instead of path we're saying nested_path.

I want to show Top 10 records and apply filter for specific fields in Elastic search

This is the query to get the Top 10 records. There is a Field name Answer inside this we have a record "UNHANDLED". I want to exclude the UNHANDLED inside the Answer field.
How to write the query to get both Top 10 and Exclude UNHANDLED
GET /logstash-sdc-mongo-abcsearch/_search?size=0
{
"aggs": {
"top_tags": {
"terms": {
"field": "question.keyword"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"answer"
]
},
"size": 1
}
}
}
}
}
}
You can use the must_not clause, to exclude the documents that containsUNHANDLED in the answer field. Try out the below query -
Index Mapping:
{
"mappings": {
"properties": {
"question": {
"type": "keyword"
},
"answer": {
"type": "keyword"
}
}
}
}
Index Data:
{
"question": "a",
"answer": "b"
}
{
"question": "c",
"answer": "UNHANDLED"
}
Search Query:
{
"query": {
"bool": {
"must_not": {
"term": {
"answer": "UNHANDLED"
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "question"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"answer"
]
},
"size": 1
}
}
}
}
}
}
Search Result:
"aggregations": {
"top_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "a",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "65563925",
"_type": "_doc",
"_id": "1",
"_score": 0.0,
"_source": {
"answer": "b"
}
}
]
}
}
}
]
}
}
Update 1:
Based on the comments below, try out the below query:
{
"query": {
"bool": {
"must_not": {
"term": {
"answer": "UNHANDLED"
}
},
"must": {
"term": {
"source": "sonax"
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "question"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"answer"
]
},
"size": 1
}
}
}
}
}
}

ElasticSearch: Get distinct field values from multi_match

My Query with multiple multi_matches looks like follows:
"query": {
"bool": {
"should" : [
{"multi_match" : {
"query": "test",
"fields": ["field1^15", "field2^8"],
"tie_breaker": 0.2,
"minimum_should_match": "50%"
}},
{"multi_match" : {
"query": "test2",
"fields": ["field1^15", "field2^8"],
"tie_breaker": 0.2,
"minimum_should_match": "50%"
}
}
]
}
}
I want to get all distinct field1 values which match the query. How can I realize that?
EDIT:
Mapping:
"field1": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "nGram_analyzer"
}
This is what I tried so far (I still get multiple identical field1 values):
"query": {
"bool": {
"should" : [
{"multi_match" : {
"query": "test",
"fields": ["field1^15", "field2^8"],
"tie_breaker": 0.2,
"minimum_should_match": "50%"
}},
{"multi_match" : {
"query": "test2",
"fields": ["field1^15", "field2^8"],
"tie_breaker": 0.2,
"minimum_should_match": "50%"
}
}
]
}
},
"aggs": {
"field1": {
"terms": {
"field": "field1.keyword",
"size": 100 //1
}
}
}
UPDATE:
The query
GET /test/test/_search
{
"_source": ["field1"],
"size": 10000,
"query": {
"multi_match" : {
"query": "test",
"fields": ["field1^15", "field2^8"],
"tie_breaker": 0.2,
"minimum_should_match": "50%"
}
},
"aggs": {
"field1": {
"terms": {
"field": "field1.keyword",
"size": 1
}
}
}
}
results in
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 35,
"max_score": 110.26815,
"hits": [
{
"_index": "test",
"_type": "test",
"_id": "AVzz99c4X4ZbfhscNES7",
"_score": 110.26815,
"_source": {
"field1": "test-hier"
}
},
{
"_index": "test",
"_type": "test",
"_id": "AVzz8JWGX4ZbfhscMwe_",
"_score": 107.45808,
"_source": {
"field1": "test-hier"
}
},
{
"_index": "test",
"_type": "test",
"_id": "AVzz8JWGX4ZbfhscMwe_",
"_score": 107.45808,
"_source": {
"field1": "test-da"
}
},
...
So actually there should only be one "test-hier".
You can add a terms aggregation on the field1.keyword field and you'll get all distinct values (you can change size to any other value that better matches the cardinality of your field):
{
"size": 0,
"query": {...},
"aggs": {
"field1": {
"terms": {
"field": "field1.keyword",
"size": 100
},
"aggs": {
"single_hit": {
"top_hits": {
"size": 1
}
}
}
}
}
}

Resources