Elasticsearch how to sort with condition - sorting

On my ElasticSearch (2.x) I have documents like this:
{
"title": "A good title",
"formats": [{
"name": "pdf",
"prices": [{
"price": 11.99,
"currency": "EUR"
}, {
"price": 18.99,
"currency": "AUD"
}]
}]
}
I'd like to sort documents by formats.prices.price but only where the formats.prices.currency === 'EUR'
I tried to do a nested field on formats.prices and then run this query:
{
"query": {
"filtered": {
"query": {
"and": [
{
"match_all": {}
}
]
}
}
},
"sort": {
"formats.prices.price": {
"order": "desc",
"nested_path": "formats.prices",
"nested_filter": {
"term": {
"currency": "EUR"
}
}
}
}
}
But unfortunately I cannot get the right order.
UPDATE:
Relevant part of mapping:
"formats": {
"properties": {
"name": {
"type": "string"
},
"prices": {
"type": "nested",
"include_in_parent": true,
"properties": {
"currency": {
"type": "string"
},
"price": {
"type": "double"
}
}
}
}
},

i hope this will solve your problem
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "formats.prices",
"filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}
},
"from": 0,
"size": 50,
"sort": [
{
"formats.prices.price": {
"order": "asc",
"nested_path": "formats.prices",
"nested_filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Elasticsearch Bucket sort on nested field

I have a problem with a bucket sort on nested field on Elastic 7.1.0:
My index has the following mapping:
{
"mapping": {
"dynamic": "strict",
"properties": {
"created_at_timestamp": {
"type": "date"
},
"url": {
"type": "keyword",
},
"title": {
"type": "keyword",
},
"entities": {
"type": "nested",
"properties": {
"counter": {
"type": "long"
},
"metric": {
"type": "long"
},
"id": {
"type": "long"
},
"relevance": {
"type": "float"
},
"weighted_metric": {
"type": "float"
}
}
}
}
}
}
and I need to order this documents by "weighted_metric", filtered for a specific entity id. I wrote this query:
GET my_index/_search?size=0
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "entities",
"query": {
"term": {
"entities.id": "27374"
}
}
}
}
],
"must_not": [
{
"term": {
"title": {
"value": ""
}
}
}
]
}
},
"aggs": {
"by_url_and_title": {
"composite": {
"sources": [
{
"final_url": {
"terms": {
"field": "final_url"
}
}
},
{
"title": {
"terms": {
"field": "title"
}
}
}
]
},
"aggs": {
"sum_metric": {
"nested": {
"path": "entities"
},
"aggs": {
"weightedmetric": {
"filters": {
"filters": {
"new": {
"bool": {
"should": [
{
"term": {
"entities.id": "27374"
}
}
]
}
}
}
},
"aggs": {
"wmetric": {
"sum": {
"field": "entities.weighted_metric"
}
}
}
},
"w_sort": {
"bucket_sort": {
"sort": [
{
"weightedmetric.wmetric": {
"order": "desc"
}
}
],
"size": 10
}
}
}
}
}
}
}
}
And I have this error:
{
"error": {
"root_cause": [],
"type": "search_phase_execution_exception",
"reason": "",
"phase": "fetch",
"grouped": true,
"failed_shards": [],
"caused_by": {
"type": "class_cast_exception",
"reason": "org.elasticsearch.search.aggregations.bucket.nested.InternalNested cannot be cast to org.elasticsearch.search.aggregations.InternalMultiBucketAggregation"
}
},
"status": 503
}
If I don't try to order the buckets everything works fine.
Can someone help me with this query? I need to order the buckets by weighted_metric. thanks

Highlight nested object in Elasticsearch

Here is my sample dataset,
{
"parent":[
{
"name":"John Doe 1",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 1",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 2",
"height":100.00,
"width":100.00
}
]
},
{
"name":"John Doe 2",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 3",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 4",
"height":100.00,
"width":100.00
}
]
}
]
}
And my definition:
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"default": {
"type": "simple"
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"parent": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "text"
},
"sex": {
"type": "text"
},
"child": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"height": {
"type": "float"
},
"width": {
"type": "float"
}
}
}
}
}
}
}
}
}
I'm using the following query to look for matches in the parent.name property and can get highlights.
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John",
"fuzziness": "AUTO:3,6",
"prefix_length": "0"
}
}
}
]
}
}
}
}
],
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}
Is there a way to get inline highlights for the matches in the child.name properties also so that it would be easy to find exactly which element of that corresponding array got matched?
For example, for the given sample data, if I search by "Doe", I'm expecting to get 6 hits, whereas if I search by "Jane", I would get only 4.
You can simply add another nested query clause inside you top level should.
Here's how your query should look:
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John Doe 1"
}
}
}
]
}
}
}
},
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.child.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent.child",
"query": {
"bool": {
"must": [
{
"match": {
"parent.child.name": {
"query": "Jane Doe 1"
}
}
}
]
}
}
}
}
],
"minimum_should_match": 1
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}

How can I stack search criteria using & in a nested query in ElasticSearch

I have this index mapping with nested fields:
"customerpropertieses": {
"_parent": {
"type": "customerprofile"
},
"_routing": {
"required": true
},
"properties": {
"id": {
"type": "string"
},
"parentId": {
"type": "string"
},
"properties": {
"type": "nested",
"properties": {
"extentionPropertyId": {
"type": "long"
},
"propertyName": {
"type": "string"
},
"value": {
"type": "string"
}
}
}
}
}
I want to find customerpropertieses where propertyName=criteria1 & value=value1 & propertyName=criteria2 & value=value2
The query I'm generating by hand is this:
{
"from": 0,
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"query": {
"nested": {
"query": {
"bool": {
"must": [
{
"match": {
"properties.propertyName": {
"query": "criteria1 "
}
}
},
{
"match": {
"properties.value": {
"boost": 10.0,
"query": "value1"
}
}
},
{
"match": {
"properties.propertyName": {
"query": "criteria2"
}
}
},
{
"match": {
"properties.value": {
"boost": 10.0,
"query": "value2"
}
}
}
]
}
},
"path": "properties",
"inner_hits": {
"explain": false
},
"_name": "nested_properties"
}
}
}
I get 0 results back and I certainly have data with those characteristics. The search works fine when I look only propertyName=criteria1 & value=value1 or with propertyName=criteria2 & value=value2'
The question is how can I stack search criteria using & in a nested query?
Try the following query. From the look of your query i take a guess you want to match documents which had two nested documents with values value1,criteria1 and value2,criteria2 in value and criteria field respectively.
{
"from": 0,
"size": 10,
"sort": [{
"_score": {
"order": "desc"
}
}],
"query": {
"bool": {
"must": [{
"nested": {
"query": {
"bool": {
"must": [{
"match": {
"properties.propertyName": {
"query": "criteria2"
}
}
}, {
"match": {
"properties.value": {
"boost": 10.0,
"query": "value2"
}
}
}]
}
},
"path": "properties",
"inner_hits": {
"explain": false
},
"_name": "nested_properties"
}
}, {
"nested": {
"query": {
"bool": {
"must": [{
"match": {
"properties.propertyName": {
"query": "criteria1 "
}
}
}, {
"match": {
"properties.value": {
"boost": 10.0,
"query": "value1"
}
}
}]
}
},
"path": "properties",
"inner_hits": {
"explain": false
},
"_name": "nested_properties"
}
}]
}
}
}
I indexed the following document making the guess and modified the query
The Following document will match the query above
{
"parentId" : "3434",
"properties" : [{
"extentionPropertyId" : 24,
"propertyName" : "criteria1",
"value" : "value1"
},{
"extentionPropertyId" : 24,
"propertyName" : "criteria2",
"value" : "value2"
}]
}
Hope this helps.
Thanks

ElasticSearch double nested sorting

I have documents which look like this (here is example):
{
"user": "xyz",
"state": "FINISHED",
"finishedTime": 1465566467161,
"jobCounters": {
"counterGroup": [
{
"counterGroupName": "org.apache.hadoop.mapreduce.FileSystemCounter",
"counter": [
{
"name": "FILE_BYTES_READ",
"mapCounterValue": 206509212380,
"totalCounterValue": 423273933523,
"reduceCounterValue": 216764721143
},
{
"name": "FILE_BYTES_WRITTEN",
"mapCounterValue": 442799895522,
"totalCounterValue": 659742824735,
"reduceCounterValue": 216942929213
},
{
"name": "HDFS_BYTES_READ",
"mapCounterValue": 207913352565,
"totalCounterValue": 207913352565,
"reduceCounterValue": 0
},
{
"name": "HDFS_BYTES_WRITTEN",
"mapCounterValue": 0,
"totalCounterValue": 89846725044,
"reduceCounterValue": 89846725044
}
]
},
{
"counterGroupName": "org.apache.hadoop.mapreduce.JobCounter",
"counter": [
{
"name": "TOTAL_LAUNCHED_MAPS",
"mapCounterValue": 0,
"totalCounterValue": 13394,
"reduceCounterValue": 0
},
{
"name": "TOTAL_LAUNCHED_REDUCES",
"mapCounterValue": 0,
"totalCounterValue": 720,
"reduceCounterValue": 0
}
]
}
]
}
}
Now I want to sort this data to get TOP 15 documents on the basis of totalCounterValue where counter.name is FILE_BYTES_READ. I have tried nested sorting on this but no matter which key name I write in counter.name, it is always sorting on the basis of HDFS_BYTES_READ. Can anyone please help me with my query.
{
"_source": true,
"size": 15,
"query": {
"bool": {
"must": [
{
"term": {
"state": {
"value": "FINISHED"
}
}
},
{
"range": {
"startedTime": {
"gte": "now - 4d",
"lte": "now"
}
}
}
]
}
},
"sort": [
{
"jobCounters.counterGroup.counter.totalCounterValue": {
"order": "desc",
"nested_path": "jobCounters.counterGroup",
"nested_filter": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
}
}
}
]}
This is the mapping for jobCounters we have created:
"jobCounters": {
"type": "nested",
"include_in_parent": true,
"properties" : {
"counterGroup": {
"type": "nested",
"include_in_parent": true,
"properties": {
"counterGroupName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"counter" : {
"type": "nested",
"include_in_parent": true,
"properties": {
"reduceCounterValue": {
"type": "long"
},
"name": {
"type": "string",
"analyzer": "english",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"totalCounterValue": {
"type": "long"
},
"mapCounterValue": {
"type": "long"
}
}
}
}
}
}
}
I followed nested sorting documentation of ElasticSearch and came up with this query, but I don't know why it is always sorting the totalCounterValue of HDFS_BYTES_READ irrespective of jobCounters.counterGroup.counter.name's value.
you can try something like this,
curl -XGET 'http://localhost:9200/index/jobCounters/_search' -d '
{
"size": 15,
"query": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
},
"sort": [
{
"jobCounters.counterGroup.counter.totalCounterValue": {
"order": "desc",
"nested_path": "jobCounters.counterGroup",
"nested_filter": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
}
}
}
]
}
'
Read the end of this document. It explains that we have to repeat the same query in nested_filter too.

Resources