elasticsearch with range sub query in nested query - elasticsearch

I am trying to get a nested query filter inside of a nested.
here is my es mapping: there is one "id" field(long) and a nested field called "my_field" with four sub fields in it.
{
"my_index": {
"mappings": {
"dynamic": "strict",
"properties": {
"id": {
"type": "long"
},
"my_field": {
"type": "nested",
"properties": {
"x": {
"type": "long"
},
"y": {
"type": "long"
},
"z": {
"type": "long"
},
"a": {
"type": "double"
},
"b": {
"type": "long"
}
}
}
}
}
}
}
My question is how to retrive the document with nested es query which contains sub range query in it.
For example, I'm trying to get two document id :11111 and id:22222 with nested query restriction "x > 15" or "a > 0.5" and also with inner hit size limitation, which is 20 here.
{
"_source": false,
"query": {
"bool": {
"must": {
"nested": {
"inner_hits": {
"size": 20
},
"path": "my_field",
"query": {
"bool": {
"should": [
{
"range": {
"x": {
"from": 15,
"include_lower": true,
"include_upper": true,
"to": null
}
}
},
{
"range": {
"a": {
"from": 0.5,
"include_lower": true,
"include_upper": true,
"to": null
}
}
}
]
}
}
}
},
"should": [
{
"term": {
"id": 11111
}
},
{
"term": {
"id": 22222
}
}
]
}
},
"timeout": "5000ms",
"track_total_hits": true
}
However, there are no hits return

Please use the dot notation in your query to include the complete path, e.g.,
"range": {
"my_field.x": { "from": ... }
}

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Filter query is not working in elastic search

I have document and search query as below elastic is not able to fetch the documents for the matched exception id's initially while creating the index i have done the mapping and after that it is not able to fetch the records
and my mapping looks like below
{
"mappings": {
"properties": {
"events": {
"type": "nested",
"properties": {
"data": {
"type": "nested",
"properties": {
"comments": {
"type": "nested",
"properties": {
"type": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
here is my index document which i am testing against using the search query.
{
"id": "1",
"score": 1,
"comments": [{
"id": "1",
"type": "Delayed",
You cannot directly use query-string on nested fields, You need to use nested query for it
GET <index-name>/_search
{
"query": {
"bool": {
"filter": [
{
"nested": { --> note
"path": "events.recommendationData",
"query": {
"query_string": {
"query": "\"1\" OR \"2\"",
"fields": [
"events.recommendationData.exceptionId"
],
"type": "best_fields",
"default_operator": "or",
"max_determinized_states": 10000,
"enable_position_increments": true,
"fuzziness": "AUTO",
"fuzzy_prefix_length": 0,
"fuzzy_max_expansions": 50,
"phrase_slop": 0,
"escape": false,
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1
}
}
}
}
]
}
},
"size": 1, --> note, to return documents ,keep 0 for only aggs
"aggs": {
"genres": {
"nested": {
"path": "events.recommendationData.recommendations"
},
"aggs": {
"nested_comments_recomms": {
"terms": {
"field": "events.recommendationData.recommendations.recommendationType"
}
}
}
}
}
}

How to set condition for aggregation in elasticsearch

Let's say I have an index
"products": {
"aliases": {},
"mappings": {
"products": {
"properties": {
"id": {
"type": "long"
},
"price": {
"type": "double"
},
"discount": {
"type": "double"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
I need to get all products with discounts in a range 0-10, price range in 5-20 and whose total price should be less than 200.
I know how to filter on fields
{
"query": {
"bool": {
"must": [
{
"range": {
"price": {
"from": 5,
"to": 200,
"include_lower": true,
"include_upper": true,
"boost": 1.0
}
}
},
{
"range": {
"discount": {
"from": 0,
"to": 10,
"include_lower": true,
"include_upper": true,
"boost": 1.0
}
}
}
]
}
}
}
Also, I know how to aggregate the price
"aggregations": {
"total_price": {
"sum": {
"field": "price"
}
}
}
But how to set the bound for this total_price?

Filtered Query with Term on collection

I'm taking an old project to maintain and I am stuck since a day on a query.
The elasticsearch version I use is 1.7 but I don't think this is relevant to my problem.
I have some teacher documents :
{
"id": 244,
"degree": [],
"teacherDiplomaRelation": [],
"user": {
"enabled": true
},
"teacherClassDisciplineRelation": [
SEE BELOW
}
The teacherClassDisciplineRelation is N times this format (for every couple levelTree/Discipline that I have)
{
"levelTree": {
"id": 34,
"label": "1st year of college",
"slugLastLevelDisplay": "college"
},
"discipline": {
"id": 1,
"label": "Maths",
"slug": "maths"
},
"cityLocation": "10.1010,10.1010"
}
Now i want to get all teacher enabled and having maths in their disciplines. my query is:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"user.enabled": true
}
},
{
"term": {
"teacherClassDisciplineRelation.discipline.slug": "maths"
}
}
]
}
}
}
},
"size": {
"from": 0,
"size": 15
}
}
Mapping:
"teacherClassDisciplineRelation": {
"type": "nested",
"properties": {
"cityLocation": {
"type": "geo_point",
"store": true
},
"discipline": {
"properties": {
"id": {
"type": "string",
"store": true
},
"label": {
"type": "string",
"boost": 7.0,
"store": true,
"analyzer": "custom_analyzer"
},
"slug": {
"type": "string",
"boost": 7.0,
"index": "not_analyzed",
"store": true,
"norms": {
"enabled": true
}
}
}
}
Problem:
My query with "user.enabled": true give me some results,
My query with "teacherClassDisciplineRelation.discipline.slug": "maths" always gives me 0 result but I've checked in the index, I should have some results.
I'm new to elasticsearch but I can't find out why my result is always 0.
Any idea why?
Since teacherClassDisciplineRelation is a nested field. You have to use Nested Query.
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "teacherClassDisciplineRelation",
"query": {
"term": {
"teacherClassDisciplineRelation.discipline.slug": {
"value": "maths"
}
}
}
}
},
{
"term": {
"user.enabled": true
}
}
]
}
}
}
Hope this helps!!

ElasticSearch double nested sorting

I have documents which look like this (here is example):
{
"user": "xyz",
"state": "FINISHED",
"finishedTime": 1465566467161,
"jobCounters": {
"counterGroup": [
{
"counterGroupName": "org.apache.hadoop.mapreduce.FileSystemCounter",
"counter": [
{
"name": "FILE_BYTES_READ",
"mapCounterValue": 206509212380,
"totalCounterValue": 423273933523,
"reduceCounterValue": 216764721143
},
{
"name": "FILE_BYTES_WRITTEN",
"mapCounterValue": 442799895522,
"totalCounterValue": 659742824735,
"reduceCounterValue": 216942929213
},
{
"name": "HDFS_BYTES_READ",
"mapCounterValue": 207913352565,
"totalCounterValue": 207913352565,
"reduceCounterValue": 0
},
{
"name": "HDFS_BYTES_WRITTEN",
"mapCounterValue": 0,
"totalCounterValue": 89846725044,
"reduceCounterValue": 89846725044
}
]
},
{
"counterGroupName": "org.apache.hadoop.mapreduce.JobCounter",
"counter": [
{
"name": "TOTAL_LAUNCHED_MAPS",
"mapCounterValue": 0,
"totalCounterValue": 13394,
"reduceCounterValue": 0
},
{
"name": "TOTAL_LAUNCHED_REDUCES",
"mapCounterValue": 0,
"totalCounterValue": 720,
"reduceCounterValue": 0
}
]
}
]
}
}
Now I want to sort this data to get TOP 15 documents on the basis of totalCounterValue where counter.name is FILE_BYTES_READ. I have tried nested sorting on this but no matter which key name I write in counter.name, it is always sorting on the basis of HDFS_BYTES_READ. Can anyone please help me with my query.
{
"_source": true,
"size": 15,
"query": {
"bool": {
"must": [
{
"term": {
"state": {
"value": "FINISHED"
}
}
},
{
"range": {
"startedTime": {
"gte": "now - 4d",
"lte": "now"
}
}
}
]
}
},
"sort": [
{
"jobCounters.counterGroup.counter.totalCounterValue": {
"order": "desc",
"nested_path": "jobCounters.counterGroup",
"nested_filter": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
}
}
}
]}
This is the mapping for jobCounters we have created:
"jobCounters": {
"type": "nested",
"include_in_parent": true,
"properties" : {
"counterGroup": {
"type": "nested",
"include_in_parent": true,
"properties": {
"counterGroupName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"counter" : {
"type": "nested",
"include_in_parent": true,
"properties": {
"reduceCounterValue": {
"type": "long"
},
"name": {
"type": "string",
"analyzer": "english",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"totalCounterValue": {
"type": "long"
},
"mapCounterValue": {
"type": "long"
}
}
}
}
}
}
}
I followed nested sorting documentation of ElasticSearch and came up with this query, but I don't know why it is always sorting the totalCounterValue of HDFS_BYTES_READ irrespective of jobCounters.counterGroup.counter.name's value.
you can try something like this,
curl -XGET 'http://localhost:9200/index/jobCounters/_search' -d '
{
"size": 15,
"query": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
},
"sort": [
{
"jobCounters.counterGroup.counter.totalCounterValue": {
"order": "desc",
"nested_path": "jobCounters.counterGroup",
"nested_filter": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
}
}
}
]
}
'
Read the end of this document. It explains that we have to repeat the same query in nested_filter too.

Resources