Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent - elasticsearch

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Related

ElasticSearch won't search specific field

I have a problem searching a specific field inside my index.
little background:
On my project we need to search inside a terminology Server like FHIR but then our own.
So we have an object that contains a Code (123564/A), multiple translations as term/display (urine problem) and mapping to other codes that are equal to that code but in a different system (ICD-10, SNOMED-CT, ICPC-2,..) example what has been indexed:
{
"Code": "10008220/A1",
"EffectiveTime": "0001-01-01T00:00:00Z",
"Active": true,
"System": "ibui",
"Purpose": "",
"Descriptions": [
{
"DescriptionId": "2464cf5c-d4fc-4a61-b6bc-746d003cb4ef",
"Code": "10008220/A1",
"System": "ibui",
"Term": "gebroken arm",
"LanguageId": "3d50c237-0add-43e7-92a2-5edf1ac7c6ee",
"FSN": false,
"Preferred": true,
"EffectiveTime": "0001-01-01T00:00:00Z",
"Active": true,
"SendVersion": "2021-12-07T17:01:53.786755Z",
"Purpose": ""
},
{
"DescriptionId": "95501583-9f24-4964-bbc9-1a6e95eba30f",
"Code": "10008220/A1",
"System": "ibui",
"Term": "fracture du bras",
"LanguageId": "1238dde0-08df-4ae0-8676-59919f66737e",
"FSN": false,
"Preferred": true,
"EffectiveTime": "0001-01-01T00:00:00Z",
"Active": true,
"SendVersion": "2021-12-07T17:01:53.786755Z",
"Purpose": ""
}
],
"Mappings": [
{
"MappingId": "",
"FromSys": "ibui",
"From": "10008220/A1",
"ToSys": "icd-10",
"To": "T10",
"EffectiveTime": "0001-01-01T00:00:00Z",
"Active": true
},
{
"MappingId": "",
"FromSys": "ibui",
"From": "10008220/A1",
"ToSys": "icpc-2",
"To": "L76",
"EffectiveTime": "0001-01-01T00:00:00Z",
"Active": true
}
],
"SendVersion": "2021-12-07T17:01:53.786755Z"
}
The problem:
We can search on 2 different fields : Code & Term. and when searching we keep in mind that we have some filters for a specific language code (Dutch,..) or A system like ICD-10 or ICPC-2,..
I have a query that is working and returns the above object when searching in 1 field (Descriptions.Term) that is the following:
working query
{
"query": {
"bool": {
"must": {
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"*": {}
}
}
},
"path": "Descriptions",
"query": {
"bool": {
"should": [
{
"multi_match": {
"fields": [
"Descriptions.Term",
"Descriptions.Term._2gram",
"Descriptions.Term._3gram"
],
"query": "gebroken*~ n",
"type": "bool_prefix"
}
}
],
"filter": [
{
"bool": {
"should": [
{
"term": {
"Descriptions.System": "ibui"
}
},{
"term": {
"Descriptions.System": "icd-10"
}
},{
"term": {
"Descriptions.System": "icpc-2"
}
}
],
"minimum_should_match": "1"
}
},
{
"term": {
"Descriptions.Active": "true"
}
},
{
"term": {
"Descriptions.LanguageId": "3d50c237-0add-43e7-92a2-5edf1ac7c6ee"
}
}
]
}
}
}
}
}
}
}
But when we somethings need to search in multiple fields.
When adding the Descriptions.Code field to the fields map the query is not working and I can't figure out why. I have it decleared inside my mapping so it should be searchable?
I'm searching for the Code of the object above in both fields (Descriptions.Term & Descriptions.Code) but it doesn't returns the hit.
not working query
{
"query": {
"bool": {
"must": {
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"*": {}
}
}
},
"path": "Descriptions",
"query": {
"bool": {
"should": [
{
"multi_match": {
"fields": [
"Descriptions.Term",
"Descriptions.Term._2gram",
"Descriptions.Term._3gram",
"Descriptions.Code"
],
"query": "10008220*~ n",
"type": "bool_prefix"
}
}
],
"filter": [
{
"bool": {
"should": [
{
"term": {
"Descriptions.System": "ibui"
}
},{
"term": {
"Descriptions.System": "icd-10"
}
},{
"term": {
"Descriptions.System": "icpc-2"
}
}
],
"minimum_should_match": "1"
}
},
{
"term": {
"Descriptions.Active": "true"
}
},
{
"term": {
"Descriptions.LanguageId": "3d50c237-0add-43e7-92a2-5edf1ac7c6ee"
}
}
]
}
}
}
}
}
}
}
mapping:
{
"settings": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"autocomplete": {
"tokenizer": "custom_tokenizer"
}
},
"tokenizer": {
"custom_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 6,
"token_chars": [
"letter",
"digit",
"symbol",
"punctuation"
]
}
}
},
"max_ngram_diff" : "5"
},
"mappings": {
"properties": {
"Descriptions": {
"type": "nested",
"properties": {
"Term": {
"type": "search_as_you_type",
"analyzer": "autocomplete"
},
"Code": {
"type": "keyword",
"index": true
},
"System": {
"type": "keyword",
"index": true
},
"LanguageId": {
"type": "keyword",
"index": true
},
"Purpose": {
"type": "keyword",
"index": true
},
"Active": {
"type": "keyword",
"index": true
}
}
},
"Mappings": {
"properties": {
"To": {
"type": "keyword",
"index": true
},
"ToSys": {
"type": "keyword",
"index": true
}
}
}
}
}
}
Thank you for helping me out!

Must Match two different terms

I am looking to filter results where two sets of data match
I get hits when I specify "should" but not "must"
Here is my query works as expected with just the one "match" but if I add a second I get no hits yet there are definitely records in the index that have productSpecification.value of Brand and 3 Years
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "control*",
"fields": [
"name^15",
"description^5",
"productCode"
]
}
}
]
}
}
"post_filter": {
"nested": {
"path": "productSpecification",
"query": {
"bool":{
"must": [
{
"match": {
"productSpecification.value":"3 years"
}
},
{
"match": {
"productSpecification.value":"Brand"
}
}
]
}
}
}
}
}
Just banging my head against the desk now trying different combinations of JSON trying to get this to return some values
{
"myindex": {
"mappings": {
"product": {
"properties": {
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dispatchTimeInDays": {
"type": "integer"
},
"height": {
"type": "integer"
},
"html": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"leadTimeInDays": {
"type": "integer"
},
"length": {
"type": "integer"
},
"limitedStock": {
"type": "boolean"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"notes": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"price": {
"type": "double"
},
"productBrandId": {
"type": "integer"
},
"productCategory": {
"properties": {
"code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullPath": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"parentProductCategoryId": {
"type": "integer"
},
"productCategoryId": {
"type": "integer"
}
}
},
"productCategoryId": {
"type": "integer"
},
"productCode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productId": {
"type": "integer"
},
"productImage": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productSpecification": {
"type": "nested",
"properties": {
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "keyword"
},
"productId": {
"type": "long"
},
"productSpecificationId": {
"type": "long"
},
"specificationId": {
"type": "long"
},
"value": {
"type": "keyword"
}
}
},
"productTypeId": {
"type": "integer"
},
"reviewRating": {
"type": "double"
},
"reviewRatingCount": {
"type": "integer"
},
"sellingPriceGroupId": {
"type": "integer"
},
"stockAvailable": {
"type": "integer"
},
"taxRateId": {
"type": "integer"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"weightKg": {
"type": "double"
},
"width": {
"type": "integer"
}
}
}
}
}
}
Here is an example of a product I would expect to be returned with this query it has a productSpecification.value of "3 years" AND a productSpecification.value of "Brand"
{
"_index": "myindex",
"_type": "product",
"_id": "uQEDbGEBfHre1rYmtsWB",
"_score": 141.5985,
"_source": {
"productId": 14587,
"name": "Brand Wave Multi Channel Remote Control",
"productCode": "111",
"productCategoryId": 17,
"length": 3,
"height": 0,
"productTypeId": 1,
"url": "brand-wave-multi-channel-remote-control",
"productBrandId": 3,
"width": 0,
"dispatchTimeInDays": 3,
"leadTimeInDays": 3,
"stockAvailable": 0,
"weightKg": 0.001,
"reviewRatingCount": 0,
"limitedStock": false,
"price": 63,
"productImage": "Wave-Remote-Control.jpg",
"productCategory": {
"productCategoryId": 17,
"name": "Accessories",
"fullPath": "Accessories",
"code": "00011"
},
"productSpecification": [{
"productSpecificationId": 852888,
"productId": 14587,
"specificationId": 232,
"name": "Brand",
"description": "This is the product manufacturer",
"value": "Brand"
},
{
"productSpecificationId": 852889,
"productId": 14587,
"specificationId": 92,
"name": "Type",
"value": "Remote control"
},
{
"productSpecificationId": 852891,
"productId": 14587,
"specificationId": 10,
"name": "Guarantee",
"value": "3 years"
},
{
"productSpecificationId": 852892,
"productId": 14587,
"specificationId": 599,
"name": "Power Voltage",
"value": "1.5 V"
},
{
"productSpecificationId": 852893,
"productId": 14587,
"specificationId": 29,
"name": "Dimensions",
"value": "157mm x 38mm x 19mm"
},
{
"productSpecificationId": 852894,
"productId": 14587,
"specificationId": 602,
"name": "Operation Range",
"value": "Up to 40m"
},
{
"productSpecificationId": 852895,
"productId": 14587,
"specificationId": 601,
"name": "Power Supply",
"value": "3V DC; 2 x AAA batteries"
}
]
}
}
After numerous amends my query is now like
{
"size": 100,
"aggs": {
"specifications": {
"nested": {
"path": "productSpecification"
},
"aggs": {
"groups": {
"terms": {
"field": "productSpecification.name"
},
"aggs": {
"attribute": {
"terms": {
"field": "productSpecification.value"
}
}
}
}
}
},
"price_range": {
"range": {
"field": "price",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100,
"to": 150
},
{
"from": 150,
"to": 200
},
{
"from": 200,
"to": 250
},
{
"from": 250
}
]
}
}
},
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "control*",
"fields": [
"name^15",
"description^5",
"productCode"
]
}
}
]
}
},
"post_filter": {
"query":{
"nested": {
"path": "productSpecification",
"query": {
"bool":{
"should": [{
"bool": {
"must":[{
"term": {
"productSpecification.name.keyword": "Brand"
}
},
{
"term": {
"productSpecification.value": "Brand"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Guarantee"
}
},
{
"term": {
"productSpecification.value": "3 years"
}
}
]
}
}
]
}
}
}
}
}
}
productSpecification.value is a keyword datatype. You should query against it with term query instead of match. And then you can't use must because if a doc has brand as value can't have also 3 years as value. In your case you will use should, because is an OR logical operator
{
"query": {
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Brand"
}
},
{
"term": {
"productSpecification.value": "Brand"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Guarantee"
}
},
{
"term": {
"productSpecification.value": "3 years"
}
}
]
}
}
]
}
}
}
}
}
Finally got this working after lots of experimentation / reading
posting here in case it is of use to others with similar problems
{
"post_filter": {
"bool": {
"filter": [{
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"filter": [{
"term": {
"productSpecification.name": "Brand"
}
},
{
"terms": {
"productSpecification.value": [
"Brand1"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"filter": [{
"term": {
"productSpecification.name": "Guarantee"
}
},
{
"terms": {
"productSpecification.value": [
"3 years"
]
}
}
]
}
}
}
}
]
}
}
}

ElasticSearch sorting nested with condition

With ElasticSearch I want to insert a condition to sort nested fields.
I have this mapping
{
"dario": {
"mappings": {
"agents": {
"properties": {
"applications": {
"type": "nested",
"properties": {
"companies": {
"type": "nested",
"properties": {
"active": {
"type": "integer"
},
"application_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"application_date_month": {
"type": "date",
"format": "yyyy-MM"
},
"application_id": {
"type": "long"
},
"assigned_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"buy_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"date_month": {
"type": "date",
"format": "yyyy-MM"
},
"favorite": {
"type": "integer"
},
"id": {
"type": "long"
},
"notes": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"id": {
"type": "integer"
},
"note": {
"type": "string",
"analyzer": "standard"
}
}
},
"score": {
"type": "long"
},
"state": {
"type": "long"
},
"view": {
"type": "integer"
},
"visible": {
"type": "integer"
},
"visible_date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
"count": {
"type": "integer"
},
"sectors": {
"type": "long"
}
}
}
}
}
}
}
}
I want to sort by the field applications.companies.buy_date, but, if this is null, then i want to consider applications.companies.date
I tried with groovy script:
{
"size": 10,
"from": 0,
"sort": [
{
"_script": {
"script": "doc['applications.companies.buy_date'] != null ? doc['applications.companies.buy_date'].date.getMillisOfDay() : doc['applications.companies.date'].date.getMillisOfDay()",
"type": "number",
"nested_filter": {
"match": {
"applications.companies.id": 711
}
},
"order": "desc"
}
}
],
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"bool": {
"must": [
{
"nested": {
"path": "applications.companies",
"query": {
"bool": {
"must": [
{
"match": {
"applications.companies.active": 1
}
},
{
"match": {
"applications.companies.id": 711
}
},
{
"bool": {
"should": [
{
"exists": {
"field": "applications.companies.buy_date"
}
},
{
"match": {
"applications.companies.favorite": 1
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
]
}
}
}
but nothing change. Any ideas?
UPDATE
I resolved issue with this solution
{
"size": 10,
"from": 0,
"_source": [
"id"
],
"sort": [
{
"_script": {
"script": {
"script": " if (doc['applications.companies.id'].value == 711) { return (doc['applications.companies.buy_date'].value > 0) ? doc['applications.companies.buy_date'].value : doc['applications.companies.date'].value; } else { return null; } ",
"lang": "groovy"
},
"type": "number",
"order": "desc",
"nested_path": "applications.companies",
"nested_filter": {
"match": {
"applications.companies.id": 711
}
}
}
}
],
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"bool": {
"must": [
{
"nested": {
"path": "applications.companies",
"query": {
"bool": {
"must": [
{
"match": {
"applications.companies.active": 1
}
},
{
"match": {
"applications.companies.id": 711
}
},
{
"bool": {
"should": [
{
"exists": {
"field": "applications.companies.buy_date"
}
},
{
"match": {
"applications.companies.favorite": 1
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
]
}
}
}

ElasticSearch double nested sorting

I have documents which look like this (here is example):
{
"user": "xyz",
"state": "FINISHED",
"finishedTime": 1465566467161,
"jobCounters": {
"counterGroup": [
{
"counterGroupName": "org.apache.hadoop.mapreduce.FileSystemCounter",
"counter": [
{
"name": "FILE_BYTES_READ",
"mapCounterValue": 206509212380,
"totalCounterValue": 423273933523,
"reduceCounterValue": 216764721143
},
{
"name": "FILE_BYTES_WRITTEN",
"mapCounterValue": 442799895522,
"totalCounterValue": 659742824735,
"reduceCounterValue": 216942929213
},
{
"name": "HDFS_BYTES_READ",
"mapCounterValue": 207913352565,
"totalCounterValue": 207913352565,
"reduceCounterValue": 0
},
{
"name": "HDFS_BYTES_WRITTEN",
"mapCounterValue": 0,
"totalCounterValue": 89846725044,
"reduceCounterValue": 89846725044
}
]
},
{
"counterGroupName": "org.apache.hadoop.mapreduce.JobCounter",
"counter": [
{
"name": "TOTAL_LAUNCHED_MAPS",
"mapCounterValue": 0,
"totalCounterValue": 13394,
"reduceCounterValue": 0
},
{
"name": "TOTAL_LAUNCHED_REDUCES",
"mapCounterValue": 0,
"totalCounterValue": 720,
"reduceCounterValue": 0
}
]
}
]
}
}
Now I want to sort this data to get TOP 15 documents on the basis of totalCounterValue where counter.name is FILE_BYTES_READ. I have tried nested sorting on this but no matter which key name I write in counter.name, it is always sorting on the basis of HDFS_BYTES_READ. Can anyone please help me with my query.
{
"_source": true,
"size": 15,
"query": {
"bool": {
"must": [
{
"term": {
"state": {
"value": "FINISHED"
}
}
},
{
"range": {
"startedTime": {
"gte": "now - 4d",
"lte": "now"
}
}
}
]
}
},
"sort": [
{
"jobCounters.counterGroup.counter.totalCounterValue": {
"order": "desc",
"nested_path": "jobCounters.counterGroup",
"nested_filter": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
}
}
}
]}
This is the mapping for jobCounters we have created:
"jobCounters": {
"type": "nested",
"include_in_parent": true,
"properties" : {
"counterGroup": {
"type": "nested",
"include_in_parent": true,
"properties": {
"counterGroupName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"counter" : {
"type": "nested",
"include_in_parent": true,
"properties": {
"reduceCounterValue": {
"type": "long"
},
"name": {
"type": "string",
"analyzer": "english",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"totalCounterValue": {
"type": "long"
},
"mapCounterValue": {
"type": "long"
}
}
}
}
}
}
}
I followed nested sorting documentation of ElasticSearch and came up with this query, but I don't know why it is always sorting the totalCounterValue of HDFS_BYTES_READ irrespective of jobCounters.counterGroup.counter.name's value.
you can try something like this,
curl -XGET 'http://localhost:9200/index/jobCounters/_search' -d '
{
"size": 15,
"query": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
},
"sort": [
{
"jobCounters.counterGroup.counter.totalCounterValue": {
"order": "desc",
"nested_path": "jobCounters.counterGroup",
"nested_filter": {
"nested": {
"path": "jobCounters.counterGroup.counter",
"filter": {
"term": {
"jobCounters.counterGroup.counter.name": "file_bytes_read"
}
}
}
}
}
}
]
}
'
Read the end of this document. It explains that we have to repeat the same query in nested_filter too.

function_score query in elasticsearch won't change score

I have an index with following doc structure: Company > Jobs (nested)
Company have name and jobs have address. I search jobs by address by default. Along with this, I'm trying to boost certain companies by their name using function_score query. But my query doesn't seem to be boosting anything or change scores.
{
"query": {
"filtered": {
"filter": {},
"query": {
"function_score": {
"query": {
"nested": {
"path": "active_jobs",
"score_mode": "max",
"query": {
"multi_match": {
"query": "United States",
"type": "cross_fields",
"fields": [
"active_jobs.address.city",
"active_jobs.address.country",
"active_jobs.address.state"
]
}
},
"inner_hits": {
"size": 1000
}
}
},
"functions": [
{
"filter": {
"term": {
"name": "Amazon"
}
},
"weight": 100
}
]
}
}
}
},
"size": 30,
"from": 0
}
[Update 1]
Here is the mapping for active_jobs property:
"active_jobs": {
"type": "nested",
"properties": {
"active": {
"type": "boolean"
},
"address": {
"properties": {
"city": {
"type": "string"
},
"country": {
"type": "string"
},
"state": {
"type": "string"
},
"state_code": {
"type": "string"
}
}
},
"id": {
"type": "long"
},
"title": {
"type": "string"
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
}
}
}

Resources