Must Match two different terms - elasticsearch

I am looking to filter results where two sets of data match
I get hits when I specify "should" but not "must"
Here is my query works as expected with just the one "match" but if I add a second I get no hits yet there are definitely records in the index that have productSpecification.value of Brand and 3 Years
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "control*",
"fields": [
"name^15",
"description^5",
"productCode"
]
}
}
]
}
}
"post_filter": {
"nested": {
"path": "productSpecification",
"query": {
"bool":{
"must": [
{
"match": {
"productSpecification.value":"3 years"
}
},
{
"match": {
"productSpecification.value":"Brand"
}
}
]
}
}
}
}
}
Just banging my head against the desk now trying different combinations of JSON trying to get this to return some values
{
"myindex": {
"mappings": {
"product": {
"properties": {
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dispatchTimeInDays": {
"type": "integer"
},
"height": {
"type": "integer"
},
"html": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"leadTimeInDays": {
"type": "integer"
},
"length": {
"type": "integer"
},
"limitedStock": {
"type": "boolean"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"notes": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"price": {
"type": "double"
},
"productBrandId": {
"type": "integer"
},
"productCategory": {
"properties": {
"code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullPath": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"parentProductCategoryId": {
"type": "integer"
},
"productCategoryId": {
"type": "integer"
}
}
},
"productCategoryId": {
"type": "integer"
},
"productCode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productId": {
"type": "integer"
},
"productImage": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productSpecification": {
"type": "nested",
"properties": {
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "keyword"
},
"productId": {
"type": "long"
},
"productSpecificationId": {
"type": "long"
},
"specificationId": {
"type": "long"
},
"value": {
"type": "keyword"
}
}
},
"productTypeId": {
"type": "integer"
},
"reviewRating": {
"type": "double"
},
"reviewRatingCount": {
"type": "integer"
},
"sellingPriceGroupId": {
"type": "integer"
},
"stockAvailable": {
"type": "integer"
},
"taxRateId": {
"type": "integer"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"weightKg": {
"type": "double"
},
"width": {
"type": "integer"
}
}
}
}
}
}
Here is an example of a product I would expect to be returned with this query it has a productSpecification.value of "3 years" AND a productSpecification.value of "Brand"
{
"_index": "myindex",
"_type": "product",
"_id": "uQEDbGEBfHre1rYmtsWB",
"_score": 141.5985,
"_source": {
"productId": 14587,
"name": "Brand Wave Multi Channel Remote Control",
"productCode": "111",
"productCategoryId": 17,
"length": 3,
"height": 0,
"productTypeId": 1,
"url": "brand-wave-multi-channel-remote-control",
"productBrandId": 3,
"width": 0,
"dispatchTimeInDays": 3,
"leadTimeInDays": 3,
"stockAvailable": 0,
"weightKg": 0.001,
"reviewRatingCount": 0,
"limitedStock": false,
"price": 63,
"productImage": "Wave-Remote-Control.jpg",
"productCategory": {
"productCategoryId": 17,
"name": "Accessories",
"fullPath": "Accessories",
"code": "00011"
},
"productSpecification": [{
"productSpecificationId": 852888,
"productId": 14587,
"specificationId": 232,
"name": "Brand",
"description": "This is the product manufacturer",
"value": "Brand"
},
{
"productSpecificationId": 852889,
"productId": 14587,
"specificationId": 92,
"name": "Type",
"value": "Remote control"
},
{
"productSpecificationId": 852891,
"productId": 14587,
"specificationId": 10,
"name": "Guarantee",
"value": "3 years"
},
{
"productSpecificationId": 852892,
"productId": 14587,
"specificationId": 599,
"name": "Power Voltage",
"value": "1.5 V"
},
{
"productSpecificationId": 852893,
"productId": 14587,
"specificationId": 29,
"name": "Dimensions",
"value": "157mm x 38mm x 19mm"
},
{
"productSpecificationId": 852894,
"productId": 14587,
"specificationId": 602,
"name": "Operation Range",
"value": "Up to 40m"
},
{
"productSpecificationId": 852895,
"productId": 14587,
"specificationId": 601,
"name": "Power Supply",
"value": "3V DC; 2 x AAA batteries"
}
]
}
}
After numerous amends my query is now like
{
"size": 100,
"aggs": {
"specifications": {
"nested": {
"path": "productSpecification"
},
"aggs": {
"groups": {
"terms": {
"field": "productSpecification.name"
},
"aggs": {
"attribute": {
"terms": {
"field": "productSpecification.value"
}
}
}
}
}
},
"price_range": {
"range": {
"field": "price",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100,
"to": 150
},
{
"from": 150,
"to": 200
},
{
"from": 200,
"to": 250
},
{
"from": 250
}
]
}
}
},
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "control*",
"fields": [
"name^15",
"description^5",
"productCode"
]
}
}
]
}
},
"post_filter": {
"query":{
"nested": {
"path": "productSpecification",
"query": {
"bool":{
"should": [{
"bool": {
"must":[{
"term": {
"productSpecification.name.keyword": "Brand"
}
},
{
"term": {
"productSpecification.value": "Brand"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Guarantee"
}
},
{
"term": {
"productSpecification.value": "3 years"
}
}
]
}
}
]
}
}
}
}
}
}

productSpecification.value is a keyword datatype. You should query against it with term query instead of match. And then you can't use must because if a doc has brand as value can't have also 3 years as value. In your case you will use should, because is an OR logical operator
{
"query": {
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Brand"
}
},
{
"term": {
"productSpecification.value": "Brand"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Guarantee"
}
},
{
"term": {
"productSpecification.value": "3 years"
}
}
]
}
}
]
}
}
}
}
}

Finally got this working after lots of experimentation / reading
posting here in case it is of use to others with similar problems
{
"post_filter": {
"bool": {
"filter": [{
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"filter": [{
"term": {
"productSpecification.name": "Brand"
}
},
{
"terms": {
"productSpecification.value": [
"Brand1"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"filter": [{
"term": {
"productSpecification.name": "Guarantee"
}
},
{
"terms": {
"productSpecification.value": [
"3 years"
]
}
}
]
}
}
}
}
]
}
}
}

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Filter by date range if the date column field type is text/keyword

After reading some online posts, I came to know that we can't edit field type in an index once it's created unless we completely delete it.
How can I write a query to convert the field type "text" to "date", in my filter condition?
Mapping:
{
"mappings": {
"properties": {
"address": {
"properties": {
"city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"zipcode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"enddate": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"customerstatus": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"customerid": {
"type": "long"
}
}
}
}
Query:
{
"from": 0,
"size": 100,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "32081",
"fields": [
"address.zipcode"
]
}
},
{
"query_string": {
"query": "FL",
"fields": [
"address.cityname",
"address.state"
]
}
}
],
"should": [
{
"bool": {
"must": [
{
"match": {
"customerstatus": "pending"
}
},
{
"range": {
"enddate": {
"gte": "2018-09-01",
"format": "yyyy-MM-dd",
"lte": "2021-07-01"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"match": {
"customerstatus": "active"
}
},
{
"range": {
"enddate": {
"gte": "2021-04-27",
"format": "yyyy-MM-dd"
}
}
}
]
}
}
],
"must_not": {
"terms": {
"customerstatus.keyword": [
"Inactive",
"cancelled"
]
}
}
}
}
}
Output:
{
"customerid":1,
"customerstatus": Active,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"enddate": "2022-07-15"
},
{
"customerid":2,
"customerstatus": Pending,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"enddate": "2016-01-01"
},
{
"customerid":3,
"customerstatus": Pending,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-06-01",
"enddate": "2021-06-01"
},
{
"customerid":4,
"customerstatus": Pending,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2021-01-01",
"enddate": "2022-01-01"
}
I am expecting customers 1 and 3 to be in my Output result.
While you can't change the name and type of a field, you can always add another multi field.
For instance, you could add a date-multifield to your enddate-field, so that your mapping eventually looks as follows:
"enddate": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
}
}
},
Then you would need to make sure that the new field also gets populated. You can achieve this by executing an update_by_query-request, like
POST <my_index>/_update_by_query
After that you only would need to change the field-name in your query from enddate to enddate.date.
Looking at your mappings, you may want to consider improving them by replacing the default entries by entries which match your particular use case. Currently it seems that all string values are mapped as text & keyword multi fields, suited for full text and exact match search. If this is really what you need, then all is fine.

How to do sorting on a field with composite aggregation in elastic search

How to do sorting on a field with composite aggregation in elastic search.
We are using elastic search version 6.8.6 and trying to achieve sorting on a field with composite aggregation.
But we are not able to get expected results with aggregation.
This is our mapping
{
"properties": {
"department": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"project": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"billingUnit": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"billingType": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"application": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"environmet": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"cost": {
"type": "float"
}
}
}
By using the following query we are not able to do sorting, The results are not in alphabetical orders :
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"department": {
"query": "HR",
"slop": 0,
"zero_terms_query": "NONE",
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"sort": [
{
"project.keyword": {
"order": "desc"
}
}
],
"aggs": {
"TERM_RANGE": {
"composite": {
"size": 10000,
"sources": [
{
"billingUnitKey": {
"terms": {
"field": "billingUnit.keyword",
"missing_bucket": false
}
}
},
{
"billingTypeKey": {
"terms": {
"field": "billingType.keyword",
"missing_bucket": false
}
}
}
]
},
"aggregations": {
"TOTAL": {
"sum": {
"field": "cost"
}
},
"dataHits": {
"top_hits": {
"from": 0,
"size": 1,
"version": false,
"seq_no_primary_term": false,
"explain": false,
"_source": {
"includes": [
"application.keyword",
"environmet.keyword",
],
"excludes": []
},
"docvalue_fields": [
{
"field": "application.keyword"
},
{
"field": "environmet.keyword"
}
]
}
},
"paginate_bucket": {
"bucket_sort": {
"sort": [],
"from": 0,
"size": 100,
"gap_policy": "SKIP"
}
}
}
}
}
}
Sorting is working fine with following query without aggregation
{
"query": {
"match": {
"department": "HR"
}
},
"size": 100,
"sort": [
{
"project.keyword": {
"order": "desc"
}
}
]
}
You should use order key of composite aggregation
https://www.elastic.co/guide/en/elasticsearch/reference/7.8/search-aggregations-bucket-composite-aggregation.html#_order

ElasticSearch: merge all inner_hits for nested queries

I am pretty new to elasticsearch and have been trying to create a query which would return me a record that matches all the must conditions of a bool-query. The bool-query is wrapped inside a constant_score: filter.
My mapping for the object is as below:
{
"mappings": {
"doc": {
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"components": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"parent_id": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"quantity_in_bundle": {
"type": "long"
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
},
"id": {
"type": "long"
},
"image": {
"properties": {
"id": {
"type": "long"
},
"isDefault": {
"type": "boolean"
},
"thumbnail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"incoming_qty": {
"type": "long"
},
"tags": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"color": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"members": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"image": {
"properties": {
"id": {
"type": "long"
},
"isDefault": {
"type": "boolean"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"incoming_qty": {
"type": "long"
},
"tags": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"color": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"master_id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"parent_id": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"product_url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"product_url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
}
}
}
I am querying this index using the below query:
{
"from": 0,
"size": 20,
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"term": {
"tenantId": {
"value": 88,
"boost": 1
}
}
},
{
"terms": {
"type": [
2
],
"boost": 1
}
},
{
"bool": {
"should": [
{
"terms": {
"status": [
2
],
"boost": 1
}
},
{
"nested": {
"query": {
"terms": {
"members.status": [
2
],
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"terms": {
"product_stores.store_id": [
20889
],
"boost": 1
}
},
"path": "product_stores",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"terms": {
"members.product_stores.store_id": [
20889
],
"boost": 1
}
},
"path": "members.product_stores",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"terms": {
"tags.id": [
1001
],
"boost": 1
}
},
"path": "tags",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"terms": {
"members.tags.id": [
1001
],
"boost": 1
}
},
"path": "members.tags",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"boost": 1
}
},
"sort": [
{
"_id": {
"order": "desc"
}
}
]
}
What I am trying to achieve with this is to fetch the object which has at least one nested object that matches all the must conditions in the main bool query. But I am getting records even if there is not a single object that matches all 3. For example, if there's a record that only matches status and tags but not the store_ids, that elastic search will return it despite the all the conditions are part of must. Can anyone please explain me the behaviour of this query? I tried reading documents but I am at loss.
Any pointer or guidance will be much appreciated.
Thank you!
UPDATE:
I fixed this issue by merging all the nested shoulds under a single nested must.
In your query, you're using a should clause which will return results even if they don't match all of the should clause conditions. Your must clause takes precedence over the should clause.
According to the Bool Query documentation, you could adjust the minimum should match parameter.

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

Resources