ElasticSearch: merge all inner_hits for nested queries - elasticsearch

I am pretty new to elasticsearch and have been trying to create a query which would return me a record that matches all the must conditions of a bool-query. The bool-query is wrapped inside a constant_score: filter.
My mapping for the object is as below:
{
"mappings": {
"doc": {
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"components": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"parent_id": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"quantity_in_bundle": {
"type": "long"
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
},
"id": {
"type": "long"
},
"image": {
"properties": {
"id": {
"type": "long"
},
"isDefault": {
"type": "boolean"
},
"thumbnail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"incoming_qty": {
"type": "long"
},
"tags": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"color": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"members": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"image": {
"properties": {
"id": {
"type": "long"
},
"isDefault": {
"type": "boolean"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"incoming_qty": {
"type": "long"
},
"tags": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"color": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"master_id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"parent_id": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"product_url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"product_url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
}
}
}
I am querying this index using the below query:
{
"from": 0,
"size": 20,
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"term": {
"tenantId": {
"value": 88,
"boost": 1
}
}
},
{
"terms": {
"type": [
2
],
"boost": 1
}
},
{
"bool": {
"should": [
{
"terms": {
"status": [
2
],
"boost": 1
}
},
{
"nested": {
"query": {
"terms": {
"members.status": [
2
],
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"terms": {
"product_stores.store_id": [
20889
],
"boost": 1
}
},
"path": "product_stores",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"terms": {
"members.product_stores.store_id": [
20889
],
"boost": 1
}
},
"path": "members.product_stores",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"terms": {
"tags.id": [
1001
],
"boost": 1
}
},
"path": "tags",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"terms": {
"members.tags.id": [
1001
],
"boost": 1
}
},
"path": "members.tags",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"boost": 1
}
},
"sort": [
{
"_id": {
"order": "desc"
}
}
]
}
What I am trying to achieve with this is to fetch the object which has at least one nested object that matches all the must conditions in the main bool query. But I am getting records even if there is not a single object that matches all 3. For example, if there's a record that only matches status and tags but not the store_ids, that elastic search will return it despite the all the conditions are part of must. Can anyone please explain me the behaviour of this query? I tried reading documents but I am at loss.
Any pointer or guidance will be much appreciated.
Thank you!
UPDATE:
I fixed this issue by merging all the nested shoulds under a single nested must.

In your query, you're using a should clause which will return results even if they don't match all of the should clause conditions. Your must clause takes precedence over the should clause.
According to the Bool Query documentation, you could adjust the minimum should match parameter.

Related

Elasticsearch query for all values of field with group by

i am having trouble forming query to fetch all values with sql group by kind of thing.
so below is my data structure:
product index:
{
"createdBy" : "61c1fcdd88dbad1920da8caf",
"creationTime" : "2021-12-22T11:58:53.576932Z",
"lastModifiedBy" : "61c1fcdd88dbad1920da8caf",
"lastModificationTime" : "2021-12-22T11:58:53.576932Z",
"id" : "61c312fdc6aa620a609db0b2",
"title" : "string",
"brand" : "string",
"longDesc" : "string",
"categoryId" : "string",
"imageUrls" : [
"string",
"string"
],
"keySpecs" : [
"string",
"string",
],
"facets" : [
{
"name" : "color",
"value" : "red"
},
{
"name" : "storage",
"value" : "16 GB"
},
{
"name" : "brand",
"value" : "Intex"
}
],
"categoryName" : "handsets"
}
Now, i want to fetch all the facets with their different values and count as well. Let's say
productA has color blue, productB has color red
productA has brand ABC, productB has brand XYZ
so, i want data which list all facets like:
color: blue(200 count), red (12 count)
brand: ABC(13 count), XYZ (99 count)
Also, different product will have different type of facet, like iphone will have color memory brand size, but a pen will have color and brand only (not memory/size).
Note: i'm using latest version of elastic
=================
UPDATE 1:
Below is the es mapping details
{
"settings": {
"analysis": {
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"english_keywords": {
"type": "keyword_marker",
"keywords": [
"example"
]
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
}
},
"analyzer": {
"lalashree_standard_analyzer": {
"tokenizer": "standard",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_keywords",
"english_stemmer"
]
},
"html_standard_analyzer": {
"char_filter": [
"html_strip"
],
"tokenizer": "standard",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_keywords",
"english_stemmer"
]
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"createdBy": {
"type": "keyword"
},
"creationTime": {
"type": "date"
},
"lastModifiedBy": {
"type": "keyword"
},
"lastModificationTime": {
"type": "date"
},
"deleted": {
"type": "boolean"
},
"deletedBy": {
"type": "keyword"
},
"deletionTime": {
"type": "date"
},
"title": {
"type": "text",
"analyzer": "lalashree_standard_analyzer",
"fields": {
"suggest": {
"type": "completion"
}
}
},
"shortDesc": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
},
"longDesc": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
},
"categoryId": {
"type": "keyword"
},
"searchDetails": {
"type": "object",
"properties": {
"desc": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
},
"keywords": {
"type": "text",
"analyzer": "lalashree_standard_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"imageUrls": {
"type": "keyword",
"index": false
},
"keySpecs": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
},
"sections": {
"type": "object",
"properties": {
"name": {
"type": "text",
"index": false
},
"shortDesc": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
},
"longDesc": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
},
"htmlContent": {
"type": "text",
"analyzer": "html_standard_analyzer"
}
}
},
"facets": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"specificationItems": {
"type": "object",
"properties": {
"key": {
"type": "text",
"analyzer": "lalashree_standard_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"values": {
"type": "text",
"analyzer": "lalashree_standard_analyzer"
}
}
},
"categoryName": {
"type": "keyword"
},
"productFamily": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"familyVariantOptions": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"values": {
"type": "keyword"
}
}
},
"productFamilyItems": {
"type": "nested",
"properties": {
"baseProductId": {
"type": "keyword"
},
"itemVariantInfoSet": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
}
}
}
}
},
"rating": {
"type": "float"
},
"totalReviewsCount": {
"type": "long"
},
"stores": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"logo": {
"type": "keyword",
"index": false
},
"active": {
"type": "boolean"
},
"name": {
"type": "text"
},
"quantity": {
"type": "long"
},
"rating": {
"type": "float"
},
"totalReviewsCount": {
"type": "long"
},
"price.mrp": {
"type": "float"
},
"price.sp": {
"type": "float"
},
"location.geoPoint": {
"type": "geo_point"
},
"oos": {
"type": "boolean"
}
}
}
}
}
}
This query first group by names then groups each name's values. By setting sizes, you can arrange number of facets you want and number of items in each facet. I think it does what you need.
Note that if you have too many documents and if performance matters, this query may perform bad.
{
"size": 0,
"aggs": {
"facets": {
"nested": {
"path": "facets"
},
"aggs": {
"names": {
"terms": {
"field": "facets.name",
"size": 10
},
"aggs": {
"values": {
"terms": {
"field": "facets.value",
"size": 10
}
}
}
}
}
}
}
}

Unable to run elastic search nested aggregate query

I am trying to create a query that aggregates the sum of 3 different field and also matches three different conditions. I don't understand what the error message is saying.
The query below gives this specific error message:
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "Unknown key for a VALUE_NUMBER in [Type].",
"line": 1,
"col": 9
}
],
"type": "parsing_exception",
"reason": "Unknown key for a VALUE_NUMBER in [Type].",
"line": 1,
"col": 9
}
}
My query looks as follow:
{
"aggs": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"data.entity.productId": "45c29143b3bb4073a9fd325106784ce2"
}
},
{
"term": {
"data.entity.locationId": "c5f45ffc4fd94dcb926f96f1d5b9d835"
}
},
{
"term": {
"type.keyword": "StockLocationActivityAggregate"
}
}
]
}
}
},
"aggs": {
"directStock": {
"sum": { "field": "data.entity.inStock" },
"aggs": {
"directOutgoing": {
"sum": { "field": "data.entity.outgoing" },
"aggs": {
"directIncoming": { "sum": { "field": "data.entity.incoming" } }
}
}
}
}
}
},
"size": 0
}
Update
I am using the following index map
{
"mapping": {
"_doc": {
"properties": {
"active": {
"type": "boolean"
},
"data": {
"properties": {
"entity": {
"properties": {
"activityDate": {
"type": "date"
},
"creationDate": {
"type": "date"
},
"deleted": {
"type": "boolean"
},
"hash": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"inStock": {
"type": "float"
},
"incoming": {
"type": "float"
},
"locationId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"modifiedOn": {
"type": "date"
},
"modifier": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"orderId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"orderItemId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"outgoing": {
"type": "float"
},
"productId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"reservationDate": {
"type": "date"
},
"version": {
"type": "long"
}
}
},
"hash": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"modifiedOn": {
"type": "date"
},
"modifier": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"tenantIdentifier": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "long"
}
}
},
"deleted": {
"type": "boolean"
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"tenantId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"version": {
"type": "long"
}
}
}
}
}
I've also tried the example from the elastic search docs and the sample from Val below. They all give the same rror.
The sum aggregation is a metric aggregation that cannot have sub-aggregations... So you cannot do sum -> sum -> sum.
If you need the 3 different sums, you can do something like this:
{
...
"aggs": {
"directIncoming": {
"sum": {
"field": "data.entity.incoming"
}
},
"directStock": {
"sum": {
"field": "data.entity.inStock"
}
},
"directOutgoing": {
"sum": {
"field": "data.entity.outgoing"
}
}
}
}

Get all the buckets for a aggregate elastic search

I want to get all the buckets available for a particular aggregate. Is there any query or endpoint to get the buckets?
Below is my Mapping. If I query with any filter then the related buckets are coming up, but I want all the buckets to show it on the frontend to have or operations.
Example: If we have 2 records, one is with category as chair and the other is in the table. If I select a chair it is returning table count is zero but it should show as table count as 1. So user can select both.
MyMapping:
{
"properties": {
"australiasellable": {
"type": "boolean"
},
"avgRating": {
"type": "float"
},
"categories": {
"type": "nested"
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"categorycode": {
"type": "text",
"fielddata": true
},
"categoryname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"colour": {
"type": "text",
"fielddata": true
},
"commercialuse": {
"type": "boolean"
},
"customisable": {
"type": "boolean"
},
"depth": {
"type": "float"
},
"freedelivery": {
"type": "boolean"
},
"height": {
"type": "float"
},
"listprice": {
"type": "float"
},
"location": {
"type": "geo_point"
},
"material": {
"type": "text",
"fielddata": true
},
"materialcode": {
"type": "text",
"fielddata": true
},
"message": {
"type": "geo_point"
},
"numberOfRating": {
"type": "long"
},
"online": {
"type": "boolean"
},
"outdooruse": {
"type": "boolean"
},
"productid": {
"type": "long"
},
"productimageurl": {
"type": "text",
"fielddata": true
},
"productname": {
"type": "text",
"fielddata": true
},
"producttypecode": {
"type": "text",
"fielddata": true
},
"sellercode": {
"type": "text",
"fielddata": true
},
"sellerdescription": {
"type": "text",
"fielddata": true
},
"shortdescription": {
"type": "text",
"fielddata": true
},
"sku": {
"type": "text",
"fielddata": true
},
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"stylecode": {
"type": "text",
"fielddata": true
},
"warrantycode": {
"type": "text",
"fielddata": true
},
"weight": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"width": {
"type": "float"
}
}
}
Regards,
Sreenivas
A possible solution would be not to set the filter in the query section of your payload but rather perform filtered aggregations and use the top_hits to get the _sources of the matched docs.
Long story short, if you apply a query, it'll of course affect your aggregations. So the trick is to not apply any query (either match_all or remove the whole query object) and perform the queries in the sub-aggregations as follows:
Using your category field:
GET your_index/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"actual_query_agg": {
"filter": {
"term": {
"category.keyword": {
"value": "chair"
}
}
},
"aggs": {
"actual_query_agg_top_hits": {
"top_hits": {
"_source": [
"category"
],
"size": 10
}
}
}
},
"excluding_my_query_filtered_agg": {
"filter": {
"bool": {
"must_not": {
"term": {
"category.keyword": "chair"
}
}
}
},
"aggs": {
"by_other_categories_agg": {
"terms": {
"field": "category.keyword",
"size": 10
},
"aggs": {
"categorized_other_docs_agg_top_hits": {
"top_hits": {
"_source": [
"category"
],
"size": 10
}
}
}
}
}
}
}
}
You can get rid of the top_hits sub-aggregations if you're just interested in the counts and not the underlying docs, i.e.:
GET your_index/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"actual_query_agg": {
"filter": {
"term": {
"category.keyword": {
"value": "chair"
}
}
}
},
"excluding_my_query_filtered_agg": {
"filter": {
"bool": {
"must_not": {
"term": {
"category.keyword": "chair"
}
}
}
},
"aggs": {
"by_other_categories_agg": {
"terms": {
"field": "category.keyword",
"size": 10
}
}
}
}
}
}

Must Match two different terms

I am looking to filter results where two sets of data match
I get hits when I specify "should" but not "must"
Here is my query works as expected with just the one "match" but if I add a second I get no hits yet there are definitely records in the index that have productSpecification.value of Brand and 3 Years
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "control*",
"fields": [
"name^15",
"description^5",
"productCode"
]
}
}
]
}
}
"post_filter": {
"nested": {
"path": "productSpecification",
"query": {
"bool":{
"must": [
{
"match": {
"productSpecification.value":"3 years"
}
},
{
"match": {
"productSpecification.value":"Brand"
}
}
]
}
}
}
}
}
Just banging my head against the desk now trying different combinations of JSON trying to get this to return some values
{
"myindex": {
"mappings": {
"product": {
"properties": {
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dispatchTimeInDays": {
"type": "integer"
},
"height": {
"type": "integer"
},
"html": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"leadTimeInDays": {
"type": "integer"
},
"length": {
"type": "integer"
},
"limitedStock": {
"type": "boolean"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"notes": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"price": {
"type": "double"
},
"productBrandId": {
"type": "integer"
},
"productCategory": {
"properties": {
"code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullPath": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"parentProductCategoryId": {
"type": "integer"
},
"productCategoryId": {
"type": "integer"
}
}
},
"productCategoryId": {
"type": "integer"
},
"productCode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productId": {
"type": "integer"
},
"productImage": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productSpecification": {
"type": "nested",
"properties": {
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "keyword"
},
"productId": {
"type": "long"
},
"productSpecificationId": {
"type": "long"
},
"specificationId": {
"type": "long"
},
"value": {
"type": "keyword"
}
}
},
"productTypeId": {
"type": "integer"
},
"reviewRating": {
"type": "double"
},
"reviewRatingCount": {
"type": "integer"
},
"sellingPriceGroupId": {
"type": "integer"
},
"stockAvailable": {
"type": "integer"
},
"taxRateId": {
"type": "integer"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"weightKg": {
"type": "double"
},
"width": {
"type": "integer"
}
}
}
}
}
}
Here is an example of a product I would expect to be returned with this query it has a productSpecification.value of "3 years" AND a productSpecification.value of "Brand"
{
"_index": "myindex",
"_type": "product",
"_id": "uQEDbGEBfHre1rYmtsWB",
"_score": 141.5985,
"_source": {
"productId": 14587,
"name": "Brand Wave Multi Channel Remote Control",
"productCode": "111",
"productCategoryId": 17,
"length": 3,
"height": 0,
"productTypeId": 1,
"url": "brand-wave-multi-channel-remote-control",
"productBrandId": 3,
"width": 0,
"dispatchTimeInDays": 3,
"leadTimeInDays": 3,
"stockAvailable": 0,
"weightKg": 0.001,
"reviewRatingCount": 0,
"limitedStock": false,
"price": 63,
"productImage": "Wave-Remote-Control.jpg",
"productCategory": {
"productCategoryId": 17,
"name": "Accessories",
"fullPath": "Accessories",
"code": "00011"
},
"productSpecification": [{
"productSpecificationId": 852888,
"productId": 14587,
"specificationId": 232,
"name": "Brand",
"description": "This is the product manufacturer",
"value": "Brand"
},
{
"productSpecificationId": 852889,
"productId": 14587,
"specificationId": 92,
"name": "Type",
"value": "Remote control"
},
{
"productSpecificationId": 852891,
"productId": 14587,
"specificationId": 10,
"name": "Guarantee",
"value": "3 years"
},
{
"productSpecificationId": 852892,
"productId": 14587,
"specificationId": 599,
"name": "Power Voltage",
"value": "1.5 V"
},
{
"productSpecificationId": 852893,
"productId": 14587,
"specificationId": 29,
"name": "Dimensions",
"value": "157mm x 38mm x 19mm"
},
{
"productSpecificationId": 852894,
"productId": 14587,
"specificationId": 602,
"name": "Operation Range",
"value": "Up to 40m"
},
{
"productSpecificationId": 852895,
"productId": 14587,
"specificationId": 601,
"name": "Power Supply",
"value": "3V DC; 2 x AAA batteries"
}
]
}
}
After numerous amends my query is now like
{
"size": 100,
"aggs": {
"specifications": {
"nested": {
"path": "productSpecification"
},
"aggs": {
"groups": {
"terms": {
"field": "productSpecification.name"
},
"aggs": {
"attribute": {
"terms": {
"field": "productSpecification.value"
}
}
}
}
}
},
"price_range": {
"range": {
"field": "price",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100,
"to": 150
},
{
"from": 150,
"to": 200
},
{
"from": 200,
"to": 250
},
{
"from": 250
}
]
}
}
},
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "control*",
"fields": [
"name^15",
"description^5",
"productCode"
]
}
}
]
}
},
"post_filter": {
"query":{
"nested": {
"path": "productSpecification",
"query": {
"bool":{
"should": [{
"bool": {
"must":[{
"term": {
"productSpecification.name.keyword": "Brand"
}
},
{
"term": {
"productSpecification.value": "Brand"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Guarantee"
}
},
{
"term": {
"productSpecification.value": "3 years"
}
}
]
}
}
]
}
}
}
}
}
}
productSpecification.value is a keyword datatype. You should query against it with term query instead of match. And then you can't use must because if a doc has brand as value can't have also 3 years as value. In your case you will use should, because is an OR logical operator
{
"query": {
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Brand"
}
},
{
"term": {
"productSpecification.value": "Brand"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"productSpecification.name.keyword": "Guarantee"
}
},
{
"term": {
"productSpecification.value": "3 years"
}
}
]
}
}
]
}
}
}
}
}
Finally got this working after lots of experimentation / reading
posting here in case it is of use to others with similar problems
{
"post_filter": {
"bool": {
"filter": [{
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"filter": [{
"term": {
"productSpecification.name": "Brand"
}
},
{
"terms": {
"productSpecification.value": [
"Brand1"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "productSpecification",
"query": {
"bool": {
"filter": [{
"term": {
"productSpecification.name": "Guarantee"
}
},
{
"terms": {
"productSpecification.value": [
"3 years"
]
}
}
]
}
}
}
}
]
}
}
}

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

Resources