Elastic Search Aggregation with bool query - elasticsearch

This is my elastic bool query. this works fine :
{
"query": {
"bool": {
"filter": [
{
"terms": {
"parent_uuid._raw": [
"87ec596a-109e-45ce-8a8d-7a2d1a56df81",
"07526608-8140-46be-96b9-c5f7cca4bd93"
]
}
},
{
"terms": {
"resource_type._raw": [
"Zone"
]
}
}
]
}
},
"from": 0
}
I want aggregation on name field. So I add this :
"aggs": {
"group_by_name": {
"terms": {
"field": "display_name.keyword"
}
} }
But result is same.
What I am missing?
The result I get is :
{ "device-resource": [
{
"fq_name": [
"default-domain",
"muthu1500",
"EP",
"JUNOS/Zone=oam"
],
"uuid": "161cf82d-16fd-4219-861d-d50de622f8eb",
"uri": "/ems-central/device-resource/161cf82d-16fd-4219-861d-d50de622f8eb"
},
{
"fq_name": [
"default-domain",
"muthu1500",
"EP",
"JUNOS/Zone=untrust"
],
"uuid": "fe28fb7c-c087-4473-aeef-e302022f47a4",
"uri": "/ems-central/device-resource/fe28fb7c-c087-4473-aeef-e302022f47a4"
},
{
"fq_name": [
"default-domain",
"muthu1500",
"MNONZT",
"JUNOS/Zone=trust"
],
"uuid": "251a4a9e-acb4-49ed-9c29-499ddbceb532",
"uri": "/ems-central/device-resource/251a4a9e-acb4-49ed-9c29-499ddbceb532"
},
{
"fq_name": [
"default-domain",
"muthu1500",
"MNONZT",
"JUNOS/Zone=untrust"
],
"uuid": "a3417512-8953-4c1e-b68e-8390327d5213",
"uri": "/ems-central/device-resource/a3417512-8953-4c1e-b68e-8390327d5213"
},
{
"fq_name": [
"default-domain",
"muthu1500",
"SRX1500MD",
"JUNOS/Zone=trust"
],
"uuid": "1a5434c5-d47d-40be-bb00-ef1d244e6c0c",
"uri": "/ems-central/device-resource/1a5434c5-d47d-40be-bb00-ef1d244e6c0c"
} ], "total": 5 }
Since last two records have same display_name as 2nd and 3rd record respectively, aggregate should show only 1 of them.
I want this result:
{ "device-resource": [
{
"fq_name": [
"default-domain",
"muthu1500",
"EP",
"JUNOS/Zone=oam"
],
"uuid": "161cf82d-16fd-4219-861d-d50de622f8eb",
"uri": "/ems-central/device-resource/161cf82d-16fd-4219-861d-d50de622f8eb"
},
{
"fq_name": [
"default-domain",
"muthu1500",
"EP",
"JUNOS/Zone=untrust"
],
"uuid": "fe28fb7c-c087-4473-aeef-e302022f47a4",
"uri": "/ems-central/device-resource/fe28fb7c-c087-4473-aeef-e302022f47a4"
},
{
"fq_name": [
"default-domain",
"muthu1500",
"MNONZT",
"JUNOS/Zone=trust"
],
"uuid": "251a4a9e-acb4-49ed-9c29-499ddbceb532",
"uri": "/ems-central/device-resource/251a4a9e-acb4-49ed-9c29-499ddbceb532"
} ], "total": 3 }

According to your mapping, your terms aggregation needs to be like this (use the _raw sub-field):
"aggs": {
"group_by_name": {
"terms": {
"field": "display_name._raw"
}
} }

Related

Cannot seem to use must and must_not together in an elastic search query

If I run the following query:
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "boxing",
"fuzziness": 2,
"minimum_should_match": 2
}
}
],
"must_not": [
{
"terms_set": {
"allowedCountries": {
"terms": ["gb", "mx"],
"minimum_should_match_script": {
"source": "2"
}
}
}
}
],
"filter": [
{
"range": {
"expireTime": {
"gt": 1674061907954
}
}
},
{
"term": {
"region": {
"value": "row"
}
}
},
{
"term": {
"sourceType": {
"value": "article"
}
}
}
]
}
}
}
against an index with articles that look like:
{
"_index": "content-items-v10",
"_type": "_doc",
"_id": "e7hm75ui4dma1mm4j8q5v7914",
"_score": 4.3724976,
"_source": {
"allowedCountries": ["gb", "ie"],
"body": "Both Joshua Buatsi and Craig Richards join The DAZN Boxing Show ahead of their clash at London's O2 Arena. Matchroom's Eddie Hearn also gives his take on the night, as well as Chantelle Cameron previewing her contest with Victoria Noelia Bustos.",
"competitions": [
{
"id": "8lo6205qyio0fksjx9glqbdhj",
"name": "Buatsi v Richards"
}
],
"contestants": [
{
"id": "7rq59j3eiamxlm12vhxcsgujj",
"name": "Joshua Buatsi"
},
{
"id": "boby9oqe23g6qyuwphrxh8su5",
"name": "Craig Richards"
}
],
"countries": [
{
"id": "7yasa43laq1nb2e6f8bfuvxed",
"name": "World"
},
{
"id": "258l9t5sm55592i08mdpqzr3t",
"name": "United Kingdom"
}
],
"dotsLastUpdateTime": 1673979749396,
"expireTime": 4800000000000,
"fixtureDate": {},
"headline": "Buatsi vs. Richards: Preview",
"id": "e7hm75ui4dma1mm4j8q5v7914",
"importance": 0,
"languageKeys": ["en"],
"languages": ["en"],
"lastUpdateTime": {
"ts": 1653088281000,
"iso8601": "2022-05-20T23:11:21.000Z"
},
"promoImageUrl": null,
"publication": {
"typeId": "1plcw0iyhx9vn1fcanbm2ja3rf",
"typeName": "Shoulder"
},
"publishedTime": {
"ts": 1653088281000,
"iso8601": "2022-05-20T23:11:21.000Z"
},
"region": "row",
"shortHeadline": null,
"sourceType": "article",
"sports": [
{
"id": "2x2oqzx60orpoeugkd754ga17",
"name": "Boxing"
}
],
"teaser": "",
"thumbnailImageUrl": "https://images.daznservices.com/di/library/babcock_canada/45/3e/the-dazn-boxing-show-20052022_xc4jbfqi022l1shq9lu641h9e.png?t=-477976832",
"translations": {}
}
}
I get the following validation error from elasticsearch:
{
"ok": false,
"errors": {
"validation": [
{
"message": "\"query.bool.must_not\" is not allowed",
"path": [
"query",
"bool",
"must_not"
],
"type": "object.unknown",
"context": {
"child": "must_not",
"label": "query.bool.must_not",
"value": [
{
"terms_set": {
"allowedCountries": {
"terms": [
"gb",
"mx"
],
"minimum_should_match_script": {
"source": "2"
}
}
}
}
],
"key": "must_not"
}
}
]
},
"correlationId": "d29e9275-9ab3-4ff8-944d-852b98d4b503"
}
And I cannot figure out what the issue might be! From the elastic docs it should be OK.
I'm using ElasticSearch 7.9.3 running in a local docker container.
I'm hoping someone out there will give me a clue!
Cheers!
I would expect this to just work.
I'm trying to filter out articles that have both of the country codes gb and mx in the field allowedCountries.
I can include them easily enough in the results when I add the terms_set query to the bool.must section of the query.
It works well, you just need to enclose your query in the query section
{
"query": { <--- add this
"bool": { <--- your query starts here
"must": [
...
Thank you for responding!
I was helping with a system I did not have full context on - it turns out there is a proxy in the mix with validation that was blocking the must_not query. So, with the proxy fixed, it now works.

Filter Elastic aggregation result in which several field have null value

I can't filter from the elastic response several subjects/buckets in which two fields have a 'null' value.
I was writing something like:
{
"bool": {
"must": [
{
"bool": {
"should": [
{
"terms": {
"username.d": [
"abc.com"
],
"boost": 1
}
},
{
"terms": {
"authorization_service.d": [
"gmail.com"
],
"boost": 1
}
}
],
"must_not": [
{
"exists": {
"field": "username.email"
}
},
{
"exists": {
"field": "username.login"
}
}
],
"boost": 1
}
},
{
"exists": {
"field": "password.hash",
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
and will get something like:
"aggregations": { -
"subjects": { -
"after_key": { -
"subject": null,
"login": null,
"authorization_service": "abc.com"
},
"buckets": [ -
{ -
"key": { -
"subject": null,
"login": null,
"authorization_service": "abc.com"
},
"doc_count": 10
}
]
}
}
So, it looks like I can find subjects/buckets in which I have "subject": null and "login": null, however, I need to filter out such cases. Any ideas would be helpful.
It's tricky to suggest something without example docs and the full query but I would try to use a filter clause with exists.
"query": {
"bool": {
"filter": [
{
"exists": {
"field": "user.login"
}
}
]....
}
},

URL search using ElasticSearch not giving right result order

GET firm_id_number/_search
{
"size": 20,
"from": 0,
"highlight": {
"order": "score",
"pre_tags": [
"<mark>"
],
"post_tags": [
"</mark>"
],
"fields": {
"question_text": {
"number_of_fragments": 0
},
"response_text_original": {
"number_of_fragments": 0
},
"question_text.shingles": {
"number_of_fragments": 0
},
"image_url": {
"number_of_fragments": 0
},
"response_text_original.shingles": {
"number_of_fragments": 0
},
"tags_text": {}
}
},
"docvalue_fields": [
"question_id",
"expiry_date",
"response_id",
"associated_entity.keyword",
"entity_type",
"is_verified",
"strategy_id",
"response_created_at",
"tags_text.keyword",
"associated_entity_type",
"response_type",
"associated_template.keyword",
"associated_investor_id",
"associated_entity_id",
"tags",
"associated_investor.keyword",
"duediligence_id",
"associated_template_id",
"fund_id",
"fund_firm_id",
"response_created_by",
"diligence_type",
"response_created_by_name.keyword",
"is_active",
"child_section_id",
"parent_section_id",
"verified_at",
"response_is_na",
"grid_id",
"verified_by_name.keyword",
"verified_by"
],
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "comments",
"query": {
"bool": {
"must": [
{
"match": {
"comments.comment_text_original": "https://dvtestenvstorage.blob.core.windows.net/images/firm49012/financial-stock-market-graph-chart-investment-trading-stock-exchange-trading-market-screen-at-night-time-W38FWJ_1656075621.jpg"
}
}
]
}
},
"inner_hits": {
"_source": false,
"highlight": {
"pre_tags": [
"<mark>"
],
"post_tags": [
"</mark>"
],
"fields": {
"comments.comment_text_original": {}
}
}
}
}
}
]
}
},
{
"bool": {
"must": [
{
"multi_match": {
"query": "https://dvtestenvstorage.blob.core.windows.net/images/firm49012/financial-stock-market-graph-chart-investment-trading-stock-exchange-trading-market-screen-at-night-time-W38FWJ_1656075621.jpg",
"type": "cross_fields",
"fields": [
"response_text_original^2",
"response_text_original.shingles",
"image_url"
]
}
}
]
}
}
]
}
},
{
"exists": {
"field": "question_text_vector"
}
}
],
"should": [],
"must_not": [],
"filter": [
{
"terms": {
"diligence_type": [
1243,
1242,
1241,
2,
-1,
0
]
}
}
]
}
},
"sort": [],
"_source": {
"includes": [
"response_text_original",
"question_text",
"comments",
"tags.tag_id",
"tags.tag_name",
"tags_text",
"question_sme",
"section_sme",
"comments",
"question_help_text",
"notes",
"grid_response",
"checkbox_response",
"attachment_response"
]
}
}
The above query gets the results for matching URLs present in the ES instance but the order of results isn't right. This query is being used for image search using image urls present in the ES_Instance. The exact match for the url is not showing as the first result, the best match is not showing for the exact same url. What might be wrong with this ES query?

ElasticSearch Aggregation on nested field with bucketing on parent id

Following is my doc structure
'Order': {
u'properties': {
u'order_id': {u'type': u'integer'},
'Product': {
u'properties': {
u'product_id': {u'type': u'integer'},
u'product_category': {'type': 'text'},
},
u'type': u'nested'
}
}
}
Doc1
"Order": {
"order_id": "1",
"Product": [
{
"product_id": "1",
"product_category": "category_1"
},
{
"product_id": "2",
"product_category": "category_2"
},
{
"product_id": "3",
"product_category": "category_2"
},
]
}
Doc2
"Order": {
"order_id": "2",
"Product": [
{
"product_id": "4",
"product_category": "category_1"
},
{
"product_id": "1",
"product_category": "category_1"
},
{
"product_id": "2",
"product_category": "category_2"
},
]
}
I want to get following output
"aggregations": {
"Order": [
{
"order_id": "1"
"category_counts": [
{
"category_1": 1
},
{
"category_2": 2
},
]
},
{
"order_id": "1"
"category_counts": [
{
"category_1": 2
},
{
"category_2": 1
},
]
},
]
}
I tried using nested aggregation
"aggs": {
"Product-nested": {
"nested": {
"path": "Product"
}
"aggs": {
"category_counts": {
"terms": {
"field": "Product.product_category"
}
}
},
}
}
It does not give output for each order but gives combined output for all orders
{
"Product-nested": {
"category_counts": [
"category_1": 3,
"category_2": 3
]
}
}
I have two questions:
How to get the desired output in above scenario?
What if instead of single product_category I have an array of
product_categories then how will we achieve the same in this
scenario?
I am using elasticsearch >= 5.0
I have an idea but i dont think its the best one..
you can make a terms aggregation on the "order_id" field, then a sub nestes aggregation on "Product.product_category".
somthing like this :
{
"aggs": {
"all-order-id": {
"terms": {
"field": "order_id",
"size": 10
},
"aggs": {
"Product-nested": {
"nested": {
"path": "Product"
},
"aggs": {
"all-products-in-order-id": {
"terms": {
"field": "Product.product_category"
}
}
}
}
}
}
}
}
sorry its lock bit messy i'm not so good with this answer editor

Elasticsearch aggregations for faceted search excluding some fields

I have shop which use elasticsearch 2.4 for faceted search.
But at the moment the existing filters (product attributes) are taken from mysql. I want to do this using elasticsearch aggregations.
But I got the problem: I do not need to aggregate all the attributes.
What a have:
Part of Mapping:
...
'is_active' => [
'type' => 'long',
'index' => 'not_analyzed',
],
'category_id' => [
'type' => 'long',
'index' => 'not_analyzed',
],
'attrs' => [
'properties' => [
'attr_name' => ['type' => 'string', 'index' => 'not_analyzed'],
'value' => [
'type' => 'string',
'index' => 'analyzed',
'analyzer' => 'attrs_analizer',
],
]
],
...
Exemple of data:
{
"id": 1,
"is_active": "1",
"category_id": 189,
...
"price": "48.00",
"attrs": [
{
"attr_name": "Brand",
"value": "TP-Link"
},
{
"attr_name": "Model",
"value": "TL-1"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
},
{
"id": 2,
"is_active": "1",
"category_id": 242,
...
"price": "12.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Lenovo"
},
{
"attr_name": "Model",
"value": "B570"
},
{
"attr_name": "OS",
"value": "Linux"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
},
{
"id": 3,
"is_active": "1",
"category_id": 242,
...
"price": "24.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Asus"
},
{
"attr_name": "Model",
"value": "QZ85"
},
{
"attr_name": "OS",
"value": "Windows"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
}
Attributes such as "Model" and "Other" are not used when filtering products, they are only displayed on the product page. On the other attributes (Brand, OS, and others ...) I want to receive aggregations.
When I try to aggregate the attrs.value field, of course I get aggregations for all data (including the large "Other" fields, in which there can be a lot of HTML).
"aggs": {
"facet_value": {
"terms": {
"field": "attrs.value",
"size": 0
}
}
}
How to exclude "attrs.attr_name": ["Model", "Other"]?
Change the mapping is a bad solution for me, but if it is inevitable, tell me how to do it? I guess I'll need to make "attrs" nested?
UPD:
I want to receive:
1. All the attributes that the products have in a certain category, except for those that I indicate in the settings of the my system (in this example I will exclude "Model" and "Other").
2. Number of products near each value.
It should look like this:
For category "Laptops":
Brand:
Lenovo (18)
Asus (19)
.....
OS:
Windows (19)
Linux (5)
...
For "computer monitors":
Brand:
Samsung (18)
LG (19)
.....
Resolution:
1360x768 (19)
1920x1080 (22)
....
It's Terms Aggregation , I use this for the number of products for each category. And I try it for attrs.value, but I do not know how to exclude "attrs.value", which refer to "attrs.attr_name": "Model" & "attrs.attr_name": "Other".
UPD2:
In my case if map attrs as nested type, the weight of the index increases by 30%.
from 2700Mi to 3510Mi.
If there is no other option, I'll have to put up with it.
you have to map first attrs as nested type and use nested aggregations.
PUT no_play
{
"mappings": {
"document_type" : {
"properties": {
"is_active" : {
"type": "long"
},
"category_id" : {
"type": "long"
},
"attrs" : {
"type": "nested",
"properties": {
"attr_name" : {
"type" : "keyword"
},
"value" : {
"type" : "keyword"
}
}
}
}
}
}
}
POST no_play/document_type
{
"id": 3,
"is_active": "1",
"category_id": 242,
"price": "24.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Asus"
},
{
"attr_name": "Model",
"value": "QZ85"
},
{
"attr_name": "OS",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
}
Since you didn't mention how you want to aggregate.
Case 1) If you want to count the attrs as individual. This metric gives you count of term occurrences.
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
}
}
}
}
}
}
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
},
"aggs": {
"reverse_back_to_roots": {
"reverse_nested": {
}
}
}
}
}
}
}
}
Now to get count of root document with attrs value you will need to hook a reverse nested aggregation to move the aggregator a level up to the level of root document.
Think of the following document.
{
"id": 3,
"is_active": "1",
"category_id": 242,
"price": "24.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Asus"
},
{
"attr_name": "Model",
"value": "QZ85"
},
{
"attr_name": "OS",
"value": "repeated value"
},
{
"attr_name": "Other",
"value": "repeated value"
}
]
}
For first query the value count for 'repeated value' will be 2 and for second query it will be 1
Note
here is how you can do filtering to exclude
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"filtered_results": {
"filter": {
"bool": {
"must_not": [{
"terms": {
"attrs.attr_name": ["Model", "Brand"]
}
}]
}
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
}
}
}
}
}
}
}
}
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"filtered_results": {
"filter": {
"bool": {
"must_not": [{
"terms": {
"attrs.attr_name": ["Model", "Brand"]
}
}]
}
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
},
"aggs": {
"reverse_back_to_roots": {
"reverse_nested": {}
}
}
}
}
}
}
}
}
}
Thanks

Resources