Elastic Search Sum aggregation with group by and where condition - elasticsearch

I am newbie in ElasticSearch.
We are currently moving our code from relational DB to ElasticSearch. So we are converting our queries in ElasticSearch query format.
I am looking for ElasticSearch equivalent of below query -
SELECT Color, SUM(ListPrice), SUM(StandardCost)
FROM Production.Product
WHERE Color IS NOT NULL
AND ListPrice != 0.00
AND Name LIKE 'Mountain%'
GROUP BY Color
Can someone provide me the example of ElasticSearch query for above?

You'd have a products index with a product type documents whose mapping could look like this based on your query above:
curl -XPUT localhost:9200/products -d '
{
"mappings": {
"product": {
"properties": {
"Color": {
"type": "string"
},
"Name": {
"type": "string"
},
"ListPrice": {
"type": "double"
},
"StandardCost": {
"type": "double"
}
}
}
}
}'
Then the ES query equivalent to the SQL one you gave above would look like this:
{
"query": {
"filtered": {
"query": {
"query_string": {
"default_field": "Name",
"query": "Mountain*"
}
},
"filter": {
"bool": {
"must_not": [
{
"missing": {
"field": "Color"
}
},
{
"term": {
"ListPrice": 0
}
}
]
}
}
}
},
"aggs": {
"by_color": {
"terms": {
"field": "Color"
},
"aggs": {
"total_price": {
"sum": {
"field": "ListPrice"
}
},
"total_cost": {
"sum": {
"field": "StandardCost"
}
}
}
}
}
}

Related

Elastic: How i can filter aggregation buckets by string key

i have some data from one provider - very big structured JSON data:
"mappings": {
"properties": {
"field_a": { .. },
"field_b": { .. },
"field_c": { .. },
"field_d": {
"properties": {
"subfield_a": {...},
"subfield_b": {...},
"subfield_c": {...},
"subfield_d": {...},
"subfield_e": {
"properties": {
"myfield": {
"type": "keyword"
},
"another_a": {...},
"another_b": {...},
}
}
}
}
}
}
subfield_e is array of objects contains many fields with my interest "myfield".
I need aggregation with only fields "myfield" what contain some string.
So, i now do this with wrong (but logic result):
GET /index/_search
{
"query": {
"wildcard": {
"field_d.subfield_e.myfield": "*string*"
}
},
"aggs": {
"interest": {
"terms": {
"field": "field_d.subfield_e.myfield",
"size": 10
}
}
},
"size": 0
}
The problem of this query is, that query will choose all documents where array of objects "esubfield_e" contain object myfield with string and under these all documents made aggregation. So, finally i get results with all "myfields" under these documents and not only myfields containing string.
I was try make a bucket_selector aggregation after my main aggregation, but i got error: "buckets_path must reference either a number value or a single value numeric metric aggregation, got: [String] at aggregation [_key]"
My code is inspired by: Filter Elasticsearch Aggregation by Bucket Key Value and looks now:
GET /index/_search
{
"query": {
"wildcard": {
"field_d.subfield_e.myfield": "*string*"
}
},
"aggs": {
"interest": {
"terms": {
"field": "field_d.subfield_e.myfield",
"size": 10
}
},
"aggs": {
"buckets": {
"bucket_selector": {
"buckets_path": {
"key": "_key"
},
"script": "params.key.contains('string')"
}
}
}
}
},
"size": 0
}
So, how i can filter a aggregations buckets (term aggs) by their string key ?
I solved it by switching subfield_e to nested object instead of undefined array and I reimported all data to this new mapping.
Current mapping looks as:
"mappings": {
"properties": {
"field_a": { .. },
"field_b": { .. },
"field_c": { .. },
"field_d": {
"properties": {
"subfield_a": {...},
"subfield_b": {...},
"subfield_c": {...},
"subfield_d": {...},
"subfield_e": {
"type": "nested" <======= This line added
"properties": {
"myfield": {
"type": "keyword"
},
"another_a": {...},
"another_b": {...},
}
}
}
}
}
}
And final working query is:
GET /index/_search
{
"query": {
"nested": {
"path": "field_d.subfield_e",
"query": {
"wildcard": {
"field_d.subfield_e.myfield": {
"value": "*string*"
}
}
}
}
},
"aggs": {
"agg": {
"nested": {
"path": "field_d.subfield_e"
},
"aggs": {
"inner": {
"filter": {
"wildcard": {
"field_d.subfield_e.myfield": "*string*"
}
}, "aggs": {
"interest": {
"terms": {
"field": "field_d.subfield_e.myfield",
"size": 10
}
}
}
}
}
}
},
"size": 0
}
The speed of this query is in my case much more better than using include/exclude in terms aggregation.

How to diversify the result of top-hits aggregation?

Let's start with a concrete example. I have a document with these fields:
{
"template": {
"mappings": {
"template": {
"properties": {
"tid": {
"type": "long"
},
"folder_id": {
"type": "long"
},
"status": {
"type": "integer"
},
"major_num": {
"type": "integer"
}
}
}
}
}
}
I want to aggregate the query result by field folder_id, and for each group divided by folder_id, retrieve the top-N documents' _source detail. So i write query DSL like:
GET /template/template/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"status": 1
}
}
]
}
},
"aggs": {
"folder": {
"terms": {
"field": "folder_id",
"size": 10
},
"aggs": {
"top_hit":{
"top_hits": {
"size": 5,
"_source": ["major_num"]
}
}
}
}
}
}
However, now comes a requirement that the top hits documents for each folder_id must be diversified on the field major_num. For each folder_id, the top hits documents retrieve by the sub top_hits aggregation under the terms aggregation, must be unique on field major_num, and for each major_num value, return at most 1 document in the sub top hits aggregation result.
top_hits aggregation cannot accept sub-aggregations, so how should i solve the question?
Why not simply adding another terms aggregation on the major_num field ?
GET /template/template/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"status": 1
}
}
]
}
},
"aggs": {
"folder": {
"terms": {
"field": "folder_id",
"size": 10
},
"aggs": {
"majornum": {
"terms": {
"field": "major_num",
"size": 10
},
"aggs": {
"top_hit": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}

Elasticsearch - Applying multi level filter on nested aggregation bucket?

I'm, trying to get distinct nested objects by applying multiple filters.
Basically in Elasticsearch I have cities as top level document and inside I have nested citizens documents, which have another nested pets documents.
I am trying to get all citizens that have certain conditions applied on all of these 3 levels (cities, citizens and pets):
Give me all distinct citizens
that have age:"40",
that have pets "name":"Casper",
from cities with office_type="secondary"
I know that to filter 1st level I can use query condition, and then if I need to filter the nested citizens I can add a filter in the aggregation level.
I am using this article as an example: https://iridakos.com/tutorials/2018/10/22/elasticsearch-bucket-aggregations.html
Query working so far:
GET city_offices/_search
{
"size" : 10,
"query": {
"term" : { "office_type" : "secondary" }
},
"aggs": {
"citizens": {
"nested": {
"path": "citizens"
},
"aggs": {
"inner_agg": {
"filter": {
"term": { "citizens.age": "40" }
} ,
"aggs": {
"occupations": {
"terms": {
"field": "citizens.occupation"
}
}
}
}
}
}
}
}
BUT: How can I add the "pets" nested filter condition?
Mapping:
PUT city_offices
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"city": {
"type": "keyword"
},
"office_type": {
"type": "keyword"
},
"citizens": {
"type": "nested",
"properties": {
"occupation": {
"type": "keyword"
},
"age": {
"type": "integer"
},
"pets": {
"type": "nested",
"properties": {
"kind": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
}
}
}
}
}
}
Index data:
PUT /city_offices/doc/1
{
"city":"Athens",
"office_type":"secondary",
"citizens":[
{
"occupation":"Statistician",
"age":30,
"pets":[
{
"kind":"Cat",
"name":"Phoebe",
"age":14
}
]
},
{
"occupation":"Librarian",
"age":30,
"pets":[
{
"kind":"Rabbit",
"name":"Nino",
"age":13
}
]
},
{
"occupation":"Librarian",
"age":40,
"pets":[
{
"kind":"Rabbit",
"name":"Nino",
"age":13
}
]
},
{
"occupation":"Statistician",
"age":40,
"pets":[
{
"kind":"Rabbit",
"name":"Casper",
"age":2
},
{
"kind":"Rabbit",
"name":"Nino",
"age":13
},
{
"kind":"Dog",
"name":"Nino",
"age":15
}
]
}
]
}
So I found a solution for this.
Basically I apply top level filters in the query section and then apply rest of conditions in the aggregations.
First I apply citizens level filter aggregation, then I go inside nested pets and apply the filter and then I need to get back up to citizens level (using reverse_nested: citizens) and then set the term that will generate the final bucket.
Query looks like this:
GET city_offices/_search
{
"size" : 10,
"query": {
"term" : { "office_type" : "secondary" }
},
"aggs": {
"citizens": {
"nested": {
"path": "citizens"
},
"aggs": {
"inner": {
"filter": {
"term": { "citizens.age": "40" }
} ,
"aggs": {
"occupations": {
"nested": {
"path": "citizens.pets"
},
"aggs": {
"inner_pets": {
"filter": {
"term": { "citizens.pets.name": "Casper" }
} ,
"aggs": {
"lll": {
"reverse_nested": {
"path": "citizens"
},
"aggs": {
"xxx": {
"terms": {
"field": "citizens.occupation",
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
}
}
The response bucket looks like this:
"xxx": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Librarian",
"doc_count": 1
},
{
"key": "Statistician",
"doc_count": 1
}
]
}
Any other suggestions?

Facet by objects(tags) in an array

I am running into a query problem with ElasticSearch.
We have objects that looks like this:
{
"id":"1234",
"tags":[
{ "tagName": "T1", "tagValue":"V1"},
{ "tagName": "T2", "tagValue":"V2"},
{ "tagName": "T3", "tagValue":"V3"}
]
}
{
"id":"5678",
"tags":[
{ "tagName": "T1", "tagValue":"X1"},
{ "tagName": "T2", "tagValue":"X2"}
]
}
And I would like to get a list of tagValues for tagName=T1, which is "V1" and "X1".
I tried
{
"filter": {
"bool": {
"must": [
{
"term":{
"tags.tagName": "T1"
}
}
]
}
},
"facets": {
"TagValues":{
"filter": {
"term": {
"tags.tagName": "T1"
}
},
"terms": {
"field": "tags.tagValue",
"size": 30
}
}
}
}
It seems like it's returning all tagValues from all tags "T1", "T2", and "T3".
Can someone please help me with this query? How can I get faceted list for objects that's in an array?
Any help would be appreciated.
Thank you,
The main idea is to use the nested type for your tags field. Here is the mapping you should use:
curl -XPUT localhost:9200/mytags -d '{
"mappings": {
"mytag": {
"properties": {
"id": {
"type": "string"
},
"tags": {
"type": "nested",
"properties": {
"tagName": {
"type": "string",
"index": "not_analyzed"
},
"tagValue": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}'
Then you can reindex your data and run a query like the one below, which will first filter only the document containing a tagName whose value is T1 and then using aggregations (don't use facets anymore as they are deprecated), you can again select only those tags whose tagName is T1 and then retrieve the associated tagValue fields. This will get you the expected V1 and X1 values.
curl -XPOST localhost:9200/mytags/mytag/_search -d '{
"size": 0,
"query": {
"filtered": {
"filter": {
"nested": {
"path": "tags",
"query": {
"term": {
"tags.tagName": "T1"
}
}
}
}
}
},
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"values": {
"filter": {
"term": {
"tags.tagName": "T1"
}
},
"aggs": {
"values": {
"terms": {
"field": "tags.tagValue"
}
}
}
}
}
}
}
}'

Aggregates in Nest (Elastic) with filter having both nested and parent objects

I have a catalog of products that I want to calculate aggregates on. The trouble comes with trying to do nested aggregations with filter that has both nested and parent fields in it. Either it gives wrong counts or 0 hits. Here is a sample of my product object mapping:
"Products": {
"properties": {
"ProductID": {
"type": "long"
},
"ProductType": {
"type": "long"
},
"ProductName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"Prices": {
"type": "nested",
"properties": {
"CurrencyType": {
"type": "integer"
},
"Cost": {
"type": "double"
}
}
}
}
}
Here is an example of the sql query that I am trying to replicate in elastic:
SELECT PRODPR.Cost AS PRODPR_Cost
,COUNT(PROD.ProdcutID) AS PROD_ProductID_Count
FROM Products PROD WITH (NOLOCK)
LEFT OUTER JOIN Prices PRODPR WITH (NOLOCK) ON (PRODPR.objectid = PROD.objectid)
WHERE PRODPR.CurrencyType = 4
AND PROD.ProductType IN (
11273
,11293
,11294
)
GROUP BY PRODPR.Cost
Elastic Search queries I came up with:
First One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me wrong counts)
GET /IndexName/Products/_search
{
"aggs": {
"price_agg": {
"filter": {
"bool": {
**"must": [
{
"nested": {
"path": "Prices",
"filter": {
"term": {
"Prices.CurrencyType": "8"
}
}
}
},
{
"terms": {
"ProductType": [
"11273",
"11293",
"11294"
]
}
}
]**
}
},
"aggs": {
"price_nested_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": { },
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
Second One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me 0 hits):
GET /IndexName/Prodcuts/_search
{
"aggs": {
"price_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"currency_filter": {
"filter": {
"bool": {
"must": [
{
"term": {
"Prices.CurrrencyType": "4"
}
},
{
"terms": {
"ProductType": [
"11273",
"11293"
]
}
}
]
}
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": {},
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
I have tried some more queries but the above two are the closest I came up with. Has anyone come across this use case? What am I doing wrong? Any help is appreciated. Thanks!

Resources