Elasticsearch - aggregation and counting elements from array - elasticsearch

I'm learning Elasticsearch and I have a problem with aggregating and counting elements. Let's say we have our data like this:
{
"sessionId": "session1",
"messages": [
{ "intent": "intentX", "message": "foo1" },
{ "intent": "intentY", "message": "foo2" },
{ "intent": "intentY", "message": "foo3" },
{ "intent": "intentZ", "message": "foo4" }
]
},
{
"sessionId": "session2",
"messages": [
{ "intent": "intentX", "message": "foo5" },
{ "intent": "intentY", "message": "foo6" },
{ "intent": "intentY", "message": "foo7" },
{ "intent": "intentY", "message": "foo8" }
]
}
How can I count how many times each intent occurred? I've tried the query as follows but seems like it returns invalid values - not sure how is it counted now. All mappings are default.
{
"size": 0,
"aggs": {
"intents": {
"terms": {
"field": "messages.intent.keyword"
},
"aggs" : {
"count" : {
"value_count" : {
"field" : "messages.intent.keyword"
}
}
}
}
}
}
The result for the query:
{
"took":2,
"timed_out":false,
"_shards":{
"total":1,
"successful":1,
"skipped":0,
"failed":0
},
"hits":{
"total":{
"value":2,
"relation":"eq"
},
"max_score":null,
"hits":[
]
},
"aggregations":{
"intents":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":"intentX",
"doc_count":2,
"count":{
"value":5
}
},
{
"key":"intentY",
"doc_count":2,
"count":{
"value":5
}
},
{
"key":"intentZ",
"doc_count":1,
"count":{
"value":3
}
}
]
}
}
}
The result I want to have is intentX.count.value = 2, intentY.count.value = 5 and intentZ.count.value = 1, so sum of intents from every array including duplicates.
Thanx for help

I've got it - I had to set "messages" field as "nested" in mapping, and then use this query to get what I wanted:
{
"size": 0,
"aggs": {
"intents": {
"nested": {
"path": "messages"
},
"aggs": {
"count": {
"terms": {
"field": "messages.intent.keyword"
}
}
}
}
}
}
This gave me the desired result.

Related

Nested aggregation in nested aggregation query

Having the below (abbreviated) document in Elastic Search 7.1. Focusing on questions.influencerReponse.selectAllThatApplyResponses path.
{
"questions": [
{
"questionId": "79cfc6e7-731e-4d83-9dd6-82f4f39fff03",
"questionKind": "select_all_that_apply",
"questionText": "Have you heard of any of the following charities?",
"questionOptions": {
"1": "Plan International",
"2": "Young Women's Trust",
"3": "Women For Refugee Women",
"4": "The FPA"
},
"influencerReponse": {
"questionId": "79cfc6e7-731e-4d83-9dd6-82f4f39fff03",
"questionKind": "select_all_that_apply",
"text": null,
"questionOrder": 3,
"order": null,
"shortAnswerResponse": null,
"viewerSentimentResponse": null,
"yesNoResponse": null,
"selectAllThatApplyResponses": [
{
"key": "2",
"value": "Young Women's Trust"
}
]
}
}
]
}
I want to get the term aggregations for the key or the value, both are keyword type. I accomplished that before but not in the level of selectAllThatApplyResponses nested type.
Here's what I have so far and throwing the below error.
{
"query": {
"bool": {
"must": [
{
"term": {
"sponsorshipId": {
"value": "33c7140f-23ae-46f2-a0fe-49e2251114e4"
}
}
}
]
}
},
"track_total_hits": true,
"size": 0,
"aggs": {
"select_all_that_apply_responses": {
"nested": {
"path": "questions"
},
"aggs": {
"filter_types": {
"filter": {
"bool": {
"must": [
{
"match": {
"questions.questionId": "79cfc6e7-731e-4d83-9dd6-82f4f39fff03"
}
}
]
}
},
"aggs": {
"select_all_that_apply_nested": {
"nested": {
"path": "questions.influencerReponse.selectAllThatApplyResponses"
},
"aggs": {
"terms": {
"field": "questions.influencerReponse.selectAllThatApplyResponses.key"
}
}
}
}
}
}
}
}
}
I am receiving the below error.
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "Expected [START_OBJECT] under [field], but got a [VALUE_STRING] in [terms]",
"line": 42,
"col": 46
}
],
"type": "parsing_exception",
"reason": "Expected [START_OBJECT] under [field], but got a [VALUE_STRING] in [terms]",
"line": 42,
"col": 46
},
"status": 400
}
The final terms agg needs a name too -- I called it select_all_that_apply_nested_terms.
...
"select_all_that_apply_nested":{
"nested":{
"path":"questions.influencerReponse.selectAllThatApplyResponses"
},
"aggs":{
"select_all_that_apply_nested_terms":{
"terms":{
"field":"questions.influencerReponse.selectAllThatApplyResponses.key"
}
}
}
}
...

Unable to create nested date aggregation query

I am trying to create an ElasticSearch aggregation query which can generate sum or average of value in all my ingested documents.
The documents are of the format -
{
"weather":"cold",
"date_1":"2017/07/05",
"feedback":[
{
"date_2":"2017/08/07",
"value":28,
"comment":"not cold"
},{
"date_2":"2017/08/09",
"value":48,
"comment":"a bit chilly"
},{
"date_2":"2017/09/07",
"value":18,
"comment":"very cold"
}, ...
]
}
I am able to create a sum aggregation of all "feedback.value" using "date_1" by using the following request -
GET _search
{
"query": {
"query_string": {
"query": "cold"
}
},
"size": 0,
"aggs": {
"temperature": {
"date_histogram":{
"field" : "date_1",
"interval" : "month"
},
"aggs":{
"temperature_agg":{
"terms": {
"field": "feedback.value"
}
}
}
}
}
}
However, I need to generate the same query across all documents aggregate based on "feedback.date_2". I am not sure if ElasticSearch can resolve such aggregation or how to approach it. Any guidance would be helpful
[EDIT]
Mapping file( I only define the nested items, ES identifes other fields on its own)
{
"mappings": {
"catalog_item": {
"properties": {
"feedback":{
"type":"nested",
"properties":{
"date_2":{
"type": "date",
"format":"YYYY-MM-DD"
},
"value": {
"type": "float"
},
"comment": {
"type": "text"
}
}
}
}
}
}
}
You would need to make use of nested documents and sum aggregation.
Here's a working example:
Sample Mapping:
PUT test
{
"mappings": {
"doc": {
"properties": {
"feedback": {
"type": "nested"
}
}
}
}
}
Add Sample document:
PUT test/doc/1
{
"date_1": "2017/08/07",
"feedback": [
{
"date_2": "2017/08/07",
"value": 28,
"comment": "not cold"
},
{
"date_2": "2017/08/09",
"value": 48,
"comment": "a bit chilly"
},
{
"date_2": "2017/09/07",
"value": 18,
"comment": "very cold"
}
]
}
Calculate both the sum and average based on date_2.
GET test/_search
{
"size": 0,
"aggs": {
"temperature_aggregation": {
"nested": {
"path": "feedback"
},
"aggs": {
"temperature": {
"date_histogram": {
"field": "feedback.date_2",
"interval": "month"
},
"aggs": {
"sum": {
"sum": {
"field": "feedback.value"
}
},
"avg": {
"avg": {
"field": "feedback.value"
}
}
}
}
}
}
}
}

Getting "expected [END_OBJECT] but found [FIELD_NAME]" in Kibana

I am working on Kibana 6x and using SentiNL to generate email alerts. Below is my query to generate mail if my application generate log "CREDENTIALS ARE NOT DEFINED FOR PULL EVENT SOURCES" with threshold 1. When i play my watcher i get below error.
Error: Watchers: play watcher : execute watcher : execute advanced watcher : get elasticsearch payload : search : [parsing_exception] [match] malformed query, expected [END_OBJECT] but found [FIELD_NAME], with { line=1 & col=80 }
Query:
"input": {
"search": {
"request": {
"index": [
"filebeat-2019.03.21"
],
"body": {
"query": {
"match": {
"msg": "CREDENTIALS ARE NOT DEFINED FOR PULL EVENT SOURCES"
},
"minimum_number_should_match": 1,
"bool": {
"filter": {
"range": {
"#timestamp": {
"gte": "now-15m/m",
"lte": "now/m",
"format": "epoch_millis"
}
}
}
}
},
"size": 0,
"aggs": {
"dateAgg": {
"date_histogram": {
"field": "#timestamp",
"time_zone": "Europe/Amsterdam",
"interval": "1m",
"min_doc_count": 1
}
}
}
}
}
}
}
Also I have used "minimum_number_should_match" to track threshold value. Is that correct?
Found the solution(Here i have not added threshold value) :
{
"actions": {
"email_html_alarm_2daee075-0f24-408e-a362-59172b5e3a1d": {
"name": "email html alarm",
"throttle_period": "1m",
"email_html": {
"stateless": false,
"subject": "Error v1.9 conditon",
"priority": "high",
"html": "<p>{{payload.hits.hits}} test hits Hi {{watcher.username}}</p>\n<p>There are {{payload.hits.total}} results found by the watcher <i>{{watcher.title}}</i>.</p>\n\n<div style=\"color:grey;\">\n <hr />\n <p>This watcher sends alerts based on the following criteria:</p>\n <ul><li>{{watcher.wizard.chart_query_params.queryType}} of {{watcher.wizard.chart_query_params.over.type}} over the last {{watcher.wizard.chart_query_params.last.n}} {{watcher.wizard.chart_query_params.last.unit}} {{watcher.wizard.chart_query_params.threshold.direction}} {{watcher.wizard.chart_query_params.threshold.n}} in index {{watcher.wizard.chart_query_params.index}}</li></ul>\n</div>",
"to": "abc#qwe.com",
"from": "abc#qwe.com"
}
}
},
"input": {
"search": {
"request": {
"index": [
"file-2019.04.03"
],
"body": {
"query": {
"bool": {
"must": {
"query_string": {
"query": "CREDENTIALS ARE NOT FOUND",
"analyze_wildcard": true,
"default_field": "*"
}
},
"filter": [{
"range": {
"#timestamp": {
"gte": "now-1d",
"lte": "now/m",
"format": "epoch_millis"
}
}
}]
}
}
}
}
}
},
"condition": {
"script": {
"script": "payload.hits.total > 0"
}
},
"trigger": {
"schedule": {
"later": "every 2 minutes"
}
},
"disable": true,
"report": false,
"title": "watcher_title",
"save_payload": false,
"spy": false,
"impersonate": false
}

Sum and count aggregations over Elasticsearch fields

I am new to Elasticsearch and I am looking to perform certain aggregations over the fields from an Elasticsearch 5.x index. I have an index that contains the documents with fields langs (which have nested structure) and docLang. These are dynamically mapped fields. Following are the examples documents
DOC 1:
{
"_index":"A",
"_type":"document",
"_id":"1",
"_source":{
"text":"This is a test sentence.",
"langs":{
"X":{
"en":1,
"es":2,
"zh":3
},
"Y":{
"en":4,
"es":5,
"zh":6
}
},
"docLang": "en"
}
}
DOC 2:
{
"_index":"A",
"_type":"document",
"_id":"2",
"_source":{
"text":"This is a test sentence.",
"langs":{
"X":{
"en":1,
"es":2
},
"Y":{
"en":3,
"es":4
}
},
"docLang": "es"
}
}
DOC 3:
{
"_index":"A",
"_type":"document",
"_id":"2",
"_source":{
"text":"This is a test sentence.",
"langs":{
"X":{
"en":1
},
"Y":{
"en":2
}
},
"docLang": "en"
}
}
I want to perform sum aggregation over the langs field in a way that for each key (X/Y) and for each language, I can get the sum across all documents in an index. Also, I want to produce the counts of documents for each type of language from docLang field.
e.g.: For above 3 documents, sum aggregation over langs field would look like below:
"langs":{
"X":{
"en":3,
"es":4,
"zh":3
},
"Y":{
"en":9,
"es":9,
"zh":6
}
}
And the docLang count would look like below:
"docLang":{
"en" : 2,
"es" : 1
}
Also because of some production env restrictions, I cannot use scripts in Elasticsearch. So, I was wondering if it is possible to use just field aggregation type for above fields?
{
"size": 0,
"aggs": {
"X": {
"nested": {
"path": "langs.X"
},
"aggs": {
"X_sum_en": {
"sum": {
"field": "langs.X.en"
}
},
"X_sum_es": {
"sum": {
"field": "langs.X.es"
}
},
"X_sum_zh": {
"sum": {
"field": "langs.X.zh"
}
}
}
},
"Y": {
"nested": {
"path": "langs.Y"
},
"aggs": {
"Y_sum_en": {
"sum": {
"field": "langs.Y.en"
}
},
"Y_sum_es": {
"sum": {
"field": "langs.Y.es"
}
},
"Y_sum_zh": {
"sum": {
"field": "langs.Y.zh"
}
}
}
},
"sum_docLang": {
"terms": {
"field": "docLang.keyword",
"size": 10
}
}
}
}
Since you didn't mention, but I think it's important. I made X and Y as nested fields:
"langs": {
"properties": {
"X": {
"type": "nested",
"properties": {
"en": {
"type": "long"
},
"es": {
"type": "long"
},
"zh": {
"type": "long"
}
}
},
"Y": {
"type": "nested",
"properties": {
"en": {
"type": "long"
},
"es": {
"type": "long"
},
"zh": {
"type": "long"
}
}
}
}
}
But, if you fields are not nested at all and here I mean actually the nested field type in Elasticsearch, a simple aggregation like this one should be enough:
{
"size": 0,
"aggs": {
"X_sum_en": {
"sum": {
"field": "langs.X.en"
}
},
"X_sum_es": {
"sum": {
"field": "langs.X.es"
}
},
"X_sum_zh": {
"sum": {
"field": "langs.X.zh"
}
},
"Y_sum_en": {
"sum": {
"field": "langs.Y.en"
}
},
"Y_sum_es": {
"sum": {
"field": "langs.Y.es"
}
},
"Y_sum_zh": {
"sum": {
"field": "langs.Y.zh"
}
},
"sum_docLang": {
"terms": {
"field": "docLang.keyword",
"size": 10
}
}
}
}

ElasticSearch Aggregation on nested field with bucketing on parent id

Following is my doc structure
'Order': {
u'properties': {
u'order_id': {u'type': u'integer'},
'Product': {
u'properties': {
u'product_id': {u'type': u'integer'},
u'product_category': {'type': 'text'},
},
u'type': u'nested'
}
}
}
Doc1
"Order": {
"order_id": "1",
"Product": [
{
"product_id": "1",
"product_category": "category_1"
},
{
"product_id": "2",
"product_category": "category_2"
},
{
"product_id": "3",
"product_category": "category_2"
},
]
}
Doc2
"Order": {
"order_id": "2",
"Product": [
{
"product_id": "4",
"product_category": "category_1"
},
{
"product_id": "1",
"product_category": "category_1"
},
{
"product_id": "2",
"product_category": "category_2"
},
]
}
I want to get following output
"aggregations": {
"Order": [
{
"order_id": "1"
"category_counts": [
{
"category_1": 1
},
{
"category_2": 2
},
]
},
{
"order_id": "1"
"category_counts": [
{
"category_1": 2
},
{
"category_2": 1
},
]
},
]
}
I tried using nested aggregation
"aggs": {
"Product-nested": {
"nested": {
"path": "Product"
}
"aggs": {
"category_counts": {
"terms": {
"field": "Product.product_category"
}
}
},
}
}
It does not give output for each order but gives combined output for all orders
{
"Product-nested": {
"category_counts": [
"category_1": 3,
"category_2": 3
]
}
}
I have two questions:
How to get the desired output in above scenario?
What if instead of single product_category I have an array of
product_categories then how will we achieve the same in this
scenario?
I am using elasticsearch >= 5.0
I have an idea but i dont think its the best one..
you can make a terms aggregation on the "order_id" field, then a sub nestes aggregation on "Product.product_category".
somthing like this :
{
"aggs": {
"all-order-id": {
"terms": {
"field": "order_id",
"size": 10
},
"aggs": {
"Product-nested": {
"nested": {
"path": "Product"
},
"aggs": {
"all-products-in-order-id": {
"terms": {
"field": "Product.product_category"
}
}
}
}
}
}
}
}
sorry its lock bit messy i'm not so good with this answer editor

Resources