Elasticsearch querying number of dates in array matching query - elasticsearch

I have documents in the following form
PUT test_index/_doc/1
{
"dates" : [
"2018-07-15T14:12:12",
"2018-09-15T14:12:12",
"2018-11-15T14:12:12",
"2019-01-15T14:12:12",
"2019-03-15T14:12:12",
"2019-04-15T14:12:12",
"2019-05-15T14:12:12"],
"message" : "hello world"
}
How do I query for documents such that there are n number of dates within the dates array falling in between two specified dates?
For example: Find all documents with 3 dates in the dates array falling in between "2018-05-15T14:12:12" and "2018-12-15T14:12:12" -- this should return the above document as "2018-07-15T14:12:12", "2018-09-15T14:12:12" and "2018-11-15T14:12:12" fall between "2018-05-15T14:12:12" and "2018-12-15T14:12:12".

I recently faced the same problem. However came up with two solutions.
1) If you do not want to change your current mapping, you could query for the documents using query_string. Also note you will have to create the query object according to the range that you have. ("\"2019-04-08\" OR \"2019-04-09\" OR \"2019-04-10\" ")
{
"query": {
"query_string": {
"default_field": "dates",
"query": "\"2019-04-08\" OR \"2019-04-09\" OR \"2019-04-10\" "
}
}
}
However,this type of a query only makes sense if the range is short.
2) So the second way is the nested method. But you will have to change your current mapping in such a way.
{
"properties": {
"dates": {
"type": "nested",
"properties": {
"key": {
"type": "date",
"format": "YYYY-MM-dd"
}
}
}
}
}
So your query will look something like this :-
{
"query": {
"nested": {
"path": "dates",
"query": {
"bool": {
"must": [
{
"range": {
"dates.key": {
"gte": "2018-04-01",
"lte": "2018-12-31"
}
}
}
]
}
}
}
}
}

You can create dates as a nested document and use bucket selector aggregation.
{
"empId":1,
"dates":[
{
"Days":"2019-01-01"
},
{
"Days":"2019-01-02"
}
]
}
Mapping:
"mappings" : {
"properties" : {
"empId" : {
"type" : "keyword"
},
"dates" : {
"type" : "nested",
"properties" : {
"Days" : {
"type" : "date"
}
}
}
}
}
GET profile/_search
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "dates",
"query": {
"range": {
"dates.Days": {
"format": "yyyy-MM-dd",
"gte": "2019-05-01",
"lte": "2019-05-30"
}
}
}
}
}
}
},
"aggs": {
"terms_parent_id": {
"terms": {
"field": "empId"
},
"aggs": {
"availabilities": {
"nested": {
"path": "dates"
},
"aggs": {
"avail": {
"range": {
"field": "dates.Days",
"ranges": [
{
"from": "2019-05-01",
"to": "2019-05-30"
}
]
},
"aggs": {
"count_Total": {
"value_count": {
"field": "dates.Days"
}
}
}
},
"max_hourly_inner": {
"max_bucket": {
"buckets_path": "avail>count_Total"
}
}
}
},
"bucket_selector_page_id_term_count": {
"bucket_selector": {
"buckets_path": {
"children_count": "availabilities>max_hourly_inner"
},
"script": "params.children_count>=19;" ---> give the number of days that should match
}
},
"hits": {
"top_hits": {
"size": 10
}
}
}
}
}
}

I found my own answer to this, although I'm not sure how efficient it is compared to the other answers:
GET test_index/_search
{
"query":{
"bool" : {
"filter" : {
"script" : {
"script" : {"source":"""
int count = 0;
for (int i=0; i<doc['dates'].length; ++i) {
if (params.first_date < doc['dates'][i].toInstant().toEpochMilli() && doc['dates'][i].toInstant().toEpochMilli() < params.second_date) {
count += 1;
}
}
if (count >= 2) {
return true
} else {
return false
}
""",
"lang":"painless",
"params": {
"first_date": 1554818400000,
"second_date": 1583020800000
}
}
}
}
}
}
}
where the parameters are the two dates in epoch time. I've chosen 2 matches here, but obviously you can generalise to any number.

Related

how to get value only from Elasticsearch sum aggregation query?

I'm using this query
POST /stats/_search?filter_path=aggregations.views.value
{
"aggs": {
"views": {
"sum": {
"field": "viewCount"
}
}
},
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {"gte":"now-2d"}
}
}
]
}
}
}
and it gives the result
{
"aggregations" : {
"views" : {
"value" : 49198.0
}
}
}
I'm looking to only get "49189.0" or the least amount of info possible, maybe "{value: 49198.0}"

Count Aggregation on filtered nested object giving incorrect result

Our Elastic Mapping
> {"mappings": {
> "products" : {
> "properties":{
> "name " : {
> "type" : "keyword"
> },
> "resellers" : {
> "type" : "nested",
> "properties" : {
> "name" : { "type" : "text" },
> "price" : { "type" : "double" }
> }
> }
> }
> }
> }}
In this mapping each product stores the list of resellers which are selling it at specific price. We have requirment to find all distinct prices of specific reseller.
As per my understanding we need to have query DSL which should first have nested filter on given reseller and then apply count aggregation. We have formed following ES query for ES 5.6 version:-
{
"query": {
"nested": {
"path": "resellers",
"query": {
"bool": {
"filter": {
"match_phrase_prefix": {
"resellers.name": "flipkart"
}
}
}
}
}
},
"aggs": {
"narrow": {
"filter": {
"nested": {
"path": "resellers",
"query": {
"bool": {
"filter": {
"term": {
"resellers.name": "flipkart"
}
}
}
}
}
},
"aggs": {
"state": {
"nested": {
"path": "resellers"
},
"aggs": {
"count": {
"terms": {
"field": "resellers.price"
}
}
}
}
}
}
}
}
This query is generating incorrect output. Output also contain price of other resellers which are present in elastic documents ( like Amazon,Snapdeal etc). Can somebody help to correct query?
Try this query instead (i.e. filter inside the nested aggregation, not outside of it):
{
"query": {
"nested": {
"path": "resellers",
"query": {
"bool": {
"filter": {
"match_phrase_prefix": {
"resellers.name": "flipkart"
}
}
}
}
}
},
"aggs": {
"resellers": {
"nested": {
"path": "resellers"
},
"aggs": {
"narrow": {
"filter": {
"term": {
"resellers.name": "flipkart"
}
},
"aggs": {
"count": {
"terms": {
"field": "resellers.price"
}
}
}
}
}
}
}
}

Combine multiple individual queries into one to get aggregated result in Elasticsearch

I have built two queries in ElasticSearch to get the counts for each error message. for example, the first query is to get how many error messages related to "was not found" error
GET /logstash*/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.pod_name": "api"
}
},
{
"match": {
"log": "error"
}
},
{
"match": {
"log": {
"query": "was not found",
"operator": "and"
}
}
},
{
"range": {"#timestamp": {
"time_zone": "CET",
"gt": "now-7d",
"lte": "now"}}
}
]
}
}
}
},
"aggs" : {
"type_count" : {
"value_count" : {
"script" : {
"source" : "doc['log.keyword'].value"
}
}
}
}
}
The second query is to get the count of error messages related to "Duplicate Entry" error
GET /logstash*/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.pod_name": "api"
}
},
{
"match": {
"log": "error"
}
},
{
"match": {
"log": {
"query": "Duplicate entry",
"operator": "and"
}
}
},
{
"range": {"#timestamp": {
"time_zone": "CET",
"gt": "now-7d",
"lte": "now"}}
}
]
}
}
}
},
"aggs" : {
"type_count" : {
"value_count" : {
"script" : {
"source" : "doc['log.keyword'].value"
}
}
}
}
}
My boss really wants me to combine these individual query into a one big query, then get the list of counts for each error messages in one output. Since we have a lot of error messages, which means we have to write each query for each error message, then we have to run each query to get the counts. Is there a way I can click one run to get the list of counts?
I have been trying use query string query and looking for solutions on either Stack Overflow and Documentation. However, there is no luck
You can use filter aggregation along with the value_count aggregation to combine these two queries. In both the queries, out of the 4 queries inside must clause only one differs. You can take this out and combine them with the two filter aggregations as below:
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"match": {
"kubernetes.pod_name": "api"
}
},
{
"match": {
"log": "error"
}
},
{
"range": {
"#timestamp": {
"time_zone": "CET",
"gt": "now-7d",
"lte": "now"
}
}
}
]
}
}
}
},
"aggs": {
"not_found_count": {
"filter": {
"match": {
"log": {
"query": "was not found",
"operator": "and"
}
}
},
"aggs": {
"count": {
"value_count": {
"script": {
"source": "doc['log.keyword'].value"
}
}
}
}
},
"duplicate_entry_count": {
"filter": {
"match": {
"log": {
"query": "Duplicate entry",
"operator": "and"
}
}
},
"aggs": {
"count": {
"value_count": {
"script": {
"source": "doc['log.keyword'].value"
}
}
}
}
}
}
}

Elasticsearch 5 | filter nested object and nested aggregation count

elasticsearch 5.2
I want to filter my items by the nested object state.id = 101 and an aggregation count for all the items by state.id. There are items with the state ids 101, 102 and 103.
The aggregation count is now limited to state.id 101. I want the only get the items with state.id = 101 and the aggregation count for all state ids.
How can i do this?
GET index/items/_search
{
"query": {
"nested" : {
"path" : "state",
"query": {
"bool": {
"filter": {
"term": { "state.id": 101 }
}
}
}
}
},
"aggs" : {
"state" : {
"nested" : {
"path" : "state"
},
"aggs" : {
"count" : { "terms" : { "field" : "state.id" } }
}
}
}
}
You need to use post_filter instead of query: (Oh and use POST instead of GET when sending a payload)
POST index/items/_search
{
"post_filter": { <--- change this
"nested": {
"path": "state",
"query": {
"bool": {
"filter": {
"term": {
"state.id": 101
}
}
}
}
}
},
"aggs": {
"state": {
"nested": {
"path": "state"
},
"aggs": {
"count": {
"terms": {
"field": "state.id"
}
}
}
}
}
}
If you want to further narrow the nested aggregation, you can also add a filter:
POST index/items/_search
{
"post_filter": {
"nested": {
"path": "state",
"query": {
"bool": {
"filter": {
"term": {
"state.id": 101
}
}
}
}
}
},
"aggs": {
"narrow": {
"filter": {
"nested": {
"path": "state",
"query": {
"bool": {
"filter": {
"term": {
"state.id": 101
}
}
}
}
}
},
"aggs": {
"state": {
"nested": {
"path": "state"
},
"aggs": {
"count": {
"terms": {
"field": "state.id"
}
}
}
}
}
}
}
}

Elasticsearch Aggregation Word Count with using Stopwords

I'm using elasticsearch to store my data. I want to count the words in my documents. But I want to see the result without the stopwords. For example; in my current result I see 'and' is my top word. But I want to remove it. Currently I have 3802 stopwords in my stopword.txt. I don't want any of them to be shown in the aggregation result. How can I do that? MY current query;
{
"query": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "now-0d/d"
}
}
}
]
}
},
"aggs": {
"words": {
"terms": {
"size" : 0,
"field": "text"
}
}
}
}
The way I want query to work is;
{
"aggs": {
"filtered": {
"query": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "now-0d/d"
}
}
}
]
}
},
"filter": {
"my_stop": {
"type": "stop",
"stopwords_path": "/work/projects/stop_words.txt"
}
},
"aggs": {
"words": {
"terms": {
"size" : 0,
"field": "text"
}
}
}
}
}
}
By the way, I have my stopwords list in my custom analyzer.But it doesn't work the way I want.

Resources