how to compare two aggregations in elastic search - elasticsearch

I have a stream of transaction data, which I'm grouping my 10m interval and counting the number of transactions in one aggregation, and moving average in another. I would like to query the results only for the case where total_count is > moving average.
This query returns just fine.
GET /_search
{
"aggs": {
"my_date_histo":{
"date_histogram":{
"field":"created_at",
"interval":"10m"
},
"aggs":{
"the_count":{
"value_count" : {"field" : "user_id"}
},
"the_movavg":{
"moving_avg":{
"buckets_path": "the_count" ,
"window": 5,
"model": "simple"
}
}
}
}
}
}
But when I try the following it throws error,
GET /_search
{
"aggs": {
"my_date_histo":{
"date_histogram":{
"field":"created_at",
"interval":"10m"
},
"aggs":{
"the_count":{
"value_count" : {"field" : "user_id"}
},
"the_movavg":{
"moving_avg":{
"buckets_path": "the_count" ,
"window": 5,
"model": "simple"
}
},
"final_filter": {
"bucket_selector": {
"buckets_path": {
"TheCount": "the_count",
"TheMovAvg": "the_movavg"
},
"script": "params.TheCount > params.TheMovAvg"
}
}
}
}
}
}
EDIT :
Mapping
{
"transaction-live": {
"mappings": {
"logs": {
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"correspondent_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"created_at": {
"type": "date"
},
"discount": {
"type": "float"
},
"endpoint": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"event_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fees": {
"type": "float"
},
"from_country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"from_currency_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fx_sent_receive": {
"type": "float"
},
"receive_amount": {
"type": "float"
},
"response_code": {
"type": "long"
},
"send_amount": {
"type": "float"
},
"source": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"source_version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startedtransaction_id": {
"type": "long"
},
"to_country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_agent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_id": {
"type": "long"
}
}
}
}
}
}
ERROR:
{
"error": {
"root_cause": [],
"type": "reduce_search_phase_exception",
"reason": "[reduce] ",
"phase": "fetch",
"grouped": true,
"failed_shards": [],
"caused_by": {
"type": "script_exception",
"reason": "runtime error",
"caused_by": {
"type": "null_pointer_exception",
"reason": null
},
"script_stack": [
"params.TheCount > params.TheMovAvg",
" ^---- HERE"
],
"script": "params.TheCount > params.TheMovAvg",
"lang": "painless"
}
},
"status": 503
}

I played around with your query a bit and found the issue.
Following is the working query you can use
{
"size": 0,
"aggs": {
"my_date_histo": {
"date_histogram": {
"field": "created_at",
"interval": "10m"
},
"aggs": {
"the_count": {
"value_count": {
"field": "user_id"
}
},
"the_movavg": {
"moving_avg": {
"buckets_path": "the_count",
"window": 5,
"model": "simple"
}
},
"final_filter": {
"bucket_selector": {
"buckets_path": {
"TheCount": "the_count",
"TheMovAvg": "the_movavg"
},
"script": "params.TheCount > (params.TheMovAvg == null ? 0 : params.TheMovAvg)"
}
}
}
}
}
}
Now to understand the issue, take the look at the following result of aggregation without the bucket_selector aggregation.
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 42,
"max_score": 0,
"hits": []
},
"aggregations": {
"my_date_histo": {
"buckets": [
{
"key_as_string": "2017-03-06T15:30:00.000Z",
"key": 1488814200000,
"doc_count": 14,
"the_count": {
"value": 14
}
},
{
"key_as_string": "2017-03-06T15:40:00.000Z",
"key": 1488814800000,
"doc_count": 0,
"the_count": {
"value": 0
}
},
{
"key_as_string": "2017-03-06T15:50:00.000Z",
"key": 1488815400000,
"doc_count": 14,
"the_count": {
"value": 14
},
"the_movavg": {
"value": 7
}
},
{
"key_as_string": "2017-03-06T16:00:00.000Z",
"key": 1488816000000,
"doc_count": 3,
"the_count": {
"value": 3
},
"the_movavg": {
"value": 14
}
},
{
"key_as_string": "2017-03-06T16:10:00.000Z",
"key": 1488816600000,
"doc_count": 8,
"the_count": {
"value": 7
},
"the_movavg": {
"value": 8.5
}
},
{
"key_as_string": "2017-03-06T16:20:00.000Z",
"key": 1488817200000,
"doc_count": 3,
"the_count": {
"value": 3
},
"the_movavg": {
"value": 6.375
}
}
]
}
}
}
if you observe the result above the first two buckets don't compute the moving_aggs for that window/setting for moving_agg. So when your filter selector was comparing it was throwing null pointer exception on runtime as JAVA compare operator throws null pointer exception.
Hope this helps you.
Thanks

Related

ElasticSearch painless, how can I access an array in _source

I try to execute a search request to the ElasticSearch (6.4.0) API which includes a custom script function. In this function I try to access an array which should part of the response data. But I always get a 'null_pointer_exception':
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"i = 0; i < params['_source']['userStats'].length; i++) { } ",
" ^---- HERE"
],
"script": "double scoreBoost = 1; for (int i = 0; i < params['_source']['userStats'].length; i++) { } return _score * Math.log1p(scoreBoost);",
"lang": "painless"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "search--project",
"node": "...",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"i = 0; i < params['_source']['userStats'].length; i++) { } ",
" ^---- HERE"
],
"script": "double scoreBoost = 1; for (int i = 0; i < params['_source']['userStats'].length; i++) { } return _score * Math.log1p(scoreBoost);",
"lang": "painless",
"caused_by": {
"type": "null_pointer_exception",
"reason": null
}
}
}
]
},
"status": 500
}
Also doc['userStats'] does't work.
Here is the complete request body:
{
"size": 10,
"query": {
"function_score": {
"query": {
"bool": {
"filter": {
"term": {
"_routing": "00000000-0000-0000-0000-000000000000"
}
},
"should": {
"query_string": {
"query": "123*",
"default_operator": "and",
"fuzziness": 1,
"analyze_wildcard": true,
"fields": [
"name^4",
"number^2",
"description",
"projectTypeId",
"projectStatusId",
"tags^1.5",
"company.name^2",
"company.number^2",
"company.industry",
"company.tags^1.5",
"company.companyTypes.name",
"company.companyContactInfos.value",
"company.companyContactInfos.addressLine1^1.25",
"company.companyContactInfos.addressLine2",
"company.companyContactInfos.zipCode^0.5",
"company.companyContactInfos.city",
"company.companyContactInfos.state",
"company.companyContactInfos.country",
"projectType.id",
"projectType.name",
"projectType.description",
"projectStatus.id",
"projectStatus.name",
"members.name",
"members.projectRoleName"
]
}
},
"minimum_should_match": 1
}
},
"functions": [
{
"script_score": {
"script": {
"source": "double scoreBoost = 1; for (int i = 0; i < params['_source']['userStats'].length; i++) { } return _score * Math.log1p(scoreBoost);",
"lang": "painless",
"params": {
"dtNow": 1543589276,
"uId": "00000000-0000-0000-0000-000000000000"
}
}
}
}
],
"score_mode": "multiply"
}
}
}
Without the script_score part the response look like this:
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 4,
"hits": [
{
"_index": "search--project",
"_type": "projectentity",
"_id": "00000000-0000-0000-0000-000000000000",
"_score": 4,
"_routing": "00000000-0000-0000-0000-000000000000",
"_source": {
"name": "123",
"description": "123...",
"projectTypeId": "00000000-0000-0000-0000-000000000000",
"projectStatusId": "00000000-0000-0000-0000-000000000000",
"tags": [
"232",
"2331",
"343"
],
"plannedDuration": 0,
"startDate": "2018-07-09T22:00:00Z",
"projectType": {
"id": "00000000-0000-0000-0000-000000000000",
"name": "test 1",
"icon": "poll"
},
"projectStatus": {
"id": "00000000-0000-0000-0000-000000000000",
"name": "In Progress",
"type": "progress"
},
"members": [
{
"userId": "00000000-0000-0000-0000-000000000000",
"name": "dummy",
"projectRoleName": "test",
"hasImage": false
},
{
"userId": "00000000-0000-0000-0000-000000000000",
"name": "dummy ",
"projectRoleName": "Manager",
"hasImage": false
}
],
"id": "00000000-0000-0000-0000-000000000000",
"userStats": [
{
"userId": "00000000-0000-0000-0000-000000000000",
"openCount": 55,
"lastOpened": 1543851773
},
{
"userId": "00000000-0000-0000-0000-000000000000",
"openCount": 9,
"lastOpened": 1542372179
}
],
"indexTime": "2018-12-03T15:42:53.157649Z"
}
}
]
}
}
The mapping look like this:
{
"search--project": {
"aliases": {},
"mappings": {
"projectentity": {
"_routing": {
"required": true
},
"properties": {
"company": {
"properties": {
"companyTypes": {
"properties": {
"icon": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"indexTime": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"indexTime": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"members": {
"properties": {
"hasImage": {
"type": "boolean"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"projectRoleName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"userId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
...
"plannedDuration": {
"type": "long"
},
"startDate": {
"type": "date"
},
"userStats": {
"properties": {
"lastOpened": {
"type": "long"
},
"openCount": {
"type": "long"
},
"userId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
},
"settings": {
"index": {
"creation_date": "1539619646426",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "G5ohN1FvQBGkYFh_803Ifw",
"version": {
"created": "6040299"
},
"provided_name": "search--project"
}
}
}
}
Anyone has a suggestion what I'm doing wrong?
Thanks.
In the query you have your params identified as dtdNow and uid. So if you're wanting to use those in your script you would do the params.dtdNow.
If you're wanting to use a property from your _source (i.e. userStats) you should be using ctx._source.userStats.length. There are more examples in the documentation: https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-examples.html.
EDIT
With a function_score query you'll either be using the doc map or params['_source']. The difference being that when you use doc it gets cached in memory and params['_source'] will be loaded up each time (see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html). The other caveat is that you need to use a non-analyzed field or an analyzed text field if fielddata is enabled. https://www.elastic.co/guide/en/elasticsearch/reference/master/modules-scripting-fields.html#modules-scripting-doc-vals
To achieve what you're trying do, you should be able to just use a non-analyzed field in your userStats object. Something like this doc['userStats.openCount'].length (here I'm assuming that openCount is required on your userStats object).

How to group documents by hour without day in elasticsearch?

i have a application who a survey is asked every day by users, and i want to have average answers hours. I tried some request but i can't group all documents by hours, it's grouped by hour by day..
I do this :
{
"aggs": {
"byHour": {
"date_histogram": {
"field": "date",
"interval": "hour",
"format" : "H"
}
}
}
}
}
It's wrapped by hour but also by date, and i want day as ignored.
[
{
"key_as_string": "0",
"key": 1533945600000,
"doc_count": 40,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "1",
"key": 1533949200000,
"doc_count": 345,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "23",
"key": 1534028400000,
"doc_count": 15,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "0",
"key": 1534032000000,
"doc_count": 0,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "1",
"key": 1534035600000,
"doc_count": 2,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "2",
"key": 1534039200000,
"doc_count": 3,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
Mapping of type
{
"myIndex": {
"mappings": {
"answer": {
"properties": {
"date": {
"type": "date"
},
"lang": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"level": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"offset": {
"type": "long"
},
"patientCaretrackId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"protocolId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"query": {
"properties": {
"constant_score": {
"properties": {
"filter": {
"properties": {
"bool": {
"properties": {
"must": {
"properties": {
"term": {
"properties": {
"questionId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"questionnaireId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
}
}
}
}
},
"questionId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"questionnaireId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"surgeonId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": true
}
}
}
}
}
}
Example of documents :
[
{
"date": "2018-09-11T00:00:00.000Z",
"lang": "fr",
"level": "red",
"offset": 21,
"patientCaretrackId": "5b894b10a9f7afec73762113",
"protocolId": "ptg-koos-long-v1",
"questionnaireId": "j21",
"surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
"value": "permanentes",
"questionId": "frequence-douleur-2"
},
{
"date": "2018-09-11T00:00:00.000Z",
"lang": "fr",
"level": "red",
"offset": 21,
"patientCaretrackId": "5b894b10a9f7afec73762113",
"protocolId": "ptg-koos-long-v1",
"questionnaireId": "j21",
"surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
"value": "permanentes",
"questionId": "frequence-douleur-2"
}
]
It's possible to do this with an query Elasticsearch ?
Thank you,
You can use a terms aggregation with a script:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-script
GET /_search
{
"aggs" : {
"hours" : {
"terms" : {
"script" : {
"source": "doc['date'].getHour()",
"lang": "painless"
}
}
}
}
}
(Just to give you an idea, not sure at all about the script itself ..)
I found thx
{
"size": 0,
"aggs": {
"byHour": {
"date_histogram": {
"field": "date",
"interval": "hour",
"format" : "H",
"keyed": true,
"time_zone": "+02:00"
}
}
}
}
The response :
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2796,
"max_score": 0,
"hits": []
},
"aggregations": {
"byHour": {
"buckets": {
"17": {
"key_as_string": "17",
"key": 1536159600000,
"doc_count": 2006
},
"18": {
"key_as_string": "18",
"key": 1536163200000,
"doc_count": 790
}
}
}
}
}

Elasticsearch: Query not able to return proper results

I have written the below query in the elasticsearch, for summing the column value of len_err where the app_name is 9 and the timestamp is specified in the query.
GET features-1/_search
{
"query": {
"match": {
"app_name": 9
}
},
"post_filter": {
"range": {
"timestamp": {
"gte": "2018-07-21T09:14:12Z",
"lte": "2018-07-21T09:14:20Z"
}
}
},
"aggs": {
"time_filter":{
"filter": {
"range": {
"timestamp": {
"gte": "2018-07-21T09:14:12Z",
"lte": "2018-07-21T09:14:20Z"
}
}
},
"aggs": {
"cont_err": {
"sum": {
"field": "len_err"
}
}
}
}
}
}
but the result that i am getting is
{
"took": 36,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
},
"aggregations": {
"time_filter": {
"doc_count": 0,
"cont_err": {
"value": 0
}
}
}
}
but when I check the logs i see that i have logs for this filter criteria
One sample doc that should match
Mapping below
{
"features-1": {
"mappings": {
"log": {
"properties": {
"app_name": {
"type": "long"
},
"len_err": {
"type": "long"
},
"len_msg": {
"type": "long"
},
"severity": {
"type": "long"
},
"source": {
"properties": {
"docker": {
"properties": {
"container_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"edge_id": {
"type": "long"
},
"kubernetes": {
"properties": {
"container_name": {
"type": "long"
},
"host": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"level": {
"type": "long"
},
"log": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"log_field": {
"properties": {
"LogLevel": {
"type": "long"
},
"charging": {
"type": "long"
}
}
}
}
},
"timestamp": {
"type": "date"
}
}
}
}
}
}

ElasticSearch: merge all inner_hits for nested queries

I am pretty new to elasticsearch and have been trying to create a query which would return me a record that matches all the must conditions of a bool-query. The bool-query is wrapped inside a constant_score: filter.
My mapping for the object is as below:
{
"mappings": {
"doc": {
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"components": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"parent_id": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"quantity_in_bundle": {
"type": "long"
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
},
"id": {
"type": "long"
},
"image": {
"properties": {
"id": {
"type": "long"
},
"isDefault": {
"type": "boolean"
},
"thumbnail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"incoming_qty": {
"type": "long"
},
"tags": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"color": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"members": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"image": {
"properties": {
"id": {
"type": "long"
},
"isDefault": {
"type": "boolean"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"incoming_qty": {
"type": "long"
},
"tags": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"color": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"master_id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"parent_id": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"product_url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"on_hand_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"product_stores": {
"type": "nested",
"properties": {
"channel_id": {
"type": "long"
},
"price": {
"type": "float"
},
"product_url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"store_id": {
"type": "long"
}
}
},
"product_warehouses": {
"type": "nested",
"properties": {
"available_qty": {
"type": "long"
},
"incoming_qty": {
"type": "long"
},
"outgoing_qty": {
"type": "long"
},
"warehouse_id": {
"type": "long"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"status": {
"type": "long"
},
"tenantId": {
"type": "long"
},
"type": {
"type": "long"
}
}
}
}
}
I am querying this index using the below query:
{
"from": 0,
"size": 20,
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"term": {
"tenantId": {
"value": 88,
"boost": 1
}
}
},
{
"terms": {
"type": [
2
],
"boost": 1
}
},
{
"bool": {
"should": [
{
"terms": {
"status": [
2
],
"boost": 1
}
},
{
"nested": {
"query": {
"terms": {
"members.status": [
2
],
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"terms": {
"product_stores.store_id": [
20889
],
"boost": 1
}
},
"path": "product_stores",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"terms": {
"members.product_stores.store_id": [
20889
],
"boost": 1
}
},
"path": "members.product_stores",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
{
"bool": {
"should": [
{
"nested": {
"query": {
"terms": {
"tags.id": [
1001
],
"boost": 1
}
},
"path": "tags",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"terms": {
"members.tags.id": [
1001
],
"boost": 1
}
},
"path": "members.tags",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
},
"path": "members",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "members",
"ignore_unmapped": false,
"from": 0,
"size": 100,
"version": false,
"explain": false,
"track_scores": false,
"_source": false
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"boost": 1
}
},
"sort": [
{
"_id": {
"order": "desc"
}
}
]
}
What I am trying to achieve with this is to fetch the object which has at least one nested object that matches all the must conditions in the main bool query. But I am getting records even if there is not a single object that matches all 3. For example, if there's a record that only matches status and tags but not the store_ids, that elastic search will return it despite the all the conditions are part of must. Can anyone please explain me the behaviour of this query? I tried reading documents but I am at loss.
Any pointer or guidance will be much appreciated.
Thank you!
UPDATE:
I fixed this issue by merging all the nested shoulds under a single nested must.
In your query, you're using a should clause which will return results even if they don't match all of the should clause conditions. Your must clause takes precedence over the should clause.
According to the Bool Query documentation, you could adjust the minimum should match parameter.

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

Resources