ElasticSearch painless, how can I access an array in _source - elasticsearch

I try to execute a search request to the ElasticSearch (6.4.0) API which includes a custom script function. In this function I try to access an array which should part of the response data. But I always get a 'null_pointer_exception':
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"i = 0; i < params['_source']['userStats'].length; i++) { } ",
" ^---- HERE"
],
"script": "double scoreBoost = 1; for (int i = 0; i < params['_source']['userStats'].length; i++) { } return _score * Math.log1p(scoreBoost);",
"lang": "painless"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "search--project",
"node": "...",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"i = 0; i < params['_source']['userStats'].length; i++) { } ",
" ^---- HERE"
],
"script": "double scoreBoost = 1; for (int i = 0; i < params['_source']['userStats'].length; i++) { } return _score * Math.log1p(scoreBoost);",
"lang": "painless",
"caused_by": {
"type": "null_pointer_exception",
"reason": null
}
}
}
]
},
"status": 500
}
Also doc['userStats'] does't work.
Here is the complete request body:
{
"size": 10,
"query": {
"function_score": {
"query": {
"bool": {
"filter": {
"term": {
"_routing": "00000000-0000-0000-0000-000000000000"
}
},
"should": {
"query_string": {
"query": "123*",
"default_operator": "and",
"fuzziness": 1,
"analyze_wildcard": true,
"fields": [
"name^4",
"number^2",
"description",
"projectTypeId",
"projectStatusId",
"tags^1.5",
"company.name^2",
"company.number^2",
"company.industry",
"company.tags^1.5",
"company.companyTypes.name",
"company.companyContactInfos.value",
"company.companyContactInfos.addressLine1^1.25",
"company.companyContactInfos.addressLine2",
"company.companyContactInfos.zipCode^0.5",
"company.companyContactInfos.city",
"company.companyContactInfos.state",
"company.companyContactInfos.country",
"projectType.id",
"projectType.name",
"projectType.description",
"projectStatus.id",
"projectStatus.name",
"members.name",
"members.projectRoleName"
]
}
},
"minimum_should_match": 1
}
},
"functions": [
{
"script_score": {
"script": {
"source": "double scoreBoost = 1; for (int i = 0; i < params['_source']['userStats'].length; i++) { } return _score * Math.log1p(scoreBoost);",
"lang": "painless",
"params": {
"dtNow": 1543589276,
"uId": "00000000-0000-0000-0000-000000000000"
}
}
}
}
],
"score_mode": "multiply"
}
}
}
Without the script_score part the response look like this:
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 4,
"hits": [
{
"_index": "search--project",
"_type": "projectentity",
"_id": "00000000-0000-0000-0000-000000000000",
"_score": 4,
"_routing": "00000000-0000-0000-0000-000000000000",
"_source": {
"name": "123",
"description": "123...",
"projectTypeId": "00000000-0000-0000-0000-000000000000",
"projectStatusId": "00000000-0000-0000-0000-000000000000",
"tags": [
"232",
"2331",
"343"
],
"plannedDuration": 0,
"startDate": "2018-07-09T22:00:00Z",
"projectType": {
"id": "00000000-0000-0000-0000-000000000000",
"name": "test 1",
"icon": "poll"
},
"projectStatus": {
"id": "00000000-0000-0000-0000-000000000000",
"name": "In Progress",
"type": "progress"
},
"members": [
{
"userId": "00000000-0000-0000-0000-000000000000",
"name": "dummy",
"projectRoleName": "test",
"hasImage": false
},
{
"userId": "00000000-0000-0000-0000-000000000000",
"name": "dummy ",
"projectRoleName": "Manager",
"hasImage": false
}
],
"id": "00000000-0000-0000-0000-000000000000",
"userStats": [
{
"userId": "00000000-0000-0000-0000-000000000000",
"openCount": 55,
"lastOpened": 1543851773
},
{
"userId": "00000000-0000-0000-0000-000000000000",
"openCount": 9,
"lastOpened": 1542372179
}
],
"indexTime": "2018-12-03T15:42:53.157649Z"
}
}
]
}
}
The mapping look like this:
{
"search--project": {
"aliases": {},
"mappings": {
"projectentity": {
"_routing": {
"required": true
},
"properties": {
"company": {
"properties": {
"companyTypes": {
"properties": {
"icon": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"indexTime": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"indexTime": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"members": {
"properties": {
"hasImage": {
"type": "boolean"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"projectRoleName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"userId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
...
"plannedDuration": {
"type": "long"
},
"startDate": {
"type": "date"
},
"userStats": {
"properties": {
"lastOpened": {
"type": "long"
},
"openCount": {
"type": "long"
},
"userId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
},
"settings": {
"index": {
"creation_date": "1539619646426",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "G5ohN1FvQBGkYFh_803Ifw",
"version": {
"created": "6040299"
},
"provided_name": "search--project"
}
}
}
}
Anyone has a suggestion what I'm doing wrong?
Thanks.

In the query you have your params identified as dtdNow and uid. So if you're wanting to use those in your script you would do the params.dtdNow.
If you're wanting to use a property from your _source (i.e. userStats) you should be using ctx._source.userStats.length. There are more examples in the documentation: https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-examples.html.
EDIT
With a function_score query you'll either be using the doc map or params['_source']. The difference being that when you use doc it gets cached in memory and params['_source'] will be loaded up each time (see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html). The other caveat is that you need to use a non-analyzed field or an analyzed text field if fielddata is enabled. https://www.elastic.co/guide/en/elasticsearch/reference/master/modules-scripting-fields.html#modules-scripting-doc-vals
To achieve what you're trying do, you should be able to just use a non-analyzed field in your userStats object. Something like this doc['userStats.openCount'].length (here I'm assuming that openCount is required on your userStats object).

Related

Elastic Search fetch records if matched the multiple fields with different values in an array

I have below documents in ElasticSearch
[
{
"class": " Grade 1",
"subject": [
"Mathematics",
"German",
"Geometry",
"Arts",
"Physical Education"
],
"student": [
{
"name": "George",
"id": "ORT-823FT"
},
{
"name": "Travis",
"id": "ORT-873FT"
},
{
"name": "Scott",
"id": "ORT-883FT"
}
]
},
{
"class": " Grade 2",
"subject": [
"Mathematics",
"German",
"Geometry",
"French",
"Arts",
"Physical Education"
],
"student": [
{
"name": "Gibbs",
"id": "ORT-923DG"
},
{
"name": "Elizabeth",
"id": "ORT-973DG"
},
{
"name": "Michale",
"id": "ORT-983DG"
}
]
}
]
I need to fetch the document only when the student name and id are matching, for eg: if the student name is George and the id is ORT-823FT, then the first document should be returned. On the other hand if the student name is Gibbs and the id is ORT-923DG then second document must be returned.
The below query works for me but is there better way to write ?
{
"query": {
"bool": {
"should": [
{
"match": {
"student.id": "ORT-823FT"
}
},
{
"match": {
"student.name": "George"
}
}
]
, "minimum_should_match": 2
}
}
}
Updated
The mapping for student is as below, I have added "type": "nested", as explained in the document.
{
"student": {
"type": "nested",
"properties": {
"studentResidence": {
"properties": {
"residenceAddress": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"phoneNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"parentEmail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"studentParentRelationShip": {
"properties": {
"relationshipType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"residenceAddress": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"comments": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"validFor": {
"properties": {
"endDateTime": {
"type": "date"
},
"startDateTime": {
"type": "date"
}
}
}
}
}
}
The corresponding query for the same is:
{
"query": {
"nested": {
"path": "student",
"query": {
"bool": {
"must": [
{
"match": {
"student.id": "ORT-823FT"
}},{
"match": {
"student.name": "George"
}
}
]
}
}
}
}
}
I am still getting an incorrect output. Not sure where I am going wrong.
Based on your sample data, Below mapping should work, note I created nested only for student property and rest properties are normal.
{
"mappings": {
"properties": {
"class": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"student": {
"type": "nested", --> note this
"properties": {
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"subject": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
After that you index the sample documents that you provided in your question+ below one more sample to test it properly.
{
"class": " Grade 2",
"subject": [
"Mathematics",
"German",
"Geometry",
"French",
"Arts",
"Physical Education"
],
"student": [
{
"name": "Gibbs",
"id": "ORT-abc"
},
{
"name": "Elizabeth",
"id": "ORT-973DG"
},
{
"name": "Michale",
"id": "ORT-983DG"
},
{
"name": "XYZ",
"id": "ORT-923DG"
}
]
}
Now same search query that your provided will return the proper results.
{
"query": {
"nested": {
"path": "student",
"query": {
"bool": {
"must": [
{
"match": {
"student.id": "ORT-823FT"
}
},
{
"match": {
"student.name": "George"
}
}
]
}
}
}
}
}
SR
"hits": [
{
"_index": "nested_mapping_student",
"_id": "1",
"_score": 4.0313807,
"_source": {
"class": " Grade 1",
"subject": [
"Mathematics",
"German",
"Geometry",
"Arts",
"Physical Education"
],
"student": [
{
"name": "George",
"id": "ORT-823FT"
},
{
"name": "Travis",
"id": "ORT-873FT"
},
{
"name": "Scott",
"id": "ORT-883FT"
}
]
}
}
]
what you did is not correct as you are not using the nested field to store your student information, hence relationship between id and name is lost, this is very well explained in this official example.
You can also try it by indexing below document
{
"class": " Grade 2",
"subject": [
"Mathematics",
"German",
"Geometry",
"French",
"Arts",
"Physical Education"
],
"student": [
{
"name": "Gibbs",
"id": "ORT-abc"
},
{
"name": "Elizabeth",
"id": "ORT-973DG"
},
{
"name": "Michale",
"id": "ORT-983DG"
},
{
"name": "XYZ",
"id": "ORT-923DG"
}
]
}
And see this is also coming in the query of Gibbs and id ORT-923DG which shouldn't be the case.
You need to use the nested query to get your expected results in all cases.

Incorrect month in Elasticsearch date_histogram

My Document looks like below:
{
"_index": "rep_cdr",
"_type": "doc",
"_id": "TaArd2YBDRXNehCp7GmW",
"_score": 1,
"_source": {
"level": "info",
"#version": "1",
"thirdPartyTime": 139,
"date": "15-10-2018",
"time": "15:00:59",
"reqId": "25718d6e-b8ef-438d-8218-1a8726c6c816",
"TAT": 1574,
"message": "",
"thirdPartyErrorDescription": "",
"#timestamp": "2018-10-15T10:00:59.146Z",
}
}
And I am running following query:
GET rep_cdr/doc/_search
{
"size": 0,
"aggs": {
"datewise": {
"date_histogram": {
"field": "date",
"interval": "day"
}
}
}
}
I am getting below result:
{
"aggregations": {
"datewise": {
"buckets": [
{
"key_as_string": "15-01-2018",
"key": 1515974400000,
"doc_count": 8
}
]
}
}
}
Index mapping is as below:
{
"rep_cdr": {
"aliases": {},
"mappings": {
"doc": {
"dynamic_date_formats": [
"DD-MM-YYYY",
"HH:mm:ss",
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
],
"properties": {
"#timestamp": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"TAT": {
"type": "integer"
},
"date": {
"type": "date",
"format": "DD-MM-YYYY"
},
"level": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"message": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 400
}
}
}
"reqId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"response": {
"type": "keyword"
},
"thirdPartyErrorDescription": {
"type": "text"
},
"thirdPartyTime": {
"type": "integer"
},
"time": {
"type": "date",
"format": "HH:mm:ss"
}
}
}
},
"settings": {
"index": {
"creation_date": "1539236694553",
"number_of_shards": "3",
"number_of_replicas": "1",
"uuid": "BYDQOhY_TbWhuqMAOA3iNw",
"version": {
"created": "6040099"
},
"provided_name": "rep_cdr"
}
}
}
}
The "key_as_string" gives me wrong month. In document the date field has value "15-10-2018" but "key_as_string" gives me "15-01-2018". I am using elasticsearch version 6.4. What could be wrong?
Your date field format is set to DD-MM-YYYY where D is day of year as mentioned on https://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html. Change your date format to dd-MM-yyyy instead and it should work as expected.
What you are seeing in response is 15th day of the year i.e. 15-01-2018

Negative values in Elasticsearch range queries

I have find this problem while making a watch in Elasticsearch, this is my query:
"body": {
"query": {
"bool": {
"must": [
{
"range": {
"percent": {
"lt": 100
}
It returns successfully every document with percent between 0 and 99, however it ignores those with negative value. The "percent" field is mapped as long number in the index.
Can you help me?
Thanks
Edit: Return of executing "curl -XGET localhost:9200/monthly-tickets-2018-06"
{
"monthly-tickets-2018-06": {
"aliases": {},
"mappings": {
"monthly_tickets": {
"properties": {
"percent": {
"type": "long"
},
"priority": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"project": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ref": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"timestamp": {
"type": "date"
}
}
}
},
"settings": {
"index": {
"creation_date": "1528946562231",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "aIfLjFwqS_aCzQFvZm0L5Q",
"version": {
"created": "6020399"
},
"provided_name": "monthly-tickets-2018-06"
}
}
}
}

how to compare two aggregations in elastic search

I have a stream of transaction data, which I'm grouping my 10m interval and counting the number of transactions in one aggregation, and moving average in another. I would like to query the results only for the case where total_count is > moving average.
This query returns just fine.
GET /_search
{
"aggs": {
"my_date_histo":{
"date_histogram":{
"field":"created_at",
"interval":"10m"
},
"aggs":{
"the_count":{
"value_count" : {"field" : "user_id"}
},
"the_movavg":{
"moving_avg":{
"buckets_path": "the_count" ,
"window": 5,
"model": "simple"
}
}
}
}
}
}
But when I try the following it throws error,
GET /_search
{
"aggs": {
"my_date_histo":{
"date_histogram":{
"field":"created_at",
"interval":"10m"
},
"aggs":{
"the_count":{
"value_count" : {"field" : "user_id"}
},
"the_movavg":{
"moving_avg":{
"buckets_path": "the_count" ,
"window": 5,
"model": "simple"
}
},
"final_filter": {
"bucket_selector": {
"buckets_path": {
"TheCount": "the_count",
"TheMovAvg": "the_movavg"
},
"script": "params.TheCount > params.TheMovAvg"
}
}
}
}
}
}
EDIT :
Mapping
{
"transaction-live": {
"mappings": {
"logs": {
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"correspondent_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"created_at": {
"type": "date"
},
"discount": {
"type": "float"
},
"endpoint": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"event_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fees": {
"type": "float"
},
"from_country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"from_currency_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fx_sent_receive": {
"type": "float"
},
"receive_amount": {
"type": "float"
},
"response_code": {
"type": "long"
},
"send_amount": {
"type": "float"
},
"source": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"source_version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startedtransaction_id": {
"type": "long"
},
"to_country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_agent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_id": {
"type": "long"
}
}
}
}
}
}
ERROR:
{
"error": {
"root_cause": [],
"type": "reduce_search_phase_exception",
"reason": "[reduce] ",
"phase": "fetch",
"grouped": true,
"failed_shards": [],
"caused_by": {
"type": "script_exception",
"reason": "runtime error",
"caused_by": {
"type": "null_pointer_exception",
"reason": null
},
"script_stack": [
"params.TheCount > params.TheMovAvg",
" ^---- HERE"
],
"script": "params.TheCount > params.TheMovAvg",
"lang": "painless"
}
},
"status": 503
}
I played around with your query a bit and found the issue.
Following is the working query you can use
{
"size": 0,
"aggs": {
"my_date_histo": {
"date_histogram": {
"field": "created_at",
"interval": "10m"
},
"aggs": {
"the_count": {
"value_count": {
"field": "user_id"
}
},
"the_movavg": {
"moving_avg": {
"buckets_path": "the_count",
"window": 5,
"model": "simple"
}
},
"final_filter": {
"bucket_selector": {
"buckets_path": {
"TheCount": "the_count",
"TheMovAvg": "the_movavg"
},
"script": "params.TheCount > (params.TheMovAvg == null ? 0 : params.TheMovAvg)"
}
}
}
}
}
}
Now to understand the issue, take the look at the following result of aggregation without the bucket_selector aggregation.
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 42,
"max_score": 0,
"hits": []
},
"aggregations": {
"my_date_histo": {
"buckets": [
{
"key_as_string": "2017-03-06T15:30:00.000Z",
"key": 1488814200000,
"doc_count": 14,
"the_count": {
"value": 14
}
},
{
"key_as_string": "2017-03-06T15:40:00.000Z",
"key": 1488814800000,
"doc_count": 0,
"the_count": {
"value": 0
}
},
{
"key_as_string": "2017-03-06T15:50:00.000Z",
"key": 1488815400000,
"doc_count": 14,
"the_count": {
"value": 14
},
"the_movavg": {
"value": 7
}
},
{
"key_as_string": "2017-03-06T16:00:00.000Z",
"key": 1488816000000,
"doc_count": 3,
"the_count": {
"value": 3
},
"the_movavg": {
"value": 14
}
},
{
"key_as_string": "2017-03-06T16:10:00.000Z",
"key": 1488816600000,
"doc_count": 8,
"the_count": {
"value": 7
},
"the_movavg": {
"value": 8.5
}
},
{
"key_as_string": "2017-03-06T16:20:00.000Z",
"key": 1488817200000,
"doc_count": 3,
"the_count": {
"value": 3
},
"the_movavg": {
"value": 6.375
}
}
]
}
}
}
if you observe the result above the first two buckets don't compute the moving_aggs for that window/setting for moving_agg. So when your filter selector was comparing it was throwing null pointer exception on runtime as JAVA compare operator throws null pointer exception.
Hope this helps you.
Thanks

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

Resources