Elasticsearch Sorting fields anomaly - elasticsearch

Trying to sort a list on certain fields. firstName and lastName but I have noticed some inconstant result.
I am running a simple query
//Return all the employees from a specific company ordering by lastName asc | desc
GET employee-index-sorting
{
"query": {
"bool": {
"filter": {
"term": {
"companyId": 3179
}
}
}
},
"sort": [
{
"lastName.keyword": { <-- Should this be keyword? or not_analyzed
"order": "desc"
}
}
]
}
In the result why would van der Mescht and van Breda be before Zwane and Zwezwe?
I suspect there is something wrong with my mappings
{
"_index": "employee-index",
"_type": "_doc",
"_id": "637467",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name",
"lastName": "van der Mescht",
},
"sort": [
"van der Mescht"
]
},
{
"_index": "employee-index",
"_type": "_doc",
"_id": "678335",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name3",
"lastName": "van Breda",
},
"sort": [
"van Breda"
]
},
{
"_index": "employee-index",
"_type": "_doc",
"_id": "113896",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name2",
"lastName": "Zwezwe",
},
"sort": [
"Zwezwe"
]
},
{
"_index": "employee-index",
"_type": "_doc",
"_id": "639639",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name1",
"lastName": "Zwane",
},
"sort": [
"Zwane"
]
}
Mappings
Posting the entire map because I am not sure if there might be something else wrong with it.
How should i change the lastName and firstName propery to allow for sorting on them?
PUT employee-index-sorting
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"employeeId": {
"type": "keyword"
},
"companyGroupId": {
"type": "keyword"
},
"companyId": {
"type": "keyword"
},
"number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"preferredName": {
"type": "text",
"index": false
},
"firstName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"middleName": {
"type": "text",
"index": false
},
"lastName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullName": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
}
},
"analyzer": "standard"
},
"terminationDate": {
"type": "date"
},
"companyName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"idNumber": {
"type": "text"
},
"description": {
"type": "text",
"index": false
},
"jobNumber": {
"type": "keyword"
},
"frequencyId": {
"type": "long"
},
"frequencyCode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"frequencyAccess": {
"type": "boolean"
}
}
}
}
}

For sorting you need to use lastName.keyword, that's correct, no need to change anything there.
The reason why van der Mescht and van Breda are before Zwane and Zwezwe is because sorting on strings happens on a lexicographical level, i.e. basically using the ASCII table and uppercase characters happen before lowercase ones, so words are sorted in that same order. But since you're sorting in desc mode, that's exactly the opposite:
z...
...
van der Mescht
...
van Breda
...
a...
...
Zwezwe
...
Zwane
...
Z...
...
A...
To fix this, what you simply need to do is to add a normalizer to your lastName.keyword field, i.e. change your mapping to this and it will work:
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
...
},
"tokenizer": {
...
},
"normalizer": { <-- add this
"lowersort": {
"type": "custom",
"filter": [
"lowercase"
]
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
...
"lastName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"normalizer": "lowersort", <-- add this
"ignore_above": 256
}
}
},
...
}
}
}
}

Related

ElasticSearch query relevance

I would like to find a product with the search priority : pickRef, name, synonym (it's an array) and the others after. I don"t succeed to have a working query.. I have to boost synonym with "50" in order to have the product in top 8 results...
The aim of my query is to make an autocompletion search with fuzzy (to avoid mispelling)
I have a product with the synonym "caca" When I want to search "caca" ES return every coca products. but not the product with the synonym "caca". However, the term "caca" must be the first result beceause it match perfectly with synonym field and coca products must come after (due to fuzzy parameter)
There is my index :
{
"product": {
"aliases": {},
"mappings": {
"properties": {
"brand": {
"type": "keyword",
"boost": 3
},
"catalogue": {
"type": "keyword"
},
"category": {
"type": "text",
"analyzer": "standard"
},
"description": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3
}
}
},
"description_ecology": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3
}
}
},
"enabled": {
"type": "boolean"
},
"image": {
"type": "text"
},
"name": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"pickRef": {
"type": "keyword",
"boost": 5
},
"replaced": {
"type": "boolean"
},
"slug": {
"type": "text"
},
"synonym": {
"type": "keyword",
"boost": 3
}
}
},
"settings": {
"index": {
"routing": {
"allocation": {
"include": {
"_tier_preference": "data_content"
}
}
},
"number_of_shards": "1",
"provided_name": "product",
"creation_date": "1634287857507",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"pre_negs": {
"pattern": "a \\w",
"type": "pattern_replace",
"replacement": ""
}
}
},
"number_of_replicas": "0",
"uuid": "EGLmpv8bRlCnfLBxHZOKmA",
"version": {
"created": "7150099"
}
}
}
}
}
There is my query :
{
"index": "product",
"size": 8,
"body": {
"query": {
"bool": {
"must": [
{
"match": {
"enabled": true
}
},
{
"match": {
"replaced": false
}
}
],
"should": [
{
"match": {
"name.fr": {
"query": "caca",
"analyzer": "standard"
}
}
},
{
"match": {
"synonym": {
"query": "caca",
"boost": 20,
"analyzer": "standard"
}
}
},
{
"multi_match": {
"query": "caca",
"fields": [
"brand^2",
"pickRef^5",
"catalogue",
"name.fr^3",
"name.en^1",
"name.de^1",
"name.lu^1",
"description.fr^1",
"description.en^1",
"description.de^1",
"description.lu^1",
"description_ecologique.fr^1",
"description_ecologique.en^1",
"description_ecologique.de^1",
"description_ecologique.lu^1"
],
"fuzziness": "AUTO"
}
},
{
"query_string": {
"query": "caca"
}
}
]
}
}
}
}
Those are my products :
{
"_index": "product",
"_type": "_doc",
"_id": "1594",
"_version": 1,
"_seq_no": 1593,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "PLANTE ARTIFICIELLE BAMBOU 120cm"
},
"pickRef": "122638",
"description": {
"fr": "Agrémentez votre lieu de travail avec cette superbe plante ! Elle garantit un environnement très naturel, ne nécessite pas d'entretien et agrémente n'importe quel espace. Tronc en bois, feuillage en polyester , livrée dans un pot standard en plastique."
},
"description_ecology": {
"fr": ""
},
"catalogue": "P399",
"image": "uploads/product/122638/122638.png",
"brand": "PAPERFLOW",
"category": "Autres",
"slug": "plante-artificielle-bambou-120cm-122638-122638",
"enabled": true,
"synonym": [],
"replaced": false
}
}
{
"_index": "product",
"_type": "_doc",
"_id": "3131",
"_version": 1,
"_seq_no": 3130,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "ROYCO MINUTE SOUP \"POIS AU JAMBON\""
},
"pickRef": "141065",
"description": {
"fr": "Retrouvez le bon goût des légumes dans ces recettes de tradition alliant tout le savoir-faire de Royco Minute Soup à la saveur des meilleurs ingrédients."
},
"description_ecology": {
"fr": ""
},
"catalogue": "P038",
"image": "uploads/product/141065/141065.png",
"brand": "ROYCO",
"category": "Soupe & pâtes",
"slug": "royco-minute-soup-pois-au-jambon-5410056186552-141065",
"enabled": true,
"synonym": [],
"replaced": false
}
}
{
"_index": "product",
"_type": "_doc",
"_id": "6",
"_version": 2,
"_seq_no": 24511,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "AGRAFES 26/6 GALVANISEES"
},
"pickRef": "100110",
"description": {
"fr": "<div>Boîte de 1000 agrafes 26/6 galvanisées.</div>"
},
"description_ecology": {
"fr": null
},
"catalogue": "S",
"image": "uploads/product/233163/233163.png",
"brand": "autres",
"category": "Autres",
"slug": "agrafes-26-6-galvanisees-jambon-5010255827746-100110",
"enabled": true,
"synonym": [
"caca",
"jambon"
],
"replaced": false
}
}
PS : I know the example is not perfect but I don't have a better one...
do you try to sort by _score?
{
"index": "product",
"size": 8,
"body": {
"query": {
.
.
.
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}
}

How to perform nested aggregation in child parent relationship

I am using elasticsearch 7.11 and have implemented parent child relation on of the base reason was my updates were very frequent and time a new child could be added under 1 parent,
My project is something managing all the computers in the network all the activity related to the endpoints should be logged for the analytics purpose so.
My mapping is some thing.
PcInformation -> User
Now Pc has its own information the main thing to note is the activationTime and the user has its Department, username, role etc.
Now I want to get the top departments w.r.t to PC and its time.
Say I want to know which departments have most number of PC in 2020.
What I am currently doing is first get all the PC using the user relationship using hasChild query is below.
{
"query": {
"bool": {
"filter": [
{
"has_child": {
"type": "user",
"query": {
"nested": {
"path": "user",
"query": {
"match_all": {}
}
}
}
}
},
{
"range": {
"regDate": {
"gte": "2020-04-11",
"lte": "2022-04-31"
}
}
}
]
}
}
}
This would return me all the PC in specific time.
And then I am performing aggregation first on user than sub aggregation on pcConnection data for the time based aggragation now I want to know the name of the department but this is not in the the pc information.
One thing is to put user information in the pc but I would lost for what I am using parent child model.
Is there anyway to do so ?
Updated
The Sample Mapping
{
"pcinformation": {
"mappings": {
"properties": {
"_class": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"type": "nested",
"properties": {
"userGroup": {
"type": "keyword"
},
"userTeam": {
"type": "keyword"
},
"userCode": {
"type": "long"
},
"userName": {
"type": "keyword"
}
}
},
"antivirus": {
"type": "nested",
"properties": {
"datetime": {
"type": "date"
},
"name": {
"type": "keyword"
}
}
},
"cpuId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"domainName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"firewall": {
"type": "nested",
"properties": {
"datetime": {
"type": "date"
},
"status": {
"type": "keyword"
}
}
},
"friendlyName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"activationDate": {
"type": "date"
},
"macId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"osArch": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"osType": {
"type": "keyword"
},
"osVersion": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"pcSignature": {
"type": "text"
},
"pcSignatureHash": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"relation": {
"type": "join",
"eager_global_ordinals": true,
"relations": {
"infection": [
"user"
]
}
},
"userName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"vm": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
So I got two records as this is parent child the one is
{
"_index": "pcInformation",
"_type": "_doc",
"_id": "abcd",
"_version": 1,
"_score": 1,
"_source": {
"_class": "stor.doc.pcInformation",
"pcSignatureHash": "abcd",
"pcSignature": "dddd",
"name": "DESKTOP8JGBPB9",
"userName": "Win1064",
"osType": "Windows.10.Enterprise",
"domainName": "DESKTOP8JGBPB9",
"cpuId": "NOCPUID",
"osVersion": "10.0.19042",
"osArch": "32",
"macId": "0800278A763D",
"activationDate": "2021-05-25T08:46:30.510Z",
"vm": "No VM",
"friendlyName": "Windows Defender",
"relation": {
"name": "pcInformation"
}
}
}
The other one is user information.
{
"_index": "pcInformation",
"_type": "_doc",
"_id": "Qw60onkBDTnt1BMJOeq0",
"_version": 1,
"_score": 1,
"_routing": "abcd",
"_source": {
"_class": "stor.doc.pcInformation",
"agent": {
"userCode": 1,
"userGroup":"admin",
"userRole":"manager"
},
"relation": {
"name": "user",
"parent": "abcd"
}
}
}

different analyzers in different fields

Because in my mapping below, when I put the URL field with the analyzer different from the title and description fields when I do a search simultaneously in the three fields, it doesn't return anything even if I have one of the three words below in each field
{
"settings": {
"index": {
"number_of_shards": "5",
"number_of_replicas": "0",
"analysis": {
"filter": {
"stemmer_plural_portugues": {
"name": "minimal_portuguese",
"stopwords" : ["http", "https", "ftp", "www"],
"type": "stemmer"
},
"synonym_filter": {
"type": "synonym",
"lenient": true,
"synonyms_path": "analysis/synonym.txt",
"updateable" : true
},
"shingle_filter": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 3
}
},
"analyzer": {
"analyzer_customizado": {
"filter": [
"lowercase",
"stemmer_plural_portugues",
"asciifolding",
"synonym_filter",
"shingle_filter" ],
"tokenizer": "standard"
},
"analyzer_url": {
"filter": [
"lowercase",
"stemmer_plural_portugues",
"asciifolding" ],
"tokenizer": "lowercase"
}
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "long"
},
"data": {
"type": "date"
},
"quebrado": {
"type": "byte"
},
"pgrk": {
"type": "integer"
},
"url_length": {
"type": "integer"
},
"title": {
"analyzer": "analyzer_customizado",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"description": {
"analyzer": "analyzer_customizado",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
},
"url": {
"analyzer": "analyzer_url",
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
}
}
}
}
}
in the query below the three words exist each of the fields, but it only returns results if I search for words that are in the title and in the description, if I also search for the word that is in the URL field that has the different analyzer does not return anything.
if I search only the words that are in the title and description field you will normally find, if I search only the word that is in the URL field also finds it, however if I search for the three words that exist in the three fields it doesn't return anything.
{
"from": 0,
"size": 10,
"query": {
"multi_match": {
"query": "carro moto aviao",
"type": "cross_fields",
"fields": [
"title",
"description",
"url"
],
"operator": "and"
}
}
}
The issue is that you are using the operator as and which means all three words carro moto aviao must present, can you change it to OR and see if its returns result.
Adding a working example with you mapping, sample data and with search query with or parameter and confirming that it works.
Sample doc
{
"title": "carro",
"description": "moto",
"url": "aviao"
}
Search query with OR param
{
"from": 0,
"size": 10,
"query": {
"multi_match": {
"query": "carro moto aviao",
"type": "cross_fields",
"fields": [
"title",
"description",
"url"
],
"operator": "or"
}
}
}
Search result
"hits": [
{
"_index": "jean",
"_type": "_doc",
"_id": "1",
"_score": 0.5753642,
"_source": {
"title": "carro",
"description": "moto",
"url": "aviao"
}
}
]
Note: confirmed that it doesn't work with and param if your query.

Incorrect month in Elasticsearch date_histogram

My Document looks like below:
{
"_index": "rep_cdr",
"_type": "doc",
"_id": "TaArd2YBDRXNehCp7GmW",
"_score": 1,
"_source": {
"level": "info",
"#version": "1",
"thirdPartyTime": 139,
"date": "15-10-2018",
"time": "15:00:59",
"reqId": "25718d6e-b8ef-438d-8218-1a8726c6c816",
"TAT": 1574,
"message": "",
"thirdPartyErrorDescription": "",
"#timestamp": "2018-10-15T10:00:59.146Z",
}
}
And I am running following query:
GET rep_cdr/doc/_search
{
"size": 0,
"aggs": {
"datewise": {
"date_histogram": {
"field": "date",
"interval": "day"
}
}
}
}
I am getting below result:
{
"aggregations": {
"datewise": {
"buckets": [
{
"key_as_string": "15-01-2018",
"key": 1515974400000,
"doc_count": 8
}
]
}
}
}
Index mapping is as below:
{
"rep_cdr": {
"aliases": {},
"mappings": {
"doc": {
"dynamic_date_formats": [
"DD-MM-YYYY",
"HH:mm:ss",
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
],
"properties": {
"#timestamp": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"TAT": {
"type": "integer"
},
"date": {
"type": "date",
"format": "DD-MM-YYYY"
},
"level": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"message": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 400
}
}
}
"reqId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"response": {
"type": "keyword"
},
"thirdPartyErrorDescription": {
"type": "text"
},
"thirdPartyTime": {
"type": "integer"
},
"time": {
"type": "date",
"format": "HH:mm:ss"
}
}
}
},
"settings": {
"index": {
"creation_date": "1539236694553",
"number_of_shards": "3",
"number_of_replicas": "1",
"uuid": "BYDQOhY_TbWhuqMAOA3iNw",
"version": {
"created": "6040099"
},
"provided_name": "rep_cdr"
}
}
}
}
The "key_as_string" gives me wrong month. In document the date field has value "15-10-2018" but "key_as_string" gives me "15-01-2018". I am using elasticsearch version 6.4. What could be wrong?
Your date field format is set to DD-MM-YYYY where D is day of year as mentioned on https://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html. Change your date format to dd-MM-yyyy instead and it should work as expected.
What you are seeing in response is 15th day of the year i.e. 15-01-2018

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

Resources