I have address with address component stored in my elasticSearch, each address looks like following in my ES :
{
"_index": "properties",
"_type": "_doc",
"_id": "property_5235354",
"_score": 32.839436,
"_source": {
"id": 5235354,
"branchid": 1,
"suburb": "Lyons",
"postcode": "2606",
"state": "ACT",
"#timestamp": "2021-09-27T08:56:08.827Z",
"agencycode": "X",
"address": "54-5 Burnie St Lyons ACT 2606 AUS",
"streetnumber": "5",
"branchcode": "X_ACT",
"unitnumber": "54",
"agencyid": 1,
"streetname": "Burnie St",
"#version": "1"
}
}
To search specific address on the basis of components I am considering following points :
There could be abbreviation of street names like "James Street" -> "James St"
Matching by address components with exact match in case insensitive manner
Please let me know if you think I should consider something else
To do this I tried following :
{
"query": {
"bool": {
"should": [
{
"match": {
"streetname.keyword": "Burnie Street"
}
},
{
"match": {
"streetname.keyword": "Burnie St"
}
}
],
"must": [
{
"match": {
"unitnumber.keyword": "54"
}
},
{
"match": {
"streetnumber.keyword": "5"
}
},
{
"match": {
"suburb.keyword": "Lyons"
}
},
{
"match": {
"state": "ACT"
}
},
{
"match": {
"postcode.keyword": "2606"
}
}
]
}
},
"size": 1000
}
Need your help on solving these :
Above query is also returning invalid result like address : 54-5 Burnie Avenue Lyons ACT 2606 AUS which is Burnie Avenue not Burnie Street.
If I give burnie street instead of Burnie Street, its unable to find the data.
More information :
This is the full result of _search API with above request body where addresses 54-5 Burnie St Lyons ACT 2606 AUS & 54/5 Burnie Street Lyons ACT 2606 are right match but 54-5 Burnie Avenue Lyons ACT 2606 AUS is an invalid match
{
"took": 1476,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 32.839436,
"hits": [
{
"_index": "properties",
"_type": "_doc",
"_id": "property_5235354",
"_score": 32.839436,
"_source": {
"id": 5235354,
"branchid": 1,
"suburb": "Lyons",
"postcode": "2606",
"state": "ACT",
"#timestamp": "2021-09-27T08:56:08.827Z",
"agencycode": "X",
"address": "54-5 Burnie St Lyons ACT 2606 AUS",
"streetnumber": "5",
"branchcode": "X_ACT",
"unitnumber": "54",
"agencyid": 1,
"streetname": "Burnie St",
"#version": "1"
}
},
{
"_index": "properties",
"_type": "_doc",
"_id": "property_11081",
"_score": 28.954222,
"_source": {
"id": 11081,
"branchid": 1,
"suburb": "Lyons",
"postcode": "2606",
"state": "ACT",
"#timestamp": "2021-09-27T08:56:08.163Z",
"agencycode": "X",
"address": "54/5 Burnie Street Lyons ACT 2606",
"streetnumber": "5",
"branchcode": "X_ACT",
"unitnumber": "54",
"agencyid": 1,
"streetname": "Burnie Street",
"#version": "1"
}
},
{
"_index": "properties",
"_type": "_doc",
"_id": "property_5235356",
"_score": 22.677355,
"_source": {
"id": 5235356,
"branchid": 1,
"suburb": "Lyons",
"postcode": "2606",
"state": "ACT",
"#timestamp": "2021-09-27T08:56:08.847Z",
"agencycode": "X",
"address": "54-5 Burnie Avenue Lyons ACT 2606 AUS",
"streetnumber": "5",
"branchcode": "X_ACT",
"unitnumber": "54",
"agencyid": 1,
"streetname": "Burnie Avenue",
"#version": "1"
}
}
]
}
}
You need to use a combination of bool/must/should query clause, term query (for exact match ignoring the case), and match_phrase_prefix query
Index Mapping:
{
"mappings": {
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"address": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"agencycode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"agencyid": {
"type": "long"
},
"branchcode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"branchid": {
"type": "long"
},
"id": {
"type": "long"
},
"postcode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"streetname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"streetnumber": {
"type": "integer"
},
"suburb": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"unitnumber": {
"type": "integer"
}
}
}
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"term": {
"streetnumber": "5"
}
},
{
"term": {
"unitnumber": "54"
}
},
{
"bool": {
"should": [
{
"term": {
"streetname.keyword": {
"value": "Burnie Street",
"case_insensitive": "true"
}
}
},
{
"match_phrase_prefix": {
"streetname": "Burnie St"
}
}
]
}
},
{
"term": {
"suburb.keyword": {
"value": "Lyons",
"case_insensitive": "true"
}
}
},
{
"term": {
"postcode.keyword": "2606"
}
},
{
"term": {
"state.keyword": {
"value": "ACT",
"case_insensitive": "true"
}
}
}
]
}
}
}
Related
I would like to find a product with the search priority : pickRef, name, synonym (it's an array) and the others after. I don"t succeed to have a working query.. I have to boost synonym with "50" in order to have the product in top 8 results...
The aim of my query is to make an autocompletion search with fuzzy (to avoid mispelling)
I have a product with the synonym "caca" When I want to search "caca" ES return every coca products. but not the product with the synonym "caca". However, the term "caca" must be the first result beceause it match perfectly with synonym field and coca products must come after (due to fuzzy parameter)
There is my index :
{
"product": {
"aliases": {},
"mappings": {
"properties": {
"brand": {
"type": "keyword",
"boost": 3
},
"catalogue": {
"type": "keyword"
},
"category": {
"type": "text",
"analyzer": "standard"
},
"description": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3
}
}
},
"description_ecology": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3
}
}
},
"enabled": {
"type": "boolean"
},
"image": {
"type": "text"
},
"name": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"pickRef": {
"type": "keyword",
"boost": 5
},
"replaced": {
"type": "boolean"
},
"slug": {
"type": "text"
},
"synonym": {
"type": "keyword",
"boost": 3
}
}
},
"settings": {
"index": {
"routing": {
"allocation": {
"include": {
"_tier_preference": "data_content"
}
}
},
"number_of_shards": "1",
"provided_name": "product",
"creation_date": "1634287857507",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"pre_negs": {
"pattern": "a \\w",
"type": "pattern_replace",
"replacement": ""
}
}
},
"number_of_replicas": "0",
"uuid": "EGLmpv8bRlCnfLBxHZOKmA",
"version": {
"created": "7150099"
}
}
}
}
}
There is my query :
{
"index": "product",
"size": 8,
"body": {
"query": {
"bool": {
"must": [
{
"match": {
"enabled": true
}
},
{
"match": {
"replaced": false
}
}
],
"should": [
{
"match": {
"name.fr": {
"query": "caca",
"analyzer": "standard"
}
}
},
{
"match": {
"synonym": {
"query": "caca",
"boost": 20,
"analyzer": "standard"
}
}
},
{
"multi_match": {
"query": "caca",
"fields": [
"brand^2",
"pickRef^5",
"catalogue",
"name.fr^3",
"name.en^1",
"name.de^1",
"name.lu^1",
"description.fr^1",
"description.en^1",
"description.de^1",
"description.lu^1",
"description_ecologique.fr^1",
"description_ecologique.en^1",
"description_ecologique.de^1",
"description_ecologique.lu^1"
],
"fuzziness": "AUTO"
}
},
{
"query_string": {
"query": "caca"
}
}
]
}
}
}
}
Those are my products :
{
"_index": "product",
"_type": "_doc",
"_id": "1594",
"_version": 1,
"_seq_no": 1593,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "PLANTE ARTIFICIELLE BAMBOU 120cm"
},
"pickRef": "122638",
"description": {
"fr": "Agrémentez votre lieu de travail avec cette superbe plante ! Elle garantit un environnement très naturel, ne nécessite pas d'entretien et agrémente n'importe quel espace. Tronc en bois, feuillage en polyester , livrée dans un pot standard en plastique."
},
"description_ecology": {
"fr": ""
},
"catalogue": "P399",
"image": "uploads/product/122638/122638.png",
"brand": "PAPERFLOW",
"category": "Autres",
"slug": "plante-artificielle-bambou-120cm-122638-122638",
"enabled": true,
"synonym": [],
"replaced": false
}
}
{
"_index": "product",
"_type": "_doc",
"_id": "3131",
"_version": 1,
"_seq_no": 3130,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "ROYCO MINUTE SOUP \"POIS AU JAMBON\""
},
"pickRef": "141065",
"description": {
"fr": "Retrouvez le bon goût des légumes dans ces recettes de tradition alliant tout le savoir-faire de Royco Minute Soup à la saveur des meilleurs ingrédients."
},
"description_ecology": {
"fr": ""
},
"catalogue": "P038",
"image": "uploads/product/141065/141065.png",
"brand": "ROYCO",
"category": "Soupe & pâtes",
"slug": "royco-minute-soup-pois-au-jambon-5410056186552-141065",
"enabled": true,
"synonym": [],
"replaced": false
}
}
{
"_index": "product",
"_type": "_doc",
"_id": "6",
"_version": 2,
"_seq_no": 24511,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "AGRAFES 26/6 GALVANISEES"
},
"pickRef": "100110",
"description": {
"fr": "<div>Boîte de 1000 agrafes 26/6 galvanisées.</div>"
},
"description_ecology": {
"fr": null
},
"catalogue": "S",
"image": "uploads/product/233163/233163.png",
"brand": "autres",
"category": "Autres",
"slug": "agrafes-26-6-galvanisees-jambon-5010255827746-100110",
"enabled": true,
"synonym": [
"caca",
"jambon"
],
"replaced": false
}
}
PS : I know the example is not perfect but I don't have a better one...
do you try to sort by _score?
{
"index": "product",
"size": 8,
"body": {
"query": {
.
.
.
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}
}
can someone help with my search query on how I can filter the results based on 2 fields? I have built an Index with 1000's of documents init and from the UI we will be calling this Index and it consists of 2 search fields
Search by Zipcode
and search by city/state
Based on these combinations we need to show results only within that zip code.
Mapping
{
"mappings": {
"properties": {
"address": {
"properties": {
"city": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"zipcode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"startdate": {
"type": "date"
},
"enddate": {
"type": "date"
},
"customerstatus": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"customerid": {
"type": "long"
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"number_of_replicas": "1"
}
}
}
Query
{
"from": 0,
"size": 100,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "32081",
"fields": ["address.zipcode" ]
}
},
{
"query_string": {
"query": "FL",
"fields": ["address.cityname","address.state" ]
}
}
]
}
}
}
Result set
{
"customerid":1,
"customerstatus": Active,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-07-15",
"enddate": "2021-07-15"
},
{
"customerid":2,
"customerstatus": Pending,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2018-01-01",
"enddate": "2019-01-01"
},
{
"customerid":3,
"customerstatus": Pending,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-06-01",
"enddate": "2021-06-01"
},
{
"customerid":4,
"customerstatus": Pending,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2021-01-01",
"enddate": "2022-01-01"
},
{
"customerid":5,
"customerstatus": Inactive,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-07-15",
"enddate": "2021-07-15"
},
{
"customerid":6,
"customerstatus": cancelled,
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-07-15",
"enddate": "2021-07-15"
}
Now the requirement is in a way that,
Exclude the results where customerstatus is Inactive and Cancelled (Customer 5 and 6 shouldn't be displayed)
Display only Active and Pending
If Status is Pending then display the customers where the enddate is <500 days old and enddate not greater than 91 days
So, how can I get only customerid 1 and 3 in my result set.
You can use a combination of bool query along with range query to find documents on the basis of a range of days. Try out this below query
{
"from": 0,
"size": 100,
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"match": {
"customerstatus": "pending"
}
},
{
"range": {
"enddate": {
"gt": "now-500d/d",
"lte": "now+91d/d"
}
}
}
]
}
},
{
"match": {
"customerstatus": "active"
}
}
],
"must_not": {
"terms": {
"customerstatus.keyword": [
"Inactive",
"cancelled"
]
}
}
}
}
}
Search Result will be
"hits": [
{
"_index": "67260491",
"_type": "_doc",
"_id": "3",
"_score": 1.6931472,
"_source": {
"customerid": 3,
"customerstatus": "Pending",
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-06-01",
"enddate": "2021-06-01"
}
},
{
"_index": "67260491",
"_type": "_doc",
"_id": "1",
"_score": 1.5404451,
"_source": {
"customerid": 1,
"customerstatus": "Active",
"address": {
"city": "PONTE VEDRA",
"state": "FL",
"zipcode": "32081"
},
"startdate": "2020-07-15",
"enddate": "2021-07-15"
}
}
]
I have a weird problem with Elasticsearch 6.0.
I have an index with the following mapping:
{
"cities": {
"mappings": {
"cities": {
"properties": {
"city": {
"properties": {
"id": {
"type": "long"
},
"name": {
"properties": {
"en": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"it": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"slug": {
"properties": {
"en": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"it": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
},
"doctype": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"suggest": {
"type": "completion",
"analyzer": "accents",
"search_analyzer": "simple",
"preserve_separators": true,
"preserve_position_increments": false,
"max_input_length": 50
},
"weight": {
"type": "long"
}
}
}
}
}
}
I have these documents in my index:
{
"_index": "cities",
"_type": "cities",
"_id": "991-city",
"_version": 128,
"found": true,
"_source": {
"doctype": "city",
"suggest": {
"input": [
"nazaré",
"nazare",
"나자레",
"najare",
"najale",
"ナザレ",
"Ναζαρέ"
],
"weight": 1807
},
"weight": 3012,
"city": {
"id": 991,
"name": {
"en": "Nazaré",
"it": "Nazaré"
},
"slug": {
"en": "nazare",
"it": "nazare"
}
}
}
}
{
"_index": "cities",
"_type": "cities",
"_id": "1085-city",
"_version": 128,
"found": true,
"_source": {
"doctype": "city",
"suggest": {
"input": [
"nazareth",
"nazaret",
"拿撒勒",
"na sa le",
"sa le",
"le",
"na-sa-lei",
"나사렛",
"nasares",
"nasales",
"ナザレス",
"nazaresu",
"नज़ारेथ",
"nj'aareth",
"aareth",
"najaratha",
"Назарет",
"Ναζαρέτ",
"názáret",
"nazaretas"
],
"weight": 1809
},
"weight": 3015,
"city": {
"id": 1085,
"name": {
"en": "Nazareth",
"it": "Nazareth"
},
"slug": {
"en": "nazareth",
"it": "nazareth"
}
}
}
}
Now, when I search using the suggester, with the following query:
POST /cities/_search
{
"suggest":{
"suggest":{
"prefix":"nazare",
"completion":{
"field":"suggest"
}
}
}
}
I expect to have both documents in my results, but I only get the second one (nazareth) back:
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": 0.0,
"hits": []
},
"suggest": {
"suggest": [
{
"text": "nazare",
"offset": 0,
"length": 6,
"options": [
{
"text": "nazaresu",
"_index": "cities",
"_type": "cities",
"_id": "1085-city",
"_score": 1809.0,
"_source": {
"doctype": "city",
"suggest": {
"input": [
"nazareth",
"nazaret",
"拿撒勒",
"na sa le",
"sa le",
"le",
"na-sa-lei",
"나사렛",
"nasares",
"nasales",
"ナザレス",
"nazaresu",
"नज़ारेथ",
"nj'aareth",
"aareth",
"najaratha",
"Назарет",
"Ναζαρέτ",
"názáret",
"nazaretas"
],
"weight": 1809
},
"weight": 3015,
"city": {
"id": 1085,
"name": {
"en": "Nazareth",
"it": "Nazareth"
},
"slug": {
"en": "nazareth",
"it": "nazareth"
}
}
}
}
]
}
]
}
}
This is unexpected, because in the suggester input for the first document, the term that I searched "nazare" appears exactly as I input it.
Another fun fact is that if I search for "najare" instead of "nazare" I get the correct results.
Any hint will be really appreciated!
For a quick solution, use the size parameter in the completion object of your query.
GET /cities/_search
{
"suggest":{
"suggest":{
"prefix":"nazare",
"completion":{
"field":"suggest",
"size": 100 <- HERE
}
}
}
}
The size parameter default to 5, so once elasticsearch as found 5 terms (and not document) having the correct prefix, it will stop looking for more terms (and consequently documents).
This limit is per term, not per document. So if one document contains 5 terms having the correct and you use the default value of 5, then possibly the other documents will not be returned.
I strongly believe that it is whats happening in your case. The returned document has at least 5 suggest terms having the prefix nazare so only this one will be returned.
For your fun fact, when you are searching najare, there is only one term having the correct prefix, so you have the correct result.
The tricky thing is that the results depends on the order elasticsearch retrieve the documents. If the first document would have been retrieved first, it would not have reach the size threshold (only 2 or 3 prefix occurrences), the next document would be also retrieved and you would have get the correct result.
Also, unless necessary, avoid using a very high value (e.g. > 1000) for the sizeparameter. It might impact the performance particularly for short or common prefixes.
I am using Elastic 5.4 and wanted to query across index containing documents of multiple types.(type a and type b). Below are example documents in the index:
Documents:
{
"_index": "test",
"_type": "a",
"_id": "1",
"_source": {
"id": "1",
"name": "john-usa-soccer",
"class": "5",
"lastseen": "2017-07-05",
"a_atts": {
"lastname": "tover",
"hobby": "soccer",
"country": "usa"
}
}
}
{
"_index": "test",
"_type": "b",
"_id": "2",
"_source": {
"id": "2",
"name": "john-usa",
"class": "5",
"lastseen": "2017-07-05",
"b_atts": {
"lastname": "kaml",
"hobby": "baseball",
"country": "usa"
}
}
}
Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_ngram_analyzer": {
"tokenizer": "my_ngram_tokenizer"
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "ngram",
"min_gram": "3",
"max_gram": "3",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"a": {
"dynamic_templates": [
{
"strings": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"analyzer": "my_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
},
"analyzer1": {
"type": "text",
"analyzer": "simple"
},
"analyzer2": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
]
},
"b": {
"dynamic_templates": [
{
"strings": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"analyzer": "my_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
},
"analyzer1": {
"type": "text",
"analyzer": "simple"
},
"analyzer2": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
]
}
}
}
My query is to search all documents which contain 'john' across any of the fields in any type and highlight the fields where the match was found. This query is constructed as per Elastic documentation. My Schema mappings has ngram_analyzer configured as analyzer instead of default analyzer for all fields of type string in the schema.
Query: http://localhost:9200/student/_search
{
"query": {
"bool": {
"should": [
{ "match": { "_all": "john"} }
]
}
},
"highlight": {
"fields": {
"name": {
"require_field_match": false
},
"a_atts.lastname":{
"require_field_match": false
},
"a_atts.hobby":{
"require_field_match": false
},
"a_atts.country":{
"require_field_match": false
}
}
}
}
Response:
{
"took": 79,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.17669111,
"hits": [
{
"_index": "student",
"_type": "a",
"_id": "AV1WjBeYEZrDBYsdGMtY",
"_score": 0.17669111,
"_source": {
"name": "john-usa-soccer",
"class": "5",
"lastseen": "2017-07-05",
"a_atts": {
"lastname": "tover",
"hobby": "soccer",
"country": "usa"
}
}
},
{
"_index": "student",
"_type": "b",
"_id": "AV1WjHFxEZrDBYsdGMtZ",
"_score": 0.17669111,
"_source": {
"name": "john-usa",
"class": "5",
"lastseen": "2017-07-05",
"b_atts": {
"lastname": "kaml",
"hobby": "baseball",
"country": "usa"
}
}
}
]
}
}
However, executing the above query against an index, returns documents matched with their _source content but not highlight field. It is missing the following:
"highlight": {
"name": [
"<em>john</em>-usa-soccer"
]
}
How can I return highlight in the results?
I got highlighter to work by following the answer provided in this link.
"highlight": {
"fields": {
"*": {}
},
"require_field_match": false
}
I'm try do a query like this in Elastic Search:
Return me all the devices of an app that had some logs between two dates and for each device return me the total number of logs
For this I've a parent-child relationship. I've the parent device type that has the device information and then a child entity device_logs that has the number of logs for each day.
I tried to run the following query with a custom score function. I do get the right devices, but the score has the sum of all the device_logs entries instead of the entries in the dates range.
Any idea if it's possible to do this kind of query?
{
"query": {
"bool": {
"filter" :
[
{
"term": {"app": 347}
}
],
"must" :
[
{
"has_child": {
"type": "device_logs",
"inner_hits" : {},
"query": {
"bool": {
"filter": {
"range": {
"date": {
"from": "2017-01-15T00:00:00Z",
"include_lower": true,
"include_upper": true,
"to": "2017-01-17T23:59:59Z"
}
}
}
}
}
}
},
{
"has_child": {
"type": "device_logs",
"score_mode": "sum",
"query" : {
"function_score" : {
"script_score": {
"script": "_score * doc['logs'].value"
}
}
}
}
}
]
}
}
}
EDIT: Adding mappings and some docs
Here you have the mappings:
"mappings": {
"device": {
"properties": {
"app": {
"type": "long",
"include_in_all": false
},
"created_at": {
"type": "date",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"language": {
"type": "keyword",
"include_in_all": false,
"ignore_above": 256
},
"last_log_at": {
"type": "date",
"include_in_all": false
},
"last_ping_at": {
"type": "date",
"include_in_all": false
},
"last_seen_at": {
"type": "date"
},
"log_enabled": {
"type": "boolean"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"lowercase": {
"type": "text",
"analyzer": "case_insensitive_sort"
}
}
},
"os_version": {
"type": "keyword",
"include_in_all": false,
"ignore_above": 256
},
"timezone": {
"type": "keyword",
"include_in_all": false,
"ignore_above": 256
},
"type": {
"type": "keyword",
"ignore_above": 256
},
"udid": {
"type": "keyword",
"ignore_above": 256
},
"version": {
"properties": {
"build": {
"type": "keyword",
"include_in_all": false,
"ignore_above": 256
},
"id": {
"type": "long",
"include_in_all": false
},
"version": {
"type": "keyword",
"include_in_all": false,
"ignore_above": 256
}
}
}
}
},
"device_logs": {
"_parent": {
"type": "device"
},
"_routing": {
"required": true
},
"properties": {
"_": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"app": {
"type": "long",
"include_in_all": false
},
"date": {
"type": "date",
"include_in_all": false
},
"errors": {
"type": "long",
"include_in_all": false
},
"logs": {
"type": "long",
"include_in_all": false
},
"warnings": {
"type": "long",
"include_in_all": false
}
}
}
}
And some documents:
{
"_index": "devices",
"_type": "device_logs",
"_id": "22466_2017.01.17",
"_score": 1,
"_routing": "22466",
"_parent": "22466",
"_source": {
"_": "22466_2017.01.17",
"app": 200,
"date": "2017-01-17T00:00:00Z",
"logs": 660,
"warnings": 238,
"errors": 217
}
}
{
"_index": "devices",
"_type": "device",
"_id": "22466",
"_score": 1,
"_source": {
"id": 22466,
"udid": "770CA14ED7FE861EC452",
"name": "Edward's iPhone",
"type": "iPhone7,2",
"app": 200,
"log_enabled": false,
"created_at": "2016-12-21T10:55:02Z",
"last_seen_at": "2017-01-19T10:07:33Z",
"last_log_at": "2017-01-19T11:07:40.756275026+01:00",
"language": "en-US",
"os_version": "9.2",
"timezone": "GMT+1",
"version.id": 7305,
"version.version": "1",
"version.build": "100"
}
}
I have solved your query.
From the first look at the query, I was doubtful that you are not filtering the child documents in one of the must filters before applying the function score on the child document.
I have used the following set of documents for this query
parent doc
{
"id": 22466,
"udid": "770CA14ED7FE861EC452",
"name": "Edward's iPhone",
"type": "iPhone7,2",
"app": 347,
"log_enabled": false,
"created_at": "2016-12-21T10:55:02Z",
"last_seen_at": "2017-01-19T10:07:33Z",
"last_log_at": "2017-01-19T11:07:40.756275026+01:00",
"language": "en-US",
"os_version": "9.2",
"timezone": "GMT+1",
"version.id": 7305,
"version.version": "1",
"version.build": "100"
}
child docs
{
"_type": "device_logs",
"_id": "22466_2017.01.17",
"_score": 0,
"_routing": "22466",
"_parent": "22466",
"_source": {
"_": "22466_2017.01.17",
"app": 200,
"date": "2017-01-17T00:00:00Z",
"logs": 660,
"warnings": 238,
"errors": 217
}
},
{
"_type": "device_logs",
"_id": "22466_2017.02.17",
"_score": 0,
"_routing": "22466",
"_parent": "22466",
"_source": {
"_": "22466_2017.02.17",
"app": 200,
"date": "2017-01-17T00:00:00Z",
"logs": 200,
"warnings": 238,
"errors": 217
}
},
{
"_type": "device_logs",
"_id": "22466_2017.02.20",
"_score": 0,
"_routing": "22466",
"_parent": "22466",
"_source": {
"_": "22466_2017.02.20",
"app": 200,
"date": "2017-01-20T00:00:00Z",
"logs": 200,
"warnings": 238,
"errors": 217
}
}
Note - The first must filter only filter the documents for innerhits.
Please use the following query:
{
"query": {
"bool": {
"filter": [{
"term": {
"app": 347
}
}],
"must": [{
"has_child": {
"type": "device_logs",
"inner_hits": {},
"query": {
"bool": {
"filter": {
"range": {
"date": {
"from": "2017-01-15T00:00:00Z",
"include_lower": true,
"include_upper": true,
"to": "2017-01-17T23:59:59Z"
}
}
}
}
}
}
}, {
"has_child": {
"type": "device_logs",
"score_mode": "sum",
"query": {
"function_score": {
"query": {
"bool": {
"filter": {
"range": {
"date": {
"from": "2017-01-15T00:00:00Z",
"include_lower": true,
"include_upper": true,
"to": "2017-01-17T23:59:59Z"
}
}
}
}
},
"score_mode": "sum",
"boost_mode": "sum",
"script_score": {
"script": "_score + doc['logs'].value"
}
}
}
}
}]
}
}
}
Few references https://github.com/elastic/elasticsearch/issues/10051
Following is the response I get with explain bool set to true
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 861,
"hits": [
{
"_shard": "[array_index1][0]",
"_node": "nnauJDrIS8-QCqicOMF23g",
"_index": "array_index1",
"_type": "device",
"_id": "22466",
"_score": 861,
"_source": {
"id": 22466,
"udid": "770CA14ED7FE861EC452",
"name": "Edward's iPhone",
"type": "iPhone7,2",
"app": 347,
"log_enabled": false,
"created_at": "2016-12-21T10:55:02Z",
"last_seen_at": "2017-01-19T10:07:33Z",
"last_log_at": "2017-01-19T11:07:40.756275026+01:00",
"language": "en-US",
"os_version": "9.2",
"timezone": "GMT+1",
"version.id": 7305,
"version.version": "1",
"version.build": "100"
},
"_explanation": {
"value": 861,
"description": "sum of:",
"details": [
{
"value": 1,
"description": "A match, join value 22466",
"details": []
},
{
"value": 860,
"description": "A match, join value 22466",
"details": []
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 1,
"description": "app:[347 TO 347], product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 1,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
},
"inner_hits": {
"device_logs": {
"hits": {
"total": 2,
"max_score": 0,
"hits": [
{
"_type": "device_logs",
"_id": "22466_2017.01.17",
"_score": 0,
"_routing": "22466",
"_parent": "22466",
"_source": {
"_": "22466_2017.01.17",
"app": 200,
"date": "2017-01-17T00:00:00Z",
"logs": 660,
"warnings": 238,
"errors": 217
}
},
{
"_type": "device_logs",
"_id": "22466_2017.02.17",
"_score": 0,
"_routing": "22466",
"_parent": "22466",
"_source": {
"_": "22466_2017.02.17",
"app": 200,
"date": "2017-01-17T00:00:00Z",
"logs": 200,
"warnings": 238,
"errors": 217
}
}
]
}
}
}
}
]
}
}
Please verify your results.