Highlight multi_match doesnt take last term - elasticsearch

When I search for multiple keywords, the last term is not highlighted in the result.
This is the index and mapping:
PUT objects
{
"mappings": {
"properties": {
"title": {
"type": "search_as_you_type"
}
}
}
}
And this is my search:
// query
GET objects/_search
{
"query": {
"multi_match": {
"query": "Goldenen Vlies",
"type": "bool_prefix",
"fields": [
"title",
"title._2gram",
"title._3gram",
"title._index_prefix"
]
}
},
"highlight": {
"fields": {
"title": {}
}
},
"_source": false
}
The output I get is the following:
{
"took" : 1,
"timed_out" : false,
"_shards" : {...},
"hits" : {
"total" : {
"value" : 23,
"relation" : "eq"
},
"max_score" : 7.628418,
"hits" : [
{
"_index" : "objects",
"_id" : "AWj1tIEBIysZ6sOt9vqw",
"_score" : 7.628418,
"highlight" : {
"title" : [
"Schwurkreuz des Ordens vom <em>Goldenen</em> Vlies" <-------
]
}
}
]
}
}
However, this would be the expected/desired output:
{
"took" : 1,
"timed_out" : false,
"_shards" : {...},
"hits" : {
"total" : {
"value" : 23,
"relation" : "eq"
},
"max_score" : 7.628418,
"hits" : [
{
"_index" : "objects",
"_id" : "AWj1tIEBIysZ6sOt9vqw",
"_score" : 7.628418,
"highlight" : {
"title" : [
"Schwurkreuz des Ordens vom <em>Goldenen</em> <em<Vlies</em>" <-------
]
}
}
]
}
}
It does work as expected when I add an extra empty space in the query like so: "query": "Goldenen Vlies ", but I want to know if there is a better solution?

Try this way with "best_fields":
{
"query": {
"multi_match": {
"query": "Goldenen Vlies",
"type": "best_fields",
"fields": [
"title",
"title._2gram",
"title._3gram",
"title._index_prefix"
]
}
},
"highlight": {
"fields": {
"title": {}
}
},
"_source": false
}

Related

Is there a function similar to subquery in elasticsearch?

I want to act like a subquery in elasticsearch.
Let's look at the example below.
create index
PUT test_index
{
"mappings" : {
"properties" : {
"human" : {
"type" : "nested",
"properties" : {
"age" : {
"type" : "integer"
},
"name" : {
"type" : "text"
}
}
}
}
}
}
insert into index sample data
POST test_index/_doc/1
{
"human": [
{
"name": "adrian",
"age" : 24
},
{
"name": "simon",
"age" : 26
},
{
"name": "michale",
"age" : 24
},
{
"name": "beom",
"age" : 25
},
{
"name": "simon",
"age" : 24
}
]
}
In this situation, i want to get a result if doc satisfied condition that human.name == "adrian" and human.name = "simon"
as follow
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.87546873,
"hits" : [
{
"_index" : "test_index",
"_id" : "1",
"_score" : 0.87546873,
"_source" : {
"human" : [
{
"name" : "adrian",
"age" : 24
},
{
"name" : "simon",
"age" : 26
},
{
"name" : "michale",
"age" : 24
},
{
"name" : "beom",
"age" : 25
},
{
"name" : "simon",
"age" : 24
}
]
}
}
]
}
}
but, when i try like this
GET test_index/_search
{
"query": {
"nested": {
"path": "human",
"query": {
"bool": {
"must": [
{
"match": {
"human.name": "simon"
}
},
{
"match": {
"human.name": "adrian"
}
}
]
}
}
}
}
}
then, result is below
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
Is there any way to solve this situation??
You need to do it as follows with two nested queries as each nested document is a document of its own. So you're looking for a top-level document that has two nested documents that must match each human.name:
GET test_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "human",
"query": {
"match": {
"human.name": "simon"
}
}
}
},
{
"nested": {
"path": "human",
"query": {
"match": {
"human.name": "adrian"
}
}
}
}
]
}
}
}

Elasticsearhc filter sub object before search

Let's say I have index like this:
{
"id": 6,
"name": "some name",
"users": [
{
"id": 1,
"name": "User1",
"isEnabled": false,
},
{
"id": 2,
"name": "User2",
"isEnabled": false,
},
{
"id": 3,
"name": "User3,
"isEnabled": true,
},
]
}
what I need is to return that index while user searching for the name some name, but also I want to filter out all not enabled users, and if there is not enabled users omit that index.
I tried to use filters like this:
{
"query": {
"bool": {
"must": {
"match": {
"name": "some name"
}
},
"filter": {
"term": {
"users.isEnabled": true
}
}
}
}
}
but in such a case I see index with all users no matter if user is enabled or not. I'm a bit new but is there a way to do so??? I can filter out all that in code after getting data from elasticsearch but in such a case it can break pagination if I remove some index without enabled users from result set.
I'm a bit new to elasticsearch, but as far I can't find how to do it. Thank you in advice!
Elasticsearch will return whole document if there is any match. If you update your mapping and make users array nested, you can achieve this by using inner hits. This is a basic example mapping that works:
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"users": {
"type": "nested"
}
}
}
}
And if you send a query like following, response will contain id and name from the parent document, and it will contain inner_hits that match to your user's isEnabled query.
{
"_source": ["id", "name"],
"query": {
"bool": {
"must": [
{
"match": {
"name": "some name"
}
},
{
"nested": {
"path": "users",
"query": {
"term": {
"users.isEnabled": {
"value": true
}
}
},
"inner_hits": {}
}
}
]
}
}
}
This is an example response
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.9375811,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.9375811,
"_source" : {
"name" : "some name",
"id" : 6
},
"inner_hits" : {
"users" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.540445,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "users",
"offset" : 2
},
"_score" : 1.540445,
"_source" : {
"id" : 3,
"name" : "User3",
"isEnabled" : true
}
}
]
}
}
}
}
]
}
}
Then you can do the mapping in the application.

ElasticSearch how to highlight search in nested objects?

I have a dataset like this:
{
"took" : 29,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "kt",
"_type" : "_doc",
"_id" : "3kscgngBKcaOnhm7gU5w",
"_score" : 1.0,
"_source" : {
"authorName" : "Alastair Reynolds",
"bookName" : "Arınma Geçidi",
"publishDate" : "2021",
"book" : [
{
"pageNum" : 1,
"pageContent" : ""
},
{
"pageNum" : 2,
"pageContent" : "© İndie Kitap - 2021 © The Orion Publishing Group Limited - 2019 Yazar:
so, it goes like this page numbers and page content. What I want to do is, for example if I search "spear", I want spears highlighted and also I want the pageNum. This is my mapping:
{
"kt" : {
"mappings" : {
"properties" : {
"authorName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"book" : {
"type" : "nested",
"properties" : {
"pageContent" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"pageNum" : {
"type" : "long"
}
}
},
"bookName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"publishDate" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
before that, "book" was not a nested object and I was getting the highlighted words with phrases but all results was together, like this:
"""Bugün de öyle, “taptığınız <em>kitap</em> tanrısal değildir,
el yapımıdır” diyor birileri.""",
"""“İğrenç ve utanç verici bir <em>kitap</em>” olarak tanımladığı Kuran’ı her-
kesin tanıması için (auff das yderman""",
"""Bu yazı vesilesiyle okuma fırsatı buldum, daha sonra bundan iki ay sürecek
bir tartışma ve bir <em>kitap</em>""",
"""anlamıyorum, siz hep Kemalizm’e dair çok sert eleş-
tiriler yaptınız Yanlış Cumhuriyet diye de bir <em>kitap</em>""",
"""Siz Kemalizm’e dair derin eleştirilerde
bulunuyordunuz, <em>kitap</em> çıktı...""",
"""Tabii ki ben Yanlış Cumhuriyet’in önemli bir <em>kitap</em>
olduğunu düşünüyorum ve özellikle de kendini""",
"""55
These are results coming from different pages, I want to know the pageNum as well. So the result I want is something like this if I search for "Reynolds":
{
"pageNum" : 3,
"pageContent" : "Alastair <em>Reynolds</em> ARINMA GEÇİDİ "
},
{
"pageNum" : 236,
"pageContent" : "ne izin vererek sözlerinin hak ettikleri saygın yeri sağladı. “Peki ya
Thorn’un çocuğuna ne oldu?” Khouri, “O Aura,” dedi. <em>Reynolds</em> için geldiğim çocuk.” "}
What should be my search query and should I change anything in mapping or settings? How can I achieve this? Thanks in advance!
You can highlight the pageContent and show the corresponding pageNum by using highlight query in inner hits
Adding a working example with index data, search query and search result
Index Data:
{
"authorName": "Alastair Reynolds",
"bookName": "Arınma Geçidi",
"publishDate": "2021",
"book": [
{
"pageNum": 1,
"pageContent": ""
},
{
"pageNum": 2,
"pageContent": "Alastair Reynolds ARINMA GEÇİDİ"
}
]
}
{
"authorName": "Alastair Reynolds",
"bookName": "Arınma Geçidi",
"publishDate": "2021",
"book": [
{
"pageNum": 1,
"pageContent": ""
},
{
"pageNum": 2,
"pageContent": "© İndie Kitap - 2021 © The Orion Publishing Group Limited - 2019 Yazar"
}
]
}
Search Query:
{
"query": {
"nested": {
"path": "book",
"query": {
"bool": {
"must": {
"match": {
"book.pageContent": "Reynolds"
}
}
}
},
"inner_hits": {
"highlight": {
"fields": {
"book.pageContent": {}
}
}
}
}
}
}
Search Result:
"inner_hits": {
"book": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.5619608,
"hits": [
{
"_index": "66868025",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "book",
"offset": 1
},
"_score": 0.5619608,
"_source": {
"pageNum": 2, // note this
"pageContent": "Alastair Reynolds ARINMA GEÇİDİ"
},
"highlight": {
"book.pageContent": [
"Alastair <em>Reynolds</em> ARINMA GEÇİDİ" // note this
]
}
}
]
}
}
}
}

Filter nested objects in ElasticSearch 6.8.1

I didn't find any answers how to do simple thing in ElasticSearch 6.8 I need to filter nested objects.
Index
{
"settings": {
"index": {
"number_of_shards": "5",
"number_of_replicas": "1"
}
},
"mappings": {
"human": {
"properties": {
"cats": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"breed": {
"type": "text"
},
"colors": {
"type": "integer"
}
}
},
"name": {
"type": "text"
}
}
}
}
}
Data
{
"name": "iridakos",
"cats": [
{
"colors": 1,
"name": "Irida",
"breed": "European Shorthair"
},
{
"colors": 2,
"name": "Phoebe",
"breed": "european"
},
{
"colors": 3,
"name": "Nino",
"breed": "Aegean"
}
]
}
select human with name="iridakos" and cats with breed contains 'European' (ignore case).
Only two cats should be returned.
Million thanks for helping.
For nested datatypes, you would need to make use of nested queries.
Elasticsearch would always return the entire document as a response. Note that nested datatype means that every item in the list would be treated as an entire document in itself.
Hence in addition to return entire document, if you also want to know the exact hits, you would need to make use of inner_hits feature.
Below query should help you.
POST <your_index_name>/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "iridakos"
}
},
{
"nested": {
"path": "cats",
"query": {
"match": {
"cats.breed": "european"
}
},
"inner_hits": {}
}
}
]
}
}
}
Response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.74455214,
"hits" : [
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1", <--- The document that hit
"_score" : 0.74455214,
"_source" : {
"name" : "iridakos",
"cats" : [
{
"colors" : 1,
"name" : "Irida",
"breed" : "European Shorthair"
},
{
"colors" : 2,
"name" : "Phoebe",
"breed" : "european"
},
{
"colors" : 3,
"name" : "Nino",
"breed" : "Aegean"
}
]
},
"inner_hits" : { <---- Note this
"cats" : {
"hits" : {
"total" : {
"value" : 2, <---- Count of nested doc hits
"relation" : "eq"
},
"max_score" : 0.52354836,
"hits" : [
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "cats",
"offset" : 1
},
"_score" : 0.52354836,
"_source" : { <---- First Nested Document
"breed" : "european"
}
},
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "cats",
"offset" : 0
},
"_score" : 0.39019167,
"_source" : { <---- Second Document
"breed" : "European Shorthair"
}
}
]
}
}
}
}
]
}
}
Note in your response how the inner_hits section would appear where you would find the exact hits.
Hope this helps!
You could use something like this:
{
"query": {
"bool": {
"must": [
{ "match": { "name": "iridakos" }},
{ "match": { "cats.breed": "European" }}
]
}
}
}
To search on a cat's breed, you can use the dot-notation.

Elasticsearch Array (Label/Tag Querying

I really think that I'm trying to do is fairly simple. I'm simply trying to query for N tags. A clear example of this was asked and answered over at "Elasticsearch: How to use two different multiple matching fields?". Yet, that solution doesn't seem to work for the latest version of ES (more likely, I'm simply doing it wrong).
To show the current data and to demonstrate a working query, see below:
{
"query": {
"filtered": {
"filter": {
"terms": {
"Price": [10,5]
}
}
}
}
}
Here are the results for this. As you can see, 5 and 10 are showing up (this demonstrates that basic queries do work):
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 1.0,
"hits" : [ {
"_index" : "labelsample",
"_type" : "entry",
"_id" : "AVLGnGMYXB5vRcKBZaDw",
"_score" : 1.0,
"_source" : {
"Category" : [ "Medium Signs" ],
"Code" : "a",
"Name" : "Sample 1",
"Timestamp" : 1.455031083799152E9,
"Price" : "10",
"IsEnabled" : true
}
}, {
"_index" : "labelsample",
"_type" : "entry",
"_id" : "AVLGnGHHXB5vRcKBZaDF",
"_score" : 1.0,
"_source" : {
"Category" : [ "Small Signs" ],
"Code" : "b",
"Name" : "Sample 2",
"Timestamp" : 1.45503108346191E9,
"Price" : "5",
"IsEnabled" : true
}
}, {
"_index" : "labelsample",
"_type" : "entry",
"_id" : "AVLGnGILXB5vRcKBZaDO",
"_score" : 1.0,
"_source" : {
"Category" : [ "Medium Signs" ],
"Code" : "c",
"Name" : "Sample 3",
"Timestamp" : 1.455031083530215E9,
"Price" : "10",
"IsEnabled" : true
}
}, {
"_index" : "labelsample",
"_type" : "entry",
"_id" : "AVLGnGGgXB5vRcKBZaDA",
"_score" : 1.0,
"_source" : {
"Category" : [ "Medium Signs" ],
"Code" : "d",
"Name" : "Sample 4",
"Timestamp" : 1.4550310834233E9,
"Price" : "10",
"IsEnabled" : true
}
}]
}
}
As a side note: the following bool query gives the exact same results:
{
"query": {
"bool": {
"must": [{
"terms": {
"Price": [10,5]
}
}]
}
}
}
Notice Category...
Let's simply copy/paste Category into a query:
{
"query": {
"filtered": {
"filter": {
"terms": {
"Category" : [ "Medium Signs" ]
}
}
}
}
}
This gives the following gem:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 6,
"successful" : 6,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
Again, here's the bool query version that gives the same 0-hit result:
{
"query": {
"bool": {
"must": [{
"terms": {
"Category" : [ "Medium Signs" ]
}
}]
}
}
}
In the end, I definitely need something similar to "Category" : [ "Medium Signs", "Small Signs" ] working (in concert with other label queries and minimum_should_match as well-- but I can't even get this bare-bones query to work).
I have zero clue why this is. I poured over the docs for houring, trying everything I can see. Do I need to look into debugging various encodings? Is my syntax archaic?
The problem here is that ElasticSearch is analyzing and betokening the Category field, and the terms filter expects an exact match. One solution here is to add a raw field to Category inside your entry mapping:
PUT labelsample
{
"mappings": {
"entry": {
"properties": {
"Category": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"Code": {
"type": "string"
},
"Name": {
"type": "string"
},
"Timestamp": {
"type": "date",
"format": "epoch_millis"
},
"Price": {
"type": "string"
},
"IsEnabled": {
"type": "boolean"
}
}
}
}
}
...and filter on the raw field:
GET labelsample/entry/_search
{
"query": {
"filtered": {
"filter": {
"terms": {
"Category.raw" : [ "Medium Signs" ]
}
}
}
}
}

Resources