Multi Match Query for multiple words with operator AND - elasticsearch

So my scenario is that in my application there is an inline search just like the one we have here on Udemy site's header bar and the user can type more than one word in it. Now, I want to use that multi word search text entered by user to be queried against multi fields.
Multi Fields against which I am querying have the following mapping
_mapping
{
"category": {
"type": "keyword"
},
"designers": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
}
}
},
"story": {
"type": "text"
},
"foundryName": {
"type": "text",
}
}
My problem here is how can I do a multi word search like "designerFirstName1 category1 foundryName1" and get results where the matched document has each word from any one of the multifields I am searching in also as I continue to add more words the result set should get reduced.
Query
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "designers",
"query": {
"match": {
"designers.name": {
"query": "designerFirstName1 category1 foundryName1",
"fuzziness": "auto"
}
}
}
}
},
{
"multi_match": {
"query": "designerFirstName1 category1 foundryName1",
"type": "cross_fields",
"fields": [
"story",
"foundryName",
"category",
]
}
}
],
"minimum_should_match": 1
}
}
}
Expected Result is that this kind of document should be higher and then as we go down the results start having not all the multiwords in any one of the field(as shown below)
{
"category": [
"category1",
"category2"
],
"designers": [
{
"name": "designerFirstName1 designerLastName1"
},
{
"name": "designerFirstName2 designerLastName2"
}
],
"story": "Sphinx of black quartz, judge my vow! Sex-charged fop blew my junk TV quiz.",
"foundryName": "foundryName1"
},
{
"category": [
"category2",
"category3"
],
"designers": [
{
"name": "designerFirstName1 designerLastName1"
},
{
"name": "designerFirstName2 designerLastName2"
}
],
"story": "Sphinx of black quartz, judge my vow! Sex-charged fop blew my junk TV quiz.",
"foundryName": "foundryName1"
},
{
"category": [
"category1",
"category3"
],
"designers": [
{
"name": "designerFirstName3 designerLastName1"
},
{
"name": "designerFirstName2 designerLastName2"
}
],
"story": "Sphinx of black quartz, judge my vow! Sex-charged fop blew my junk TV quiz.",
"foundryName": "foundryName1"
},
{
"category": [
"category2",
"category3"
],
"designers": [
{
"name": "designerFirstName3 designerLastName1" /*changed here comparing with the above document*/
},
{
"name": "designerFirstName2 designerLastName2"
}
],
"story": "Sphinx of black quartz, judge my vow! Sex-charged fop blew my junk TV quiz.",
"foundryName": "foundryName1"
},

Related

Cannot seem to use must and must_not together in an elastic search query

If I run the following query:
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "boxing",
"fuzziness": 2,
"minimum_should_match": 2
}
}
],
"must_not": [
{
"terms_set": {
"allowedCountries": {
"terms": ["gb", "mx"],
"minimum_should_match_script": {
"source": "2"
}
}
}
}
],
"filter": [
{
"range": {
"expireTime": {
"gt": 1674061907954
}
}
},
{
"term": {
"region": {
"value": "row"
}
}
},
{
"term": {
"sourceType": {
"value": "article"
}
}
}
]
}
}
}
against an index with articles that look like:
{
"_index": "content-items-v10",
"_type": "_doc",
"_id": "e7hm75ui4dma1mm4j8q5v7914",
"_score": 4.3724976,
"_source": {
"allowedCountries": ["gb", "ie"],
"body": "Both Joshua Buatsi and Craig Richards join The DAZN Boxing Show ahead of their clash at London's O2 Arena. Matchroom's Eddie Hearn also gives his take on the night, as well as Chantelle Cameron previewing her contest with Victoria Noelia Bustos.",
"competitions": [
{
"id": "8lo6205qyio0fksjx9glqbdhj",
"name": "Buatsi v Richards"
}
],
"contestants": [
{
"id": "7rq59j3eiamxlm12vhxcsgujj",
"name": "Joshua Buatsi"
},
{
"id": "boby9oqe23g6qyuwphrxh8su5",
"name": "Craig Richards"
}
],
"countries": [
{
"id": "7yasa43laq1nb2e6f8bfuvxed",
"name": "World"
},
{
"id": "258l9t5sm55592i08mdpqzr3t",
"name": "United Kingdom"
}
],
"dotsLastUpdateTime": 1673979749396,
"expireTime": 4800000000000,
"fixtureDate": {},
"headline": "Buatsi vs. Richards: Preview",
"id": "e7hm75ui4dma1mm4j8q5v7914",
"importance": 0,
"languageKeys": ["en"],
"languages": ["en"],
"lastUpdateTime": {
"ts": 1653088281000,
"iso8601": "2022-05-20T23:11:21.000Z"
},
"promoImageUrl": null,
"publication": {
"typeId": "1plcw0iyhx9vn1fcanbm2ja3rf",
"typeName": "Shoulder"
},
"publishedTime": {
"ts": 1653088281000,
"iso8601": "2022-05-20T23:11:21.000Z"
},
"region": "row",
"shortHeadline": null,
"sourceType": "article",
"sports": [
{
"id": "2x2oqzx60orpoeugkd754ga17",
"name": "Boxing"
}
],
"teaser": "",
"thumbnailImageUrl": "https://images.daznservices.com/di/library/babcock_canada/45/3e/the-dazn-boxing-show-20052022_xc4jbfqi022l1shq9lu641h9e.png?t=-477976832",
"translations": {}
}
}
I get the following validation error from elasticsearch:
{
"ok": false,
"errors": {
"validation": [
{
"message": "\"query.bool.must_not\" is not allowed",
"path": [
"query",
"bool",
"must_not"
],
"type": "object.unknown",
"context": {
"child": "must_not",
"label": "query.bool.must_not",
"value": [
{
"terms_set": {
"allowedCountries": {
"terms": [
"gb",
"mx"
],
"minimum_should_match_script": {
"source": "2"
}
}
}
}
],
"key": "must_not"
}
}
]
},
"correlationId": "d29e9275-9ab3-4ff8-944d-852b98d4b503"
}
And I cannot figure out what the issue might be! From the elastic docs it should be OK.
I'm using ElasticSearch 7.9.3 running in a local docker container.
I'm hoping someone out there will give me a clue!
Cheers!
I would expect this to just work.
I'm trying to filter out articles that have both of the country codes gb and mx in the field allowedCountries.
I can include them easily enough in the results when I add the terms_set query to the bool.must section of the query.
It works well, you just need to enclose your query in the query section
{
"query": { <--- add this
"bool": { <--- your query starts here
"must": [
...
Thank you for responding!
I was helping with a system I did not have full context on - it turns out there is a proxy in the mix with validation that was blocking the must_not query. So, with the proxy fixed, it now works.

Position as result, instead of highlighting

I try to get positions instead of highlighted text as the result of elasticsearch query.
Create the index:
PUT /test/
{
"mappings": {
"article": {
"properties": {
"text": {
"type": "text",
"analyzer": "english"
},
"author": {
"type": "text"
}
}
}
}
}
Put a document:
PUT /test/article/1
{
"author": "Just Me",
"text": "This is just a simple test to demonstrate the audience the purpose of the question!"
}
Search the document:
GET /test/article/_search
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"text": {
"query": "simple test",
"_name": "must"
}
}
}
],
"should": [
{
"match_phrase": {
"text": {
"query": "need help",
"_name": "first",
"slop": 2
}
}
},
{
"match_phrase": {
"text": {
"query": "purpose question",
"_name": "second",
"slop": 3
}
}
},
{
"match_phrase": {
"text": {
"query": "don't know anything",
"_name": "third"
}
}
}
],
"minimum_should_match": 1
}
},
"highlight": {
"fields": {
"text": {}
}
}
}
When i run this search, i get the result like so:
This is just a simple test to <em>demonstrate</em> the audience the purpose of the <em>question</em>!
I'm not interested in getting the results surrounded with em tags, but i want to get all the positions of the results like so:
"hits": [
{ "start_offset": 30, "end_offset": 40 },
{ "start_offset": 74, "end_offset": 81 }
]
Hope you get my idea!
To have the offset position of a word in a text you should add to your index mapping a termvector - doc here . As written in the doc, you have to enable this param at index time:
"term_vector": "with_positions_offsets_payloads"
For the specific query, please follow the linked doc page

Bool AND search in properties in ElasticSearch

I've got a very small dataset of documents put in ES :
{"id":1, "name": "John", "team":{"code":"red", "position":"P"}}
{"id":2, "name": "Jack", "team":{"code":"red", "position":"S"}}
{"id":3, "name": "Emily", "team":{"code":"green", "position":"P"}}
{"id":4, "name": "Grace", "team":{"code":"green", "position":"P"}}
{"id":5, "name": "Steven", "team":[
{"code":"green", "position":"S"},
{"code":"red", "position":"S"}]}
{"id":6, "name": "Josephine", "team":{"code":"red", "position":"S"}}
{"id":7, "name": "Sydney", "team":[
{"code":"red", "position":"S"},
{"code":"green", "position":"P"}]}
I want to query ES for people who are in the red team, with position P.
With the request
curl -XPOST 'http://localhost:9200/teams/aff/_search' -d '{
"query": {
"bool": {
"must": [
{
"match": {
"team.code": "red"
}
},
{
"match": {
"team.position": "P"
}
}
]
}
}
}'
I've got a wrong result.
ES gives
"name": "John",
"team":
{ "code": "red", "position": "P" }
and
"name": "Sydney",
"team":
[
{ "code": "red", "position": "S"},
{ "code": "green", "position": "P"}
]
For the last entry, ES took the property code=red in the first record and took the property position=P in the second record.
How can I specify that the search must match the 2 two terms in the same record (within or not a list of nested records) ?
In fact, the good answer is only the document 1, with John.
Here is the gist that creates the dataset :
https://gist.github.com/flrt/4633ef59b9b9ec43d68f
Thanks in advance
When you index document like
{
"name": "Sydney",
"team": [
{"code": "red", "position": "S"},
{"code": "green","position": "P"}
]
}
ES implicitly create inner object for your field (team in particular example) and flattens it to structure like
{
'team.code': ['red', 'green'],
'team.position: ['S', 'P']
}
So you lose your order. To avoid this you need explicitly put nested mapping, index your document as always and query them with nested query
So, this
PUT so/nest/_mapping
{
"nest": {
"properties": {
"team": {
"type": "nested"
}
}
}
}
PUT so/nest/
{
"name": "Sydney",
"team": [
{
"code": "red",
"position": "S"
},
{
"code": "green",
"position": "P"
}
]
}
GET so/nest/_search
{
"query": {
"nested": {
"path": "team",
"query": {
"bool": {
"must": [
{
"match": {
"team.code": "red"
}
},
{
"match": {
"team.position": "P"
}
}
]
}
}
}
}
}
will result with empty hits.
Further reading on relation management: https://www.elastic.co/blog/managing-relations-inside-elasticsearch
You can use a Nested Query so that your searches happen individually on the subdocuments in the team array, rather than across the entire document.
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "team",
"query": {
"bool": {
"must": [
{ "match": { "team.code": "red" } },
{ "match": { "team.position": "P" } }
]
}
}
}
}
]
}
}
}

ElasticSearch: search inside the array of objects

I have a problem with querying objects in array.
Let's create very simple index, add a type with one field and add one document with array of objects (I use sense console):
PUT /test/
PUT /test/test/_mapping
{
"test": {
"properties": {
"parent": {"type": "object"}
}
}
}
POST /test/test
{
"parent": [
{
"name": "turkey",
"label": "Turkey"
},
{
"name": "turkey,mugla-province",
"label": "Mugla (province)"
}
]
}
Now I want to search by both names "turkey" and "turkey,mugla-province" . The first query works fine:
GET /test/test/_search {"query":{ "term": {"parent.name": "turkey"}}}
But the second one returns nothing:
GET /test/test/_search {"query":{ "term": {"parent.name": "turkey,mugla-province"}}}
I tried a lot of stuff including:
"parent": {
"type": "nested",
"include_in_parent": true,
"properties": {
"label": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "string",
"store": true
}
}
}
But nothing helps. What do I miss?
Here's one way you can do it, using nested docs:
I defined an index like this:
PUT /test_index
{
"mappings": {
"doc": {
"properties": {
"parent": {
"type": "nested",
"properties": {
"label": {
"type": "string"
},
"name": {
"type": "string"
}
}
}
}
}
}
}
Indexed your document:
PUT /test_index/doc/1
{
"parent": [
{
"name": "turkey",
"label": "Turkey"
},
{
"name": "turkey,mugla-province",
"label": "Mugla (province)"
}
]
}
Then either of these queries will return it:
POST /test_index/_search
{
"query": {
"nested": {
"path": "parent",
"query": {
"match": {
"parent.name": "turkey"
}
}
}
}
}
POST /test_index/_search
{
"query": {
"nested": {
"path": "parent",
"query": {
"match": {
"parent.name": "turkey,mugla-province"
}
}
}
}
}
Here's the code I used:
http://sense.qbox.io/gist/6258f8c9ee64878a1835b3e9ea2b54e5cf6b1d9e
For search multiple terms use the Terms query instead of Term query.
"terms" : {
"tags" : [ "turkey", "mugla-province" ],
"minimum_should_match" : 1
}
There are various ways to construct this query, but this is the simplest and most elegant in the current version of ElasticSearch (1.6)

Nested filtering in elasticsearch with more than one term of the same nested type

I'm new to elasticsearch, so maybe my approach is plain wrong, but I want to make an index of recipes and allow the user to filter it down with the aggregated ingredients that are still found in the subset.
Maybe I'm using the wrong language to explain so maybe this example will clarify. I would like to search for recipes with the term salt; which results in three recipes:
with ingredients: salt, flour, water
with ingredients: salt, pepper, egg
with ingredients: water, flour, egg, salt
The aggregate on the results ingredients returns salt, flour, water, pepper, egg. When I filter with flour I only want recipe 1 and 3 to appear in the search results (and the aggregate on ingredients should only return salt, flour, water, egg and salt). When I add another filter egg I want only recipe 3 to appear (and the aggregate should only return water, flour, egg, salt).
I can't make the latter to work: one filter next to the default query does narrow down the results as desired but when adding the other term (egg) to the terms filter the results again start to include b as well, as if it were an OR filter. Adding AND however to the filter execution results in NO results ... what am I doing wrong?
My mapping:
{
"recipe": {
"properties": {
"title": {
"analyzer": "dutch",
"type": "string"
},
"ingredients": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"analyzer": "dutch",
"include_in_parent": true,
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
My query:
{
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"match": {
"_all": "salt"
}
}
]
}
},
"filter": {
"nested": {
"path": "ingredients",
"filter": {
"terms": {
"ingredients.name": [
"flour",
"egg"
],
"execution": "and"
}
}
}
}
}
},
"size": 50,
"aggregations": {
"ingredients": {
"nested": {
"path": "ingredients"
},
"aggregations": {
"count": {
"terms": {
"field": "ingredients.name.raw"
}
}
}
}
}
}
Why are you using a nested mapping here? Its main purpose is to keep relations between the sub-object attributes, but your ingredients field has just one attribute and can be modeled simply as a string field.
So, if you update your mapping like this :
POST recipes
{
"mappings": {
"recipe": {
"properties": {
"title": {
"type": "string"
},
"ingredients": {
"name": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
You can still index your recipes as :
{
"title":"recipe b",
"ingredients":["salt","pepper","egg"]
}
And this query gives you the result you are waiting for :
POST recipes/recipe/_search
{
"query": {
"filtered": {
"query": {
"match": {
"_all": "salt"
}
},
"filter": {
"terms": {
"ingredients": [
"flour",
"egg"
],
"execution": "and"
}
}
}
},
"size": 50,
"aggregations": {
"ingredients": {
"terms": {
"field": "ingredients"
}
}
}
}
which is :
{
...
"hits": {
"total": 1,
"max_score": 0.22295055,
"hits": [
{
"_index": "recipes",
"_type": "recipe",
"_id": "PP195TTsSOy-5OweArNsvA",
"_score": 0.22295055,
"_source": {
"title": "recipe c",
"ingredients": [
"salt",
"flour",
"egg",
"water"
]
}
}
]
},
"aggregations": {
"ingredients": {
"buckets": [
{
"key": "egg",
"doc_count": 1
},
{
"key": "flour",
"doc_count": 1
},
{
"key": "salt",
"doc_count": 1
},
{
"key": "water",
"doc_count": 1
}
]
}
}
}
Hope this helps.

Resources