Search for documents matching all terms in a nested array Elasticsearch - elasticsearch

I am learning to use Elasticsearch as a basic recommender engine.
My elasticsearch document contains records with nested entities as follows
PUT recs/user/1
{
"name" : "Brad Pitt",
"movies_liked": [
{
"name": "Forrest Gump",
"score": 1
},
{
"name": "Terminator",
"score": 4
},
{
"name": "Rambo",
"score": 4
},
{
"name": "Rocky",
"score": 4
},
{
"name": "Good Will Hunting",
"score": 2
}
]
}
PUT recs/user/2
{
"name" : "Tom Cruise",
"movies_liked": [
{
"name": "Forrest Gump",
"score": 2
},
{
"name": "Terminator",
"score": 1
},
{
"name": "Rocky IV",
"score": 1
},
{
"name": "Rocky",
"score": 1
},
{
"name": "Rocky II",
"score": 1
},
{
"name": "Predator",
"score": 4
}
]
}
I would like to search for users who specifically like "Forrest Gump","Terminator" and "Rambo".
I have used a nested query which currently looks like this
POST recs/user/_search
{
"query": {
"nested": {
"path": "movies_liked",
"query": {
"terms": {
"movies_liked.name": ["Forrest Gump","Terminator","Rambo"]
}
}
}
}
}
However when I execute this search, I expected to see only the first record which has all the required terms, but in the results I am getting both the records. In the second record the user clearly does not have "Rambo" in his liked list. I understand that this query is doing an "OR" operation with the given terms, How do I tweak this query to do an "AND" operation so that only the records having all the terms get matched?

How do I tweak this query to do an "AND" operation so that only the records having all the terms get matched?
By using a bool query:
POST recs/user/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "movies_liked",
"query": {
"bool": {
"must": [
{
"terms": {
"movies_liked.name": [
"Forrest Gump"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "movies_liked",
"query": {
"bool": {
"must": [
{
"terms": {
"movies_liked.name": [
"Terminator"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "movies_liked",
"query": {
"bool": {
"must": [
{
"terms": {
"movies_liked.name": [
"Rambo"
]
}
}
]
}
}
}
}
]
}
}
}
Note that bool wraps around several nested queries, not the other way around. It is important because the scope of a nested query is the nested document, because it basically a hidden separate object.
Hope that helps!

Related

how to match multiple fields inside filter keyword in elastic search query?

I want to add one more field inside match inside function block in my query, but when i am adding, i am getting an error ------ "reason" : "[match] query doesn't support multiple fields, found [gender] and [id]",
How do i do it?
GET exp/_search
{
"_source": ["score","answer","gender","id"]
, "query": {
"function_score": {
"query": {
"match": {
"score": 10
}
},
"functions": [
{
"filter": {
"match":{
"gender":"male",
"id":1
}
},
"weight": 2
}
]
}
}
}
You can create bool query inside filter and it will be resolved your issue. match query does not support providing 2 diffrent field and values. You can use bool query for same purpose.
{
"_source": [
"score",
"answer",
"gender",
"id"
],
"query": {
"function_score": {
"query": {
"match": {
"score": 10
}
},
"functions": [
{
"filter": {
"bool": {
"must": [
{
"match": {
"gender": "male"
}
},
{
"match": {
"id": 1
}
}
]
}
},
"weight": 2
}
]
}
}
}
Also, If you want to apply two different boosting value for gender and id then you can give two filter clause as shown below:
{
"_source": [
"score",
"answer",
"gender",
"id"
],
"query": {
"function_score": {
"query": {
"match": {
"score": 10
}
},
"functions": [
{
"filter": {
"match": {
"gender": "male"
}
},
"weight": 2
},
{
"filter": {
"match": {
"id": 1
}
},
"weight": 1
}
]
}
}
}

ElasticSearch - Filtering data returned from nested query

I am have a set of data in the following structure:
[
{
"productId": "ProductId1",
"customerNumbers": [
"customer": {
"name": "Prod1Cust1",
"totalOrders": 23
},
"customer": {
"name": "Prod2Cust1",
"totalOrders": 5
},
"customer": {
"name": "Prod3Cust1",
"totalOrders": 5
}
]
},
{
"productId": "ProductId2",
"customerNumbers": [
"customer": {
"name": "Prod2Cust1",
"totalOrders": 23
},
"customer": {
"name": "Prod2Cust1",
"totalOrders": 5
}
]
}
]
and I need to fetch all the records which have a prefix of "Prod1 as in name field(in the example avoid, only first record should be returned i.e. ProductId1). Also, when the data is returned, I need to just fetch just the customer number whose prefix is Prod1 i.e:
Correct Output:
{
"productId": "ProductId1",
"customerNumbers": [
"customer": {
"name": "Prod1Cust1",
"totalOrders": 23
}
]
}
Instead of:
{
"productId": "ProductId1",
"customerNumbers": [
"customer": {
"name": "Prod1Cust1",
"totalOrders": 23
},
"customer": {
"name": "Prod2Cust1",
"totalOrders": 5
},
"customer": {
"name": "Prod3Cust1",
"totalOrders": 5
}
]
}
I'm able to fetch the records whose Name prefix is "Prod1" using nested query coupled with MatchPhrasePrefixQuery (this returns me result with all the customer numbers). How can I further filter the data to get customer numbers whose Name prefix is "Prod1".
Following is my current query:
{
"from": 0,
"size": 10,
"sort": [
{
"name.keyword": {
"missing": "_first",
"order": "asc"
}
}
],
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"match": {
"customerNumbers.name": {
"query": "Prod1",
"type": "phrase_prefix"
}
}
}
]
}
},
"path": "customerNumbers"
}
}
]
}
}
]
}
}
}
P.S: I'm using ElasticSearch 5.x with Nest.
Try using inner_hits:
PUT products
{"mappings":{"_doc":{"properties":{"customerNumbers":{"type":"nested"}}}}}
POST products/_doc
{"productId":"ProductId1","customerNumbers":[{"name":"Prod1Cust1","totalOrders":23},{"name":"Prod2Cust1","totalOrders":5},{"name":"Prod3Cust1","totalOrders":5}]}
POST products/_doc
{"productId":"ProductId2","customerNumbers":[{"name":"Prod2Cust1","totalOrders":23},{"name":"Prod2Cust1","totalOrders":5}]}
GET products/_search
{
"_source": "inner_hits",
"from": 0,
"size": 10,
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"nested": {
"path": "customerNumbers",
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"customerNumbers.name": {
"query": "Prod1"
}
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
]
}
}
}
yielding the following hits
[
{
"_index":"products",
"_type":"_doc",
"_id":"tyQGo3EBdiyDG0RsTa0N",
"_score":0.9808292,
"_source":{
},
"inner_hits":{
"customerNumbers":{
"hits":{
"total":1,
"max_score":0.9808292,
"hits":[
{
"_index":"products",
"_type":"_doc",
"_id":"tyQGo3EBdiyDG0RsTa0N",
"_nested":{
"field":"customerNumbers",
"offset":0
},
"_score":0.9808292,
"_source":{
"name":"Prod1Cust1", <-----
"totalOrders":23
}
}
]
}
}
}
}
]

Elasticsearch must_not filter not works with a big bunch of values

I have the next query that include some filters:
{
"from": 0,
"query": {
"function_score": {
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"idpais": [
115
]
}
},
{
"term": {
"tipo": [
1
]
}
}
],
"must_not": [
{
"term": {
"idregistro": [
5912471,
3433876,
9814443,
11703069,
6333176,
8288242,
9924922,
6677850,
11852501,
12530205,
4703469,
12776479,
12287659,
11823679,
12456304,
12777457,
10977614,
...
]
}
}
]
}
},
"query": {
"bool": {
"should": [
{
"match_phrase": {
"area": "Coordinator"
}
},
{
"match_phrase": {
"company": {
"boost": 5,
"query": "IBM"
}
}
},
{
"match_phrase": {
"topic": "IT and internet stuff"
}
},
{
"match_phrase": {
"institution": {
"boost": 5,
"query": "University of my city"
}
}
}
]
}
}
}
},
"script_score": {
"params": {
"idpais": 115,
"idprovincia": 0,
"relationships": []
},
"script_id": "ScoreUsuarios"
}
}
},
"size": 24,
"sort": [
{
"_script": {
"order": "desc",
"script_id": "SortUsuarios",
"type": "number"
}
}
]
}
The must_not filter has a big bunch of values to exclude (around 200 values), but it looks like elasticsearch ignores those values and it includes on the result set. If I try to set only a few values (10 to 20 values) then elasticsearch applies the must_not filter.
Exists some restriction a bout the amount of values in the filters? Exists some way to remove a big amount of results from the query?
terms query is used for passing a list of values not term query.You have to use it like below in your must filter.
{
"query": {
"terms": {
"field_name": [
"VALUE1",
"VALUE2"
]
}
}
}

Elasticsearch additional boost if multiple conditions are met

Imagine I have a document, which looks like this:
{
"Title": "Smartphones in United Kingdom",
"Text": "A huge text about the topic",
"CategoryTags": [
{
"CategoryID": 1,
"CategoryName": "Smartphone"
},
{
"CategoryID": 2,
"CategoryName": "Apple"
},
{
"CategoryID": 3,
"CategoryName": "Samsung"
}
],
"GeographyTags": [
{
"GeographyID": 1,
"GeographyName": "Western Europe"
},
{
"GeographyID": 2,
"GeographyName": "United Kingdom"
}
]
}
CategoryTags and GeographyTags are stored as nested subdocuments.
I'd be looking for "apple united kingdom" in my search bar. How'd I make a query that would boost this document if it has both matching category and geography at the same time?
I was thinking of multi_match query, but I didn't figure out how would I deal with nested documents here...
I was thinking of nesting must into should statement. Would that make any sense?
POST /_search
{
"template": {
"size": "50",
"_source": {
"include": "Title"
},
"query": {
"filtered": {
"query": {
"bool": {
"minimum_number_should_match": "2<50%",
"must": [
{
"match": {
"Text": {
"query": "{{SearchPhrase}}"
}
}
}
],
"should": [
{
"match": {
"Title": {
"query": "{{SearchPhrase}}",
"type": "phrase",
"boost": "20"
}
}
},
{
"bool": {
"must": [
{
"nested": {
"path": "CategoryTags",
"query": {
"match": {
"CategoryTags.CategoryName": "{{SearchPhrase}}"
}
}
}
},
{
"nested": {
"path": "GeographyTags",
"query": {
"match": {
"GeographyTags.GeographyName": "{{SearchPhrase}}"
}
}
}
}
]
}
}
]
}
}
}
}
}
}

Elastic search DSL Syntax equivalence for SQL statement

I'm trying to replicate the below query logic in an elastic search query but something's not right.
Basically the query below returns one doc. I'd like either the first condition to be applied: "name": "iphone" OR the more complex second one which is: (username = 'gogadget' AND status_type = '1' AND created_time between 4532564 AND 64323238). Note that the nested bool must inside the should would take care of the more complex condition. I should still see 1 doc if I change the outside match of "name": "iphone" to be changed to "name": "wrong value". But I get nothing when I do that. I'm not sure where this is wrong.
The SQL Query is here below.
SELECT * from data_points
WHERE name = 'iphone'
OR
(username = 'gogadget' AND status_type = '1' AND created_time between 4532564 AND 64323238)
{
"size": 30,
"query": {
"bool": {
"must": [
{
"bool": {
"minimum_should_match": "1",
"should": [
{
"bool": {
"must": [
{
"match": {
"username": "gogadget"
}
},
{
"terms": {
"status_type": [
"3",
"4"
]
}
},
{
"range": {
"created_time": {
"gte": 20140712,
"lte": 1405134711
}
}
}
]
}
}
],
"must": [],
"must_not": []
}
},
{
"match": {
"name": "iphone"
}
}
]
}
}
}
should query will match the query and return.
You don't need use must to aggregate your OR query.
The query should like:
{
"query": {
"bool": {
"should": [{
"bool": {
"must": [{
"match": {
"username": "gogadget"
}
}, {
"terms": {
"status_type": [
"3",
"4"
]
}
}, {
"range": {
"created_time": {
"gte": 20140712,
"lte": 1405134711
}
}
}]
}
}, {
"match": {
"name": "iphone"
}
}]
}
}
}

Resources