Usage of NestedFilter and changes in mapping - elasticsearch

I'm trying to create a filter on the basis of some product attributes which I've indexed. Earlier, they were indexed as follows -:
"attributes": {
"name1": "value1",
"name2": "value2",
....
}
The filter that I used earlier was generated according to URL query parameters as follows -:
"/search?q=product&color=black&color=blue&size=xl"
would lead to -
>>> and_filter = ANDFilter(
[
ORFilter(
[
TermFilter('color', 'blue'),
TermFilter('color', 'black')
]
),
ORFilter(
[
TermFilter('size', 'xl')
]
)
]
)
>>> main_filter = BoolFilter().add_must(and_filter)
Due to some changes in the backend, the mapping had to be changed to a nested one.
New Mapping -:
"attributes":
{
"type": "nested",
"properties": {
"name": {"type": "string"},
"display_name": {"type": "string"},
"type": {"type": "string"},
"value": {"type": "string"}
}
}
I thought of the new filter as so -:
>>> and_filter = ANDFilter(
[
ORFilter(
[
ANDFilter(
[
TermFilter("attributes.name", "color"),
TermFilter("attributes.value", "blue"),
]
),
ANDFilter(
[
TermFilter("attributes.name", "color"),
TermFilter("attributes.value", "black"),
]
)
]
),
ORFilter(
[
ANDFilter(
[
TermFilter("attributes.name", "size"),
TermFilter("attributes.value", "xl"),
]
)
]
)
]
)
>>> nested_filter = NestedFilter("attributes", BoolFilter().add_must(and_filter))
However, this doesn't seem to be the right way to do it. When I try to generate facets over products with this filter applied, all the counts come out to be zero everytime.
Additionally, trying to search for products does not yield the expected results when the filter is applied.
I would appreciate some pointers as to how to design the filter correctly.
EDIT:
Old Filter -:
{
"bool": {
"must": [
{
"and": [
{
"or": [
{
"term": {
"color": "blue"
}
},
{
"term": {
"color": "black"
}
}
]
},
{
"or": [
{
"term": {
"size": "xl"
}
}
]
}
]
}
]
}
}
New Filter -:
{
"nested": {
"filter": {
"bool": {
"must": [
{
"and": [
{
"or": [
{
"and": [
{
"term": {
"attributes.name": "color"
}
},
{
"term": {
"attributes.value": "blue"
}
}
]
},
{
"and": [
{
"term": {
"attributes.name": "color"
}
},
{
"term": {
"attributes.value": "black"
}
}
]
}
]
},
{
"or": [
{
"and": [
{
"term": {
"attributes.name": "size"
}
},
{
"term": {
"attributes.value": "xl"
}
}
]
}
]
}
]
}
]
}
},
"path": "attributes"
}
}

Related

ElasticSearch - Filtering data returned from nested query

I am have a set of data in the following structure:
[
{
"productId": "ProductId1",
"customerNumbers": [
"customer": {
"name": "Prod1Cust1",
"totalOrders": 23
},
"customer": {
"name": "Prod2Cust1",
"totalOrders": 5
},
"customer": {
"name": "Prod3Cust1",
"totalOrders": 5
}
]
},
{
"productId": "ProductId2",
"customerNumbers": [
"customer": {
"name": "Prod2Cust1",
"totalOrders": 23
},
"customer": {
"name": "Prod2Cust1",
"totalOrders": 5
}
]
}
]
and I need to fetch all the records which have a prefix of "Prod1 as in name field(in the example avoid, only first record should be returned i.e. ProductId1). Also, when the data is returned, I need to just fetch just the customer number whose prefix is Prod1 i.e:
Correct Output:
{
"productId": "ProductId1",
"customerNumbers": [
"customer": {
"name": "Prod1Cust1",
"totalOrders": 23
}
]
}
Instead of:
{
"productId": "ProductId1",
"customerNumbers": [
"customer": {
"name": "Prod1Cust1",
"totalOrders": 23
},
"customer": {
"name": "Prod2Cust1",
"totalOrders": 5
},
"customer": {
"name": "Prod3Cust1",
"totalOrders": 5
}
]
}
I'm able to fetch the records whose Name prefix is "Prod1" using nested query coupled with MatchPhrasePrefixQuery (this returns me result with all the customer numbers). How can I further filter the data to get customer numbers whose Name prefix is "Prod1".
Following is my current query:
{
"from": 0,
"size": 10,
"sort": [
{
"name.keyword": {
"missing": "_first",
"order": "asc"
}
}
],
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"match": {
"customerNumbers.name": {
"query": "Prod1",
"type": "phrase_prefix"
}
}
}
]
}
},
"path": "customerNumbers"
}
}
]
}
}
]
}
}
}
P.S: I'm using ElasticSearch 5.x with Nest.
Try using inner_hits:
PUT products
{"mappings":{"_doc":{"properties":{"customerNumbers":{"type":"nested"}}}}}
POST products/_doc
{"productId":"ProductId1","customerNumbers":[{"name":"Prod1Cust1","totalOrders":23},{"name":"Prod2Cust1","totalOrders":5},{"name":"Prod3Cust1","totalOrders":5}]}
POST products/_doc
{"productId":"ProductId2","customerNumbers":[{"name":"Prod2Cust1","totalOrders":23},{"name":"Prod2Cust1","totalOrders":5}]}
GET products/_search
{
"_source": "inner_hits",
"from": 0,
"size": 10,
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"nested": {
"path": "customerNumbers",
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"customerNumbers.name": {
"query": "Prod1"
}
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
]
}
}
}
yielding the following hits
[
{
"_index":"products",
"_type":"_doc",
"_id":"tyQGo3EBdiyDG0RsTa0N",
"_score":0.9808292,
"_source":{
},
"inner_hits":{
"customerNumbers":{
"hits":{
"total":1,
"max_score":0.9808292,
"hits":[
{
"_index":"products",
"_type":"_doc",
"_id":"tyQGo3EBdiyDG0RsTa0N",
"_nested":{
"field":"customerNumbers",
"offset":0
},
"_score":0.9808292,
"_source":{
"name":"Prod1Cust1", <-----
"totalOrders":23
}
}
]
}
}
}
}
]

How to query multiple parameters in a nested field in elasticsearch

I'm trying to search for keyword and then add nested queries for amenities which is a nested field of an array of objects.
With the query below I am able to search when I'm only matching one amenity id but when I have more than one it doesn't return anything.
Anyone have an idea what is wrong with my query ?
{
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_geo_distance": {
"geolocation": [
100,
10
],
"order": "asc",
"unit": "m",
"mode": "min",
"distance_type": "sloppy_arc"
}
}
],
"query": {
"bool": {
"must": [
{
"multi_match": {
"fields": [
"name^2",
"city",
"state",
"zip"
],
"fuzziness": 5,
"query": "complete"
}
},
{
"nested": {
"path": "amenities",
"query": {
"bool": {
"must": [
{
"term": {
"amenities.id": "1"
}
},
{
"term": {
"amenities.id": "2"
}
}
]
}
}
}
}
]
}
}
}
When you do:
"must": [
{
"term": {
"amenities.id": "1"
}
},
{
"term": {
"amenities.id": "2"
}
}]
What you're actually saying is find me any document where "amenities.id"="1" and "amenities.id"="2" which unless "amenities.id" is a list of values it won't work.
What you probably want to say is find me any document where "amenities.id"="1" or "amenities.id"="2"
To do that you should use should instead of must:
"should": [
{
"term": {
"amenities.id": "1"
}
},
{
"term": {
"amenities.id": "2"
}
}]

Elastic Search search query for nested array

{
"application": {
"package_name": "com.jackhenry.OregonFirstCU",
"countries": [
{
"short_name": "US"
}
]
},
"application": {
"package_name": "com.jackhenry.OregonFirstCU",
"countries": [
{
"short_name": "US"
}
]
},
"application": {
"package_name": "com.jackhenry.OregonFirstCU",
"countries": [
]
}
}
How can I get results for both empty array and US for application.countries and application.countries.short_name
In short you want to fetch results which either contain US or do not contain any country name. You can apply should between term and not exists
{
"query": {
"bool": {
"should": [
{
"bool": {
"must_not": {
"exists": {
"field": "application.countries.short_name"
}
}
}
},
{
"term": {
"application.countries.short_name": [
"US"
]
}
}
]
}
}

Elasticsearch must_not filter not works with a big bunch of values

I have the next query that include some filters:
{
"from": 0,
"query": {
"function_score": {
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"idpais": [
115
]
}
},
{
"term": {
"tipo": [
1
]
}
}
],
"must_not": [
{
"term": {
"idregistro": [
5912471,
3433876,
9814443,
11703069,
6333176,
8288242,
9924922,
6677850,
11852501,
12530205,
4703469,
12776479,
12287659,
11823679,
12456304,
12777457,
10977614,
...
]
}
}
]
}
},
"query": {
"bool": {
"should": [
{
"match_phrase": {
"area": "Coordinator"
}
},
{
"match_phrase": {
"company": {
"boost": 5,
"query": "IBM"
}
}
},
{
"match_phrase": {
"topic": "IT and internet stuff"
}
},
{
"match_phrase": {
"institution": {
"boost": 5,
"query": "University of my city"
}
}
}
]
}
}
}
},
"script_score": {
"params": {
"idpais": 115,
"idprovincia": 0,
"relationships": []
},
"script_id": "ScoreUsuarios"
}
}
},
"size": 24,
"sort": [
{
"_script": {
"order": "desc",
"script_id": "SortUsuarios",
"type": "number"
}
}
]
}
The must_not filter has a big bunch of values to exclude (around 200 values), but it looks like elasticsearch ignores those values and it includes on the result set. If I try to set only a few values (10 to 20 values) then elasticsearch applies the must_not filter.
Exists some restriction a bout the amount of values in the filters? Exists some way to remove a big amount of results from the query?
terms query is used for passing a list of values not term query.You have to use it like below in your must filter.
{
"query": {
"terms": {
"field_name": [
"VALUE1",
"VALUE2"
]
}
}
}

Search with multi filter

I have a question here. In common shopping cart sites have the function search for product with multiple filters. For example I'm searching for sport gear with some filters like this:
Manufacturer
[x] Nike
Adidas
Umbro
Options
Size
[x] S
[x] M
L
Color
[x] White
Yellow
Red
[x] Blue
Here's my mapping
PUT test/product/_mapping
{
"product":{
"properties" : {
"name" : {"type" : "string", "store":"yes"},
"manufacturer" {"type": "string}
"options" : {
"type": "nested"
}
}
}
}
Some test data
POST test/product/1
{
"name": "Shirt 1",
"manufacturer": "Adidas",
"options":[
{
"Color" : ["Red", "Green"]
},
{
"Size" : ["S","M","L"]
}
],
"price":250000
}
POST test/product/2
{
"name": "Shirt 2",
"manufacturer": "Nike",
"options":[
{
"Color" : ["Blue", "Green", "White"]
},
{
"Size" : ["S", "L", "XL"]
}
],
"price":100000
}
POST test/product/3
{
"name": "Shirt 3",
"manufacturer": "Umbro",
"options": [
{
"Color" : ["Red"]
},
{
"Size" : ["S","XXL"]
}
],
"price": 300000
}
With this query, everything's fine
POST test/product/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "options",
"filter": {
"bool": {
"must": [
{
"terms": {
"options.Color": [
"white"
]
}
}
]
}
}
}
},
{
"term": {
"manufacturer": "nike"
}
}
]
}
}
}
}
}
But, if I add more condition in Options filter, i get no result
POST test/product/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "options",
"filter": {
"bool": {
"must": [
{
"terms": {
"options.Color": [
"white"
]
}
},
{
"terms": {
"options.Size": [
"s"
]
}
}
]
}
}
}
},
{
"term": {
"manufacturer": "nike"
}
}
]
}
}
}
}
}
I don't know whether i'm wrong in mapping or my query, or can you show me what's the best way to create mapping in this scenario. Thank you for all your help.
The problem here is the usage of the nested type. Your nested filter is not evaluated over all children altogether but on every child individually. Since you do not have a single nested object, that satisfies your filter (having both, Color and Size), you're not getting any results. You have two options:
merge those individual nested objects together
POST test/product/1
{
"name": "Shirt 1",
"manufacturer": "Adidas",
"options":[
{
"Color" : ["Red", "Green"],
"Size" : ["S","M","L"]
}
],
"price":250000
}
Your mapping and query stays the same.
Do not use a nested type, but a simple object type. You have to change your mapping for options:
PUT test/product/_mapping
{
"product":{
"properties" : {
"options" : {
"type": "object"
}
}
}
}
And drop the nested filter:
POST test/product/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"terms": {
"options.Color": [
"white"
]
}
},
{
"terms": {
"options.Size": [
"s"
]
}
},
{
"term": {
"manufacturer": "nike"
}
}
]
}
}
}
}
}
But your data can stay the same.
Nested objects are really for different structured data. If you were to have something like
"options":[
{
"Color" : "Blue",
"Size": "S"
},
{
"Color": "Red",
"Size" : "L"
}
]
And you want to filter for items, that are both, Blue and S, then you would have to use a nested filter.

Resources