Combining nested query with boolean query in Elastic Search - elasticsearch

I am trying to filter hotel rooms by price range in Elastic Search. The rooms have a default nightly price and also custom prices can be set for specific days.
I'm storing the nightlyPrice and a nested object for custom prices together with the dates. The mapping is smt. like:
{
"adverts": {
"mappings": {
"advert": {
"properties": {
"nightlyPrice": {"type": "float"},
"customPrices": {
"type": "nested",
"properties": {
"date": {"type": "date"},
"price": {"type": "float"}
}
}
}
}
}
}
}
For example I want to get the rooms within the price range of 100 and 200$ between the dates 1st and 7th of July.
So I came up with this logic:
Either customPrices.date must be between 2019-07-01 and 2019-07-07 and customPrices.price between 100 and 200.
or the nightlyPrice must be between 100 and 200 and no customPrices.date is set between 05 and 07 July.
However I couldn't be able to apply this logic to Elastic Search, nested objects / queries are kinda tricky I guess.
This is the final query I came up with:
{
"query": {
"bool": {
"filter": [
{
"term": {
"status": "active"
}
}
],
"must": [
{
"bool": {
"should": [
{
"nested": {
"path": "customPrices",
"query": {
"bool": {
"must": [
{
"range": {
"date": {
"from": "2019-07-01",
"to": "2019-07-07"
}
}
},
{
"range": {
"price": {
"from": 100,
"to": 200
}
}
}
]
}
}
}
},
{
"bool": {
"must": [
{
"range": {
"nightlyPrice": {
"from": 100,
"to": 200
}
}
}
],
"must_not": [
{
"nested": {
"path": "customPrices",
"query": {
"range": {
"customPrices.date": {
"from": "2019-07-01",
"to": "2019-07-07"
}
}
}
}
}
]
}
}
]
}
}
]
}
}
}
The problem with this query is if customPrices.date matches the date range it never matches the document no matter the price range is. I experimented with 1 - 100000$ price range and it still doesn't match.
Tried to use the explain API to understand why a specific document didn't match but I don't understand it, it says user requested match_none query but there's this should query so it should match the nested query (first one):
{
"_index": "adverts",
"_type": "advert",
"_id": "13867",
"matched": false,
"explanation": {
"value": 0.0,
"description": "Failure to meet condition(s) of required/prohibited clause(s)",
"details": [
{
"value": 0.0,
"description": "no match on required clause (+(ToParentBlockJoinQuery (MatchNoDocsQuery(\"User requested \"match_none\" query.\")) (+nightlyPrice:[100.0 TO 200.0] -ToParentBlockJoinQuery (customListingPrices.date:[1561939200000 TO 1562543999999]))) #status:active",
"details": [
{
"value": 0.0,
"description": "Failure to meet condition(s) of required/prohibited clause(s)",
"details": [
{
"value": 0.0,
"description": "no match on required clause (ToParentBlockJoinQuery (MatchNoDocsQuery(\"User requested \"match_none\" query.\")) (+nightlyPrice:[100.0 TO 200.0] -ToParentBlockJoinQuery (customListingPrices.date:[1561939200000 TO 1562543999999])))",
"details": [
{
"value": 0.0,
"description": "No matching clauses",
"details": []
}
]
},
{
"value": 0.0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0.0,
"description": "# clause",
"details": []
},
{
"value": 1.0,
"description": "status:active",
"details": []
}
]
}
]
}
]
},
{
"value": 0.0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0.0,
"description": "# clause",
"details": []
},
{
"value": 1.0,
"description": "DocValuesFieldExistsQuery [field=_primary_term]",
"details": []
}
]
}
]
}
}
Any help or idea is greatly appreciated...

If you closely look at the first must clause, it appears that you haven't mentioned the entire path of the field.
{
"range":{
"date":{ <-- must be "customPrices.date"
"from":"2019-07-01",
"to":"2019-07-07"
}
}
},
{
"range":{
"price":{ <-- must be "customPrices.price"
"from":100,
"to":200
}
}
}
Below is how the query should be and should work fine for your use case.
Query
POST <your_index_name>/_search
{
"query":{
"bool":{
"filter":{
"term":{
"status":"active"
}
},
"must":[
{
"bool":{
"should":[
{
"bool":{
"must":[
{
"nested":{
"path":"customPrices",
"query":{
"bool":{
"must":[
{
"range":{
"customPrices.date":{
"gte":"2019-07-01",
"lte":"2019-07-09"
}
}
},
{
"range":{
"customPrices.price":{
"gte":100,
"lte":200
}
}
}
]
}
}
}
}
]
}
},
{
"bool":{
"must":[
{
"range":{
"nightlyPrice":{
"gte":100,
"lte":200
}
}
}
],
"must_not":[
{
"nested":{
"path":"customPrices",
"query":{
"range":{
"customPrices.date":{
"gte":"2019-07-05",
"lte":"2019-07-07"
}
}
}
}
}
]
}
}
]
}
}
]
}
}
}
Hope it helps!

Related

Source filtering an array of objects in ElasticSearch

Here is a document in ElasticSearch
"CompanyId": 5733,
"PartNumber": "W8S038",
"Name_en": "#8 Washer, M4 Compatible, Stainless Steel, Pack of 100",
"ProductId": 90023,
"CompanyName": "Washers Ltd",
"Prices": [
{
"BuyerId": 308,
"Price": 2.42
}
,
{
"BuyerId": 406,
"Price": 2.22
}
]
}
Obviously we can't let on to buyer 308 that buyer 406 is getting a better price. Therefore when buyer 308 is searching I need to remove all of the prices for other buyers.
I'd like to do this by using source filtering. But how?!
(I could exclude Prices and add back in the required price by using a script_field. However, that means that the price is not part of the source document and therefore ReactiveSearch can't see it and therefore can't sort on it.)
Update: here is the query generated by ReactiveSearch to which I need to append the limit on prices:
"query":{
"bool":{
"must":[
{
"bool":{
"must":[
{
"bool":{
"must":[
{
"bool":{
"should":[
{
"multi_match":{
"query":"m4 washer",
"fields":[
"Name_en"
],
"type":"cross_fields",
"operator":"and"
}
},
{
"multi_match":{
"query":"m4 washer",
"fields":[
"Name_en"
],
"type":"phrase_prefix",
"operator":"and"
}
}
],
"minimum_should_match":"1"
}
}
]
}
}
]
}
}
],
"filter": [
{
"nested": {
"path": "Prices",
"query": {
"term": {
"Prices.CompanyId": 1474
}
},
"inner_hits": {}
}
}
]
}
},
"size":10,
"aggs":{
"CompanyName.raw":{
"terms":{
"field":"CompanyName.raw",
"size":1000,
"order":{
"_count":"desc"
}
}
}
},
"_source":{
"excludes":[
"PurchasingViews",
"ContractFilters",
"SearchField*",
"Keywords*",
"Menus*"
]
},
"from":0,
"sort":[
{
"Name_en.raw":{
"order":"asc"
}
}
],
"script_fields":{
"price":{
"script":{
"lang":"painless",
"inline":"if(params['_source']['Prices'] != null){for(p in params['_source']['Prices']){ if(p.CompanyId == 1474) return p.Price; }} return null;"
}
}
}
}
(That bool, must, bool, must, bool, must, bool, should seems rather stupid?)
You need to use the nested inner_hits feature like below.
{
"_source": [
"CompanyId", "PartNumber", "Name_en", "ProductId", "CompanyName"
],
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "Prices",
"query": {
"term": {
"Prices.BuyerId": 308
}
},
"inner_hits": {}
}
}
]
}
}
}
In the output you'll get exactly what you expect, namely all the root-level fields and the matching prices for the given buyer.
UPDATE:
Here is how I would rewrite your query:
{
"query": {
"bool": {
"minimum_should_match": "1",
"should": [
{
"multi_match": {
"query": "m4 washer",
"fields": [
"Name_en"
],
"type": "cross_fields",
"operator": "and"
}
},
{
"multi_match": {
"query": "m4 washer",
"fields": [
"Name_en"
],
"type": "phrase_prefix",
"operator": "and"
}
}
],
"filter": [
{
"nested": {
"path": "Prices",
"query": {
"term": {
"Prices.CompanyId": 1474
}
},
"inner_hits": {}
}
}
]
}
},
"size": 10,
"aggs": {
"CompanyName.raw": {
"terms": {
"field": "CompanyName.raw",
"size": 1000,
"order": {
"_count": "desc"
}
}
}
},
"_source": {
"excludes": [
"PurchasingViews",
"ContractFilters",
"SearchField*",
"Keywords*",
"Menus*",
"Prices"
]
},
"from": 0,
"sort": [
{
"Name_en.raw": {
"order": "asc"
}
}
],
"script_fields": {
"price": {
"script": {
"lang": "painless",
"inline": "if(params['_source']['Prices'] != null){for(p in params['_source']['Prices']){ if(p.CompanyId == 1474) return p.Price; }} return null;"
}
}
}
}

Multi match query with terms lookup searching multiple indices elasticsearch 6.x

All,
I am working on building a NEST 6.x query that takes a serach term and looks in different fields in different indices.
This is the one I got so far but is not returning any results that I am expecting.
Please see the details below
Indices used
dev-sample-search
user-agents-search
The way the search should work is as follows.
The value in the query field(27921093) is searched against the
fields agentNumber, customerName, fileNumber, documentid(These are all
analyzed fileds).
The search should limit the documents to the agentNumbers the user
sampleuser#gmail.com has access to( sample data for
user-agents-search) is added below.
agentNumber, customerName, fileNumber, documentid and status are
part of the index dev-sample-search.
status field is defined as a keyword.
The fields in the user-agents-search index are all keywords
Sample user-agents-search index data:
{
"id": "sampleuser#gmail.com"",
"user": "sampleuser#gmail.com"",
"agentNumber": [
"123.456.789",
"1011.12.13.14"
]
}
Sample dev-sample-search index data:
{
"agentNumber": "123.456.789",
"customerName": "Bank of america",
"fileNumber":"test_file_1123",
"documentid":"1234456789"
}
GET dev-sample-search/_search
{
"from": 0,
"size": 10,
"query": {
"bool": {
"must": [
{
"multi_match": {
"type": "best_fields",
"query": "27921093",
"operator": "and",
"fields": [
"agentNumber",
"customerName",
"fileNumber",
"documentid^10"
]
}
}
],
"filter": [
{
"bool": {
"must": [
{
"terms": {
"agentNumber": {
"index": "user-agents-search",
"type": "_doc",
"user": "sampleuser#gmail.com",
"path": "agentNumber"
}
}
},
{
"bool": {
"must_not": [
{
"terms": {
"status": {
"value": "pending"
}
}
},
{
"term": {
"status": {
"value": "cancelled"
}
}
},
{
"term": {
"status": {
"value": "app cancelled"
}
}
}
],
"should": [
{
"term": {
"status": {
"value": "active"
}
}
},
{
"term": {
"status": {
"value": "terminated"
}
}
}
]
}
}
]
}
}
]
}
}
}
I see a couple of things that you may want to look at:
In the terms lookup query, "user": "sampleuser#gmail.com", should be "id": "sampleuser#gmail.com",.
If at least one should clause in the filter clause should match, set "minimum_should_match" : 1 on the bool query containing the should clause

Must not with and in elastic search

I have 4 fields in an elastic search schema.
date
status
type
createdAt
Now, I need to fetch all the rows where
date=today
status = "confirmed"
and where type is not equals to "def"
However, it is ok if
type=def exists
but only when the field createdAt is not equals to today.
My current query looks like this:
{
must: [
{ "bool":
{
"must": [
{"term": {"date": 'now/d'}},
{"term": {"status": 'confirmed'}},
]
}
}
],
mustNot: [
{"match": {'createdAt': 'now/d'}},
{"match":{"type": "def"}}
]
}
The rows where type is not equals to "def" are fetched.
However, if a row has the type=def AND createdAT any date but today, then the row doesn't show up.
What am I doing wrong?
This query should work.
{
"query": {
"bool": {
"must": [
{ "term": {"date": "now/d" } },
{ "term": {"status": "confirmed" } }
],
"must_not": {
"bool": {
"must": [
{ "match": { "createdAt": "now/d" } },
{ "match": { "type": "def" } }
]
}
}
}
}
}
I believe the reason that your version is not working is that every query in the must_not must not match.
https://www.elastic.co/guide/en/elasticsearch/guide/current/bool-query.html#_controlling_precision
All the must clauses must match, and all the must_not clauses must not match, but how many should clauses should match? By default, none of the should clauses are required to match, with one exception: if there are no must clauses, then at least one should clause must match.
Assuming a setup like this:
PUT twitter
{
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0
}
},
"mappings": {
"_doc": {
"properties": {
"date": {
"type": "date",
"format": "epoch_millis"
},
"createdAt": {
"type": "date",
"format": "epoch_millis"
},
"status": {
"type": "keyword"
},
"type": {
"type": "keyword"
}
}
}
}
}
and a sample doc like this (adjust values to test the query):
post twitter/_doc/1
{
"date": 1536562800000, //start of TODAY September 10, 2018 in UTC
"createdAt": 1536562799999,
"status": "confirmed",
"type": "def"
}
the following query should work:
get twitter/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "now/d",
"lte": "now/d"
}
}
},
{
"term": {
"status": "confirmed"
}
}
],
"must_not": [
{
"range": {
"createdAt": {
"gte": "now/d",
"lte": "now/d"
}
}
},
{
"term": {
"type": "def"
}
}
]
}
}
}
}
}
This is a filtered query which i think for this scenario is better because it doesn't calculate the score. If you do want to calculate the score, just remove the bool and the filter from the top.

ElasticSearch aggs with function_score

I'm trying to exclude duplicated documents which have the same slug parameters to do it I use aggs in ElasticSearch (version 2.4). I use - this query:
{
"fields":[
"id",
"score"],
"size":0,
"query":{
"function_score":{
"query":{
"bool":{
"should":[
{
"match":{
"main_headline.en":{
"query":"headline_for_search"
}
}
},
{
"match":{
"body.en":"body for search"
}
}],
"must_not":{
"term":{
"id":75333
}
},
"filter":[
{
"term":{
"status":3
}
},
[
{
"term":{
"sites":6
}
}]]
}
},
"functions":[
{
"gauss":{
"published_at":{
"scale":"140w",
"decay":0.3
}
}
}
]
},
"aggs":{
"postslug":{
"terms":{
"field":"slug",
"order":{
"top_score":"desc"
}
},
"aggs":{
"grouppost":{
"top_hits": {
"_source": {
"include": [
"id",
"slug",
]
},
"size" : 10
}
}
}
}
}
}
}
When I run it I get error
failed to parse search source. expected field name but got [START_OBJECT]
I can`t figure out where is a mistake.
Without section aggs all works fine (except present duplicates)
I see one issue which relates to the fact that in the source filtering section include should read includes. Also, the aggs section is not at the right location, you have it in the query section, and it should be at the top-level:
{
"fields": [
"id",
"score"
],
"size": 0,
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"match": {
"main_headline.en": {
"query": "headline_for_search"
}
}
},
{
"match": {
"body.en": "body for search"
}
}
],
"must_not": {
"term": {
"id": 75333
}
},
"filter": [
{
"term": {
"status": 3
}
},
[
{
"term": {
"sites": 6
}
}
]
]
}
},
"functions": [
{
"gauss": {
"published_at": {
"scale": "140w",
"decay": 0.3
}
}
}
]
}
},
"aggs": {
"postslug": {
"terms": {
"field": "slug",
"order": {
"top_score": "desc"
}
},
"aggs": {
"grouppost": {
"top_hits": {
"_source": {
"includes": [
"id",
"slug"
]
},
"size": 10
}
}
}
}
}
}

Elasticsearch- sum specific field in specific datetime range?

I have a requirement of find sum one fields in a single query. I have managed to sum specific field in specific datetime range .
I try to use aggregation query to get the field total in specific datetime range,but get total value is 0
My document json look like the following way :
{
"_index": "flow-2018.02.01",
"_type": "doc",
"_source": {
"#timestamp": "2018-02-01T01:02:40.701Z",
"dest": {
"ip": "120.119.37.237",
"mac": "d4:6d:50:21:f8:44",
"port": 3280
},
"final": true,
"flow_id": "EQQA////DP//////FP8BAAFw5CIXZxTUbVAh+ERn/7FMeHcl7S6z0Aw",
"last_time": "2018-02-01T01:01:48.349Z",
"source": {
"ip": "100.255.177.76",
"mac": "70:e4:30:15:67:14",
"port": 45870,
"stats": {
"bytes_total": 60,
"packets_total": 1
}
},
"start_time": "2018-02-01T01:01:48.349Z",
"transport": "tcp",
"type": "flow"
},
"fields": {
"start_time": [
1517446908349
],
"#timestamp": [
1517446960701
],
"last_time": [
1517446908349
]
},
"sort": [
1517446960701
]
}
My search query :
{
"size":0,
"query":{
"bool":{
"must":[
{
"range":{
"_source.#timestamp":{
"gte": "2018-02-01T01:00:00.000Z",
"lte": "2018-02-01T01:05:00.000Z"
}
}
}
]
}
},
"aggs":{
"total":{
"sum":{
"field":"stats.packets_total "
}
}
}
}
Please help me to solve this issue. Thank you
You're almost there, stats.packets_total should be source.stats.packets_total (and make sure to remove the space at the end of the field name), like this:
{
"size":0,
"query":{
"bool":{
"must":[
{
"range":{
"#timestamp":{
"gte": "2018-02-01T01:00:00.000Z",
"lte": "2018-02-01T01:05:00.000Z"
}
}
}
]
}
},
"aggs":{
"total":{
"sum":{
"field":"source.stats.packets_total"
}
}
}
}

Resources