Elasticsearch - Search and filter by price - elasticsearch

I have some array with promocodes (comes from request):
$promocodes = ['K1H5E1F1', 'M4C8A5K6', 'A3B9A45KL'];
And I have products data in Elasticsearch (as example, I will give data of one product):
// First product (2 promocodes matched, take a lower price 265.5 and filter this product at this price)
"price": 199,
"promocodes" : [
{
"code" : "K1H5E1F1",
"price" : 265.5
},
{
"code" : "LKDS3534K",
"price" : 357
},
{
"code" : "A3B9A45KL",
"price" : 327.5
}
]
// Second product (1 promocode matched, take a price 700 and filter this product at this price)
"price": 800,
"promocodes" : [
{
"code" : "AJ543HJB",
"price" : 500
},
{
"code" : "M4C8A5K6",
"price" : 700
}
]
// Third product (0 promocode matched, take a base price 900 and filter this product at this price)
"price": 900,
"promocodes" : [
{
"code" : "AJ87HJ90",
"price" : 750
}
]
I need to filter products data by price based on promocodes. If you set a range for the price and have promocodes, then you need to filter the products. If the product has the same promocode, then you need to take the price for this promotional code, not the main price. If 2 promocodes match for one product, then you need to take a lower price. In my example, the same product has 2 promotional codes for one product, I need to take the lower price out of 2 prices for the promocode and filter for that particular price.
This request does not filter prices as I need:
GET dev_products/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"price": {
"gte": 100,
"lte": 350
}
}
},
{
"nested": {
"path": "promocodes",
"query": {
"terms": {
"promocodes.code": [
'K1H5E1F1',
'M4C8A5K6',
'A3B9A45KL'
]
}
}
}
}
]
}
}
}
I don't know how to make a request correctly, I ask you for help.

You need to use inner hits.
{
"query": {
"bool": {
"must": [
{
"range": {
"price": {
"gte": 100,
"lte": 350
}
}
},
{
"nested": {
"path": "promocodes",
"query": {
"terms": {
"promocodes.code": [
"K1H5E1F1",
"A3B9A45KL"
]
}
},
"inner_hits": {
"sort": {"promocodes.price": "asc"},----> sort nested document by price
"size": 1 ---> return top 1 document
}
}
}
]
}
}
}
Result:
"hits" : [
{
"_index" : "index4",
"_type" : "_doc",
"_id" : "NTBFgm0BFLPFo7KPt70j",
"_score" : 2.0,
"_source" : {
"price" : 199,
"promocodes" : [
{
"code" : "K1H5E1F1",
"price" : 265.5
},
{
"code" : "LKDS3534K",
"price" : 357
},
{
"code" : "A3B9A45KL",
"price" : 327.5
}
]
},
"inner_hits" : { -----> inner hits contains nested data
"promocodes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ -----> returns one matched field
{
"_index" : "index4",
"_type" : "_doc",
"_id" : "NTBFgm0BFLPFo7KPt70j",
"_nested" : {
"field" : "promocodes",
"offset" : 0
},
"_score" : null,
"_source" : {
"code" : "K1H5E1F1",
"price" : 265.5
},
"sort" : [
265.5
]
}
]
}
}
}
}
]
EDIT:
Below logic checks if promocode has match then return document with promocode value in innerhits. If promocode has no match and parent price is in range(gte and lte value) then return that document.
GET dev_products/_search
{
"_source": "price",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"price": {
"gte": 100,
"lte": 350
}
}
}
],
"must_not": [
{
"nested": {
"path": "promocodes",
"query": {
"bool": {
"must": [
{
"terms": {
"promocodes.code.keyword": [
"K1H5E1F1",
"A3B9A45KL"
]
}
}
]
}
},
"inner_hits": {
"sort": {
"promocodes.price": "asc"
},
"size": 1
}
}
}
]
}
},
{
"nested": {
"path": "promocodes",
"query": {
"bool": {
"must": [
{
"terms": {
"promocodes.code.keyword": [
"K1H5E1F1",
"A3B9A45KL"
]
}
},
{
"range": {
"promocodes.price": {
"gte": 100,
"lte": 350
}
}
}
]
}
},
"inner_hits": {
"sort": {
"promocodes.price": "asc"
},
"size": 1
}
}
}
]
}
}
}
EDIT-2
Query
GET dev_products/_search
{
"_source": "price",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"price": {
"gte": 100,
"lte": 350
}
}
}
],
"must_not": [
{
"nested": {
"path": "promocodes",
"query": {
"bool": {
"must": [
{
"terms": {
"promocodes.code.keyword": [
"K1H5E1F1",
"A3B9A45KL"
]
}
}
]
}
}
}
}
]
}
},
{
"bool": {
"must": [
{
"nested": {
"path": "promocodes",
"query": {
"bool": {
"must": [
{
"terms": {
"promocodes.code.keyword": [
"K1H5E1F1",
"A3B9A45KL"
]
}
},
{
"range": {
"promocodes.price": {
"lte": 350,
"gte": 100
}
}
}
]
}
},
"inner_hits": {
"sort": {
"promocodes.price": "asc"
},
"size": 1
}
}
}
],
----> don't include document if any matched promcode has value less than given range
"must_not": [
{
"nested": {
"path": "promocodes",
"query": {
"bool": {
"must": [
{
"terms": {
"promocodes.code.keyword": [
"K1H5E1F1",
"A3B9A45KL"
]
}
},
{
"range": {
"promocodes.price": {
"lt": 100
}
}
}
]
}
}
}
}
]
}
}
]
}
}
}

If the range for price and promocodes.price is equal to "gte":270,"lte":271 and in terms of promocodes.code is equal to ["promo1","promo2","promo4"] the request does not work - in fact, he should not choose this product, since the price according to the lowest promotional code is 265.5 and does not fall into the diapason of range, but he still selects this product and does not add the desired Promocode for inner_hits (for some reason he chooses "promo2" for inner_hits with price 270).
"price": 275,
"promocodes" : [
{
"code" : "promo1",
"price" : 265.5
},
{
"code" : "promo2",
"price" : 270
},
{
"code" : "promo3",
"price" : 250
}
]

Related

Elastic search combine must and must_not

I have a document that holds data for a product the mapping is as follow:
"mappings" : {
"properties" : {
"view_score" : {
"positive_score_impact" : true,
"type" : "rank_feature"
},
"recipients" : {
"dynamic" : false,
"type" : "nested",
"enabled" : true,
"properties" : {
"type" : {
"similarity" : "boolean",
"type" : "keyword"
},
"title" : {
"type" : "text",
"fields" : {
"key" : {
"type" : "keyword"
}
}
}
}
}
}
}
And I have 2 documents with the following data:
{
"view_score": 10,
"recipients": [{"type":"gender", "title":"male"}, {"type":"gender", "title":"female"}]
}
{
"view_score": 10,
"recipients": [{"type":"gender", "title":"female"}]
}
When a user searches for a product she can say "I prefer products for females" so The products which specifies gender as just female should come before products that specifies gender as male and female both.
I have the following query which gives more score to products with just female gender:
GET _search
{
"sort": [
"_score"
],
"query": {
"script_score": {
"query": {
"bool": {
"should": [
{
"nested": {
"path": "recipients",
"ignore_unmapped": true,
"query": {
"bool": {
"boost": 10,
"must": [
{
"term": {
"recipients.type": "gender"
}
},
{
"match": {
"recipients.title": "female"
}
}
],
"must_not": {
"bool": {
"filter": [
{
"term": {
"recipients.type": "gender"
}
},
{
"match": {
"recipients.title": "male"
}
}
]
}
}
}
}
}
}
]
}
},
"script": {
"source": "return _score;"
}
}
}
}
But if I add another query to should query it won't behave the same and gives the same score to products with one or two genders in their specifications.
here is my final query which wont work as expected:
GET _search
{
"sort": [
"_score"
],
"query": {
"script_score": {
"query": {
"bool": {
"should": [
{
"rank_feature": {
"field": "view_score",
"linear": {}
}
},
{
"nested": {
"path": "recipients",
"ignore_unmapped": true,
"query": {
"bool": {
"boost": 10,
"must": [
{
"term": {
"recipients.type": "gender"
}
},
{
"match": {
"recipients.title": "female"
}
}
],
"must_not": {
"bool": {
"filter": [
{
"term": {
"recipients.type": "gender"
}
},
{
"match": {
"recipients.title": "male"
}
}
]
}
}
}
}
}
}
]
}
},
"script": {
"source": "return _score;"
}
}
}
}
So my problem is how to combine these should clause together to give more weight to the products that specify only one gender.

In ElasticSearch break down hits per filter?

Given the following query, how can I get the number of hits independently for each range and term query and what are the performance implications for this? As of yet, I can't find anything in the documentation that indicates how to do this. Where can I find the docs for such a feature?
{
"query": {
"bool" : {
"must" : {
"term" : { "user.id" : "kimchy" }
},
"filter": {
"term" : { "tags" : "production" }
},
"must_not" : {
"range" : {
"age" : { "gte" : 10, "lte" : 20 }
}
},
You can use filter aggregation for getting document count per query clause. As you are providing query as well, you need to use global aggregation with filter aggregation. If you dont use global aggregation then it will return count based on top level query and you will not able to get total document for specific query clause.
Below is sample query with aggregation:
{
"query": {
"bool": {
"must": {
"term": {
"user.id": "kimchy"
}
},
"filter": {
"term": {
"tags": "production"
}
},
"must_not": {
"range": {
"age": {
"gte": 10,
"lte": 20
}
}
}
}
},
"aggs": {
"Total": {
"global": {},
"aggs": {
"user_term": {
"filter": {
"term": {
"user.id": "kimchy"
}
}
},
"tag_term": {
"filter": {
"term": {
"tags": "production"
}
}
},
"age_range_not": {
"filter": {
"bool": {
"must_not": {
"range": {
"age": {
"gte": 10,
"lte": 20
}
}
}
}
}
},
"age_range": {
"filter": {
"range": {
"age": {
"gte": 10,
"lte": 20
}
}
}
}
}
}
}
}
You will get below response:
"aggregations" : {
"Total" : {
"doc_count" : 3,
"age_range" : {
"doc_count" : 2
},
"age_range_not" : {
"doc_count" : 1
},
"tag_term" : {
"doc_count" : 3
},
"user_term" : {
"doc_count" : 2
}
}
}

elastic - query multiple levels on nested object in inner_hits

i have a huge nested object which has lots of levels
i want to create a query which will return only the leaf / some object in the middle,
and the query is supposed to query multiple levels in the tree.
for example:
my DB is saving the whole company structure.
company -> wards -> employees -> working hours
i want to make a query that will return only the working hours of the employees in ward 2 which started later than 3pm this month
i tried to use inner_hits - but to no use.
as requested, sample document and expected result:
company:[{
properties:{companyId: 112}
ward:[{
properties: {wardId: 223}
employee:{
properties: {employeeId: 334},
workingHours: [
{ date: "1.1.2021", numOfHours: 4},
{ date: "1.2.2021", numOfHours: 7}
]
}]
}]
}]
the query:
I need to return the working hours of date "1.2.21" , of employee 334, of ward 223. and only the working hours, not the whole tree.
expected result:
4 or { date: "1.1.2021", numOfHours: 4} , whatever is simpler
hope its clear now
You need to add inner_hits to all nested queries
You can either parse entire result to get matched working hours(from inner hits) o can use response filtering to remove additional data
Mapping
PUT index123
{
"mappings": {
"properties": {
"company": {
"type": "nested",
"properties": {
"ward": {
"type": "nested",
"properties": {
"employee": {
"type": "nested",
"properties": {
"workingHours": {
"type": "nested",
"properties": {
"date": {
"type": "date"
}
}
}
}
}
}
}
}
}
}
}
}
Data
"_index" : "index123",
"_type" : "_doc",
"_id" : "9gGYI3oBt-MOenya6BcN",
"_score" : 1.0,
"_source" : {
"company" : [
{
"companyId" : 112,
"ward" : [
{
"wardId" : 223,
"employee" : {
"employeeId" : 334,
"workingHours" : [
{
"date" : "2021-01-01",
"numOfHours" : 4
},
{
"date" : "2021-01-02",
"numOfHours" : 7
}
]
}
}
]
}
]
}
}
Query
GET index123/_search?filter_path=hits.hits.inner_hits.ward.hits.hits.inner_hits.employee.hits.hits.inner_hits.workingHours.hits.hits._source
{
"query": {
"nested": {
"inner_hits": {
"name":"ward"
},
"path": "company.ward",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.wardId": {
"value": 223
}
}
},
{
"nested": {
"inner_hits": {
"name":"employee"
},
"path": "company.ward.employee",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.employee.employeeId": {
"value":334
}
}
},
{
"nested": {
"inner_hits": {
"name":"workingHours"
},
"path": "company.ward.employee.workingHours",
"query": {
"range": {
"company.ward.employee.workingHours.date": {
"gte": "2021-01-01",
"lte": "2021-01-01"
}
}
}
}
}
]
}
}
}
}
]
}
}
}
}
}
Result
{
"hits" : {
"hits" : [
{
"inner_hits" : {
"ward" : {
"hits" : {
"hits" : [
{
"inner_hits" : {
"employee" : {
"hits" : {
"hits" : [
{
"inner_hits" : {
"workingHours" : {
"hits" : {
"hits" : [
{
"_source" : {
"date" : "2021-01-01",
"numOfHours" : 4
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
Update:
Query with company ID
GET index123/_search?filter_path=hits.hits.inner_hits.company.hits.hits.inner_hits.ward.hits.hits.inner_hits.employee.hits.hits.inner_hits.workingHours.hits.hits._source
{
"query": {
"nested": {
"path": "company",
"inner_hits": {
"name": "company"
},
"query": {
"bool": {
"must": [
{
"term": {
"company.companyId": {
"value": 112
}
}
},
{
"nested": {
"inner_hits": {
"name": "ward"
},
"path": "company.ward",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.wardId": {
"value": 223
}
}
},
{
"nested": {
"inner_hits": {
"name": "employee"
},
"path": "company.ward.employee",
"query": {
"bool": {
"must": [
{
"term": {
"company.ward.employee.employeeId": {
"value": 334
}
}
},
{
"nested": {
"inner_hits": {
"name": "workingHours"
},
"path": "company.ward.employee.workingHours",
"query": {
"range": {
"company.ward.employee.workingHours.date": {
"gte": "2021-01-01",
"lte": "2021-01-01"
}
}
}
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
}
}

Documents repeating in the query of elasticsearch

I'm new to elasticsearch. I need to build the query dynamically, where for each field name the the corresponding file is fetched
I have the below query, can anyone say if its the right approach? Also with this query, the documents are just repeating for one particular file name
Please let me know how to go about it
GET index_name/_search
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"match_phrase": {
"field_name": "program"
}
},
{
"match_phrase": {
"field_value": "aaa-123"
}
}
]
}
},
{
"bool": {
"must": [
{
"match_phrase": {
"field_name": "species"
}
},
{
"match_phrase": {
"field_value": "mouse"
}
}
]
}
},
{
"bool": {
"must": [
{
"match_phrase": {
"field_name": "model name"
}
},
{
"match_phrase": {
"field_value": "b45"
}
}
]
}
}
]
}
},"aggs": {
"2": {
"terms": {
"field": "myfile_file_name.keyword",
"size": 1000,
"order": {
"_key": "asc"
}
},
"aggs": {
"3": {
"terms": {
"field": "field_name.keyword",
"size": 1000,
"order": {
"_key": "asc"
}
}
}
}
}
}
}
mapping and Output
{
"_index" : "test",
"_type" : "test_data",
"_id" : "123",
"_score" : 1.0,
"_source" : {
"document_id" : 123,
"m_id" : 1,
"source" : "ADDD",
"type" : "M",
"name" : "Animal",
"value" : "None",
"test_type" : "Test123",
"file_name" : "AA.zip",
"description" : "testing",
"program" : ["hello"],
"species" : ["mouse"],
"study" : ["Study1"],
"create_date" : "2020-08-20 11:51:21.152",
"update_date" : "2020-08-20 11:51:21.152",
"source_name" : "Anim",
"auth" : ["na"],
"treatment" : ["TR001", "TR002", "TR004"],
"timepoint" : ["72", "48"],
"findings_reports" : "na",
"model" : ["None",],
"additional" : "{'view': '', 'load': []}",
"data" : "Pre"
}
},
]
}
}

must_not not working for nested elastic query

I have a question.
Suppose there are 2 transactions for the same customer id '11'.In one transaction customer bought 'CLEANSING' product and in second transaction customer bought 'SKIN CARE' product.Now I wanted to filter out customers who bought product 'CLEANSING' but not 'SKIN CARE'.But when I try to aggregate by customer id '11' I get the customer because in the 1st transaction he did not purchase product 'SKIN CARE'.How to make elastic look for the entire transactions of a customer and not a single transaction.Please help me out.
These are the transactions -
{
"transactionId" : "1211",
"CDID" : "11",
"transactionDate" : "2019-06-24T09:35:30.2117315Z",
"lineItems" : [
{
"description" : "BUBBLE BUBBLE MILD FOAMING CLEANSER",
"markdownFlag" : "N",
"quantity" : 1,
"rate" : 14,
"value" : 14,
"discount" : 0,
"amount" : 13.33,
"grossAmount" : 14,
"itemDetails" : {
"itemName" : "BUBBLE BUBBLE MILD FOAMING CLEANSER",
"retailDepartmentName" : "CLEANSING",
}
}
]
}
{
"transactionId" : "1232",
"CDID" : "11",
"transactionDate" : "2019-06-24T09:35:30.2117315Z",
"lineItems" : [
{
"description" : "BUBBLE BUBBLE MILD FOAMING CLEANSER",
"markdownFlag" : "N",
"quantity" : 1,
"rate" : 14,
"value" : 14,
"discount" : 0,
"amount" : 13.33,
"grossAmount" : 14,
"itemDetails" : {
"itemName" : "BUBBLE BUBBLE MILD FOAMING CLEANSER",
"retailDepartmentName" : "SKIN CARE",
}
}
]
}
lineItems is of nested type
The transactions are made by the same customer
I am trying to get the customer who bought 'CLEANSING' but did not buy 'SKIN CARE'.I should get no results.
My query -
{
"aggs": {
"CDID": {
"terms": {
"field": "CDID.keyword",
"size": 10
},
"aggs": {
"lineItems1": {
"filter": {
"nested": {
"path": "lineItems",
"query": {
"bool": {
"must": [
{
"bool": {
"must_not": [
{
"match": {
"lineItems.itemDetails.retailDepartmentName.keyword": "SKIN CARE"
}
}
],
"must": [
{
"match": {
"lineItems.itemDetails.retailDepartmentName.keyword": "CLEANSING"
}
}
]
}
}
]
}
}
}
},
"aggs": {
"nested_path": {
"nested": {
"path": "lineItems"
},
"aggs": {
"sum1": {
"sum": {
"field": "lineItems.quantity"
}
}
}
}
}
}
}
}
}
}
Result -
"aggregations" : {
"CDID" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "11",
"doc_count" : 2,
"lineItems1" : {
"doc_count" : 1,
"nested_path" : {
"doc_count" : 1,
"sum1" : {
"value" : 1.0
}
}
}
}
]
}
}
UPDATE-Still didn't find the answer
Below Query you can achieve the result.
Mapping Query:
PUT /<index_name>
{
"mappings" : {
"properties" : {
"transactionId": {
"type": "text"
},
"CDID": {
"type": "text"
},
"transactionDate": {
"type": "text"
},
"lineItems" : {
"type" : "nested"
}
}
}
}
Sample Data Mapping:
POST /<index_name>/_doc
{
"transactionId": "1211",
"CDID": "11",
"transactionDate": "2019-06-24T09:35:30.2117315Z",
"lineItems": [
{
"description": "BUBBLE BUBBLE MILD FOAMING CLEANSER",
"markdownFlag": "N",
"quantity": 1,
"rate": 14,
"value": 14,
"discount": 0,
"amount": 13.33,
"grossAmount": 14,
"itemDetails": {
"itemName": "BUBBLE BUBBLE MILD FOAMING CLEANSER",
"retailDepartmentName": "CLEANSING"
}
}
]
}
Search Query:
GET /test_trans/_search
{
"query": {
"nested": {
"path": "lineItems",
"query": {
"bool": {
"must": [
{
"match": {
"lineItems.itemDetails.retailDepartmentName": "CLEANSING"
}
}
],
"must_not": [
{
"match": {
"lineItems.itemDetails.retailDepartmentName": "SKIN CARE"
}
}
]
}
},
"score_mode": "avg"
}
},
"aggs": {
"nested_path": {
"nested": {
"path": "lineItems"
},
"aggs": {
"sum1": {
"sum": {
"field": "lineItems.quantity"
}
}
}
}
}
}

Resources