I would like to change the following ElasticSearch so the "should" array will not affect the scoring of the result. I want that the score will be calculated by the "query_string" for the name property only.
how can i achieve that with minimum chnages
GET customers/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "properties.name",
"query": "Joe*"
}
}
],
"should": [
{
"match": {
"properties.role": "admin"
}
},
{
"match": {
"properties.role": "sysop"
}
},
{
"match": {
"properties.role": "client"
}
},
{
"match": {
"properties.status": "public"
}
},
{
"match": {
"properties.status": "public"
}
}
],
"must_not": [
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
},
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
},
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
}
]
}
},
"size": 30,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "return doc['_index'][0] == 'customers' && doc.containsKey('properties.videoCount')?doc['properties.videoCount'].value:0"
}
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "long timestampNow = new Date().getTime(); return doc['_index'][0] == 'customers' && doc.containsKey('properties.subscriptions.features.allow-application')?(timestampNow < doc['properties.subscriptions.features.first-on-search'].value.getMillis()):false"
}
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "return doc['_index'][0] == 'customers' && doc.containsKey('properties.videoCount')?doc['properties.videoCount'].value:0"
}
}
}
]
}
You need to use a combination of bool should and filter clause to achieve your required result.
Adding a working example with index data, search query, and search result
Index Data:
{
"properties":{
"name": "Joe",
"role":"sysop"
}
}
{
"properties":{
"name": "Joe",
"role":"admin"
}
}
{
"properties":{
"name": "Joe",
"role":"student"
}
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "properties.name",
"query": "Joe*"
}
}
],
"should": [
{
"bool": {
"filter": {
"bool": {
"should": [
{
"match": {
"properties.role": "student"
}
},
{
"match": {
"properties.role": "sysop"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "65469210",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "admin"
}
}
},
{
"_index": "65469210",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "student"
}
}
},
{
"_index": "65469210",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "sysop"
}
}
}
]
You can even use the Explain API, to know how the score is calculated. Here you can see that the should clauses match have a value of 0.0. Therefore, they do not contribute in the overall scoring of the query.
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "admin"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
}
]
}
},
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "student"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
},
{
"value": 0.0, // note this
"description": "ConstantScore(properties.role:student properties.role:sysop)^0.0",
"details": []
}
]
}
},
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "sysop"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
},
{
"value": 0.0, // note this
"description": "ConstantScore(properties.role:student properties.role:sysop)^0.0",
"details": []
}
]
}
}
]
}
}
Use filter, filter just remove documents, and wont affect the score:
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html
Related
I've prepared an Elastic Search query in which I'm trying to fetch results from nested objects. The query looks something like this:
{
"from": 0,
"size": 100,
"_source": {
"excludes": [
"#version"
]
},
"query": {
"bool": {
"must": [
{
"term": {
"doc.workflow_id.keyword": "workflow1"
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "price"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*34*"
}
}
]
}
}
]
}
}
}
}
],
"must_not": []
}
}
}
Output:
"hits" : [
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv1",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "weight",
"value" : "10"
},
{
"name" : "color",
"value" : "green"
},
{
"name" : "city",
"value" : "#error"
}
]
}
}
},
{
"_index" : "sample_index",
"_type" : "_doc",
"_id" : "mv2",
"_score" : null,
"_source" : {
"doc" : {
"workflow_id" : "workflow1",
"attributes" : [
{
"name" : "price",
"value" : "34"
},
{
"name" : "color",
"value" : "green"
}
]
}
}
}
]
I've omitted a few trivial details in query and output for simplicity. The attributes array in the response is of type nested and contains name and value fields of type string.
I've put filters on attributes color and price, but as you can see, I'm getting other attributes too in the attributes array. Can I somehow pass specific attribute names to the ES query and get the value of those attributes only?
I tried using inner_hits in both nested queries, but it returns the attribute value only for the passed attribute name in the nested query.
E.g.
{
"nested": {
"path": "doc.attributes",
"query": {
"bool": {
"filter": [
{
"match": {
"doc.attributes.name": "color"
}
},
{
"bool": {
"should": [
{
"wildcard": {
"doc.attributes.value.rawold": "*green*"
}
}
]
}
}
]
}
},
"inner_hits": {
"name": "two",
"_source": [
"doc.product_attributes.name",
"doc.product_attributes.value"
]
}
}
}
gives result
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "weight",
"value": "34"
},
{
"name": "color",
"value": "green"
},
{
"name": "city",
"value": "#ERROR"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
},
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv2",
"_score": null,
"_source": {
"doc": {
"workflow_id": "workflow1",
"attributes": [
{
"name": "price",
"value": "34"
},
{
"name": "color",
"value": "green"
}
]
}
},
"inner_hits": {
"two": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "sample_index",
"_type": "_doc",
"_id": "mv1",
"_nested": {
"field": "doc.attributes",
"offset": 1
},
"_score": 0.0,
"_source": {
"name": "color",
"value": "green"
}
}
]
}
}
}
}
]
}
Note the attribute name and value received inside the inner_hits object.
I want to get other attribute names and values as well in the response for which I'm putting any filter. For example, if I want to get attribute names and values for weight, color & city only, how do I do that?
I've checked this thread select matching objects from array in elasticsearch, but it doesn't solve my problem.
I'm using laravel + elasticsearch.
I have an array like this:
[
{
"title": "product_title",
"stocks": [
{
"country": "EN",
"stock": 0
},
{
"country": "IN",
"stock": 1
}
]
},
{
"title": "product_title_2",
"stocks": [
{
"country": "EN",
"stock": 1
},
{
"country": "IN",
"stock": 0
}
]
}
]
Now I want to find all objects has country equal EN and stock is greater than 1.
updated
my query:
{
"index": "products",
"body": {
"size": 15,
"from": 1,
"sort": [
{
"stock": {
"order": "desc"
}
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "**",
"type": "best_fields",
"fields": [
"erp_id",
"title_en^2",
"translations.title^2",
"erp.title_en",
"erp.title",
"erp.options.title",
"erp.options.title_en"
],
"analyze_wildcard": true,
"allow_leading_wildcard": true
}
}
],
"filter": [
{
"term": {
"is_active": 1
}
},
{
"term": {
"shops.shop_id": 1
}
}
]
}
},
"aggs": {
"max_price": {
"filter": {
"term": {
"erp.price_lists.currency.abbr": "tmn"
}
},
"aggs": {
"result": {
"max": {
"field": "erp.price_lists.pivot.price_tt"
}
}
}
},
"min_price": {
"filter": {
"term": {
"erp.price_lists.currency.abbr": "tmn"
}
},
"aggs": {
"result": {
"min": {
"field": "erp.price_lists.pivot.price_tt"
}
}
}
}
}
}
}
You can use nested query along with inner_hits to get the object satisfying the requirements
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"stocks": {
"type": "nested"
}
}
}
}
Index Data:
{
"title": "product_title_2",
"stocks": [
{
"country": "EN",
"stock": 1
},
{
"country": "IN",
"stock": 0
}
]
}
{
"title": "product_title",
"stocks": [
{
"country": "EN",
"stock": 0
},
{
"country": "IN",
"stock": 1
}
]
}
{
"title": "product_title_3",
"stocks": [
{
"country": "EN",
"stock": 2
},
{
"country": "IN",
"stock": 0
}
]
}
Search Query:
{
"query": {
"nested": {
"path": "stocks",
"query": {
"bool": {
"filter": [
{
"match": {
"stocks.country": "EN"
}
},
{
"range": {
"stocks.stock": {
"gt": 1
}
}
}
]
}
},
"inner_hits":{}
}
}
}
Search Result:
"hits": [
{
"_index": "67294405",
"_type": "_doc",
"_id": "3",
"_score": 0.0,
"_source": {
"title": "product_title_3",
"stocks": [
{
"country": "EN",
"stock": 2
},
{
"country": "IN",
"stock": 0
}
]
},
"inner_hits": {
"stocks": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "67294405",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "stocks",
"offset": 0
},
"_score": 0.0,
"_source": {
"country": "EN",
"stock": 2
}
}
]
}
}
}
}
]
In Elastic Search I have an index named Menu. In Menu have an array of Shop. Something like this.
{
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "A",
"shop_id: "A",
},
{
"name": "B",
"shop_id: "B",
}
]
}
{
"menu_id": 2,
"name": 2,
"shops": [
{
"name": "C",
"shop_id: "C",
}
]
}
{
"menu_id": 3,
"name": 3,
"shops": [
{
"name": "A",
"shop_id: "A",
}
]
}
{
"menu_id": 4,
"name": 4,
"shops": [
{
"name": "A",
"shop_id: "A",
},
{
"name": "C",
"shop_id: "C",
}
]
}
With my query I want to search Shop that have id "A" or "C". I want my result being like this.
{
"name": "A",
"shop_id: "A",
},
{
"name": "C",
"shop_id: "C",
}
I tried with this query.
{
"_source": "shops",
"query": {
"bool": {
"should": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
}
},
"aggs": {
"all_shops": {
"terms": {
"field": "shops.id.keyword",
"min_doc_count": 1
},
"aggs": {
"real_shop": {
"top_hits": {
"_source": [
"shops"
],
"size": 1
}
}
}
}
}
}
And this query.
{
"_source": "shops",
"query": {
"bool": {
"should": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
}
},
"aggs": {
"messages": {
"filters": {
"filters": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
},
"aggs": {
"real_shop": {
"top_hits": {
"_source": [
"shops"
],
"size": 1
}
}
}
}
}
}
I still got many "A", "B" and many "C".
How can I get just once "A" and once "C".
I cannot search it with Index Shop Because I want to use Information from Menu to search it.
Final Query is "Search shop with shop's name or menu's name with shop ids".
You need to make shops to be of the nested type, to query on each nested field object. You can use inner_hits to return documents that matched exactly with the query. Modify your index mapping as shown below
{
"mappings": {
"properties": {
"shops": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"nested": {
"path": "shops",
"query": {
"terms": {
"shops.shop_id.keyword": [
"A",
"C"
]
}
},
"inner_hits": {}
}
}
}
Search Result:
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "A",
"shop_id": "A"
},
{
"name": "B",
"shop_id": "B"
}
]
},
"inner_hits": {
"shops": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "A", // note this
"shop_id": "A"
}
}
]
}
}
}
},
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "C",
"shop_id": "C"
}
]
},
"inner_hits": {
"shops": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "C",
"shop_id": "C" // note this
}
}
]
}
}
}
}
]
UPDATE 1:
You can use filter aggregation along with nested aggregation, to achieve your use case. Try out this below query
{
"size": 0,
"aggs": {
"NAME": {
"nested": {
"path": "shops"
},
"aggs": {
"NAME": {
"filter": {
"terms": {
"shops.shop_id.keyword": ["A","C"]
}
},
"aggs": {
"NAME": {
"terms": {
"field": "shops.shop_id.keyword"
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
}
}
Search Result will be
"aggregations": {
"NAME": {
"doc_count": 6,
"NAME": {
"doc_count": 5,
"NAME": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"top_sales_hits": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "A", // note this
"shop_id": "A"
}
}
]
}
}
},
{
"key": "C",
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "C", // note this
"shop_id": "C"
}
}
]
}
}
}
]
}
}
}
}
How can we fetch candidates which have at least one phone number from the below index data along with other conditions like must and should?
Using elastic version 6.*
{
"_index": "test",
"_type": "docs",
"_id": "1271",
"_score": 1.518617,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
You can use terms query that returns documents that contain one
or more exact terms in a provided field.
Search Query:
{
"query": {
"bool": {
"must": [
{
"terms": {
"record.phoneNumbers.phoneNumber.keyword": [
"7845200448"
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "stof_64388591",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
]
Update 1: For version 7.*
You need to use a script query, to filter documents based on the provided script.
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].length > 0",
"lang": "painless"
}
}
}
}
}
}
For version 6.*
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].values.length > 0",
"lang": "painless"
}
}
}
}
}
}
You can use exists query for this purpose like below which is a lightweight query in comparison with scripts:
{
"query": {
"exists": {
"field": "record.phoneNumbers.phoneNumber"
}
}
}
lets say records have city field as an array of city names.
records ex:
record 1:
{
cities : [
{name: city1},
{name : city2},
{name : city3}
]
}
record 2:
{
cities : [
{name: city2},
{name : city3},
{name : city4}
]
}
record 3:
{
cities : [
{name: city3},
{name : city4},
{name : city5}
]
}
requirement:
My filter criteria is to fetch the records matches with city1 or city2 or city3 but since the record 1 matches all 3 it should come first and record 2 matches 2 so it should come 2nd and record 3 matches only one so it should come last.
You don't have to use the nested data-type as you don't have the nested properties or complex object, its very simple and easy to achieve.
Working example
Index mapping
{
"mappings": {
"properties": {
"cities": {
"type": "text"
}
}
}
}
Index sample docs
{
"cities": [
"tel-aviv", "bangalore", "sf"
]
}
{
"cities": [
"tel-aviv"
]
}
{
"cities": [
"sf"
]
}
Search query
{
"query": {
"bool": {
"should": [
{
"match": {
"cities": "tel-aviv"
}
},
{
"match": {
"cities": "bangalore"
}
},
{
"match": {
"cities": "sf"
}
}
]
}
}
}
And search result with proper expected result and score
"hits": [
{
"_index": "cities",
"_type": "_doc",
"_id": "1",
"_score": 1.850198,
"_source": {
"cities": [
"tel-aviv",
"bangalore",
"sf"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "2",
"_score": 0.9983525,
"_source": {
"cities": [
"tel-aviv"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "3",
"_score": 0.6133945,
"_source": {
"cities": [
"sf"
]
}
}
]
Adding another answer with nested bool queries:
Index Mapping:
{
"mappings": {
"properties":{
"Cities": {
"type": "nested",
"dynamic": "true"
}
}}
}
Index Data:
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Bangalore"
}
}
]
}
}
}
},
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Hyderabad"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "1",
"_score": 3.297317, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
},
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "2",
"_score": 1.6486585, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
}
]