ElasticSearch compound queries - elasticsearch

My index data is
{
"first_name":"Kevin",
"last_name":"John",
"job": "IT"
}
{
"first_name":"John",
"last_name":"Thimothy",
"job": "Accountant"
}
{
"first_name":"Eric",
"last_name":"Villa",
"job": "Driver"
}
{
"first_name":"John",
"last_name":"Villa",
"job": "Student"
}
I am not sure if anyone could help me to build a query to get data that have first_name or last_name as John and have a job as IT or Student.

You need to use a combination of the bool/must/should clause
Search Query:
{
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"first_name": "John"
}
},
{
"match": {
"last_name": "John"
}
}
]
}
},
{
"bool": {
"should": [
{
"match": {
"job": "IT"
}
},
{
"match": {
"job": "student"
}
}
]
}
}
]
}
}
}
Search Result will be
"hits": [
{
"_index": "66982646",
"_type": "_doc",
"_id": "1",
"_score": 2.4079456,
"_source": {
"first_name": "Kevin",
"last_name": "John",
"job": "IT"
}
},
{
"_index": "66982646",
"_type": "_doc",
"_id": "4",
"_score": 1.89712,
"_source": {
"first_name": "John",
"last_name": "Villa",
"job": "Student"
}
}
]

Related

Search in multiple fields in ElasticSearch returns more results than expected

I'm trying to perform a search by country, or city or both using ElasticSearch.
When I perform a search by country using USA as the search term, I get these results, which are correct.
email
country
city
mike#example.com
USA
Portland
You Can Also
USA
Chicago
The query looks like so:
{
"body": {
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"country": {
"query": "USA",
"operator": "and"
}
}
}
]
}
}
]
}
}
}
}
The problem is that if I want to also search by city using the term Portland I'm expecting to get only one result
email
country
city
mike#example.com
USA
Portland
but I get both results again, just like when I'm only searching by country.
The query for both fields looks like this:
{
"body": {
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"match": {
"country": {
"query": "USA",
"operator": "and"
}
}
}
]
}
},
{
"bool": {
"should": [
{
"match": {
"country": {
"query": "USA",
"operator": "and"
}
}
},
{
"match": {
"city": {
"query": "Portland",
"operator": "and"
}
}
}
]
}
}
]
}
}
}
}
What am I doing wrong?
There is no need to use multiple bool/should clause and operator (with match query), in your case.
Adding a working example
Search by country
{
"query": {
"bool": {
"must": {
"match": {
"country": {
"query": "USA"
}
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "67676851",
"_type": "_doc",
"_id": "1",
"_score": 0.18232156,
"_source": {
"email": "mike#example.com",
"country": "USA",
"city": "Portland"
}
},
{
"_index": "67676851",
"_type": "_doc",
"_id": "2",
"_score": 0.18232156,
"_source": {
"email": "You Can Also",
"country": "USA",
"city": "Chicago"
}
}
]
Search by city:
{
"query": {
"bool": {
"must": {
"match": {
"city": {
"query": "Portland"
}
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "67676851",
"_type": "_doc",
"_id": "1",
"_score": 0.6931471,
"_source": {
"email": "mike#example.com",
"country": "USA",
"city": "Portland"
}
}
]
Search by city and country:
{
"query": {
"bool": {
"must": [
{
"match": {
"city": "Portland"
}
},
{
"match": {
"country": "USA"
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "67676851",
"_type": "_doc",
"_id": "1",
"_score": 0.8754687,
"_source": {
"email": "mike#example.com",
"country": "USA",
"city": "Portland"
}
}
]

"should" query affect scoring, how to avoid that?

I would like to change the following ElasticSearch so the "should" array will not affect the scoring of the result. I want that the score will be calculated by the "query_string" for the name property only.
how can i achieve that with minimum chnages
GET customers/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "properties.name",
"query": "Joe*"
}
}
],
"should": [
{
"match": {
"properties.role": "admin"
}
},
{
"match": {
"properties.role": "sysop"
}
},
{
"match": {
"properties.role": "client"
}
},
{
"match": {
"properties.status": "public"
}
},
{
"match": {
"properties.status": "public"
}
}
],
"must_not": [
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
},
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
},
{
"match": {
"properties.status": "hide_from_search_results"
}
},
{
"match": {
"properties.status": "deleted"
}
},
{
"match": {
"properties.status": "banned"
}
}
]
}
},
"size": 30,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "return doc['_index'][0] == 'customers' && doc.containsKey('properties.videoCount')?doc['properties.videoCount'].value:0"
}
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "long timestampNow = new Date().getTime(); return doc['_index'][0] == 'customers' && doc.containsKey('properties.subscriptions.features.allow-application')?(timestampNow < doc['properties.subscriptions.features.first-on-search'].value.getMillis()):false"
}
}
},
{
"_script": {
"type": "string",
"order": "desc",
"script": {
"lang": "painless",
"source": "return doc['_index'][0] == 'customers' && doc.containsKey('properties.videoCount')?doc['properties.videoCount'].value:0"
}
}
}
]
}
You need to use a combination of bool should and filter clause to achieve your required result.
Adding a working example with index data, search query, and search result
Index Data:
{
"properties":{
"name": "Joe",
"role":"sysop"
}
}
{
"properties":{
"name": "Joe",
"role":"admin"
}
}
{
"properties":{
"name": "Joe",
"role":"student"
}
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "properties.name",
"query": "Joe*"
}
}
],
"should": [
{
"bool": {
"filter": {
"bool": {
"should": [
{
"match": {
"properties.role": "student"
}
},
{
"match": {
"properties.role": "sysop"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "65469210",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "admin"
}
}
},
{
"_index": "65469210",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "student"
}
}
},
{
"_index": "65469210",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "sysop"
}
}
}
]
You can even use the Explain API, to know how the score is calculated. Here you can see that the should clauses match have a value of 0.0. Therefore, they do not contribute in the overall scoring of the query.
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "admin"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
}
]
}
},
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "student"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
},
{
"value": 0.0, // note this
"description": "ConstantScore(properties.role:student properties.role:sysop)^0.0",
"details": []
}
]
}
},
{
"_shard": "[65469210][0]",
"_node": "g1iQ5TpzQli7sSx266LDEA",
"_index": "65469210",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"properties": {
"name": "Joe",
"role": "sysop"
}
},
"_explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "properties.name:joe*",
"details": []
},
{
"value": 0.0, // note this
"description": "ConstantScore(properties.role:student properties.role:sysop)^0.0",
"details": []
}
]
}
}
]
}
}
Use filter, filter just remove documents, and wont affect the score:
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html

Filter elastic data on array count

How can we fetch candidates which have at least one phone number from the below index data along with other conditions like must and should?
Using elastic version 6.*
{
"_index": "test",
"_type": "docs",
"_id": "1271",
"_score": 1.518617,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
You can use terms query that returns documents that contain one
or more exact terms in a provided field.
Search Query:
{
"query": {
"bool": {
"must": [
{
"terms": {
"record.phoneNumbers.phoneNumber.keyword": [
"7845200448"
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "stof_64388591",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
]
Update 1: For version 7.*
You need to use a script query, to filter documents based on the provided script.
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].length > 0",
"lang": "painless"
}
}
}
}
}
}
For version 6.*
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].values.length > 0",
"lang": "painless"
}
}
}
}
}
}
You can use exists query for this purpose like below which is a lightweight query in comparison with scripts:
{
"query": {
"exists": {
"field": "record.phoneNumbers.phoneNumber"
}
}
}

elasticsearch filter on nested array

lets say records have city field as an array of city names.
records ex:
record 1:
{
cities : [
{name: city1},
{name : city2},
{name : city3}
]
}
record 2:
{
cities : [
{name: city2},
{name : city3},
{name : city4}
]
}
record 3:
{
cities : [
{name: city3},
{name : city4},
{name : city5}
]
}
requirement:
My filter criteria is to fetch the records matches with city1 or city2 or city3 but since the record 1 matches all 3 it should come first and record 2 matches 2 so it should come 2nd and record 3 matches only one so it should come last.
You don't have to use the nested data-type as you don't have the nested properties or complex object, its very simple and easy to achieve.
Working example
Index mapping
{
"mappings": {
"properties": {
"cities": {
"type": "text"
}
}
}
}
Index sample docs
{
"cities": [
"tel-aviv", "bangalore", "sf"
]
}
{
"cities": [
"tel-aviv"
]
}
{
"cities": [
"sf"
]
}
Search query
{
"query": {
"bool": {
"should": [
{
"match": {
"cities": "tel-aviv"
}
},
{
"match": {
"cities": "bangalore"
}
},
{
"match": {
"cities": "sf"
}
}
]
}
}
}
And search result with proper expected result and score
"hits": [
{
"_index": "cities",
"_type": "_doc",
"_id": "1",
"_score": 1.850198,
"_source": {
"cities": [
"tel-aviv",
"bangalore",
"sf"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "2",
"_score": 0.9983525,
"_source": {
"cities": [
"tel-aviv"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "3",
"_score": 0.6133945,
"_source": {
"cities": [
"sf"
]
}
}
]
Adding another answer with nested bool queries:
Index Mapping:
{
"mappings": {
"properties":{
"Cities": {
"type": "nested",
"dynamic": "true"
}
}}
}
Index Data:
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Bangalore"
}
}
]
}
}
}
},
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Hyderabad"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "1",
"_score": 3.297317, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
},
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "2",
"_score": 1.6486585, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
}
]

ElasticSearch query sub-objects

I wandered through the docs a lot today, but can't find the answer; probably because I'm new to Elastic and don't really know the entire ES-terminology yet.
Say I have a books type containing a bunch of, well - books. Each book has a nested author.
{
"name": "Me and Jane",
"rating": "10",
"author": {
"name": "John Doe",
"alias":"Mark Twain"
}
}
Now, I know we can query the authors fields like this:
"match": {
"author.name": "Doe"
}
But what if I want to search across all the author fields? I tried author._all, which doesn't work.
Another approach is multi_match with wildcard field names: https://www.elastic.co/guide/en/elasticsearch/guide/current/multi-match-query.html#_using_wildcards_in_field_names
Something like this, I think:
"query": {
"nested": {
"path": "author",
"query": {
"multi_match": {
"query": "doe",
"fields": [
"author.*"
]
}
}
}
}
UPDATE: full sample provided
PUT /books
{
"mappings": {
"paper": {
"properties": {
"author": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"alias": {
"type": "string"
}
}
}
}
}
}
}
POST /books/paper/_bulk
{"index":{"_id":1}}
{"author":[{"name":"john doe","alias":"doe"},{"name":"mark twain","alias":"twain"}]}
{"index":{"_id":2}}
{"author":[{"name":"mark doe","alias":"john"}]}
{"index":{"_id":3}}
{"author":[{"name":"whatever","alias":"whatever"}]}
GET /books/paper/_search
{
"query": {
"nested": {
"path": "author",
"query": {
"multi_match": {
"query": "john",
"fields": [
"author.*"
]
}
}
}
}
}
Result is:
"hits": {
"total": 2,
"max_score": 0.5906161,
"hits": [
{
"_index": "books",
"_type": "paper",
"_id": "2",
"_score": 0.5906161,
"_source": {
"author": [
{
"name": "mark doe",
"alias": "john"
}
]
}
},
{
"_index": "books",
"_type": "paper",
"_id": "1",
"_score": 0.5882852,
"_source": {
"author": [
{
"name": "john doe",
"alias": "doe"
},
{
"name": "mark twain",
"alias": "twain"
}
]
}
}
]
}
You can use Query String Query, The example:
{
"query": {
"query_string": {
"fields": ["author.*"],
"query": "doe",
"use_dis_max": true
}
}
}

Resources