Query with And & OR in Elastic Search - elasticsearch

I'm new to Elastic Search, I have document like below :
Mapping of same JSON index is like below :
Mapping
{
"mappings": {
"properties": {
"age": {
"type": "long"
},
"hobbiles": {
"type": "keyword"
}
}
}
}
Some sample documents are like below :
[{
"_id": "test#domain.com",
"age": 12,
"hobbiles": [{
"name": "Singing",
"level": "begineer"
},
{
"name": "Dancing",
"level": "begineer"
}
]
},
{
"_id": "test1#domain.com",
"age": 7,
"hobbiles": [{
"name": "Coding",
"level": "begineer"
},
{
"name": "Chess",
"level": "begineer"
}
]
},
{
"_id": "test2#domain.com",
"age": 20,
"hobbiles": [{
"name": "Singing",
"level": "begineer"
},
{
"name": "Dancing",
"level": "begineer"
}
]
},
{
"_id": "test3#domain.com",
"age": 21,
"hobbiles": [{
"name": "Coding",
"level": "begineer"
},
{
"name": "Dancing",
"level": "Football"
}
]
}
]
Now I want to fetch documents where id IN (test#domain.com, test1#domain.com) and age is greater than 5. [operationally] hobiiles Football.
My expectations from output is I should get three documents: and if hobbies is not matching then also it should be fine but if hobbies matches then that document should be on top. Basically I want to match hobbies but its optional if it doesn't match then also I should get data based on prior clauses.
[test3#domain.com, test#domain.com, test1#domain.com]
test3 on top because Football matches there, and test and test1 because age and id matches there.

Tldr;
It can be achieved via bool queries.
Solution
PUT /_bulk
{"index":{"_index":"73935795", "_id":"test#domain.com"}}
{"age":12,"hobbiles":[{"name":"Singing","level":"begineer"},{"name":"Dancing","level":"begineer"}]}
{"index":{"_index":"73935795", "_id":"test1#domain.com"}}
{"age":7,"hobbiles":[{"name":"Coding","level":"begineer"},{"name":"Chess","level":"begineer"}]}
{"index":{"_index":"73935795", "_id":"test2#domain.com"}}
{"age":20,"hobbiles":[{"name":"Singing","level":"begineer"},{"name":"Dancing","level":"begineer"}]}
{"index":{"_index":"73935795", "_id":"test3#domain.com"}}
{"age":21,"hobbiles":[{"name":"Coding","level":"begineer"},{"name":"Dancing","level":"Football"}]}
GET 73935795/_search
{
"query": {
"bool": {
"filter": [
{
"range": {
"age": {
"gt": 5
}
}
},
{
"terms": {
"_id": [
"test#domain.com",
"test1#domain.com",
"test3#domain.com"
]
}
}
],
"should": [
{
"query_string": {
"query": "(football) OR (begineer)",
"default_field": "hobbiles.level"
}
}
]
}
}
}

This requires using Should clause. Should is equivalent to "OR". So a document will be returned if it satisfies any one condition in should query.
For conditions on id and age I have used filter clause. It is equivalent to "AND" . Filter clause does not calculate score for matched documents so any document which matches "hobbiles.level" will be ranked higher.
Query
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"term": {
"hobbiles.level.keyword": {
"value": "Football"
}
}
},
{
"bool": {
"filter": [
{
"terms": {
"id.keyword": [
"test#domain.com",
"test1#domain.com"
]
}
},
{
"range": {
"age": {
"gt": 5
}
}
}
]
}
}
]
}
}
}
Result
"hits" : [
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "qE06noMBfFiM6spcUTo4",
"_score" : 1.3112575,
"_source" : {
"id" : "test3#domain.com",
"age" : 21,
"hobbiles" : [
{
"name" : "Coding",
"level" : "begineer"
},
{
"name" : "Dancing",
"level" : "Football"
}
]
}
},
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "pE03noMBfFiM6spc4jr2",
"_score" : 0.0,
"_source" : {
"id" : "test#domain.com",
"age" : 12,
"hobbiles" : [
{
"name" : "Singing",
"level" : "begineer"
},
{
"name" : "Dancing",
"level" : "begineer"
}
]
}
},
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "pU03noMBfFiM6spc6DqZ",
"_score" : 0.0,
"_source" : {
"id" : "test1#domain.com",
"age" : 7,
"hobbiles" : [
{
"name" : "Coding",
"level" : "begineer"
},
{
"name" : "Chess",
"level" : "begineer"
}
]
}
}
]

Related

Elasticsearch DSL queries - optional should terms & scores

I'm pretty new on Elasticsearch world and I might be missing some concept.
That's the scenario I'm not understanding:
I want to find a doc from the following criteria:
category.level = A
category.name = "John .G" OR "Chris T."
approved = yes (optional)
Mappings:
PUT data
{
"mappings": {
"properties": {
"createdAt": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSSZ"
},
"category": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"analyzer": "keyword"
}
}
},
"approved": {
"type": "text",
"analyzer": "keyword"
}
}
}
}
Data:
POST data/_create/1
{
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Mary F.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "yes"
}
POST data/_create/2
{
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "no"
}
POST data/_create/3
{
"category": [
{
"name": "John G.",
"level": "C"
},
{
"name": "Phil C.",
"level": "C"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "no"
}
POST data/_create/4
{
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2020-04-18 19:09:27.527+0200",
"approved": "yes"
}
POST data/_create/5
{
"category": [
{
"name": "Unknown A.",
"level": "A"
},
{
"name": "Unknown B.",
"level": "A"
}
],
"createdBy": "Unknown",
"createdAt": "2020-08-18 19:09:27.527+0200",
"approved": "yes"
}
Query:
GET data/_search
{
"query": {
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{"match": {"category.level": "A"}}
],
"should": [
{"term": {"category.name": "John G."}},
{"term": {"category.name": "Chris T."}},
{"term": {"approved": "yes"}}
],
"minimum_should_match": 1
}
}
}
}
}
Response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.4455402,
"hits" : [
{
"_index" : "data",
"_id" : "2",
"_score" : 1.4455402,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "no"
}
},
{
"_index" : "data",
"_id" : "4",
"_score" : 1.4455402,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2020-04-18 19:09:27.527+0200",
"approved" : "yes"
}
},
{
"_index" : "data",
"_id" : "1",
"_score" : 1.151647,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Mary F.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "yes"
}
}
]
}
}
Questions:
Why the first document returned is an approval = no? I was expecting that docs with approval = yes would be better scored.
Why doc with index = 5 (it doesn't attend the criteria category.name, but it does for approved = yes) is not being returned?
The optionality of approved = yes is not being expressed in the above query. How could I create a kind of extra separated should term with minimum_should_match: 0 ? Something that would increase the score but would not filter the results.
You need to use below query, which have main bool query. it have first must clause with nested query and it have bool query for category.level field and then another bool query with should clause for category.name field.
Now main bool query have should clause for approved which is used for boosting result with yes value (this is outside nested query).
POST data/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{
"term": {
"category.level": {
"value": "a"
}
}
},
{
"bool": {
"should": [
{
"term": {
"category.name": "John G."
}
},
{
"term": {
"category.name": "Chris T."
}
}
]
}
}
]
}
}
}
}
],
"should": [
{
"term": {
"approved": "yes"
}
}
]
}
}
}
Result:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.9845366,
"hits" : [
{
"_index" : "data",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.9845366,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2020-04-18 19:09:27.527+0200",
"approved" : "yes"
}
},
{
"_index" : "data",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.6906434,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Mary F.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "yes"
}
},
{
"_index" : "data",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.4455402,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "no"
}
}
]
}
}
Why the first document returned is an approval = no? I was expecting
that docs with approval = yes would be better scored.
Because you have should clause inside nested query and it is no matching to any document as approved is outside category hence it is not changing score.
Why doc with index = 5 (it doesn't attend the criteria category.name,
but it does for approved = yes) is not being returned?
it is removed by your must clause, but if you need index =5 document as well then you can add two should clause, one for nested and one for approved and it will resolved your issue.
Your question 3 also resolved by my answer.
I tried your scenario with your mapping and sample data, and found the issue, you are using approved:yes in the nested query context which is causing the issue, which is causing the issue, if you change the query to below(Basically using approved:yes in the should block but outside the nested query), it solves all your issues.
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{
"match": {
"category.level": "A"
}
}
],
"should": [
{
"term": {
"category.name": "John G."
}
},
{
"term": {
"category.name": "Chris T."
}
}
]
}
}
}
},
{
"term": {
"approved": "yes"
}
}
]
}
}
}
And search result
"hits": [
{
"_index": "71967271",
"_id": "4",
"_score": 1.9845366,
"_source": {
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2020-04-18 19:09:27.527+0200",
"approved": "yes"
}
},
{
"_index": "71967271",
"_id": "2",
"_score": 1.4455402,
"_source": {
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "no"
}
},
{
"_index": "71967271",
"_id": "1",
"_score": 1.2437345,
"_source": {
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Mary F.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "yes"
}
},
{
"_index": "71967271",
"_id": "5",
"_score": 0.7968255,
"_source": {
"category": [
{
"name": "Unknown A.",
"level": "A"
},
{
"name": "Unknown B.",
"level": "A"
}
],
"createdBy": "Unknown",
"createdAt": "2020-08-18 19:09:27.527+0200",
"approved": "yes"
}
}
]

Filter document on items in an array ElasticSearch using condition AND

I have data:
[
{
"NAME": "John Doe",
"CLASS":[1,10,30]
},
{
"NAME": "Albert",
"CLASS": [1,10,40]
},
{
"NAME": "XINN",
"CLASS": [10,30]
},
{
"NAME": "UJANG",
"CLASS": [1,40]
},
{
"NAME": "BAMBANG",
"CLASS": [30,40]
}
]
I have the following query DSL:
{
query: {
terms: {
class: [1,10]
}
}
}
and I want what will appear is:
[{"NAME": "John Doe","CLASS":[1,10,30]},{"NAME": "Albert","CLASS": [1,10,40]}]
How do I change my search to match the result?
You need to combine multiple term queries in must clause.
Query
{
"query": {
"bool": {
"must": [
{
"term": {
"CLASS": {
"value": 1
}
}
},
{
"term": {
"CLASS": {
"value": 10
}
}
}
]
}
}
}
Result
"hits" : [
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "2EdK6XsBP61bDf9bI3R1",
"_score" : 2.0,
"_source" : {
"NAME" : "John Doe",
"CLASS" : [
1,
10,
30
]
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "2UdK6XsBP61bDf9bMHT5",
"_score" : 2.0,
"_source" : {
"NAME" : "Albert",
"CLASS" : [
1,
10,
40
]
}
}
]

How to group distinct records in Elasticsearch

I have the following data in my Elasticsearch index:
{
"title": "Hello from elastic",
"name": "ABC",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "PQR",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "XYZ",
"j_id": "2",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "MNO",
"j_id": "3",
"date": '2021-03-02T12:29:31.356514'
}
Now I want to get unique records on the basis of the id.
The expected output is:
{
"1": [{
"title": "Hello from elastic",
"name": "ABC",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "PQR",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
}],
"2": [{
"title": "Hello from elastic",
"name": "XYZ",
"j_id": "2",
"date": '2021-03-02T12:29:31.356514'
}],
"3": [{
"title": "Hello from elastic",
"name": "MNO",
"j_id": "3",
"date": '2021-03-02T12:29:31.356514'
}]
}
I tried an aggregate query but it's giving me only the counts.
Also, I want to include latest record in my response.
How can I get sorted, unique records from Elasticsearch grouped by the id?
I want latest inserted data first
Assuming a minimal mapping covering the date and j_id fields:
PUT myindex
{
"mappings": {
"properties": {
"j_id": {
"type": "keyword"
},
"date": {
"type": "date"
}
}
}
}
you can leverage a terms aggregation whose sub-aggregation is an ordered top_hits aggregation:
POST myindex/_search?filter_path=aggregations.*.buckets.key,aggregations.*.buckets.sorted_hits.hits.hits._source
{
"size": 0,
"aggs": {
"by_j_id": {
"terms": {
"field": "j_id",
"size": 10,
"order": {
"max_date": "desc"
}
},
"aggs": {
"max_date": {
"max": {
"field": "date"
}
},
"sorted_hits": {
"top_hits": {
"size": 10,
"sort": [
{
"date": {
"order": "desc"
}
}
]
}
}
}
}
}
}
The URL parameter filter_path reduces the response body to closely mimic your required format:
{
"aggregations" : {
"by_j_id" : {
"buckets" : [
{
"key" : "1",
"sorted_hits" : {
"hits" : {
"hits" : [
{
"_source" : {
"title" : "Hello from elastic",
"name" : "ABC",
"j_id" : "1",
"date" : "2021-03-02T12:29:31.356514"
}
},
{
"_source" : {
"title" : "Hello from elastic",
"name" : "PQR",
"j_id" : "1",
"date" : "2021-03-02T12:29:31.356514"
}
}
]
}
}
},
{
"key" : "2",
"sorted_hits" : {
"hits" : {
"hits" : [
{
"_source" : {
"title" : "Hello from elastic",
"name" : "XYZ",
"j_id" : "2",
"date" : "2021-03-02T12:29:31.356514"
}
}
]
}
}
},
{
"key" : "3",
"sorted_hits" : {
"hits" : {
"hits" : [
{
"_source" : {
"title" : "Hello from elastic",
"name" : "MNO",
"j_id" : "3",
"date" : "2021-03-02T12:29:31.356514"
}
}
]
}
}
}
]
}
}
}

Elastics search query to filter out result having value A in list but not B

I am trying to form a query where, requirement is to filter only those data where country is india but not usa.
Sample data.
{
"data": {
"attributes": {
"name": "test",
"country": ["india","usa","japan"]
}
}
}
As , in the above example we have both india and usa, it should not filter the result.
whereas it should filter if the data is in the format mentioned below.
ex1:
{
"data": {
"attributes": {
"name": "test",
"country": ["india","japan"]
}
}
}
ex2:
{
"data": {
"attributes": {
"name": "test",
"country": ["india"]
}
}
}
You can use the filter (or must) and must_not clauses in a boolean query. In case I understood your requirement correctly, you could use the following.
{
"query": {
"bool" : {
"filter": {
"term" : { "data.attributes.country" : "india" }
},
"must_not" : {
"term" : { "data.attributes.country" : "usa" }
}
}
}
}
The documentation for both boolean and term are quite extensive and contain many example.
The question, looked a bit confusing. I am assuming, you don't want India and USA to come together. Based on the above assumption, the below query is shared.
"query": {
"bool": {
"must": [{
"term": {
"tags": {
"value": "india"
}
}
}
],
"must_not": [{
"term": {
"tags": {
"value": "usa"
}
}
}
]
}
}
The query looks like this:
GET countries/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"data.attributes.country.keyword":{
"value": "india"
}
}
}
],
"must_not": [
{
"term": {
"data.attributes.country":{
"value": "usa"
}
}
}
]
}
}
}
Response:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.16786805,
"hits" : [
{
"_index" : "countries",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.16786805,
"_source" : {
"data" : {
"attributes" : {
"name" : "test",
"country" : [
"india"
]
}
}
}
},
{
"_index" : "countries",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.13353139,
"_source" : {
"data" : {
"attributes" : {
"name" : "test",
"country" : [
"india",
"japan"
]
}
}
}
}
]
}
}
Steps to reproduce:
PUT countries
PUT countries/_doc/1
{
"data": {
"attributes": {
"name": "test",
"country": [
"india",
"usa",
"japan"
]
}
}
}
PUT countries/_doc/2
{
"data": {
"attributes": {
"name": "test",
"country": [
"india",
"japan"
]
}
}
}
PUT countries/_doc/3
{
"data": {
"attributes": {
"name": "test",
"country": [
"india"
]
}
}
}

How to search in nested object's array matching all items

I have an index with nested objects houses.
My index contains these documents:
{
"_id": "hello",
"name": "pippos",
"houses": [
{
"address": "garden square",
"id1": 1,
"id2": 5
},
{
"address": "top square",
"id1": 1,
"id2": 5
}
]
},
{
"_id": "hellone",
"name": "pippoone",
"houses": [
{
"address": "central square",
"id1": 1,
"id2": 9
},
{
"address": "minimale square",
"id1": 1,
"id2": 5
}
]
}
Using this query I receive both documents:
GET /pippis/_search
{
"query": {
"nested": {
"path": "houses",
"query": {
"bool": {
"must": [
{ "match": { "houses.id1": 1 }},
{ "match": { "houses.id2": 5 }}
]
}
}
}
}
}
I want only documents having all houses with id1=1 and id2=5
Mapping:
PUT /user
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"houses": {
"type": "nested",
"properties": {
"address": {
"type": "text"
},
"id1": {
"type": "integer"
},
"id2": {
"type": "integer"
}
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "user",
"_type" : "_doc",
"_id" : "5kQ6-2wBWSK8eKKSSozQ",
"_score" : 1.0,
"_source" : {
"name" : "pippos",
"houses" : [
{
"address" : "garden square",
"id1" : 1,
"id2" : 5
},
{
"address" : "top square",
"id1" : 1,
"id2" : 5
}
]
}
},
{
"_index" : "user",
"_type" : "_doc",
"_id" : "50Q9-2wBWSK8eKKStIzf",
"_score" : 1.0,
"_source" : {
"name" : "pippoone",
"houses" : [
{
"address" : "central square",
"id1" : 1,
"id2" : 9
},
{
"address" : "minimale square",
"id1" : 1,
"id2" : 5
}
]
}
},
{
"_index" : "user",
"_type" : "_doc",
"_id" : "6ERM-2wBWSK8eKKS3IzD",
"_score" : 1.0,
"_source" : {
"name" : "pippoone1",
"houses" : [
{
"address" : "central square",
"id1" : 2,
"id2" : 9
},
{
"address" : "minimale square",
"id1" : 2,
"id2" : 5
}
]
}
}
]
}
Query:
GET /user/_search
{
"query": {
"bool": {
"must_not": [ -----> Not of documents returned in nested query
{
"nested": {
"path": "houses",
"query": {
"bool": {
"should": [ -----> get documents where id1 is not 1 or id2 is not 5
{
"bool": {
"must_not": [
{
"match": {
"houses.id1": 1
}
}
]
}
},
{
"bool": {
"must_not": [
{
"match": {
"houses.id2": 5
}
}
]
}
}
]
}
}
}
}
]
}
}
}
Result:
[
{
"_index" : "user",
"_type" : "_doc",
"_id" : "5kQ6-2wBWSK8eKKSSozQ",
"_score" : 0.0,
"_source" : {
"name" : "pippos",
"houses" : [
{
"address" : "garden square",
"id1" : 1,
"id2" : 5
},
{
"address" : "top square",
"id1" : 1,
"id2" : 5
}
]
}
}
]

Resources