How to built AND condition between should and must elastic search bool query - spring-boot

Here is the sample USER document
{
"id" : "1234567",
"userId" : "testuser01",
"firstName" : "firstname",
"lastName" : "lastname",
"orgId" : "567890",
"phoneNumber" : "1234567890"
}
I want to build a search query where in I want to pull all those users which belong to particular orgId AND which matches the search text entered by user in any of the fields (userId, firstname, etc.)
ex. if search is made using text "first", I want to pull all those records which belong to particular orgId AND fields containing first in it.
Sample query I am trying is
"query" : {
"bool" : {
"must" : [
{
"term" : {
"orgId.keyword" : {
"value" : "567890",
"boost" : 1.0
}
}
}
],
"should" : [
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"lastName^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"userId^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"orgId^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"firstName^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"phoneNumber^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
},
{
"simple_query_string" : {
"query" : "first*",
"fields" : [
"id^1.0"
],
"flags" : -1,
"default_operator" : "or",
"lenient" : false,
"analyze_wildcard" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
"sort" : [
{
"userId.keyword" : {
"order" : "asc"
}
}
]
}
Issue I am facing is, I want to have AND condition between MUST and SHOULD.

You don't need to specify the query for each field in query_string query. Rather you can specify the list of fields as below:
{
"query": {
"bool": {
"must": [
{
"term": {
"orgId.keyword": {
"value": "567890",
"boost": 1
}
}
},
{
"simple_query_string": {
"query": "first*",
"fields": [
"lastName^1.0",
"userId^1.0",
"orgId^1.0",
"firstName^1.0",
"phoneNumber^1.0",
"id^1.0"
]
}
}
]
}
},
"sort": [
{
"userId.keyword": {
"order": "asc"
}
}
]
}
Also to answer
How to built AND condition between should and must elastic search bool query?
here is a sample query for this:
{
"query": {
"bool": {
"must": [
{
"term": {
"field1": "someval"
}
},
{
"bool": {
"should": [
{
"terms": {
"field2": [
"v1",
"v2"
]
}
},
{
"query_string": {
"query": "this AND that OR thus"
}
}
]
}
}
]
}
}
}

Related

Elastic Search combination of with Multiple Range, Term filters with And and Or operators

I have a filter with multiple data range filter with And and OR operators. I have to get filter results which satisfies both date range filters or any one of the date range filter.
"query":{
"bool" : {
"must" : [
{
"match_phrase_prefix" : {
"searchField" : {
"query" : "Adam",
"slop" : 0,
"max_expansions" : 50,
"boost" : 1.0
}
}
}
],
"filter" : [
{
"term" : {
"srvcType" : {
"value" : "FullTime",
"boost" : 1.0
}
}
},
{"range" : { "or": {"startDt": {"from" : "2010-05-16","to" : "2022-02-18","include_lower": true,"include_upper" : true,"boost" : 1.0}} }},
{"range" : { "or": {"endDt": {"from" : "2015-05-16","to" : "2022-02-18","include_lower" : true,"include_upper" : true,"boost" : 1.0}}}}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
I tried to run the query like above, I got parsing_exception - query does not support StartDt.
{
"query":{
"bool" : {
"must" : [
{
"match_phrase_prefix" : {
"searchField" : {
"query" : "Adam",
"slop" : 0,
"max_expansions" : 50,
"boost" : 1.0
}
}
}
],
"filter" : [
{
"term" : {
"srvcType" : {
"value" : "FullTime",
"boost" : 1.0
}
}
},
{"range" : {"startDt": {"from" : "2010-05-16","to" : "2022-02-18","include_lower": true,"include_upper" : true,"boost" : 1.0}} },
{"range" : {"endDt": {"from" : "2015-05-16","to" : "2022-02-18","include_lower" : true,"include_upper" : true,"boost" : 1.0}}}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
If you need AND semantics for your date range filters, you can let both range queries in the bool/filter array.
However, if you need OR semantics you can use the bool/should query, like below:
{
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"searchField": {
"query": "Adam",
"slop": 0,
"max_expansions": 50,
"boost": 1
}
}
}
],
"filter": [
{
"term": {
"srvcType": {
"value": "FullTime",
"boost": 1
}
}
}
],
"minimum_should_match": 1,
"should": [
{
"range": {
"startDt": {
"from": "2010-05-16",
"to": "2022-02-18",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
},
{
"range": {
"endDt": {
"from": "2015-05-16",
"to": "2022-02-18",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
}

Elasticsearch - Missing Field Value For Nested Field - Function Score Query | v7.10.2

I have already posted this on the ES group but I got no response and so posted it on SO. Link https://discuss.elastic.co/t/missing-field-value-for-nested-field-function-score-query-v7-10-2/291365
I have been trying for a long time now but the nested field value is always saying missing field value while calculating the score.
Mapping:
{
"doctor_idx" : {
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"service" : {
"type" : "nested",
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"serviceTypeEarliestAvailability" : {
"type" : "nested",
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"earliestAvailableDateTimeByType" : {
"type" : "date",
"format" : "date_hour_minute_second"
},
"serviceType" : {
"type" : "text"
},
"servicesMedium" : {
"type" : "keyword"
}
}
}
}
}
}
}
}
}
For simplicity, I have kept just one record. A glimpse of the record
"serviceTypeEarliestAvailability" : [
{
"serviceType" : "Service Type",
"earliestAvailableDateTimeByType" : "2021-12-09T19:39:16",
"servicesMedium" : [
"MED1",
"MED2",
"MED3",
"MED4"
]
}
],
The following query gives: "A document doesn't have a value for a field! Use doc[].size()==0 to check if a document is missing a field!"
I tried using field_value_factor instead of script_score but it's the same problem and it complains about the missing field value.
Query 1: with script_score
GET /doctor_idx/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type" // no complaints about this
}
},
"weight": 10
},
{
"script_score": {
"script": {
"source": "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())"
}
}
}
]
}
}
}
}
}
}
}
Query 2 : With field_value_factor
GET /doctor_idx/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type"
}
},
"weight": 10
},
{
"field_value_factor": {
"field": "service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType"
}
}
]
}
}
}
}
}
}
}
Error From ES for Query 1:
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.get(ScriptDocValues.java:160)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.getValue(ScriptDocValues.java:154)",
"(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
" ^---- HERE"
],
"script" : "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
"lang" : "painless",
"position" : {
"offset" : 79,
"start" : 0,
"end" : 98
}
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "doctor_idx",
"node" : "mvh5k24dQPqM-d7JVeNomQ",
"reason" : {
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.get(ScriptDocValues.java:160)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.getValue(ScriptDocValues.java:154)",
"(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
" ^---- HERE"
],
"script" : "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
"lang" : "painless",
"position" : {
"offset" : 79,
"start" : 0,
"end" : 98
},
"caused_by" : {
"type" : "illegal_state_exception",
"reason" : "A document doesn't have a value for a field! Use doc[<field>].size()==0 to check if a document is missing a field!"
}
}
}
]
},
"status" : 400
}
Error From ES for Query 2:
{
"error" : {
"root_cause" : [
{
"type" : "exception",
"reason" : "Missing value for field [service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType]"
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "doctor_idx",
"node" : "mvh5k24dQPqM-d7JVeNomQ",
"reason" : {
"type" : "exception",
"reason" : "Missing value for field [service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType]"
}
}
]
},
"status" : 500
}
The only record I have in the ES
{
"took" : 32,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "doctor_idx",
"_type" : "_doc",
"_id" : "xM20oH0Bmp1zsT0w8pQe",
"_score" : 1.0,
"_source" : {
"_class" : "com.insig.clinic_apps.services_server.booking.dao.DoctorSchema",
"id" : "xM20oH0Bmp1zsT0w8pQe",
"metadata" : {
"doctorId" : "xhnz2lGvXxelWyVekz82c2v6Srb2",
"fullName" : "Dave Insig Email Test",
"credentials" : [
"MD"
],
"languagesSpoken" : [
"EN",
"NOT_DEFINED"
],
"specialitiesServed" : [ ],
"city" : "VAUGHAN",
"provincesPermitted" : [ ],
"province" : "ON",
"country" : "CANADA"
},
"service" : {
"servicesMedium" : [
"IN_CLINIC",
"VIDEO",
"MESSAGING",
"PHONE"
],
"servicesTypeDuration" : [
{
"serviceType" : "Acne Symptoms",
"duration" : 5,
"servicesMedium" : [
"IN_CLINIC",
"MESSAGING",
"PHONE",
"VIDEO"
]
}
],
"serviceTypeEarliestAvailability" : [
{
"serviceType" : "Acne Symptoms",
"earliestAvailableDateTimeByType" : "2021-12-09T19:39:16",
"servicesMedium" : [
"IN_CLINIC",
"MESSAGING",
"PHONE",
"VIDEO"
]
}
],
"bufferTimeForNextAvailability" : 0
},
"earliestAvailableDateTime" : "2021-12-09T19:39:16",
"patientRating" : 4.384481,
"onTimeRating" : 3.171053
}
}
]
}
}
/!\ Partial solution /!\
Query 1
When I was trying to execute your query I got this error:
{
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "dynamic method [org.elasticsearch.script.JodaCompatibleZonedDateTime, toEpochMilli/0] not found"
}
}
So I did a slight change to your query .toInstant().toEpochMilli()
GET /so_custom_score/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type"
}
},
"weight": 10
},
{
"script_score": {
"script": {
"source": """
def availability = doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value;
return availability.toInstant().toEpochMilli();
"""
}
}
}
]
}
}
}
}
}
}
}
Well, wasted a lot of time here and figured that there was no problem with the queries. Elasticsearch v7.10.2 has an issue when I try to get the explanation of the query.
It works perfectly fine without the explain parameter.
See the at org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction$1.explainScore(FieldValueFactorFunction.java:103) ~[elasticsearch-7.10.2.jar:7.10.2]
PS: One of the comments in the OG question mentioned that the latest Elasticsearch v7.15 works fine even with explain parameter.

How do I query nested with normal match query together?

I want to fire nestedQuery on addresses and multiMatchQuery on name in single query. I tried few ways but I am getting "[bool] query does not support [nested]". I don't know whether this is possible or not (ES version: 7.x).
When I separately querying (i.e.nestedQuery() & multiMatchQuery()) that time it is working fine.
Please help me with that.
This is the mapping I am using:
{
"employee" : {
"mappings" : {
"properties" : {
"addresses" : {
"type" : "nested",
"properties" : {
"permanentAddress" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"TemporaryAddress" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
1. {
"query": {
"nested": {
"path": "addresses",
"query": {
"bool": {
"must": [
{ "match": { "addresses.permanentAddress": "xxx" } }
]
}
},
"score_mode": "avg"
}
}
}
2. {
"query": {
"bool": {
"must" : [
{
"multi_match" : {
"query" : "xxx",
"fields" : [
"name^1.0"
],
"type" : "best_fields",
"boost" : 1.0
}
}
]
}
}
}
nestedQuery() = looking for xxx value in addresses.permanentAddress
multi_match() = looking for xxx value in name
If value of name or addresses.permanentAddress matches with xxx then returns the result.
"bool" : {
"should" : [
{
"bool" : {
"must" : [
{
"match" : {
"name" : {
"query" : "xxx",
"operator" : "AND",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
{
"nested" : {
"query" : {
"bool" : {
"must" : [
{
"match" : {
"employee.permanentAddress" : {
"query" : "xxx",
"operator" : "AND",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"auto_generate_synonyms_phrase_query" : true,
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
"path" : "employee",
"ignore_unmapped" : false,
"score_mode" : "none",
"boost" : 1.0,
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
This Bool query with nested worked for me and with that I can able to check parent's as well as nested property.

ElasticSearch should query

I want to create ElasticSearch query which would be the same as this SQL query
select *
from main.adverts
where user_id = 4
and
(title ilike '%продать / купить%'
or description ilike '%продать / купить%'
)
My attempt is:
{
"query" :{
"bool" : {
"must" : [
{
"term" : {
"user.id" : {
"value" : 4,
"boost" : 1.0
}
}
}
],
"should" : [
{
"bool" : {
"must" : [
{
"match" : {
"title" : {
"query" : "продать",
"operator" : "OR",
"fuzzy_transpositions" : false,
"boost" : 1.0
}
}
},
{
"wildcard" : {
"title" : {
"wildcard" : "купить*",
"boost" : 1.0
}
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
{
"bool" : {
"must" : [
{
"match" : {
"description" : {
"query" : "продать",
"operator" : "OR",
"fuzzy_transpositions" : false,
"boost" : 1.0
}
}
},
{
"wildcard" : {
"description" : {
"wildcard" : "купить*",
"boost" : 1.0
}
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
But it doesn't work correctly. As the result of this query I got records which doesn't contain phrase "продать / купить".
I think that problem is in the "should" part of ElasticSearch query but can't get where is particular.
Could you point me where is my mistake?
It seems that I've found solution
{
"from" : 0, "size" : 60,
"_source" : ["title", "description"],
"query" :{
"bool" : {
"must" : [
{
"term" : {
"user.id" : {
"value" : 4,
"boost" : 1.0
}
}
},
{
"bool" : {
"must" : [
{
"match" : {
"title" : {
"query" : "продать",
"operator" : "OR",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : false,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
}
},
{
"wildcard" : {
"title" : {
"wildcard" : "купить*",
"boost" : 1.0
}
}
}
],
"should" : [
{
"bool" : {
"must" : [
{
"match" : {
"description" : {
"query" : "продать",
"operator" : "OR",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : false,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
}
},
{
"wildcard" : {
"description" : {
"wildcard" : "купить*",
"boost" : 1.0
}
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}

Elasticsearch bool query formation with multiple must clause

I have a query like the following -
{
"query": {
"bool": {
"must": {
"bool" : { "should": [
{ "match": { "camp_id": "Elasticsearch" }},
{ "match": { "camp_id": "Solr" }} ] }
},
"must": {
"bool" : { "should": [
{ "match": { "ad_id": "Elastic" }},
{ "match": { "ad_id": "dummy" }} ] }
},
"must_not": { "match": {"authors": "radu gheorge" }},
.....
.....
}
}
}
In short, (camp_id = 'elasticsearch' or camp_id = 'solr') AND (ad_id = 'elasticsearch' or ad_id = 'solr') ....
After good amount of research, I wrote the following java code -
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
final BoolQueryBuilder finalBoolQuery = new BoolQueryBuilder();
BoolQueryBuilder campaignBoolQuery = null;
if (campaignIds != null) {
campaignBoolQuery = QueryBuilders.boolQuery();
for (int campaignId : campaignIds) {
campaignBoolQuery.should(QueryBuilders.matchQuery("camp_id", campaignId));
}
}
BoolQueryBuilder creativeBoolQuery = null;
if (creativeIds != null) {
creativeBoolQuery = QueryBuilders.boolQuery();
for (int creativeId : creativeIds) {
creativeBoolQuery.should(QueryBuilders.matchQuery("ad_id", creativeId));
}
}
finalBoolQuery.must(campaignBoolQuery);
finalBoolQuery.must(creativeBoolQuery);
searchSourceBuilder.query(finalBoolQuery).size(9999);
System.out.println(searchSourceBuilder.toString());
With the above code, I expected that I would have 1 must clause for 'camp_id' and another 1 for 'ad_id' but following is what I got -
{
"size" : 9999,
"query" : {
"bool" : {
"must" : [
{
"bool" : {
"should" : [
{
"match" : {
"camp_id" : {
"query" : 1,
"operator" : "OR",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
}
},
{
"match" : {
"camp_id" : {
"query" : 2,
"operator" : "OR",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
{
"bool" : {
"should" : [
{
"match" : {
"ad_id" : {
"query" : 1,
"operator" : "OR",
"prefix_length" : 0,
"max_expansions" : 50,
"fuzzy_transpositions" : true,
"lenient" : false,
"zero_terms_query" : "NONE",
"boost" : 1.0
}
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
],
"disable_coord" : false,
"adjust_pure_negative" : true,
"boost" : 1.0
}
}
}
There is only one must clause which wraps both camp_id and ad_id. Can someone please point out what am I missing? I am using elastic search version - 5.5.0 and jest - 2.4.0 as my java client.
your outer bool sample query contains two must clauses, however that must be a single must clause, that contains of an array of objects. I suppose you are overwriting the first must clause with the second, when calling must() twice.

Resources