elasticsearch boost query in feild having multiple value - elasticsearch

I have some document in elasticsearch index. Here is the sample document
DOC1
{
"feild1":["hi","hello","goodmorning"]
"feild2":"some string"
"feild3":{}
}
DOC2
{
"feild1":["hi","goodmorning"]
"feild2":"some string"
"feild3":{}
}
DOC3
{
"feild1":["hi","hello"]
"feild2":"some string"
"feild3":{}
}
I want to query for feild1 having values "hi" and "hello" if both is present then that document should come first if any one is present then it should come after that.
for example:
result should be in order of DOC1, DOC3, DOC2. I tried with boost query. but it is retuning not in the order that I want. Here is the query that I am trying.
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"avail_status": true
}
},
{
"bool": {
"should": [
{
"constant_score": {
"filter": {
"terms": {
"feild1": [
"hi"
]
}
},
"boost": 20
}
},
{
"constant_score": {
"filter": {
"terms": {
"feild1": [
"hello"
]
}
},
"boost": 18
}
}
]
}
}
]
}
}
}
this is returning me first those document having "hi" and then those having "hello". Thanks in advance!

To add extra boost for documents with larger field1, you can put funtion_score script score.
Mappings
{
"mappings": {
"document_type" : {
"properties": {
"field1" : {
"type": "text",
"fielddata": true
},
"field2" : {
"type": "text"
},
"field3" : {
"type": "text"
}
}
}
}
}
Index documents
POST custom_score_index1/document_type
{
"feild1":["hi","hello","goodmorning"],
"feild2":"some string",
"feild3":{}
}
POST custom_score_index1/document_type
{
"feild1":["hi","goodmorning"],
"feild2":"some string",
"feild3":{}
}
POST custom_score_index1/document_type
{
"feild1":["hi","hello"],
"feild2":"some string",
"feild3":{}
}
Query with function score add extra _score for larger size for field1
POST custom_score_index1/document_type/_search
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [{
"match_phrase": {
"avail_status": true
}
},
{
"bool": {
"should": [{
"constant_score": {
"filter": {
"terms": {
"feild1": [
"hi"
]
}
},
"boost": 20
}
},
{
"constant_score": {
"filter": {
"terms": {
"feild1": [
"hello"
]
}
},
"boost": 18
}
}
]
}
}
]
}
},
"functions": [{
"script_score": {
"script": {
"inline": "_score + 10000 * doc['field1'].length"
}
}
}],
"score_mode": "sum",
"boost_mode": "sum"
}
}
}

Related

Querying Specific List Indexes In Elastic Search

So I have an search query in Elasticsearch which queries a field called myList. Inside that list are elements. elm1, elm2, and elm3. I want to be able to query that list such that all the elements must match. For example:
myList: [{
elm1: "value1",
elm2: "value2",
elm3: "value4"
},
{
elm1: "value2"
elm2: "value3"
elm3: "value3"
},
{
elm1: "value3",
elm2: "value4",
elm3: "value5"
}]
If I construct a query such that it searches for the field: elm1 = value1 and elm2 = value2 and elm3=value3,
"query": {
"bool": {
"must": [],
"filter": [
{
"bool": {
"filter": [
{
"bool": {
"should": [
{
"query_string": {
"fields": [
"myList.elm1.keyword"
],
"query": "value1"
}
}
],
"minimum_should_match": 1
}
},"bool": {
"filter": [
{
"bool": {
"should": [
{
"query_string": {
"fields": [
"myList.elm2.keyword"
],
"query": "value2"
}
}
],
"minimum_should_match": 1
}
},"bool": {
"filter": [
{
"bool": {
"should": [
{
"query_string": {
"fields": [
"myList.elm3.keyword"
],
"query": "value3"
}
}
],
"minimum_should_match": 1
}
}
}
]
}
}
It will return true because
myList[0]['elm1']=value1
myList[0]['elm2']=value2
myList[1]['elm3']=value3
This is not what I want.
How do I get it such that
myList[x]['elm1']=value1
myList[y]['elm2']=value2
myList[z]['elm3']=value3
Where x=y=z
When you're deailing with arrays of objects, these objects get flattened and essentially lose the connections between each other.
You should use the nested field type instead:
PUT elms_deep
{
"mappings": {
"properties": {
"myList": {
"type": "nested"
}
}
}
}
then re-add your documents:
POST elms_deep/_doc
{
"myList": [
{
"elm1": "value1",
"elm2": "value2",
"elm3": "value4"
},
{
"elm1": "value2",
"elm2": "value3",
"elm3": "value3"
},
{
"elm1": "value3",
"elm2": "value4",
"elm3": "value5"
}
]
}
and then proceed with the 3 nested term queries -- no need for your original query_string queries when you're targeting .keyword fields:
POST elms_deep/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "myList",
"query": {
"term": {
"myList.elm1.keyword": {
"value": "value1"
}
}
}
}
},
{
"nested": {
"path": "myList",
"query": {
"term": {
"myList.elm2.keyword": {
"value": "value2"
}
}
}
}
},
{
"nested": {
"path": "myList",
"query": {
"term": {
"myList.elm3.keyword": {
"value": "value3"
}
}
}
}
}
]
}
}
}
So Joe's answer is nearly there, he's right you need to use the nested field type. His answer gives the possibility of
myList[x]['elm1']=value1
myList[y]['elm2']=value2
myList[z]['elm3']=value3
Where x,y,z are independent elements.
If you want x=y=z:
{
"query": {
"nested": {
"path": "myList",
"query": {
"bool": {
"filter": [
{
"term": {
"myList.elm1.keyword": {
"value": "value1"
}
}
},
{
"term": {
"myList.elm1.keyword": {
"value": "value1"
}
}
},
{
"term": {
"myList.elm1.keyword": {
"value": "value1"
}
}
}
]
}
}
}
}
}

ElasticSearch should with nested and bool must_not exists

With the following mapping:
"categories": {
"type": "nested",
"properties": {
"category": {
"type": "integer"
},
"score": {
"type": "float"
}
}
},
I want to use the categories field to return documents that either:
have a score above a threshold in a given category, or
do not have the categories field
This is my query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"terms": {
"categories.category": [
<id>
]
}
},
{
"range": {
"categories.score": {
"gte": 0.5
}
}
}
]
}
}
}
},
{
"bool": {
"must_not": [
{
"exists": {
"field": "categories"
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}
It correctly returns documents both with and without the categories field, and orders the results so the ones I want are first, but it doesn't filter the results having score below the 0.5 threshold.
Great question.
That is because categories is not exactly a field from the elasticsearch point of view[a field on which inverted index is created and used for querying/searching] but categories.category and categories.score is.
As a result categories being not found in any document, which is actually true for all the documents, you observe the result what you see.
Modify the query to the below and you'd see your use-case working correctly.
POST <your_index_name>/_search
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"terms": {
"categories.category": [
"100"
]
}
},
{
"range": {
"categories.score": {
"gte": 0.5
}
}
}
]
}
}
}
},
{
"bool": {
"must_not": [ <----- Note this
{
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "categories.category"
}
},
{
"exists": {
"field": "categories.score"
}
}
]
}
}
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}

Elasticsearch Query for getting field with 'AND' relation

I'm having elastic document as below
I want a search query satisfying condition:
how to get the those OPERATIONS and CATEGORY values that has both AREA=Mumbai and AREA=Chennai
So Output should be CATEGORY:Consulting1 , OPERATIONS: Regulatory Operations
Use terms Query :
{
"query": {
"terms": {
"AREA": [
"Mumbai",
"Chennai"
]
}
}
}
May be that works:
{
"query": {
"bool": {
"must": [
{"term": { "AREA" : "Mumbai" }},
{"term": { "AREA" : "Chennai" }}
]
}
}
}
Try this and let me know:
{
"size": 0,
"query": {
"bool": {
"should": [
{
"term": {
"AREA": "mumbai"
}
},
{
"term": {
"AREA": "chennai"
}
}
]
}
},
"aggs": {
"unique_operations": {
"terms": {
"field": "OPERATIONS",
"size": 10
},
"aggs": {
"count_areas": {
"cardinality": {
"field": "AREA"
}
},
"top": {
"top_hits": {
"size": 2,
"_source": {
"include": ["CATEGORY"]
}
}
},
"areas_bucket_filter": {
"bucket_selector": {
"buckets_path": {
"areasCount": "count_areas"
},
"script": "areasCount == 2"
}
}
}
}
}
}
LATER EDIT: added top_hits aggregation to get back sample documents covering the request for the categories.
Please try this one.
{
"query": {
"bool": {
"should": [
{
"query_string": {
"default_field": "AREA",
"query": "mumbai"
}
},
{
"query_string": {
"default_field": "AREA",
"query": "chennai"
}
}
]
}
}
}[![result][1]][1]

Elasticsearch must_not filter not works with a big bunch of values

I have the next query that include some filters:
{
"from": 0,
"query": {
"function_score": {
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"idpais": [
115
]
}
},
{
"term": {
"tipo": [
1
]
}
}
],
"must_not": [
{
"term": {
"idregistro": [
5912471,
3433876,
9814443,
11703069,
6333176,
8288242,
9924922,
6677850,
11852501,
12530205,
4703469,
12776479,
12287659,
11823679,
12456304,
12777457,
10977614,
...
]
}
}
]
}
},
"query": {
"bool": {
"should": [
{
"match_phrase": {
"area": "Coordinator"
}
},
{
"match_phrase": {
"company": {
"boost": 5,
"query": "IBM"
}
}
},
{
"match_phrase": {
"topic": "IT and internet stuff"
}
},
{
"match_phrase": {
"institution": {
"boost": 5,
"query": "University of my city"
}
}
}
]
}
}
}
},
"script_score": {
"params": {
"idpais": 115,
"idprovincia": 0,
"relationships": []
},
"script_id": "ScoreUsuarios"
}
}
},
"size": 24,
"sort": [
{
"_script": {
"order": "desc",
"script_id": "SortUsuarios",
"type": "number"
}
}
]
}
The must_not filter has a big bunch of values to exclude (around 200 values), but it looks like elasticsearch ignores those values and it includes on the result set. If I try to set only a few values (10 to 20 values) then elasticsearch applies the must_not filter.
Exists some restriction a bout the amount of values in the filters? Exists some way to remove a big amount of results from the query?
terms query is used for passing a list of values not term query.You have to use it like below in your must filter.
{
"query": {
"terms": {
"field_name": [
"VALUE1",
"VALUE2"
]
}
}
}

ElasticSearch ignoring sort when filtered

ElasticSearch Version: 0.90.1, JVM: 1.6.0_51(20.51-b01-457)
I'm trying to do two things with my ElasticSearch query: 1) filter the results based on a boolean (searchable) and "open_date < tomorrow" and 2) two sort by the field "open_date" DESC
This produces the following query:
{
"query": {
"bool": {
"should": [
{
"prefix": {
"name": "foobar"
}
},
{
"query_string": {
"query": "foobar"
}
},
{
"match": {
"name": {
"query": "foobar"
}
}
}
],
"minimum_number_should_match": 1
},
"filtered": {
"filter": {
"and": [
{
"term": {
"searchable": true
}
},
{
"range": {
"open_date": {
"lt": "2013-07-16"
}
}
}
]
}
}
},
"sort": [
{
"open_date": "desc"
}
]
}
However, the results that come back are not being sorted by "open_date". If I remove the filter:
{
"query": {
"bool": {
"should": [
{
"prefix": {
"name": "foobar"
}
},
{
"query_string": {
"query": "foobar"
}
},
{
"match": {
"name": {
"query": "foobar"
}
}
}
],
"minimum_number_should_match": 1
}
},
"sort": [
{
"open_date": "desc"
}
]
}
... the results come back as expected.
Any ideas?
I'm not sure about the Tire code, but the JSON does not correctly construct a filtered query. My guess is that this overflows and causes the sort element to also not be correctly parsed.
A filtered query should be constructed like this (see http://www.elasticsearch.org/guide/reference/query-dsl/filtered-query/ ):
{
"query": {
"filtered": { // Note: this contains both query and filter
"query": {
"bool": {
"should": [
{
"prefix": {
"name": "foobar"
}
},
{
"query_string": {
"query": "foobar"
}
},
{
"match": {
"name": {
"query": "foobar"
}
}
}
],
"minimum_number_should_match": 1
}
},
"filter": {
"and": [
{
"term": {
"searchable": true
}
},
{
"range": {
"open_date": {
"lt": "2013-07-16"
}
}
}
]
}
}
},
"sort": [
{
"open_date": "desc"
}
]
}
Cheers,
Boaz

Resources