Elasticsearch is giving unnecessary records on match query - elasticsearch

I want to get All the documents whose dateofbirth is having substring "-11-09".
This is my elasticsearch query :
{ "query": { "bool" : { "must": { "match": { "dobdata": ".*-11-09.*"} } } } }
And the result i am getting is
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 5.0782137,
"hits": [
{
"_index": "userindexv1",
"_type": "usertype",
"_id": "58f9a9d1acf8c47037000038",
"_score": 5.0782137,
"_source": {
"fullname": "Eshwar ",
"fullname_raw": "Eshwar ",
"mobile1": "7222222256",
"uid": "UIDS1010",
"mobile2": "",
"classname": "Class 5",
"classname_raw": "Class 5",
"divid": 63,
"category": "S",
"dobdata": "2010-11-09"
}
},
{
"_index": "userindexv1",
"_type": "usertype",
"_id": "57960b35acf8c4c43000002c",
"_score": 1.259227,
"_source": {
"fullname": "Sindhu ",
"fullname_raw": "Sindhu ",
"mobile1": "9467952335",
"uid": "UIDS1006",
"mobile2": "",
"classname": "class 1s Group for class g",
"classname_raw": "class 1s Group for class g",
"divid": 63,
"category": "S",
"dobdata": "2012-11-08"
}
},
{
"_index": "userindexv1",
"_type": "usertype",
"_id": "58eb62d2acf8c4d43300002f",
"_score": 1.1471639,
"_source": {
"fullname": "Himanshu ",
"fullname_raw": "Himanshu ",
"mobile1": "9898785484",
"uid": "",
"mobile2": "",
"classname": "Play Group",
"classname_raw": "Play Group",
"divid": 63,
"category": "S",
"dobdata": "2012-11-08"
}
},
{
"_index": "userindexv1",
"_type": "usertype",
"_id": "580dbe5bacf8c4b82300002a",
"_score": 1.1471639,
"_source": {
"fullname": "Sai Bhargav ",
"fullname_raw": "Sai Bhargav ",
"mobile1": "9739477159",
"uid": "",
"mobile2": "7396226318",
"classname": "class 1s Group for class g",
"classname_raw": "class 1s Group for class g",
"divid": 63,
"category": "S",
"dobdata": "2012-11-07"
}
}
]
}}
I am getting the records whose dateofbirth does not contain the string "-11-09". I tried to work around it. I am not able to find the soultion.
I am new to elasticsearch. I want only the first record. Can anyone please help me out. Sorry for bad english.

Even I faced same problem and I solved it by doing two things.
1. Changed the format of date of birth from Y-m-d to YMd and made the index as not_analyzed.
2. Used wildcard query insteadof match query
{
"query": {
"wildcard": {
"dobdata": {
"value": "*Nov09*"
}
}
}
}
It solved my problem.Hope this will solve your problem too.

For getting results with month-date=11-09 use following query.
The mapping for dobdata is
"dobdata": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
Query is:
curl -XGET "http://localhost:9200/userindexv1/_search" -H 'Content-Type:
application/json' -d'
{
"query": {
"wildcard": {
"dobdata.keyword":"*-11-09*"
}
}
}'
Also use multifield mapping instead of _raw fields Refer here.

#K Sathish so you use elasticsearch 2.x? String type for date is not so confortable. I suggest to you to change the datatype from string in date type , so you can get also the range query. Once changed in date type you can make your query in elastic 2.x in this way:
{
"query": {
"filtered": {
"filter": {
"script": {
"script": "doc.dobdata.date.monthOfYear == 11 && doc.dobdata.date.dayOfMonth == 9"
}
}
}
}
}

Related

ElasticSearch sorting by more conditions

I have index with simple data and I have to filter and sort it like this:
Records are like this:
{
"name": "Product ABC variant XYZ subvariant JKL",
"date": "2023-01-03T10:34:39+01:00"
}
And I'm searching name, where it is: "Product FGH"
Get records with exact match (field name) and sort them by date (field date) DESC
if nothing found in 1) or if there is not exact match, but similar records, then the rest records sort by default score.
Is it possible to do it in one elasticsearch request? And how it should look the whole query?
Thanks
What you are looking for is running Elasticsearch queries based on the conditions, which is not possible in a single query, you need to first fire first query and if it doesn't return any hit, you need to fire the second one.
Using script_query, you can do it how you want. Convert the date to milliseconds and assign it to the "_score" field for an exact match. for non exact match, you can simply return _score field
For an exact match, it will be sorted by date field desc.
For non exact match, it will sorted by _score field
For example:
Mapping:
{
"mappings": {
"properties": {
"name" : {"type": "keyword"},
"date" : {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}
}
}
}
Insert:
PUT func/_doc/1
{
"name" : "Product ABC variant XYZ subvariant JKL",
"date" : "2023-01-03 10:34:39"
}
PUT func/_doc/2
{
"name" : "Product ABC variant XYZ subvariant JKL",
"date" : "2022-12-03 10:33:39"
}
PUT func/_doc/3
{
"name" : "Product ABC",
"date" : "2022-11-03 10:33:39"
}
PUT func/_doc/4
{
"name" : "Product ABC",
"date" : "2023-01-03 10:33:39"
}
Query:
GET /func/_search
{
"query": {
"script_score": {
"query": {
"match_all": {}
},
"script": {
"source": "if (doc['name'].value == params.search_term) { return doc['date'].value.toInstant().toEpochMilli(); } else return _score",
"params": {
"search_term": "Product ABC"
}
}
}
}
}
output:
{
"took": 29,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 4,
"relation": "eq"
},
"max_score": 1672742040000,
"hits": [
{
"_index": "func",
"_id": "4",
"_score": 1672742040000,
"_source": {
"name": "Product ABC",
"date": "2023-01-03 10:33:39"
}
},
{
"_index": "func",
"_id": "3",
"_score": 1667471640000,
"_source": {
"name": "Product ABC",
"date": "2022-11-03 10:33:39"
}
},
{
"_index": "func",
"_id": "1",
"_score": 1,
"_source": {
"name": "Product ABC variant XYZ subvariant JKL",
"date": "2023-01-03 10:34:39"
}
},
{
"_index": "func",
"_id": "2",
"_score": 1,
"_source": {
"name": "Product ABC variant XYZ subvariant JKL",
"date": "2022-12-03 10:33:39"
}
}
]
}
}

How to perform elastic search _update_by_query using painless script - for complex condition

Can you suggest how to update documents (with a script - i guess painless) that based on condition fields?
its purpose is to add/or remove values from the document
so if I have those input documents:
doc //1st
{
"Tags":["foo"],
"flag":"true"
}
doc //2nd
{
"flag":"true"
}
doc //3rd
{
"Tags": ["goo"],
"flag":"false"
}
And I want to perform something like this:
Update all documents that have "flag=true" with:
Added tags: "me", "one"
Deleted tags: "goo","foo"
so expected result should be something like:
doc //1st
{
"Tags":["me","one"],
"flag":"true"
}
doc //2nd
{
"Tags":["me","one"],
"flag":"true"
}
doc //3rd
{
"Tags": ["goo"],
"flag":"false"
}
Create mapping:
PUT documents
{
"mappings": {
"document": {
"properties": {
"tags": {
"type": "keyword",
"index": "not_analyzed"
},
"flag": {
"type": "boolean"
}
}
}
}
}
Insert first doc:
PUT documents/document/1
{
"tags":["foo"],
"flag": true
}
Insert second doc (keep in mind that for empty tags I specified empty tags array because if you don't have field at all you will need to check in script does field exists):
PUT documents/document/2
{
"tags": [],
"flag": true
}
Add third doc:
PUT documents/document/3
{
"tags": ["goo"],
"flag": false
}
And then run _update_by_query which has two arrays as params one for elements to add and one for elements to remove:
POST documents/_update_by_query
{
"script": {
"inline": "for(int i = 0; i < params.add_tags.size(); i++) { if(!ctx._source.tags.contains(params.add_tags[i].value)) { ctx._source.tags.add(params.add_tags[i].value)}} for(int i = 0; i < params.remove_tags.size(); i++) { if(ctx._source.tags.contains(params.remove_tags[i].value)){ctx._source.tags.removeAll(Collections.singleton(params.remove_tags[i].value))}}",
"params": {
"add_tags": [
{"value": "me"},
{"value": "one"}
],
"remove_tags": [
{"value": "goo"},
{"value": "foo"}
]
}
},
"query": {
"bool": {
"must": [
{"term": {"flag": true}}
]
}
}
}
If you then do following search:
GET documents/_search
you will get following result (which I think is what you want):
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [{
"_index": "documents",
"_type": "document",
"_id": "2",
"_score": 1,
"_source": {
"flag": true,
"tags": [
"me",
"one"
]
}
},
{
"_index": "documents",
"_type": "document",
"_id": "1",
"_score": 1,
"_source": {
"flag": true,
"tags": [
"me",
"one"
]
}
},
{
"_index": "documents",
"_type": "document",
"_id": "3",
"_score": 1,
"_source": {
"tags": [
"goo"
],
"flag": false
}
}
]
}
}

Get specific fields from index in elasticsearch

I have an index in elastic-search.
Sample structure :
{
"Article": "Article7645674712",
"Genre": "Genre92231455",
"relationDesc": [
"Article",
"Genre"
],
"org": "user",
"dateCreated": {
"date": "08/05/2015",
"time": "16:22 IST"
},
"dateModified": "08/05/2015"
}
From this index i want to retrieve selected fields: org and dateModified.
I want result like this
{
"took": 265,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 28,
"max_score": 1,
"hits": [
{
"_index": "couchrecords",
"_type": "couchbaseDocument",
"_id": "3",
"_score": 1,
"_source": {
"doc": {
"org": "user",
"dateModified": "08/05/2015"
}
}
},
{
"_index": "couchrecords",
"_type": "couchbaseDocument",
"_id": "4",
"_score": 1,
"_source": {
"doc": {
"org": "user",
"dateModified": "10/05/2015"
}
}
}
]
}
}
How to query elastic-search to get only selected specific fields ?
You can retrieve only a specific set of fields in the result hits using the _source parameter like this:
curl -XGET localhost:9200/couchrecords/couchbaseDocument/_search?_source=org,dateModified
Or in this format:
curl -XPOST localhost:9200/couchrecords/couchbaseDocument/_search -d '{
"_source": ["doc.org", "doc.dateModified"], <---- you just need to add this
"query": {
"match_all":{} <----- or whatever query you have
}
}'
That's easy. Considering any query of this format :
{
"query": {
...
},
}
You'll just need to add the fields field into your query which in your case will result in the following :
{
"query": {
...
},
"fields" : ["org","dateModified"]
}
{
"_source" : ["org","dateModified"],
"query": {
...
}
}
Check ElasticSearch source filtering.

Elastic Search- Fetch Distinct Tags

I have document of following format:
{
_id :"1",
tags:["guava","apple","mango", "banana", "gulmohar"]
}
{
_id:"2",
tags: ["orange","guava", "mango shakes", "apple pie", "grammar"]
}
{
_id:"3",
tags: ["apple","grapes", "water", "gulmohar","water-melon", "green"]
}
Now, I want to fetch unique tags value from whole document 'tags field' starting with prefix g*, so that these unique tags will be display by tag suggestors(Stackoverflow site is an example).
For example: Whenever user types, 'g':
"guava", "gulmohar", "grammar", "grapes" and "green" should be returned as a result.
ie. the query should returns distinct tags with prefix g*.
I tried everywhere, browse whole documentations, searched es forum, but I didn't find any clue, much to my dismay.
I tried aggregations, but aggregations returns the distinct count for whole words/token in tags field. It does not return the unique list of tags starting with 'g'.
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"query_string": {
"allow_leading_wildcard": false,
"fields": [
"tags"
],
"query": "g*",
"fuzziness":0
}
}
]
}
},
"filter": {
//some condition on other field...
}
}
},
"aggs": {
"distinct_tags": {
"terms": {
"field": "tags",
"size": 10
}
}
},
result of above: guava(w), apple(q), mango(1),...
Can someone please suggest me the correct way to fetch all the distinct tags with prefix input_prefix*?
It's a bit of a hack, but this seems to accomplish what you want.
I created an index and added your docs:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"tags":["guava","apple","mango", "banana", "gulmohar"]}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"tags": ["orange","guava", "mango shakes", "apple pie", "grammar"]}
{"index":{"_index":"test_index","_type":"doc","_id":3}}
{"tags": ["guava","apple","grapes", "water", "grammar","gulmohar","water-melon", "green"]}
Then I used a combination of prefix query and highlighting as follows:
POST /test_index/_search
{
"query": {
"prefix": {
"tags": {
"value": "g"
}
}
},
"fields": [ ],
"highlight": {
"pre_tags": [""],
"post_tags": [""],
"fields": {
"tags": {}
}
}
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1,
"highlight": {
"tags": [
"guava",
"gulmohar"
]
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "2",
"_score": 1,
"highlight": {
"tags": [
"guava",
"grammar"
]
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "3",
"_score": 1,
"highlight": {
"tags": [
"guava",
"grapes",
"grammar",
"gulmohar",
"green"
]
}
}
]
}
}
Here is the code I used:
http://sense.qbox.io/gist/c14675ee8bd3934389a6cb0c85ff57621a17bf11
What you're trying to do amounts to autocomplete, of course, and there are perhaps better ways of going about that than what I posted above (though they are a bit more involved). Here are a couple of blog posts we did about ways to set up autocomplete:
http://blog.qbox.io/quick-and-dirty-autocomplete-with-elasticsearch-completion-suggest
http://blog.qbox.io/multi-field-partial-word-autocomplete-in-elasticsearch-using-ngrams
As per #Sloan Ahrens advice, I did following:
Updated the mapping:
"tags": {
"type": "completion",
"context": {
"filter_color": {
"type": "category",
"default": "",
"path": "fruits.color"
},
"filter_type": {
"type": "category",
"default": "",
"path": "fruits.type"
}
}
}
Reference: ES API Guide
Inserted these indexes:
{
_id :"1",
tags:{input" :["guava","apple","mango", "banana", "gulmohar"]},
fruits:{color:'bar',type:'alice'}
}
{
_id:"2",
tags:{["orange","guava", "mango shakes", "apple pie", "grammar"]}
fruits:{color:'foo',type:'bob'}
}
{
_id:"3",
tags:{ ["apple","grapes", "water", "gulmohar","water-melon", "green"]}
fruits:{color:'foo',type:'alice'}
}
I don't need to modify much, my original index. Just added input before tags array.
POST rescu1/_suggest?pretty'
{
"suggest": {
"text": "g",
"completion": {
"field": "tags",
"size": 10,
"context": {
"filter_color": "bar",
"filter_type": "alice"
}
}
}
}
gave me the desired output.
I accepted #Sloan Ahrens answer as his suggestions worked like a charm for me, and he showed me the right direction.

ElasticSearch - prefix with space and filtering

My ElasticSearch server contains documents of the following form:
{
"_index": "xindex",
"_type": "xtype",
"_id": "1100",
"_score": 3.00010,
"_source": {
"_id": "2333345",
"field1": "11111111111111",
"field2": "y",
"name": "hello world",
}
}
I need to get all the documents with name prefix "hello wo" and field2 "y".
Tried a lot of queries and none have worked. There are all kind of solutions for the prefix with space issue, but when adding the filtering/another query for field2, results get corrupted.
Thanks.
You can achieve this in 3 steps :
Change your mapping of field name to not_analyzed
Use a match_phrase_prefix query (documentation here)
Filter this query results by wrapping it in a filtered query and use a term filter on the field2 with value "y"
You can see it working with the following dataset :
PUT test/prefix/_mapping
{
"properties": {
"name":{
"type": "string",
"index": "not_analyzed"
}
}
}
//should match
PUT test/prefix/2333345
{
"field1": "11111111111111",
"field2": "y",
"name": "hello world"
}
//should match
PUT test/prefix/1112223
{
"field1": "22222222222222",
"field2": "y",
"name": "hello wombat"
}
//should not match (field2 value is different)
PUT test/prefix/4445556
{
"field1": "33333333333333",
"field2": "z",
"name": "hello world"
}
//should not match (second word not starting with wo)
PUT test/prefix/4445556
{
"field1": "33333333333333",
"field2": "y",
"name": "hello zombie"
}
Then, the query is :
GET test/prefix/_search
{
"query": {
"filtered": {
"query": {
"match_phrase_prefix" : {
"name" : "hello wo"
}
},
"filter": {
"term": {
"field2": "y"
}
}
}
}
}
which outputs the documents 1112223 and 2333345 as expected :
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1.592944,
"hits": [
{
"_index": "test",
"_type": "prefix",
"_id": "2333345",
"_score": 1.592944,
"_source": {
"field1": "11111111111111",
"field2": "y",
"name": "hello world"
}
},
{
"_index": "test",
"_type": "prefix",
"_id": "1112223",
"_score": 1.592944,
"_source": {
"field1": "22222222222222",
"field2": "y",
"name": "hello wombat"
}
}
]
}
}
use simple_query_string, This approach solved my issue:
{
"query": {
"bool": {
"should": [
{
"simple_query_string": {
"fields": [
"name"
],
"default_operator": "and",
"query": "(hello world*)"
}
}
]
}
}
}

Resources