Query Vs Filter in Elastic Search - elasticsearch

I am trying to index a document which has three fields first_name, last_name, occupation of type "keyword" and has values XYZ, ABC, DEF respectively.
I have written query using filter for an exact match with AND condition as follows,
"query": {
"bool": {
"filter": [
{"term": {"first_name": "XYZ"}},
{"term": {"last_name": "ABC"}}
]
}
}
This has to return one document, but returns nothing.
I have another query for the same operation,
"query": {
"bool": {
"must": [
{"match": {"first_name": "XYZ"}},
{"match": {"last_name": "ABC"}}
]
}
}
This returns one document.
According to Elasticsearch documentation, I understand that the difference between query and filter is that filter does not score the result. I am not sure why the first query does not return any result. Is my understanding correct?

As documentation states there is no difference between query and filter except scoring. Of course this applies to the situation when both query and filters uses the same query type. Here you are using two different types - term and match. term is designed for exact comparison while match is analyzed and used as full-text search.
Take a look at the example below.
Your mapping:
PUT /index_53053054
{
"mappings": {
"_doc": {
"properties": {
"first_name": {
"type": "text"
},
"last_name": {
"type": "text"
},
"occupation": {
"type": "keyword"
}
}
}
}
}
Your document:
PUT index_53053054/_doc/1
{
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
filter query:
GET index_53053054/_search
{
"query": {
"bool": {
"filter": [
{
"match": {
"first_name": "XYZ"
}
},
{
"match": {
"last_name": "ABC"
}
},
{
"term": {
"occupation": "DEF"
}
}
]
}
}
}
and result:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": [
{
"_index": "index_53053054",
"_type": "_doc",
"_id": "1",
"_score": 0,
"_source": {
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
}
]
}
}
Similar must query:
GET index_53053054/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"first_name": "XYZ"
}
},
{
"match": {
"last_name": "ABC"
}
},
{
"term": {
"occupation": "DEF"
}
}
]
}
}
}
and response:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.8630463,
"hits": [
{
"_index": "index_53053054",
"_type": "_doc",
"_id": "1",
"_score": 0.8630463,
"_source": {
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
}
]
}
}
As you can see hits are almost the same. The only difference is that in filter score is not calculated while in must query is.
Read more: https://www.elastic.co/guide/en/elasticsearch/reference/6.4/query-filter-context.html

Related

Elastic Search query match on keyword 'OR'

I'm using ElasticSearch 7.0
Given the mapping:
{
"searchquestion": {
"mappings": {
"properties": {
"server": {
"properties": {
"hostname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
I have put the following documents into this index:
{
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
{
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
{
"server": {
"hostname": "server1-linux.loc1.uk"
}
}
I would like to query the exact text of the hostname. Luckily, this can be done because there is an additional keyword type field on this field.
Successful query :
{
"query": {
"bool": {
"must": [
{
"match": {
"server.hostname.keyword": {
"query": "server1-windows.loc2.uk"
}
}
}
]
}
}
}
However, I would like to extend this query string, to include another hostname to search for. In my results, I expect to have both documents returned.
My attempt:
{
"query": {
"bool": {
"must": [
{
"match": {
"server.hostname.keyword": {
"query": "server1-windows.loc2.uk server1-linux.loc1.uk",
"operator": "or"
}
}
}
]
}
}
}
This returns no hits, I suspect because the default analyser is splitting this query up into sections, but I'm actually searching the keyword field which is a full string. I cannot add analyzer: keyword to this query search, as server1-windows.loc2.uk server1-linux.loc1.uk as an exact string won't match anything either.
How can I search for both these strings, as their complete selves?
i.e. "query": ["server1-windows.loc2.uk", "server1-linux.loc1.uk"]
I would also like to use wildcards to match any loc. I would expect
"query": ["server1-windows.*.uk"] to match both windows servers, but I get no hits.
What am I missing?
you can use Query_String to get your desired result
Case 1:
Query:
GET server/_search
{
"query": {
"query_string": {
"query": "(server1-windows.loc2.uk) OR (server1-linux.loc1.uk)",
"default_field": "server.hostname.keyword"
}
}
}
Output:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.9808291,
"hits": [
{
"_index": "server",
"_id": "3",
"_score": 0.9808291,
"_source": {
"server": {
"hostname": "server1-linux.loc1.uk"
}
}
},
{
"_index": "server",
"_id": "1",
"_score": 0.4700036,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
},
{
"_index": "server",
"_id": "2",
"_score": 0.4700036,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
}
]
}
}
Case 2: with wildcard(*)
Query:
GET server/_search
{
"query": {
"query_string": {
"query": "server1-windows.*.uk",
"default_field": "server.hostname.keyword"
}
}
}
Output:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "server",
"_id": "1",
"_score": 1,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
},
{
"_index": "server",
"_id": "2",
"_score": 1,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
}
]
}
}

Unique search results from ElasticSearch

I am new to ElasticSearch and can't quite figure out what I want is possible or not.
I can query like this:
GET entity/_search
{
"query": {
"bool": {
"must": [
{ "match": { "searchField": "searchValue" }}
]
}
},
"aggs" : {
"uniq_Id" : {
"terms" : { "field" : "Id", "size":500 }
}
}
}
and it will return top search results and the term aggregation buckets. But ideally what I would like for the search results to return, is only one (perhaps the top one, does not matter) for each of unique Id's defined in the aggregation terms.
You can make use of Terms Aggregation along with the Top Hits Aggregation to give you the result you are looking for.
Now once you do that, specify the size as 1 in the Top Hits Aggregation
Based on your query I've created sample mapping,documents, aggregation query and the response for your reference.
Mapping:
PUT mysampleindex
{
"mappings": {
"mydocs": {
"properties": {
"searchField":{
"type": "text"
},
"Id": {
"type": "keyword"
}
}
}
}
}
Sample Documents:
POST mysampleindex/mydocs/1
{
"searchField": "elasticsearch",
"Id": "1000"
}
POST mysampleindex/mydocs/2
{
"searchField": "elasticsearch is awesome",
"Id": "1000"
}
POST mysampleindex/mydocs/3
{
"searchField": "elasticsearch is awesome",
"Id": "1001"
}
POST mysampleindex/mydocs/4
{
"searchField": "elasticsearch is pretty cool",
"Id": "1001"
}
POST mysampleindex/mydocs/5
{
"searchField": "elasticsearch is pretty cool",
"Id": "1002"
}
Query:
POST mysampleindex/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"searchField": "elasticsearch"
}
}
]
}
},
"aggs": {
"myUniqueIds": {
"terms": {
"field": "Id",
"size": 10
},
"aggs": {
"myDocs": {
"top_hits": { <---- Top Hits Aggregation
"size": 1 <---- Note this
}
}
}
}
}
}
Sample Response:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"myUniqueIds": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1000",
"doc_count": 2,
"myDocs": {
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "1",
"_score": 0.2876821,
"_source": {
"searchField": "elasticsearch",
"Id": "1000"
}
}
]
}
}
},
{
"key": "1001",
"doc_count": 2,
"myDocs": {
"hits": {
"total": 2,
"max_score": 0.25316024,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "3",
"_score": 0.25316024,
"_source": {
"searchField": "elasticsearch is awesome",
"Id": "1001"
}
}
]
}
}
},
{
"key": "1002",
"doc_count": 1,
"myDocs": {
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "5",
"_score": 0.2876821,
"_source": {
"searchField": "elasticsearch is pretty cool",
"Id": "1002"
}
}
]
}
}
}
]
}
}
}
Notice that I am not returning any bool results in the above, the search result you are looking for comes in the form of Top Hits Aggregation.
Hope this helps!

ElasticSearch, DELETE documents by exact match (5.1)

I need to delete information that match exactly the word of the keys in the query of elastic-search, but I have problems with the request and I deleted information with the same prefix. What I have to do to fix my script and delete only the correct ones? (Exact Match on two conditions)
curl -X POST elasticDomain/index/_delete_by_query -d '{"query": {
"bool": {
"must": [
{
"term": {
"component.name": {
"query" : "prefix-component-one"
}
}
},
{
"term": {
"enviroment": "qa"
}
}
]
}}}'
Data example, when I want to delete only information about component-one:
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "index",
"_type": "event",
"_id": "c04b0f94-4995-11e8-a9f5-a22f517abdda",
"_score": 1,
"_source": {
"component": {
"name": "prefix-component-two",
"qualifier": "TRK"
},
"enviroment": "history",
"timestamp": "2018-04-26T16:06:54.000Z"
}
},
{
"_index": "index",
"_type": "event",
"_id": "bf80d63e-4995-11e8-a9f5-a22f517abdda",
"_score": 1,
"_source": {
"component": {
"name": "prefix-component-one",
"qualifier": "TRK"
},
"enviroment": "qa",
"timestamp": "2018-04-26T16:06:54.000Z"
}
}
]
}
}
I fixed with match_phrase
curl -X POST elasticDomain/index/_delete_by_query -d '{"query": {
"bool": {
"must": [
{
"match_phrase": {
"component.name": {
"query" : "prefix-component-one"
}
}
},
{
"term": {
"enviroment": "qa"
}
}
]
}}}

elasticsearch parent/child query logic

elastic version: 5.0.1
define mappingļ¼š
PUT test
{
"mappings": {
"my_parent": {
"properties": {
"key": {
"type": "keyword"
}
}
},
"my_child": {
"_parent": {
"type": "my_parent"
},
"properties": {
"key": {
"type": "keyword"
}
}
}
}
}
add demo data:
POST _bulk
{"update": {"_index": "test","_type": "my_parent","_id": "1"}}
{"doc": {"key": 1},"doc_as_upsert": true}
{"update": {"_index": "test","_type": "my_child","_parent": 1,"_id": "11"}}
{"doc": {"key": 11},"doc_as_upsert": true}
{"update": {"_index": "test","_type": "my_child","_parent": 1,"_id": "12"}}
{"doc": {"key": 12},"doc_as_upsert": true}
query:
POST test/my_parent/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"key": 3
}
},
{
"has_child": {
"type": "my_child",
"inner_hits": {
"name": "a"
},
"query": {
"term": {
"key": 11
}
}
}
}
]
}
},
{
"has_child": {
"type": "my_child",
"inner_hits": {
"name": "b"
},
"query": {
"term": {
"key": 12
}
}
}
}
]
}
}
}
}
}
result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": [
{
"_index": "test",
"_type": "my_parent",
"_id": "1",
"_score": 0,
"_source": {
"key": 1
},
"inner_hits": {
"a": {
"hits": {
"total": 1,
"max_score": 0.9808292,
"hits": [
{
"_type": "my_child",
"_id": "11",
"_score": 0.9808292,
"_routing": "1",
"_parent": "1",
"_source": {
"key": 11
}
}
]
}
},
"b": {
"hits": {
"total": 1,
"max_score": 0.9808292,
"hits": [
{
"_type": "my_child",
"_id": "12",
"_score": 0.9808292,
"_routing": "1",
"_parent": "1",
"_source": {
"key": 12
}
}
]
}
}
}
}
]
}
}
question here:
Do 'must'\'should'\'must_not' clause have the same meaning between plain search and parent\child search?
Why the result of inner_hits with name 'a' is returned?
'must'|'should'|'must_not' clauses have different meaning. Let me explain you with example of the plain search.
Understand these clause with equivalent SQL query.
must: The clause (query) must appear in matching documents and will contribute to the score.
SQL: select * from user where country_code = 'US' AND state_code = 'NY'
Query DSL:
POST _search
{
"query": {
"bool": {
"must": [
{"term": {"country_code": "US"}},
{"term": {"state_code": "NY"}}
]
}
}
}
should: At least one of these clauses must match, like logical OR.
SQL: select * from user where country_code = 'US' OR state_code = 'NY'
Query DSL:
POST _search
{
"query": {
"bool": {
"should": [
{"term": {"country_code": "US"}},
{"term": {"state_code": "NY"}}
]
}
}
}
must_not: Condition must not match the documents.
SQL: select * from user where country_code != 'US' AND state_code != 'NY'
Query DSL:
POST _search
{
"query": {
"bool": {
"must_not": [
{"term": {"country_code": "US"}},
{"term": {"state_code": "NY"}}
]
}
}
}
Why the result of inner_hits with name 'a' is returned?
Because you put two has_child condition inside the should filter. As explain above it is matching the document from (inner_hits.name =a ..) OR ( inner_hits.name=b ..)

What is the query required for fetching full-text with delimiter in elasticsearch

Assuming I have a document like this in elasticSearch :
{
"videoName": "taylor.mp4",
"type": "long"
}
I tried full-text search using the DSL query:
{
"query": {
"match":{
"videoName": "taylor"
}
}
}
I need to get the above document, but I don't get it .If I specify taylor.mp4, it returns the document.
So, I would like to know, how to make full-text search with delimiters.
Edit after KARTHEEK answer:
The regexp fetches the taylor.mp4 document. Take the situation, where the document in video index are:
{
"videoName": "Akon - smack that.mp4",
"type": "long"
}
So, the query for retrieving this document can be ,
{
"query": {
"match":{
"videoName": "smack that"
}
}
}
In this case, the document will be retrieved, since we use smack in the query string. match does the full-text search and gets us the document. But, say I only know the that keyword and the match, doesn't get the document. I need to use regexp for that.
{
"query": {
"regexp":{
"videoName": "smack.* that.*"
}
}
}
On the Other hand, if i take up regexp and make all my query strings to smack.* that.*, this will also not retrieve any documents. And, we dont know which word will have its suffix .mp4. So, my question is we need to do the full-text search with match, and it should also detect the delimiters. Is there any other way ?
Edit after Richa asked the mapping of index
for http://localhost:9200/example/videos/_mapping
{
"example": {
"mappings": {
"videos": {
"properties": {
"query": {
"properties": {
"match": {
"properties": {
"videoName": {
"type": "string"
}
}
}
}
},
"type": {
"type": "string"
},
"videoName": {
"type": "string"
}
}
}
}
}
}
Depending upon above query you mentioned right we can use regular expression in order get the result.Please find attached result for your perusal and let me know if there are anything else you want.
curl -XGET "http://localhost:9200/test/sample/_search" -d'
{
"query": {
"regexp":{
"videoName": "taylor.*"
}
}
}'
Result:
{
"took": 22,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "sample",
"_id": "1",
"_score": 1,
"_source": {
"videoName": "taylor.mp4",
"type": "long"
}
}
]
}
}
Please use this mapping
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"videoName": {
"type": "string",
"term_vector": "yes"
}
}
}
}
}
After that you need to index a document that you mentioned earlier:
PUT test_index/doc/1
{
"videoName": "Akon - smack that.mp4",
"type": "long"
}
Output:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.15342641,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.15342641,
"_source": {
"videoName": "Akon - smack that.mp4",
"type": "long"
}
}
]
}
}
Query to get results:
GET /test_index/doc/1/_termvector?fields=videoName
Results:
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_version": 1,
"found": true,
"took": 1,
"term_vectors": {
"videoName": {
"field_statistics": {
"sum_doc_freq": 3,
"doc_count": 1,
"sum_ttf": 3
},
"terms": {
"akon": {
"term_freq": 1
},
"smack": {
"term_freq": 1
},
"that.mp4": {
"term_freq": 1
}
}
}
}
}
By using this we will search based on "smack"
POST /test_index/_search
{
"query": {
"match": {
"_all": "smack"
}
}
}
Result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.15342641,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.15342641,
"_source": {
"videoName": "Akon - smack that.mp4",
"type": "long"
}
}
]
}
}

Resources