Elastic Search query match on keyword 'OR' - elasticsearch

I'm using ElasticSearch 7.0
Given the mapping:
{
"searchquestion": {
"mappings": {
"properties": {
"server": {
"properties": {
"hostname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
I have put the following documents into this index:
{
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
{
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
{
"server": {
"hostname": "server1-linux.loc1.uk"
}
}
I would like to query the exact text of the hostname. Luckily, this can be done because there is an additional keyword type field on this field.
Successful query :
{
"query": {
"bool": {
"must": [
{
"match": {
"server.hostname.keyword": {
"query": "server1-windows.loc2.uk"
}
}
}
]
}
}
}
However, I would like to extend this query string, to include another hostname to search for. In my results, I expect to have both documents returned.
My attempt:
{
"query": {
"bool": {
"must": [
{
"match": {
"server.hostname.keyword": {
"query": "server1-windows.loc2.uk server1-linux.loc1.uk",
"operator": "or"
}
}
}
]
}
}
}
This returns no hits, I suspect because the default analyser is splitting this query up into sections, but I'm actually searching the keyword field which is a full string. I cannot add analyzer: keyword to this query search, as server1-windows.loc2.uk server1-linux.loc1.uk as an exact string won't match anything either.
How can I search for both these strings, as their complete selves?
i.e. "query": ["server1-windows.loc2.uk", "server1-linux.loc1.uk"]
I would also like to use wildcards to match any loc. I would expect
"query": ["server1-windows.*.uk"] to match both windows servers, but I get no hits.
What am I missing?

you can use Query_String to get your desired result
Case 1:
Query:
GET server/_search
{
"query": {
"query_string": {
"query": "(server1-windows.loc2.uk) OR (server1-linux.loc1.uk)",
"default_field": "server.hostname.keyword"
}
}
}
Output:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.9808291,
"hits": [
{
"_index": "server",
"_id": "3",
"_score": 0.9808291,
"_source": {
"server": {
"hostname": "server1-linux.loc1.uk"
}
}
},
{
"_index": "server",
"_id": "1",
"_score": 0.4700036,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
},
{
"_index": "server",
"_id": "2",
"_score": 0.4700036,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
}
]
}
}
Case 2: with wildcard(*)
Query:
GET server/_search
{
"query": {
"query_string": {
"query": "server1-windows.*.uk",
"default_field": "server.hostname.keyword"
}
}
}
Output:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "server",
"_id": "1",
"_score": 1,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
},
{
"_index": "server",
"_id": "2",
"_score": 1,
"_source": {
"server": {
"hostname": "server1-windows.loc2.uk"
}
}
}
]
}
}

Related

Elasticsearch returns NullPointerException during inner_hits query

I have an index, which stores a nested document. I wanna see this nested documents, for this purpose I used 'inner_hits' in request, but elastic returns nullPointerException. Do anyone meet with this problem?)
Request to elasticsearch using Postman:
GET http://localhost/my-index/_search
{
"query": {
"nested": {
"path": "address_object",
"query": {
"bool": {
"must": {
"term": {"address_object.city": "Paris"}
}
}
},
"inner_hits" : {}
}
}
}
Response with status code 200:
{
"took": 161,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 1,
"skipped": 0,
"failed": 1,
"failures": [
{
"shard": 0,
"index": "my-index",
"node": "DWdD83KaTmUiodENQkGDww",
"reason": {
"type": "null_pointer_exception",
"reason": null
}
}
]
},
"hits": {
"total": 6500039,
"max_score": 2.1761138,
"hits": []
}
}
Elasticsearch version: 6.2.4
Lucene version: 7.2.1
Update:
Mapping:
{
"my-index": {
"mappings": {
"mytype": {
"dynamic": "false",
"_source": {
"enabled": false
},
"properties": {
"adverts_count": {
"type": "integer",
"store": true
},
...
"address_object": {
"type": "nested",
"properties": {
"adverts_count": {
"type": "integer",
"store": true
},
"city": {
"type": "keyword",
"store": true
}
}
},
...
Sample document:
{
"_index": "my-index",
"_type": "mytype",
"_id": "XDWrGncBdwNBWGEagAM2",
"_score": 2.1587489,
"fields": {
"is_target_page_shown": [
0
],
"updated_at": [
1612264276
],
"is_shown": [
0
],
"nb_queries": [
1
],
"search_query": [
"phone"
],
"target_category": [
15
],
"adverts_count": [
1
]
}
}
Extra information:
If I remove the "inner_hits": {} from search request, elastic returns nested documents(_index, _type, _id, _score), but ain't other fields(e.g city)
Also, as suggested in the comments, I tried setting to true ignore_unmapped, but it doesn't helped. The same nullPointerException.
I tried reproducing your issue, but as you have not provided the proper sample documents(one which you provided doesn't have the address_object properties), I used your mapping and below sample documents.
PUT index-name/_doc/1
{
"address_object" :{
"adverts_count" : 1,
"city": "paris"
}
}
PUT index-name/_doc/2
{
"address_object" :{
"adverts_count" : 1,
"city": "blr"
}
}
And when I use the same search provided by you.
POST 71907588/_search
{
"query": {
"nested": {
"path": "address_object",
"query": {
"bool": {
"must": {
"term": {
"address_object.city": "paris"
}
}
}
},
"inner_hits": {}
}
}
}
I get a proper response, matching paris as city as shown in the search response.
"hits": [
{
"_index": "71907588",
"_id": "1",
"_score": 0.6931471,
"_source": {
"address_object": {
"adverts_count": 1,
"city": "paris"
}
},
"inner_hits": {
"address_object": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.6931471,
"hits": [
{
"_index": "71907588",
"_id": "1",
"_nested": {
"field": "address_object",
"offset": 0
},
"_score": 0.6931471,
"_source": {
"city": "paris",
"adverts_count": 1
}
}
]
}
}
}
}
]

Elasticsearch returns documents with a query must_not exists

Elasticsearch: 6.5.4
Issue: I'm executing a bool query (sample to follow) where I'm checking for the existence of a specific field. The issue is, I'm getting results back where the field does exist but has an empty array.
My question is, how do I properly execute a query and only get results where nlp is not added to the document at all.
Sample query:
{
"size": 100,
"sort": [{
"publishedAt": {
"order": "asc"
}
}],
"_source": {
"includes": ["nlp"]
},
"query": {
"bool": {
"must_not": {
"exists": {
"field": "nlp.categories.gcp"
}
}
}
}
}
Sample Mapping:
(This was automatically created by Elastic Search, with the exception of the null_value, I tried adding that).
{
"mapping": {
"article": {
"properties": {
"nlp": {
"properties": {
"categories": {
"properties": {
"gcp": {
"properties": {
"confidence": {
"type": "float"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"null_value": "[]",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
}
}
}
Sample Result:
{
"took": 68,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1126581,
"max_score": null,
"hits": [
{
"_index": "news",
"_type": "article",
"_id": "UTuVmmsBE1H01hY9Rn6i",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": []
}
}
},
"sort": [
1509940860000
]
},
{
"_index": "news",
"_type": "article",
"_id": "2w6PmmsBIpi-jAhhO13F",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": []
}
}
},
"sort": [
1510027260000
]
}
]
}
}
When the nlp.categories.gcp has values in it, a typical response would look like this.
{
"took": 26,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 475690,
"max_score": null,
"hits": [
{
"_index": "news",
"_type": "article",
"_id": "6Q6JmmsBIpi-jAhhAlcm",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": [
{
"confidence": 0.8999999761581421,
"name": "/Travel/Hotels & Accommodations"
}
]
}
}
},
"sort": [
1510215565000
]
},
{
"_index": "news",
"_type": "article",
"_id": "rzunmmsBE1H01hY9sLyE",
"_score": null,
"_source": {
"nlp": {
"categories": {
"gcp": [
{
"confidence": 0.9399999976158142,
"name": "/Travel/Hotels & Accommodations"
}
]
}
}
},
"sort": [
1510228881000
]
}
]
}
}

Query Vs Filter in Elastic Search

I am trying to index a document which has three fields first_name, last_name, occupation of type "keyword" and has values XYZ, ABC, DEF respectively.
I have written query using filter for an exact match with AND condition as follows,
"query": {
"bool": {
"filter": [
{"term": {"first_name": "XYZ"}},
{"term": {"last_name": "ABC"}}
]
}
}
This has to return one document, but returns nothing.
I have another query for the same operation,
"query": {
"bool": {
"must": [
{"match": {"first_name": "XYZ"}},
{"match": {"last_name": "ABC"}}
]
}
}
This returns one document.
According to Elasticsearch documentation, I understand that the difference between query and filter is that filter does not score the result. I am not sure why the first query does not return any result. Is my understanding correct?
As documentation states there is no difference between query and filter except scoring. Of course this applies to the situation when both query and filters uses the same query type. Here you are using two different types - term and match. term is designed for exact comparison while match is analyzed and used as full-text search.
Take a look at the example below.
Your mapping:
PUT /index_53053054
{
"mappings": {
"_doc": {
"properties": {
"first_name": {
"type": "text"
},
"last_name": {
"type": "text"
},
"occupation": {
"type": "keyword"
}
}
}
}
}
Your document:
PUT index_53053054/_doc/1
{
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
filter query:
GET index_53053054/_search
{
"query": {
"bool": {
"filter": [
{
"match": {
"first_name": "XYZ"
}
},
{
"match": {
"last_name": "ABC"
}
},
{
"term": {
"occupation": "DEF"
}
}
]
}
}
}
and result:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": [
{
"_index": "index_53053054",
"_type": "_doc",
"_id": "1",
"_score": 0,
"_source": {
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
}
]
}
}
Similar must query:
GET index_53053054/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"first_name": "XYZ"
}
},
{
"match": {
"last_name": "ABC"
}
},
{
"term": {
"occupation": "DEF"
}
}
]
}
}
}
and response:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.8630463,
"hits": [
{
"_index": "index_53053054",
"_type": "_doc",
"_id": "1",
"_score": 0.8630463,
"_source": {
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
}
]
}
}
As you can see hits are almost the same. The only difference is that in filter score is not calculated while in must query is.
Read more: https://www.elastic.co/guide/en/elasticsearch/reference/6.4/query-filter-context.html

ElasticSearch, DELETE documents by exact match (5.1)

I need to delete information that match exactly the word of the keys in the query of elastic-search, but I have problems with the request and I deleted information with the same prefix. What I have to do to fix my script and delete only the correct ones? (Exact Match on two conditions)
curl -X POST elasticDomain/index/_delete_by_query -d '{"query": {
"bool": {
"must": [
{
"term": {
"component.name": {
"query" : "prefix-component-one"
}
}
},
{
"term": {
"enviroment": "qa"
}
}
]
}}}'
Data example, when I want to delete only information about component-one:
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "index",
"_type": "event",
"_id": "c04b0f94-4995-11e8-a9f5-a22f517abdda",
"_score": 1,
"_source": {
"component": {
"name": "prefix-component-two",
"qualifier": "TRK"
},
"enviroment": "history",
"timestamp": "2018-04-26T16:06:54.000Z"
}
},
{
"_index": "index",
"_type": "event",
"_id": "bf80d63e-4995-11e8-a9f5-a22f517abdda",
"_score": 1,
"_source": {
"component": {
"name": "prefix-component-one",
"qualifier": "TRK"
},
"enviroment": "qa",
"timestamp": "2018-04-26T16:06:54.000Z"
}
}
]
}
}
I fixed with match_phrase
curl -X POST elasticDomain/index/_delete_by_query -d '{"query": {
"bool": {
"must": [
{
"match_phrase": {
"component.name": {
"query" : "prefix-component-one"
}
}
},
{
"term": {
"enviroment": "qa"
}
}
]
}}}

What is the query required for fetching full-text with delimiter in elasticsearch

Assuming I have a document like this in elasticSearch :
{
"videoName": "taylor.mp4",
"type": "long"
}
I tried full-text search using the DSL query:
{
"query": {
"match":{
"videoName": "taylor"
}
}
}
I need to get the above document, but I don't get it .If I specify taylor.mp4, it returns the document.
So, I would like to know, how to make full-text search with delimiters.
Edit after KARTHEEK answer:
The regexp fetches the taylor.mp4 document. Take the situation, where the document in video index are:
{
"videoName": "Akon - smack that.mp4",
"type": "long"
}
So, the query for retrieving this document can be ,
{
"query": {
"match":{
"videoName": "smack that"
}
}
}
In this case, the document will be retrieved, since we use smack in the query string. match does the full-text search and gets us the document. But, say I only know the that keyword and the match, doesn't get the document. I need to use regexp for that.
{
"query": {
"regexp":{
"videoName": "smack.* that.*"
}
}
}
On the Other hand, if i take up regexp and make all my query strings to smack.* that.*, this will also not retrieve any documents. And, we dont know which word will have its suffix .mp4. So, my question is we need to do the full-text search with match, and it should also detect the delimiters. Is there any other way ?
Edit after Richa asked the mapping of index
for http://localhost:9200/example/videos/_mapping
{
"example": {
"mappings": {
"videos": {
"properties": {
"query": {
"properties": {
"match": {
"properties": {
"videoName": {
"type": "string"
}
}
}
}
},
"type": {
"type": "string"
},
"videoName": {
"type": "string"
}
}
}
}
}
}
Depending upon above query you mentioned right we can use regular expression in order get the result.Please find attached result for your perusal and let me know if there are anything else you want.
curl -XGET "http://localhost:9200/test/sample/_search" -d'
{
"query": {
"regexp":{
"videoName": "taylor.*"
}
}
}'
Result:
{
"took": 22,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "sample",
"_id": "1",
"_score": 1,
"_source": {
"videoName": "taylor.mp4",
"type": "long"
}
}
]
}
}
Please use this mapping
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"videoName": {
"type": "string",
"term_vector": "yes"
}
}
}
}
}
After that you need to index a document that you mentioned earlier:
PUT test_index/doc/1
{
"videoName": "Akon - smack that.mp4",
"type": "long"
}
Output:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.15342641,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.15342641,
"_source": {
"videoName": "Akon - smack that.mp4",
"type": "long"
}
}
]
}
}
Query to get results:
GET /test_index/doc/1/_termvector?fields=videoName
Results:
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_version": 1,
"found": true,
"took": 1,
"term_vectors": {
"videoName": {
"field_statistics": {
"sum_doc_freq": 3,
"doc_count": 1,
"sum_ttf": 3
},
"terms": {
"akon": {
"term_freq": 1
},
"smack": {
"term_freq": 1
},
"that.mp4": {
"term_freq": 1
}
}
}
}
}
By using this we will search based on "smack"
POST /test_index/_search
{
"query": {
"match": {
"_all": "smack"
}
}
}
Result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.15342641,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.15342641,
"_source": {
"videoName": "Akon - smack that.mp4",
"type": "long"
}
}
]
}
}

Resources