elasticsearch: term query fails - elasticsearch

I have a mapping for some documents and queries agains terms does fail. I don't understand why:
"mappings":{
"timeslot":{
"properties":{
"FOB_IN":{
"type":"long"
},
"TRIGGER_CODE":{
"type":"long"
},
"FLIGHT_PHASE":{
"type":"long"
},
"REP16_TRIG":{
"type":"long"
},
"fwot":{
"type":"string"
},
"FOB_OUT":{
"type":"long"
},
"FP":{
"type":"long"
},
"FLTNB":{
"type":"string"
},
"Date":{
"format":"strict_date_optional_time||epoch_millis",
"type":"date"
}
}
}
}
I can make a term query against TRIGGER_CODE, for example, and it works fine
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 4.4446826,
"hits": [
{
"_index": "merged-2016-04",
"_type": "timeslot",
"_id": "AVRS8VnirVLwfvMnwpXb",
"_score": 4.4446826,
"_source": {
"Date": "2016-04-03T08:42:44+0000",
"FLIGHT_PHASE": 20,
"TRIGGER_CODE": 4000,
"fwot": "A6-APA"
}
}
]
}
}
now the same against fwot does fail. What's wrong?
GET merged-2016-04/_search?size=1
{
"query" : {
"term" : { "fwot": "A6-APA"}
}
}
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}

You need fwot to be "index": "not_analyzed" for that to work. And you need to reindex the data for the above change to work.
Here's the complete list of commands for the mapping change and some test data:
PUT /merged-2016-04
{
"mappings": {
"timeslot": {
"properties": {
"FOB_IN": {
"type": "long"
},
"TRIGGER_CODE": {
"type": "long"
},
"FLIGHT_PHASE": {
"type": "long"
},
"REP16_TRIG": {
"type": "long"
},
"fwot": {
"type": "string",
"index": "not_analyzed"
},
"FOB_OUT": {
"type": "long"
},
"FP": {
"type": "long"
},
"FLTNB": {
"type": "string"
},
"Date": {
"format": "strict_date_optional_time||epoch_millis",
"type": "date"
}
}
}
}
}
POST /merged-2016-04/timeslot
{
"Date": "2016-04-03T08:42:44+0000",
"FLIGHT_PHASE": 20,
"TRIGGER_CODE": 4000,
"fwot": "A6-APA"
}
GET merged-2016-04/_search?size=1
{
"query": {
"term": {
"fwot": "A6-APA"
}
}
}

See the doc page Query DLS term query, note "Why doesn’t the term query match my document" for a detailed explanation.

We Can use keyword
GET merged-2016-04/_search?size=1
{
"query": {
"term": {
"fwot.keyword": "A6-APA"
}
}
}

Related

Unable to search the data using query or update _by_query in a newly created index in Elasticsearch

Created an index tr_logintracker in elasticsearch using the below. We are using Elasticsearch version 7.17
We even tried with data type of Integer in place of index
{
"mappings": {
"properties": {
"logintime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"logouttime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"logout": {
"type": "long"
},
"vehicleid": {
"type": "long"
},
"driverid": {
"type": "long"
},
"vehicleownerid": {
"type": "long"
}
}
}
}
Index is created
{
"acknowledged": true,
"shards_acknowledged": true,
"index": "tr_logintracker"
}
Document is inserted into the index. Same can be seen using
{
"query": {
"match_all" : {}
}
}
Response
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "tr_logintracker",
"_type": "_doc",
"_id": "6pJHe4QBPiDyvh1VwkiC",
"_score": 1.0,
"_source": {
"data": {
"vehicleownerid": 17,
"driverid": 21,
"vehicleid": 20,
"logintime": "2022-11-15 18:03:29",
"logout": 0
}
}
}
]
}
}
But when the same is queried null result is getting fetched
Query
{
"query": {
"bool": {
"must": [
{ "match": { "driverid" : 21 }}
]
}
}
}
Response
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
}
}
When checked /tr_logintracker/_mapping can see the below. Which does not look correct. The second set of entries is happening when we insert the document into the index.
{
"tr_logintracker": {
"mappings": {
"properties": {
"data": {
"properties": {
"driverid": {
"type": "long"
},
"logintime": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"logout": {
"type": "long"
},
"vehicleid": {
"type": "long"
},
"vehicleownerid": {
"type": "long"
}
}
},
"driverid": {
"type": "long"
},
"logintime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"logout": {
"type": "long"
},
"logouttime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"vehicleid": {
"type": "long"
},
"vehicleownerid": {
"type": "long"
}
}
}
}
}
We also tried the option of dynamic mapping - getting the index created by the program which is inserting. Even in that case query is not fetching the result.
Tldr;
It seems you have two entries for driverid,
data.driverid
driverid
From the document you showed the first time, you have data.driverid: 21.
But you seem to query driverid instead.
Solution
This should work.
{
"query": {
"bool": {
"must": [
{ "match": { "data.driverid" : 21 }}
]
}
}
}
Root Cause
Most likely, you must be sending a document like below to Elasticsearch
{
"data": {
"vehicleownerid": 17,
"driverid": 21,
"vehicleid": 20,
"logintime": "2022-11-15 18:03:29",
"logout": 0
}
}
Where as you should be sending it like so
{
"vehicleownerid": 17,
"driverid": 21,
"vehicleid": 20,
"logintime": "2022-11-15 18:03:29",
"logout": 0
}
From the match_all result, I can see that what you want to query stays inside the data object. So, instead of matching driverid, it should be data.driverid.
Your query then looks like this:
{
"query": {
"bool": {
"must": [
{ "match": { "data.driverid" : 21 }}
]
}
}
}

Elasticsearch terms aggregation returns no buckets

New elasticsearch user here and having an issue with a terms aggregation.
I have indexed 187 documents with fields like "name","host","risk" etc.
The field risk has 4 unique values ("Critical","High","Medium","Low","Informational")
I am running a terms aggregations like this:
POST http://localhost:9200/{index_name}/_search?size=0
{
"aggs":{
"riskCount":{
"terms":{
"field":"risk.keyword"
}
}
}
}
I was expecting a result stating that i have x of Critical, x of High etc.
Thing is, i get no buckets returned.
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 187,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"riskCount": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
}
My Elasticsearch version is 7.12.0 Any ideas
Edit:
So, here's the mapping:
"findings": {
"mappings": {
"properties": {
"date_uploaded": {
"type": "date"
},
"host": {
"type": "text"
},
"name": {
"type": "text"
},
"risk": {
"type": "text"
}
}
}
}
And here's the document:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 187,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "findings",
"_type": "_doc",
"_id": "f86b6b5b-f09e-4350-9a66-d88a3a78f640",
"_score": 1.0,
"_source": {
"risk": "Informational",
"name": "HTTP Server Type and Version",
"host": "10.10.9.10",
"date_uploaded": "2021-05-07T19:39:10.810663+00:00"
}
}
]
}
}
Since the risk field is of text type, you need to update your index mapping as
PUT /_mapping
{
"properties": {
"risk": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
Then run the update_by_query API to reindex the data
You don't have any risk.keyword field in your mapping. You need to change your mapping as follows. Just run the following command to update your mapping and create the risk.keyword sub-field:
PUT index-name/_mapping
{
"properties": {
"date_uploaded": {
"type": "date"
},
"host": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"risk": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
Then reindex your data using this command:
POST index-name/_update_by_query
And then your query can be run like this:
{
"aggs":{
"riskCount":{
"terms":{
"field":"risk.keyword"
}
}
}
}

Why my ElasticSearch query does not fetch any records?

I'm running the following query :
{
"size": 50,
"_source" : ["servername", "silo", "packages.displayname", "packages.displayversion","environment"],
"query": {
"bool": {
"must": {
"match": {
"packages.displayname": "Google Chrome"
}
}
,
"must": {
"type": {
"value": "server"
}
}
}
}
}
But it doesn't fetch any records
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
However, the concerned index\type has some records where "packages.displayname" = "Google Chrome", below is a sample of the index\type
{
"took": 78,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 994,
"max_score": 1,
"hits": [
{
"_index": "package_conformity-13.02.2019",
"_type": "server",
"_id": "AWjklhaPsoJF1yu58sfg",
"_score": 1,
"_source": {
"environment": "PRD",
"servername": "Zephyr",
"packages": [
{
"displayname": "Google Chrome",
"displayversion": "71.0.3578.80"
},
here is the index mapping :
{
"package_conformity-13.02.2019": {
"mappings": {
"server": {
"properties": {
"environment": {
"type": "keyword"
},
"farm": {
"type": "keyword"
},
"packages": {
"type": "nested",
"properties": {
"InstallDate": {
"type": "date",
"index": false
},
"InstallLocation": {
"type": "text",
"index": false
},
"comments": {
"type": "text",
"index": false
},
"displayname": {
"type": "keyword"
},
"displayversion": {
"type": "keyword",
"index": false
},
"publisher": {
"type": "text",
"index": false
},
"regkey": {
"type": "keyword",
"index": false
}
}
},
"servername": {
"type": "keyword"
},
"silo": {
"type": "keyword"
},
"timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
}
Is there something wrong in the way of querying or in the index structure or content ? Please help me by pointing me to the right way..
Thanks
If you want multiple constraints inside your must clause, you need to have an array (and not repeat the must keyword multiple times). Also, the constraint on _type should be made differently, using a term query. Try this query instead:
{
"size": 50,
"_source": [
"servername",
"silo",
"packages.displayname",
"packages.displayversion",
"environment"
],
"query": {
"bool": {
"must": [
{
"nested": {
"path": "packages",
"query": {
"match": {
"packages.displayname": "Google Chrome"
}
}
}
},
{
"term": {
"_type": "server"
}
}
]
}
}
}

why Elasticsearch doesn't return right results?

I'm using Elasticsearch 6.2 configured with one cluster of 2 nodes.
GET _cluster/health:
{
"cluster_name": "cluster_name",
"status": "green",
"timed_out": false,
"number_of_nodes": 2,
"number_of_data_nodes": 2,
"active_primary_shards": 47,
"active_shards": 94,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"delayed_unassigned_shards": 0,
"number_of_pending_tasks": 0,
"number_of_in_flight_fetch": 0,
"task_max_waiting_in_queue_millis": 0,
"active_shards_percent_as_number": 100
}
GET myindex/_settings:
{
"myindex": {
"settings": {
"index": {
"number_of_shards": "3",
"analysis": {
"analyzer": {
"url_split_analyzer": {
"filter": "lowercase",
"tokenizer": "url_split"
}
},
"tokenizer": {
"url_split": {
"pattern": "[^a-zA-Z0-9]",
"type": "pattern"
}
}
},
"number_of_replicas": "1",
"version": {
"created": "6020499"
}
}
}
}
}
here a snapshot of the _mappings structure:
"myindex": {
"mappings": {
"mytype": {
"properties": {
"#timestamp": {
"type": "date"
},
............
"active": {
"type": "short"
},
"id_domain": {
"type": "short",
"ignore_malformed": true
},
"url": {
"type": "text",
"similarity": "boolean",
"analyzer": "url_split_analyzer"
}
}
.......
I have casually found documents, within my index, that I cannot find if I query the index using the id_domain property.
For example:
GET /myindex/mytype/_search
{
"query": {
"bool": {
"must": [
{
"match": { "active": 1 }
}
]
}
}
}
output example:
{
"_index": "myindex",
"_type": "mytype",
"_id": "myurl",
"_score": 1,
"_source": {
"id_domain": "73993",
"active": 1,
"url": "myurl",
"#timestamp": "2018-05-21T10:55:16.247Z"
}
}
....
returns a list of documents where I found id_domain that I cannot find querying against that id domain, like this:
GET /myindex/mytype/_search
{
"query": {
"match": {
"id_domain": 73993 // with or without " got the same result
}
}
}
output
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
I cannot understand why this happens.
I also tried to reindex the index but I got the same result.
I am convinced that I'm missing something.
Is there any reason about that behaviour?
Thank you
In your mapping, id_domain has type short, but in your document you have a value that is out of the bounds for short values ([-32,768 to 32,767]), i.e. 73993.
You need to change the type to integer and all will be fine

Querying a string consisting exactly a part of a query

I have a field named "lang" which consists values "en_US","en_GB","ru_RU", e.t.c. with this mapping
"lang": {
"type": "string",
"index": "not_analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
How to filter for documents, e.g. from "US"?
One way you can do it is change "index": "not_analyzed" on the upper-level field, and set up a pattern analyzer for that field. Since you already have the "lang.raw" field set up, you'll still be able to get the untouched version for faceting or whatever.
So, to test it I set up an index like this:
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"whitespace_underscore": {
"type": "pattern",
"pattern": "[\\s_]+",
"lowercase": false
}
}
}
},
"mappings": {
"doc": {
"properties": {
"name": {
"type": "string"
},
"lang": {
"type": "string",
"index_analyzer": "whitespace_underscore",
"search_analyzer": "standard",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
}
}
}
}
And added a few docs:
POST /test_index/doc/_bulk
{"index":{"_id":1}}
{"name":"doc1","lang":"en_US"}
{"index":{"_id":2}}
{"name":"doc2","lang":"en_GB"}
{"index":{"_id":3}}
{"name":"doc3","lang":"ru_RU"}
Now I can filter by "US" like this:
POST /test_index/_search
{
"query": {
"filtered": {
"filter": {
"term": {
"lang": "US"
}
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1,
"_source": {
"name": "doc1",
"lang": "en_US"
}
}
]
}
}
And I can still get a list of values with a terms aggregation on "lang.raw":
POST /test_index/_search?search_type=count
{
"aggs": {
"lang_terms": {
"terms": {
"field": "lang.raw"
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"lang_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "en_GB",
"doc_count": 1
},
{
"key": "en_US",
"doc_count": 1
},
{
"key": "ru_RU",
"doc_count": 1
}
]
}
}
}
Here is the code I used to test it:
http://sense.qbox.io/gist/ac3f3fd66ea649c0c3a8010241d1f6981a7e012c

Resources