if i have a JSON document indexed into Elasticsearch, like the following:
"_source": {
"pid_no": 19321,
"aggregator_id": null,
"inet_family": "ipv4-unicast",
"origin_code": "igp",
"extended_community": null,
"atomic_aggregate": null,
"adv_type": "announce",
"local_preference": 250,
"med_metric": 0,
"time_stamp": 1447534931,
"net_mask": "23",
"prefix4_": {
"last": 222,
"first": 111
},
"counter_no": 69668,
"confederation_path": "",
"as_set": null,
and i have tried successfully to filter all of the keys of the doc,
but, except the nested ones.
the query looks like:
GET /SNIP!/SNIP!/_search?routing=SNIP!
{
"query": {
"bool": {
"must": {
"query": {
"match_all": {}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"local_preference": {
"gt": 150,
"lte": 250
}
}
},
>>> if i remove the filter below, matches the document.
>>> when i apply the filter, i get 0 hits
{
"and": [
{
"range": {
"prefix4_.first": {
"lte": 200
}
}
},
{
"range": {
"prefix4_.last": {
"gte": 200
}
}
}
]
}
]
}
}
}
}
}
it goes without saying that the mapping is done using integers in the corresponding fields (prefix4_.first,prefix4_.last)
could you please advise on why the filtering does not work ?
EDIT: the mapping looks like this
{
"mappings": {
"_default_": {
"_all": { "enabled": False },
"dynamic": True,
"_routing": { "required": True },
"properties": {
"pid_no": { "type": "string", "index": "not_analyzed", "store": "no" },
"counter_no": { "type": "long", "store": "no" },
"time_stamp": { "type": "date", "format": "epoch_second", "store": "no" },
"host_name": { "type": "string", "index": "not_analyzed", "store": "no" },
"local_ip": { "type": "ip", "store": "no" },
"peer_ip": { "type": "ip", "store": "no" },
"local_asn": { "type": "string", "index": "not_analyzed", "store": "no" },
"peer_asn": { "type": "string", "index": "not_analyzed", "store": "no" },
"inet_family": { "type": "string", "index": "not_analyzed", "store": "no" },
"next_hop": { "type": "ip", "store": "no" },
"net_block": { "type": "string", "index": "analyzed", "store": "no" },
"as_path": { "type": "string", "index": "analyzed", "store": "no" },
"cluster_list": { "type": "string", "index": "not_analyzed", "store": "no" },
"confederation_path": { "type": "string", "index": "not_analyzed", "store": "no" },
"local_preference": { "type": "integer", "store": "no" },
"originator_ip": { "type": "ip", "store": "no" },
"origin_code": { "type": "string", "index": "not_analyzed", "store": "no" },
"community_note": { "type": "string", "index": "analyzed", "store": "no" },
"med_metric": { "type": "long", "store": "no" },
"atomic_aggregate": { "type": "boolean", "store": "no" },
"aggregator_id": { "type": "string", "index": "analyzed", "store": "no" },
"as_set": { "type": "string", "index": "analyzed", "store": "no" },
"extended_community": { "type": "string", "index": "not_analyzed", "store": "no" },
"adv_type": { "type": "string", "index": "not_analyzed", "store": "no" },
"prefix_": { "type": "string", "index": "not_analyzed", "store": "no" },
"net_mask": { "type": "integer", "store": "no" },
"prefix4_": {
"type": "nested",
"properties": {
"first": { "type": "integer", "store": "no" },
"last": { "type": "integer", "store": "no" }
}
},
"prefix6_": {
"type": "nested",
"properties": {
"lofirst": { "type": "long", "store": "no" },
"lolast": { "type": "long", "store": "no" },
"hifirst": { "type": "long", "store": "no" },
"hilast": { "type": "long", "store": "no" }
}
}
}
}
},
"settings" : {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"store.throttle.type": "none",
"memory.index_buffer_size": "20%",
"refresh_interval": "1m",
"merge.async": True,
"merge.scheduler.type": "concurrent",
"merge.policy.type": "log_byte_size",
"merge.policy.merge_factor": 15,
"cache.query.enable": True,
"cache.filter.type": "node",
"fielddata.cache.type": "node",
"cache.field.type": "soft"
}
}
}
Elasticsearch provides multiple ways of mapping nested documents. You are using nested which indexes nested documents as separate documents behind the scenes and as such querying them requires the use of a nested query.
The simplest way of indexing nested JSON like you've shown is using the object type mapping. This would allow you to query the field the way you were expecting, however Elasticsearch flattens the hierarchy which may not be acceptable for you.
use nested filters to filter your documents on nested fields.
https://www.elastic.co/guide/en/elasticsearch/reference/1.4/query-dsl-nested-filter.html
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"peer_ip": "pqr",
"_cache": true
}
},
{
"nested": {
"filter": {
"bool": {
"must": [
{
"terms": {
"first": [
"xyz"
],
"_cache": true
}
}
]
}
},
"path": "prefix4_",
"inner_hits": {}
}
},
{
"terms": {
"pid_no": [
"yyu"
],
"_cache": true
}
}
]
}
}
}
}
}
Related
I have a problem with a query that return no result. When I execute the following query either with match or term :
{
"size": 1,
"query": {
"bool": {
"must": [
{ "term": { "ALERT_TYPE.raw": "ERROR" }}
],
"filter": [
{ "range": {
"#timestamp": {
"gte": "2018-02-01T00:00:01.000Z",
"lte": "2018-02-28T23:55:55.000Z"
}
}}
]
}
}
}
I always got the following response, :
{
"took": 92,
"timed_out": false,
"_shards": {
"total": 215,
"successful": 215,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
But i'm sure the element is present because when i do a match_all query, the first hit is the following :
{
"took": 269,
"timed_out": false,
"_shards": {
"total": 210,
"successful": 210,
"failed": 0
},
"hits": {
"total": 68292,
"max_score": 1,
"hits": [
{
"_index": "logstash-2018.02.22",
"_type": "alert",
"_id": "AWEdVphtJjppDZ0FiAz-",
"_score": 1,
"_source": {
"#version": "1",
"#timestamp": "2018-02-22T10:07:41.549Z",
"path": "/something",
"host": "host.host",
"type": "alert",
"SERVER_TYPE": "STANDALONE",
"LOG_FILE": "log.log",
"DATE": "2018-02-22 11:02:02,367",
"ALERT_TYPE": "ERROR",
"MESSAGE": "There is an error"
}
}
]
}
}
Here I can see the field is the value that I am expecting. And from the mapping I know the field is analyzed by the default analyser and the raw field is not analysed (Thanks to the answer of Glenn Van Schil). The mapping is generated dynamically by logstash but it looks like this for the type i'm looking into:
"alert": {
"_all": {
"enabled": true,
"omit_norms": true
},
"dynamic_templates": [
{
"message_field": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"fielddata": { "format": "disabled" },
"type": "string"
},
"match": "message",
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"fielddata": { "format": "disabled" },
"type": "string",
"fields": {
"raw": {
"index": "not_analyzed",
"ignore_above": 256,
"type": "string"
}
}
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"properties": {
"#timestamp": { "type": "date", "format": "strict_date_optional_time||epoch_millis" },
"#version": { "type": "string", "index": "not_analyzed" },
"ALERT_TYPE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"DATE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"LOG_FILE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"MESSAGE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"SERVER_TYPE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": { "type": "ip" },
"latitude": { "type": "float" },
"location": { "type": "geo_point" },
"longitude": { "type": "float" }
}
},
"host": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"path": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"type": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
Does anyone have a clue about why this query keep returning nothing ? Maybe there is something in the mapping that i am missing which explain why the match or term query keep failing ? I'm running out of idea about what is happenning and i'm quite new to elasticsearch and logstash.
Versions of tools and environment :
OS: RHEL Server 6.5 (Santiago)
Java: 1.7.0_91
Elasticsearch: 2.4.6
Lucene: 5.5.4
Logstash: 2.4.1
This is not really an answer, but it was to complicated to write this as a comment.
from the mapping i know the field is not analysed.
You are searching for ALERT_TYPE, but this one is in fact analyzed with the default analyzer since you did not specify any analyzer directly under your ALERT_TYPE's mapping.
However, your ALERT_TYPE has an internal field named raw that is not analyzed. If you want to search documents using the raw field you'll need to change the query from
"must": [
{ "term": { "ALERT_TYPE": "ERROR" }}
]
to
"must": [
{ "term": { "ALERT_TYPE.raw": "ERROR" }}
]
When I query my index with query_string, I am getting results
But when I query using term query, I dont get any results
{
"query": {
"bool": {
"must": [],
"must_not": [],
"should": [
{
"query_string": {
"default_field": "Printer.Name",
"query": "HL-2230"
}
}
]
}
},
"from": 0,
"size": 10,
"sort": [],
"aggs": {}
}
I know that term is not_analyzed and query_string is analyzed but Name is already as "HL-2230", why doesnt it match with term query? I tried also searching with "hl-2230", I still didnt get any result.
EDIT: mapping looks like as below. Printer is the child of Product. Not sure if this makes difference
{
"state": "open",
"settings": {
"index": {
"creation_date": "1453816191454",
"number_of_shards": "5",
"number_of_replicas": "1",
"version": {
"created": "1070199"
},
"uuid": "TfMJ4M0wQDedYSQuBz5BjQ"
}
},
"mappings": {
"Product": {
"properties": {
"index": "not_analyzed",
"store": true,
"type": "string"
},
"ProductName": {
"type": "nested",
"properties": {
"Name": {
"store": true,
"type": "string"
}
}
},
"ProductCode": {
"type": "string"
},
"Number": {
"index": "not_analyzed",
"store": true,
"type": "string"
},
"id": {
"index": "no",
"store": true,
"type": "integer"
},
"ShortDescription": {
"store": true,
"type": "string"
},
"Printer": {
"_routing": {
"required": true
},
"_parent": {
"type": "Product"
},
"properties": {
"properties": {
"RelativeUrl": {
"index": "no",
"store": true,
"type": "string"
}
}
},
"PrinterId": {
"index": "no",
"store": true,
"type": "integer"
},
"Name": {
"store": true,
"type": "string"
}
}
},
"aliases": []
}
}
As per mapping provided by you above
"Name": {
"store": true,
"type": "string"
}
Name is analysed. So HL-2230 will split into two tokens, HL and 2230. That's why term query is not working and query_string is working. When you use term query it will search for exact term HL-2230 which is not there.
Some documents has category fields.. Some of these docs has category fields its value equals to "-1". I need a query return documents which have category fields and "not equal to -1".
I tried this:
GET webproxylog/_search
{
"query": {
"filtered": {
"filter": {
"not":{
"filter": {"and": {
"filters": [
{"term": {
"category": "-1"
}
},
{
"missing": {
"field": "category"
}
}
]
}}
}
}
}
}
}
But not work.. returns docs not have "category field"
EDIT
Mapping:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string",
"index": "not_analyzed"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"analyzer": "slash_analyzer"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
If your category field is string and is analyzed by default, then your -1 will be indexed as 1 (stripping the minus sign).
You will need that field to be not_analyzed or to add a sub-field which is not analyzed (as my solution below).
Something like this:
DELETE test
PUT /test
{
"mappings": {
"test": {
"properties": {
"category": {
"type": "string",
"fields": {
"notAnalyzed": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST /test/test/1
{"category": "-1"}
POST /test/test/2
{"category": "2"}
POST /test/test/3
{"category": "3"}
POST /test/test/4
{"category": "4"}
POST /test/test/5
{"category2": "-1"}
GET /test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"category.notAnalyzed": {
"value": "-1"
}
}
},
{
"filtered": {
"filter": {
"missing": {
"field": "category"
}
}
}
}
]
}
}
}
Continuing a previous question.
What am I trying to do?
Thanks to #AndreiStefan I'm trying to put a murmur3 hash off the heap, using doc_values:
"dynamic_templates": [
{
"murmur3_hashed": {
"mapping": {
"index": "not_analyzed",
"norms": {
"enabled": false
},
"fielddata": {
"format": "doc_values"
},
"doc_values": true,
"type": "string",
"fields": {
"hash": {
"index": "no",
"doc_values": true,
"type": "murmur3"
}
}
},
"match_mapping_type": "string",
"match": "my_prop"
}
}
]
I used stream2es for reindexing.
What is the result?
After a reindexing, the result property is:
"my_prop": {
"index": "not_analyzed",
"fielddata": {
"format": "doc_values"
},
"doc_values": true,
"type": "string",
"fields": {
"hash": {
"null_value": -1,
"precision_step": 2147483647,
"type": "murmur3"
}
}
},
What is the problem?
Why is the "index": "no", "doc_values": true missing in the result property?
This is the list of commands that I tested in ES 1.6.0:
PUT /test
{
"mappings": {
"_default_": {
"dynamic_templates": [
{
"murmur3_hashed": {
"mapping": {
"index": "not_analyzed",
"norms": {
"enabled": false
},
"fielddata": {
"format": "doc_values"
},
"doc_values": true,
"type": "string",
"fields": {
"hash": {
"index": "no",
"doc_values": true,
"type": "murmur3"
}
}
},
"match_mapping_type": "string",
"match": "my_prop"
}
}
]
}
}
}
POST /test/test_type/1
{
"my_prop": "xxx"
}
GET /test/test_type/_mapping
And I got this as output:
{
"test": {
"mappings": {
"test_type": {
"dynamic_templates": [
{
"murmur3_hashed": {
"mapping": {
"fielddata": {
"format": "doc_values"
},
"norms": {
"enabled": false
},
"index": "not_analyzed",
"type": "string",
"fields": {
"hash": {
"index": "no",
"type": "murmur3",
"doc_values": true
}
},
"doc_values": true
},
"match": "my_prop",
"match_mapping_type": "string"
}
}
],
"properties": {
"my_prop": {
"type": "string",
"index": "not_analyzed",
"doc_values": true,
"fielddata": {
"format": "doc_values"
},
"fields": {
"hash": {
"type": "murmur3",
"index": "no",
"doc_values": true,
"precision_step": 2147483647,
"null_value": -1
}
}
}
}
}
}
}
}
I have an elasticsearch range aggregation problem.
I have a nested object called "prices" in an nested "object" called "products".
in this sub nested object prices I have different prices for different countries and currencies. now I wanna use a range aggregation, but this ones loop over all price items and returns a big range aggregation.
now I want to use a script to filter curriencies and country price. but my if clause never got a return value.
"script": "if(doc['currency']=='GBP') { doc['price']; } else 0"
here is my code for query
"aggs": {
"products": {
"nested": {
"path": "products"
},
"aggs": {
"prices": {
"nested": {
"path": "products.prices"
},
"aggs": {
"range": {
"range": {
"field": "products.prices.price",
"script": "if(doc['currency']=='GBP') { doc['price']; } else 0",
"params": {
"currency": "GBP",
"country": "GB"
},
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100
}
]
}
}
}
}
}
}
}
and my mapping
{
"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 1
},
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": ["letter", "digit", "punctuation", "symbol"]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": ["lowercase", "asciifolding", "nGram_filter"]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings": {
"program": {
"properties": {
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"products": {
"type": "nested",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "nested",
"index": "not_analyzed"
}
},
"properties": {
"sku": {
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"prices": {
"type": "nested",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "nested",
"index": "not_analyzed"
}
},
"properties": {
"price": {
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"analyzer": "english",
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"price2": {
"include_in_all": false,
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"vat": {
"include_in_all": false,
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"country": {
"include_in_all": false,
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"currency": {
"include_in_all": false,
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
}
}
you can try this?
{
"filtered" : {
"query" : { "match_all" : {} },
"filter" : {
"nested" : {
"path" : "products",
"filter" : {
"bool" : {
"must" : [
{
"term" : {"prices.currency" : "GBP"}
},
{
"range" : {"range.count" : {"gt" : 5}}
}
]
}
},
"_cache" : true
}
}
}
}
Filtered is deprecated at this point for ElasticSearch and was replaced with bool. The new version of this would be the following:
{
"query" : {
"nested" : {
"path" : "products",
"query" : {
"bool" : {
"must" : [
{
"term" : {"prices.currency" : "GBP"}
},
{
"range" : {"range.count" : {"gt" : 5}}
}
]}
}
}
}
}
Here's a reference to the ElasticSearch documentation