elasticsearch nested range filter script - elasticsearch

I have an elasticsearch range aggregation problem.
I have a nested object called "prices" in an nested "object" called "products".
in this sub nested object prices I have different prices for different countries and currencies. now I wanna use a range aggregation, but this ones loop over all price items and returns a big range aggregation.
now I want to use a script to filter curriencies and country price. but my if clause never got a return value.
"script": "if(doc['currency']=='GBP') { doc['price']; } else 0"
here is my code for query
"aggs": {
"products": {
"nested": {
"path": "products"
},
"aggs": {
"prices": {
"nested": {
"path": "products.prices"
},
"aggs": {
"range": {
"range": {
"field": "products.prices.price",
"script": "if(doc['currency']=='GBP') { doc['price']; } else 0",
"params": {
"currency": "GBP",
"country": "GB"
},
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100
}
]
}
}
}
}
}
}
}
and my mapping
{
"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 1
},
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": ["letter", "digit", "punctuation", "symbol"]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": ["lowercase", "asciifolding", "nGram_filter"]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings": {
"program": {
"properties": {
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"products": {
"type": "nested",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "nested",
"index": "not_analyzed"
}
},
"properties": {
"sku": {
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"prices": {
"type": "nested",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "nested",
"index": "not_analyzed"
}
},
"properties": {
"price": {
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"analyzer": "english",
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"price2": {
"include_in_all": false,
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"vat": {
"include_in_all": false,
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"country": {
"include_in_all": false,
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"currency": {
"include_in_all": false,
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
}
}

you can try this?
{
"filtered" : {
"query" : { "match_all" : {} },
"filter" : {
"nested" : {
"path" : "products",
"filter" : {
"bool" : {
"must" : [
{
"term" : {"prices.currency" : "GBP"}
},
{
"range" : {"range.count" : {"gt" : 5}}
}
]
}
},
"_cache" : true
}
}
}
}

Filtered is deprecated at this point for ElasticSearch and was replaced with bool. The new version of this would be the following:
{
"query" : {
"nested" : {
"path" : "products",
"query" : {
"bool" : {
"must" : [
{
"term" : {"prices.currency" : "GBP"}
},
{
"range" : {"range.count" : {"gt" : 5}}
}
]}
}
}
}
}
Here's a reference to the ElasticSearch documentation

Related

Unable to get similar results with a filter applied specific field

I am working on Elastic Search 6.4.2. I am uploading my index and mapping and my searchable fields are title,content and I want to filter the results by "test" field. The values in the test fields are abce, ghij, klmn. I want to filter the results with field "test" and value "ghij".
PUT /test_index
{
"settings": {
"index": {
"number_of_shards": 4,
"number_of_replicas": 1,
"refresh_interval": "60s",
"analysis" : {
"analyzer" : {
"my_analyzer" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "my_snow"]
},
"blogs_analyzer": {
"type": "stop",
"stopwords": "_english_"
}
} ,
"filter" : {
"my_snow" : {
"type" : "snowball",
"language" : "Lovins"
}
}
}
}
},
"mappings": {
"doc": {
"_source": {
"enabled": true
},
"properties": {
"content": {
"type": "text",
"index": "true",
"store": true,
"analyzer":"my_analyzer",
"search_analyzer": "my_analyzer"
},
"host": {
"type": "keyword",
"index": "true",
"store": true
},
"title": {
"type": "text",
"index": "true",
"store": true,
"analyzer":"my_analyzer",
"search_analyzer": "my_analyzer"
},
"url": {
"type": "text",
"index": "true",
"store": true
},
"test": {
"type": "keyword",
"index": "true",
"store": true
}
}
}
}
}
I tried with the below body in order to get the results.
POST test_index/_search
{
"query": {
"bool": {
"should": [{
"match": {
"content": {
"query": "sports"
}
}
},
{
"match": {
"title": {
"query": "sports"
}
}
}
],
"filter": {
"bool": {
"must": [{
"term": {
"test": "ghij"
}
}]
}
}
}
}
}
If I send the above request I am getting lesser records and I use send direct request GET /test_index/_search?q=sports I am getting more number of results

ES highlight not showing in results

Can somebody tell me why these fields will not show up in my ES results (within the highlight object) in Sense when I know some of the sites I'm indexing into ES have content for these particular HTML elements?
ES mapping of the fields
"metatag.description": {
"type": "multi_field",
"fields": {
"metatag.description": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"autocomplete": {
"type": "string",
"index_analyzer": "autocomplete_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
},
"metatag.keywords": {
"type": "multi_field",
"fields": {
"metatag.keywords": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"autocomplete": {
"type": "string",
"index_analyzer": "autocomplete_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
},
For some reason only "title.autocomplete" is in the results?
This is my query:
GET /hugetestindex/doc/_search
{
"query": {
"match": {
"_all": {
"query": "ford mu",
"operator": "and"
}
}
},
"highlight": {
"fields": {
"title.autocomplete": {
"pre_tags" : ["<em>"],
"post_tags" : ["</em>"],
"number_of_fragments": 10,
"fragment_size": 300
},
"metatags.keywords.autocomplete": {
"pre_tags" : ["<em>"],
"post_tags" : ["</em>"],
"number_of_fragments": 10,
"fragment_size": 300
},
"metatags.description.autocomplete": {
"pre_tags" : ["<em>"],
"post_tags" : ["</em>"],
"number_of_fragments": 10,
"fragment_size": 300
}
}
}
}
Where am I going wrong?

elastic: HOW-TO search a nested field ? [not working]

if i have a JSON document indexed into Elasticsearch, like the following:
"_source": {
"pid_no": 19321,
"aggregator_id": null,
"inet_family": "ipv4-unicast",
"origin_code": "igp",
"extended_community": null,
"atomic_aggregate": null,
"adv_type": "announce",
"local_preference": 250,
"med_metric": 0,
"time_stamp": 1447534931,
"net_mask": "23",
"prefix4_": {
"last": 222,
"first": 111
},
"counter_no": 69668,
"confederation_path": "",
"as_set": null,
and i have tried successfully to filter all of the keys of the doc,
but, except the nested ones.
the query looks like:
GET /SNIP!/SNIP!/_search?routing=SNIP!
{
"query": {
"bool": {
"must": {
"query": {
"match_all": {}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"local_preference": {
"gt": 150,
"lte": 250
}
}
},
>>> if i remove the filter below, matches the document.
>>> when i apply the filter, i get 0 hits
{
"and": [
{
"range": {
"prefix4_.first": {
"lte": 200
}
}
},
{
"range": {
"prefix4_.last": {
"gte": 200
}
}
}
]
}
]
}
}
}
}
}
it goes without saying that the mapping is done using integers in the corresponding fields (prefix4_.first,prefix4_.last)
could you please advise on why the filtering does not work ?
EDIT: the mapping looks like this
{
"mappings": {
"_default_": {
"_all": { "enabled": False },
"dynamic": True,
"_routing": { "required": True },
"properties": {
"pid_no": { "type": "string", "index": "not_analyzed", "store": "no" },
"counter_no": { "type": "long", "store": "no" },
"time_stamp": { "type": "date", "format": "epoch_second", "store": "no" },
"host_name": { "type": "string", "index": "not_analyzed", "store": "no" },
"local_ip": { "type": "ip", "store": "no" },
"peer_ip": { "type": "ip", "store": "no" },
"local_asn": { "type": "string", "index": "not_analyzed", "store": "no" },
"peer_asn": { "type": "string", "index": "not_analyzed", "store": "no" },
"inet_family": { "type": "string", "index": "not_analyzed", "store": "no" },
"next_hop": { "type": "ip", "store": "no" },
"net_block": { "type": "string", "index": "analyzed", "store": "no" },
"as_path": { "type": "string", "index": "analyzed", "store": "no" },
"cluster_list": { "type": "string", "index": "not_analyzed", "store": "no" },
"confederation_path": { "type": "string", "index": "not_analyzed", "store": "no" },
"local_preference": { "type": "integer", "store": "no" },
"originator_ip": { "type": "ip", "store": "no" },
"origin_code": { "type": "string", "index": "not_analyzed", "store": "no" },
"community_note": { "type": "string", "index": "analyzed", "store": "no" },
"med_metric": { "type": "long", "store": "no" },
"atomic_aggregate": { "type": "boolean", "store": "no" },
"aggregator_id": { "type": "string", "index": "analyzed", "store": "no" },
"as_set": { "type": "string", "index": "analyzed", "store": "no" },
"extended_community": { "type": "string", "index": "not_analyzed", "store": "no" },
"adv_type": { "type": "string", "index": "not_analyzed", "store": "no" },
"prefix_": { "type": "string", "index": "not_analyzed", "store": "no" },
"net_mask": { "type": "integer", "store": "no" },
"prefix4_": {
"type": "nested",
"properties": {
"first": { "type": "integer", "store": "no" },
"last": { "type": "integer", "store": "no" }
}
},
"prefix6_": {
"type": "nested",
"properties": {
"lofirst": { "type": "long", "store": "no" },
"lolast": { "type": "long", "store": "no" },
"hifirst": { "type": "long", "store": "no" },
"hilast": { "type": "long", "store": "no" }
}
}
}
}
},
"settings" : {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"store.throttle.type": "none",
"memory.index_buffer_size": "20%",
"refresh_interval": "1m",
"merge.async": True,
"merge.scheduler.type": "concurrent",
"merge.policy.type": "log_byte_size",
"merge.policy.merge_factor": 15,
"cache.query.enable": True,
"cache.filter.type": "node",
"fielddata.cache.type": "node",
"cache.field.type": "soft"
}
}
}
Elasticsearch provides multiple ways of mapping nested documents. You are using nested which indexes nested documents as separate documents behind the scenes and as such querying them requires the use of a nested query.
The simplest way of indexing nested JSON like you've shown is using the object type mapping. This would allow you to query the field the way you were expecting, however Elasticsearch flattens the hierarchy which may not be acceptable for you.
use nested filters to filter your documents on nested fields.
https://www.elastic.co/guide/en/elasticsearch/reference/1.4/query-dsl-nested-filter.html
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"peer_ip": "pqr",
"_cache": true
}
},
{
"nested": {
"filter": {
"bool": {
"must": [
{
"terms": {
"first": [
"xyz"
],
"_cache": true
}
}
]
}
},
"path": "prefix4_",
"inner_hits": {}
}
},
{
"terms": {
"pid_no": [
"yyu"
],
"_cache": true
}
}
]
}
}
}
}
}

elasticsearch "having not" query

Some documents has category fields.. Some of these docs has category fields its value equals to "-1". I need a query return documents which have category fields and "not equal to -1".
I tried this:
GET webproxylog/_search
{
"query": {
"filtered": {
"filter": {
"not":{
"filter": {"and": {
"filters": [
{"term": {
"category": "-1"
}
},
{
"missing": {
"field": "category"
}
}
]
}}
}
}
}
}
}
But not work.. returns docs not have "category field"
EDIT
Mapping:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string",
"index": "not_analyzed"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"analyzer": "slash_analyzer"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
If your category field is string and is analyzed by default, then your -1 will be indexed as 1 (stripping the minus sign).
You will need that field to be not_analyzed or to add a sub-field which is not analyzed (as my solution below).
Something like this:
DELETE test
PUT /test
{
"mappings": {
"test": {
"properties": {
"category": {
"type": "string",
"fields": {
"notAnalyzed": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST /test/test/1
{"category": "-1"}
POST /test/test/2
{"category": "2"}
POST /test/test/3
{"category": "3"}
POST /test/test/4
{"category": "4"}
POST /test/test/5
{"category2": "-1"}
GET /test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"category.notAnalyzed": {
"value": "-1"
}
}
},
{
"filtered": {
"filter": {
"missing": {
"field": "category"
}
}
}
}
]
}
}
}

Elasticsearch big data

I`m new in elasticsearch and I have problem.
I have 1 million rows of data and query result take too long.
Went I have 150k it was taking 0.5s , now is taking 10sec.
Each days, number of data is different (One day can be 150k, other 1 million and etc.)
I need advice how to make it faster.
Mapping
{
"mappings": {
"Jobs": {
"_ttl": {
"enabled": true,
"default": "1d"
},
"properties": {
"id": {
"type": "integer"
},
"advertiser_id": {
"type": "integer"
},
"company_id": {
"type": "integer"
},
"feed_id": {
"type": "integer"
},
"description_unique": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"city": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"county": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"country": {
"type": "integer"
},
"description": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"store": true
}
}
},
"company": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"url": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"premium": {
"type": "integer"
},
"bid": {
"type": "integer"
},
"created": {
"type": "date",
"format": "dateOptionalTime",
"default": "basic_date"
},
"updated": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
Query
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Survey Developer",
"type": "best_fields",
"fields": [
"title",
"description"
],
"operator": "and"
}
}
]
}
},
"highlight": {
"boundary_chars": ".,!? \t\n",
"tag_schema": "styled",
"pre_tags": [
"<b>"
],
"post_tags": [
"</b>"
],
"fields": {
"description": {
"fragment_size": 200,
"number_of_fragments": 3
}
}
},
"sort": [
{
"premium": {
"order": "desc"
}
},
{
"bid": {
"order": "desc"
}
}
]
}
Server parameters:
CPU 1 vCPU
RAM 1 GB
System Disk 40 GB
Network 120 Mb/s

Resources