Elasticsearch - Filters not working - elasticsearch

I am trying to build a query to return all products of a genre:
curl -XPOST 'http://172.17.0.2:9200/products/_search?pretty' -d '
{
"query" : {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"term" : {
"genre" : "Feminino"
}
}
}
}
}'
but the result is zero products. The query doesn't return anything.
If I remove the filter assignment the query works:
curl -XPOST 'http://172.17.0.2:9200/products/_search?pretty' -d '
{
"query" : {
"filtered" : {
"query" : {
"match_all" : {}
}
}
}
}'
The setting of my index are:
curl -s http://172.17.0.2:9200/products/_settings |python -m json.tool
{
"products": {
"settings": {
"index": {
"creation_date": "1455635219340",
"number_of_replicas": "1",
"number_of_shards": "5",
"products": {
"mappings": {
"properties": {
"avaliability": {
"index": "not_analyzed",
"type": "string"
},
"best_seller": {
"index": "not_analyzed",
"type": "string"
},
"brand": {
"index": "not_analyzed",
"type": "double"
},
"brand_lenses": {
"index": "not_analyzed",
"type": "string"
},
"category": {
"index": "not_analyzed",
"type": "string"
},
"color": {
"index": "not_analyzed",
"type": "string"
},
"color_arm": {
"index": "not_analyzed",
"type": "string"
},
"color_lense": {
"index": "not_analyzed",
"type": "string"
},
"description": {
"type": "string"
},
"duration": {
"index": "not_analyzed",
"type": "string"
},
"final_price": {
"index": "not_analyzed",
"type": "double"
},
"format": {
"index": "not_analyzed",
"type": "string"
},
"front_image": {
"type": "string"
},
"genre": {
"index": "not_analyzed",
"type": "string"
},
"installment_amount": {
"index": "not_analyzed",
"type": "double"
},
"installment_times": {
"index": "not_analyzed",
"type": "integer"
},
"lenses_type": {
"index": "not_analyzed",
"type": "string"
},
"link": {
"type": "string"
},
"manufacturer": {
"index": "not_analyzed",
"type": "string"
},
"material": {
"index": "not_analyzed",
"type": "string"
},
"model": {
"index": "not_analyzed",
"type": "string"
},
"name": {
"type": "string"
},
"new": {
"index": "not_analyzed",
"type": "string"
},
"pathology": {
"index": "not_analyzed",
"type": "string"
},
"price": {
"index": "not_analyzed",
"type": "double"
},
"price_in_cash": {
"index": "not_analyzed",
"type": "double"
},
"qty": {
"index": "not_analyzed",
"type": "integer"
},
"side_image": {
"type": "string"
},
"sku": {
"type": "string"
},
"toast": {
"index": "not_analyzed",
"type": "string"
},
"type": {
"index": "not_analyzed",
"type": "string"
}
}
}
},
"uuid": "jxYCUwUGSHW3Rj-A5Q0Tkg",
"version": {
"created": "2020099"
}
}
}
}
Is there something wrong in my query or in my index?
EDIT: The output of search without the filter:
curl -s -XPOST 'http://172.17.0.2:9200/products/_search?pretty&size=1' -d '
{
"query" : {
"filtered" : {
"query" : {
"match_all" : {}
}
}
}
}' | python -m json.tool
{
"_shards": {
"failed": 0,
"successful": 5,
"total": 5
},
"hits": {
"hits": [
{
"_id": "30-2024-MMBQ1090_C4",
"_index": "products",
"_score": 1.0,
"_source": {
"avaliability": "[out of stock]",
"best_seller": "",
"brand": "1.8.1",
"category": "\u00d3culos de Grau",
"color": "Marrom",
"color_arm": "Marrom",
"color_lense": "",
"description": "Esse charmoso \u00f3culos possui a super tend\u00eancia cor marsala, um vinho mais fechado pro marrom, que transmite eleg\u00e2ncia imediata. Al\u00e9m disso, na lateral da sua haste ele mostra um detalhe met\u00e1lico incr\u00edvel, que enche esse modelo de personalidade. Seu formato retangular d\u00e1 o toque final de estilo contempor\u00e2neo. Muito belo! preencher",
"final_price": 197.0,
"format": "Retangular",
"front_image": "https://media.eotica.com.br/catalog/product/cache/1/small_image/266x120/9df78eab33525d08d6e5fb8d27136e95/o/c/oculos-181-mmbq1090p-c4-1-rn.jpg",
"genre": [
"Feminino",
"Masculino"
],
"id": "30-2024-MMBQ1090_C4",
"installment_amount": 65.67,
"installment_times": "3",
"lenses_type": "",
"link": "https://www.eotica.com.br/oculos-de-grau-181-jacob-mmbq-1090-marrom-c4.html",
"material": "N\u00e3o Metal",
"model": "MMBQ1090 ",
"name": "181 Jacob MMBQ1090 - Marrom - C4 - \u00d3culos de Grau",
"new": "",
"price": 197.0,
"price_in_cash": 187.15,
"qty": 0,
"side_image": "https://media.eotica.com.br/catalog/product/o/c/oculos-181-mmbq1090p-c4-1-rn.jpg",
"sku": "30-2024-MMBQ1090_C4",
"toast": "0"
},
"_type": "product"
}
],
"max_score": 1.0,
"total": 10416
},
"timed_out": false,
"took": 1
}

Resolved.
The problem is the Standard Analyzer.
An analyzer of type standard is built using the Standard Tokenizer with the Standard Token Filter, Lower Case Token Filter, and Stop Token Filter.
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
So, the query:
curl -XPOST 'http://172.17.0.2:9200/products/_search?pretty' -d '
{"query" : {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"term" : {
"genre" : "feminino"
}
}
}
}
}'
work very good.

Related

Elasticsearch match or term always failing

I have a problem with a query that return no result. When I execute the following query either with match or term :
{
"size": 1,
"query": {
"bool": {
"must": [
{ "term": { "ALERT_TYPE.raw": "ERROR" }}
],
"filter": [
{ "range": {
"#timestamp": {
"gte": "2018-02-01T00:00:01.000Z",
"lte": "2018-02-28T23:55:55.000Z"
}
}}
]
}
}
}
I always got the following response, :
{
"took": 92,
"timed_out": false,
"_shards": {
"total": 215,
"successful": 215,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
But i'm sure the element is present because when i do a match_all query, the first hit is the following :
{
"took": 269,
"timed_out": false,
"_shards": {
"total": 210,
"successful": 210,
"failed": 0
},
"hits": {
"total": 68292,
"max_score": 1,
"hits": [
{
"_index": "logstash-2018.02.22",
"_type": "alert",
"_id": "AWEdVphtJjppDZ0FiAz-",
"_score": 1,
"_source": {
"#version": "1",
"#timestamp": "2018-02-22T10:07:41.549Z",
"path": "/something",
"host": "host.host",
"type": "alert",
"SERVER_TYPE": "STANDALONE",
"LOG_FILE": "log.log",
"DATE": "2018-02-22 11:02:02,367",
"ALERT_TYPE": "ERROR",
"MESSAGE": "There is an error"
}
}
]
}
}
Here I can see the field is the value that I am expecting. And from the mapping I know the field is analyzed by the default analyser and the raw field is not analysed (Thanks to the answer of Glenn Van Schil). The mapping is generated dynamically by logstash but it looks like this for the type i'm looking into:
"alert": {
"_all": {
"enabled": true,
"omit_norms": true
},
"dynamic_templates": [
{
"message_field": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"fielddata": { "format": "disabled" },
"type": "string"
},
"match": "message",
"match_mapping_type": "string"
}
},
{
"string_fields": {
"mapping": {
"index": "analyzed",
"omit_norms": true,
"fielddata": { "format": "disabled" },
"type": "string",
"fields": {
"raw": {
"index": "not_analyzed",
"ignore_above": 256,
"type": "string"
}
}
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"properties": {
"#timestamp": { "type": "date", "format": "strict_date_optional_time||epoch_millis" },
"#version": { "type": "string", "index": "not_analyzed" },
"ALERT_TYPE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"DATE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"LOG_FILE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"MESSAGE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"SERVER_TYPE": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"geoip": {
"dynamic": "true",
"properties": {
"ip": { "type": "ip" },
"latitude": { "type": "float" },
"location": { "type": "geo_point" },
"longitude": { "type": "float" }
}
},
"host": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"path": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
},
"type": {
"type": "string",
"norms": { "enabled": false },
"fielddata": { "format": "disabled" },
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed",
"ignore_above": 256
}
}
}
Does anyone have a clue about why this query keep returning nothing ? Maybe there is something in the mapping that i am missing which explain why the match or term query keep failing ? I'm running out of idea about what is happenning and i'm quite new to elasticsearch and logstash.
Versions of tools and environment :
OS: RHEL Server 6.5 (Santiago)
Java: 1.7.0_91
Elasticsearch: 2.4.6
Lucene: 5.5.4
Logstash: 2.4.1
This is not really an answer, but it was to complicated to write this as a comment.
from the mapping i know the field is not analysed.
You are searching for ALERT_TYPE, but this one is in fact analyzed with the default analyzer since you did not specify any analyzer directly under your ALERT_TYPE's mapping.
However, your ALERT_TYPE has an internal field named raw that is not analyzed. If you want to search documents using the raw field you'll need to change the query from
"must": [
{ "term": { "ALERT_TYPE": "ERROR" }}
]
to
"must": [
{ "term": { "ALERT_TYPE.raw": "ERROR" }}
]

Elastic Search : Expected numeric type on field

All the fields that i need to calculate the SUM are INT but when i make the request i got this error :
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Expected numeric type on field [mlf16_txservnum], but got [string]"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "bpa_stag_v3",
"node": "zzsbvdwmQ0-d0Ca_b3w0uQ",
"reason": {
"type": "illegal_argument_exception",
"reason": "Expected numeric type on field [mlf16_txservnum], but got [string]"
}
}
]
},
"status": 400
}
Here the request :
{
"size" : 0,
"query" : {
"bool" : {
"filter" : [ {
"term" : {
"mlf16_cptfou_six" : "095436"
}
}, {
"term" : {
"mlf16_codadres" : "00"
}
}, {
"term" : {
"mlf16_semind" : "24"
}
}, {
"term" : {
"mlf16_annee" : "2017"
}
} ]
}
},
"aggregations" : {
"Sum_Service_Rate_Numerator" : {
"sum" : {
"field" : "mlf16_txservnum"
}
},
"Sum_Service_Rate_Denominator" : {
"sum" : {
"field" : "mlf16_txservden"
}
}
}
}
Here the index without any aggregation :
{
"_index": "bpa_stag_v3",
"_type": "indic_semaine_yassine",
"_id": "2017-06-11,2017,23,23,0000900156,05,0000241235,00,00,5I,3,0,19,0,2,0,0,3,2,2,0,12,3,241235,2017",
"_score": 1,
"_source": {
"dt_extract": "2017-06-11",
"mlf16_annee": "2017",
"mlf16_semind": "23",
"mlf16_semcal": "23",
"mlf16_cptmag": "0000900156",
"mlf16_codraft": "05",
"mlf16_cptfou": "0000241235",
"mlf16_codadrdl": "00",
"mlf16_codadres": "00",
"mlf16_grpges": "5I",
"mlf16_clasges": "3",
"mlf16_txservnum": 0,
"mlf16_txservden": 19,
"mlf16_txdocnum": 0,
"mlf16_txdocden": 2,
"mlf16_txfiabnum": 0,
"mlf16_txfiabden": 0,
"mlf16_refret2j": 3,
"mlf16_refret7j": 2,
"mlf16_refret21j": 2,
"mlf16_natcouv": "0",
"mlf16_nbligndiff": 12,
"mlf16_nbrefdiff": 3,
"mlf16_cptfou_six": "241235",
"dt_year": 2017,
"es_id": "2017-06-11,2017,23,23,0000900156,05,0000241235,00,00,5I,3,0,19,0,2,0,0,3,2,2,0,12,3,241235,2017"
}
},
Here the mapping of my index :
{
"bpa_stag_v3": {
"mappings": {
"indic_semaine_yassine": {
"dynamic_templates": [
{
"string_fields": {
"mapping": {
"index": "not_analyzed",
"omit_norms": true,
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"properties": {
"dt_extract": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"dt_year": {
"type": "long"
},
"es_id": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_annee": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_clasges": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_codadrdl": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_codadres": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_codraft": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_cptfou": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_cptfou_six": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_cptmag": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_grpges": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_natcouv": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_nbligndiff": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_nbrefdiff": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"mlf16_refret21j": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_refret2j": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_refret7j": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_semcal": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_semind": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_txdocden": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_txdocnum": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_txfiabden": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_txfiabnum": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_txservden": {
"type": "string",
"index": "not_analyzed"
},
"mlf16_txservnum": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
As your exception says, "reason": "Expected numeric type on field [mlf16_txservnum], but got [string]", it clearly says that mlf16_txservnum is string but numeric expected because aggregation sum can be only applied to numbers, not string and in your mapping, you have specified datatype of mlf16_txservnum to string.
"mlf16_txservnum": {
"type": "string",
"index": "not_analyzed"
}
Change the type in your mapping to number (integer, double etc.) for this field and other fields that you performing sum:
"mlf16_txservnum": {
"type": "integer"
}
I think #RA KA explained the issue and fix.But there is another way to fix as well. If your data is huge and can't reindex, You can change the data type of value on run time.
"aggregations" : {
"Sum_Service_Rate_Numerator" : {
"sum" : {
"field" : 'Integer.parseInt(doc["mlf16_txservnum"].value)'
}
},
"Sum_Service_Rate_Denominator" : {
"sum" : {
"field" : 'Integer.parseInt(doc["mlf16_txservden"].value)'
}
}
}
Hope this Helps..!

Elasticsearch MapperParsingException

I am trying to index following data to elasticsearch,
{
"_id": "5619578c1983757a72efef15",
"aseg": {},
"cs": {
"source": "None",
"ss": "In Transit",
"sr": "Weight Captured",
"act": "+B",
"pid": "BAG21678106",
"st": "UD",
"dest": "Bharatpur_DC (Rajasthan)",
"u": "J",
"sl": "Jaipur_Hub (Rajasthan)",
"ud": "2015-10-12T14:59:44.270000",
"sd": "2015-10-12T14:59:44.270000"
},
"nsl": [
{
"dt": [
2015,
10,
10
],
"code": "X-PPONM"
},
{
"dt": [
2015,
10,
11
],
"code": "X-UCI"
},
]
}
but in return i am getting this error
MapperParsingException[failed to parse [cs.nsl]]; nested: ElasticsearchIllegalArgumentException[unknown property [dt]];
I checked the mapping, mapping is correct, nsl nested inside cs dict has a different mapping than nsl at root level.
"cs": {
"properties": {
"act": {
"type": "string"
},
"add": {
"type": "string"
},
"asr": {
"type": "string"
},
"bucket": {
"type": "string"
},
"dest": {
"type": "string",
"index": "not_analyzed"
},
"dwbn": {
"type": "string"
},
"lcld": {
"type": "string"
},
"lat": {
"type": "string"
},
"lon": {
"type": "string"
},
"loc": {
"type": "double"
},
"nsl": {
"type": "string",
"index": "not_analyzed"
},
"ntd": {
"type": "date",
"format": "dateOptionalTime"
},
"pbs": {
"type": "string"
},
"pid": {
"type": "string"
},
"pupid": {
"type": "string"
},
"sd": {
"type": "date",
"format": "dateOptionalTime"
},
"sl": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"properties": {
"source": {
"type": "string"
},
"source_id": {
"type": "string"
},
"source_type": {
"type": "string"
}
}
},
"sr": {
"type": "string"
},
"ss": {
"type": "string",
"index": "not_analyzed"
},
"st": {
"type": "string"
},
"u": {
"type": "string",
"index": "not_analyzed"
},
"ud": {
"type": "date",
"format": "dateOptionalTime"
},
"vh": {
"type": "string"
}
}
},
and for nsl at root level mapping is as follow
"nsl": {
"properties" : {
"code" : {
"type" : "string",
"index": "not_analyzed"
},
"dt" : {
"type" : "string",
"index": "not_analyzed"
}
}
},
this is happening for only a few records, rest all are syncing fine.
there isn't any changes in payload.
Futher nsl is a sparse key inside cs.
In your mapping nsl is as follows -
"nsl": {
"type": "string",
"index": "not_analyzed"
},
As per mapping , Elasticsearch is expecting a concrete string value to the nsl field but its a object array in the document you have provided.
Elasticsearch once it has a mapping , its definite. You cant insert an object data into a string field.
I tried your document without pre-setting any mapping as follows:
{
"aseg": {},
"cs": {
"source": "None",
"ss": "In Transit",
"sr": "Weight Captured",
"act": "+B",
"pid": "BAG21678106",
"st": "UD",
"dest": "Bharatpur_DC (Rajasthan)",
"u": "J",
"nsl":"foo",
"sl": "Jaipur_Hub (Rajasthan)",
"ud": "2015-10-12T14:59:44.270000",
"sd": "2015-10-12T14:59:44.270000"
},
"nsl": [
{
"dt": [
2015,
10,
10
],
"code": "X-PPONM"
},
{
"dt": [
2015,
10,
11
],
"code": "X-UCI"
}
]
}
And the ES created the mapping as follows:
"nsl": {
"properties": {
"dt": {
"type": "long"
},
"code": {
"type": "string"
}
}
}
As you can see ES put the "dt" type as "long" which is the internal representation of a date type. So, may be need to change that type?
Also, without seeing the successful document it is difficult to guess but I believe those documents do not have the "dt" field value.
Of course, you are free to put "not_analyzed" as you see fit for any field.

elasticsearch nested range filter script

I have an elasticsearch range aggregation problem.
I have a nested object called "prices" in an nested "object" called "products".
in this sub nested object prices I have different prices for different countries and currencies. now I wanna use a range aggregation, but this ones loop over all price items and returns a big range aggregation.
now I want to use a script to filter curriencies and country price. but my if clause never got a return value.
"script": "if(doc['currency']=='GBP') { doc['price']; } else 0"
here is my code for query
"aggs": {
"products": {
"nested": {
"path": "products"
},
"aggs": {
"prices": {
"nested": {
"path": "products.prices"
},
"aggs": {
"range": {
"range": {
"field": "products.prices.price",
"script": "if(doc['currency']=='GBP') { doc['price']; } else 0",
"params": {
"currency": "GBP",
"country": "GB"
},
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100
}
]
}
}
}
}
}
}
}
and my mapping
{
"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 1
},
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": ["letter", "digit", "punctuation", "symbol"]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": ["lowercase", "asciifolding", "nGram_filter"]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings": {
"program": {
"properties": {
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"products": {
"type": "nested",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "nested",
"index": "not_analyzed"
}
},
"properties": {
"sku": {
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"prices": {
"type": "nested",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "nested",
"index": "not_analyzed"
}
},
"properties": {
"price": {
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"analyzer": "english",
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"price2": {
"include_in_all": false,
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"vat": {
"include_in_all": false,
"type": "float",
"store": true,
"index": "analyzed",
"null_value": 0,
"fields": {
"raw": {
"type": "float",
"index": "not_analyzed"
}
}
},
"country": {
"include_in_all": false,
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"currency": {
"include_in_all": false,
"type": "string",
"store": true,
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
}
}
you can try this?
{
"filtered" : {
"query" : { "match_all" : {} },
"filter" : {
"nested" : {
"path" : "products",
"filter" : {
"bool" : {
"must" : [
{
"term" : {"prices.currency" : "GBP"}
},
{
"range" : {"range.count" : {"gt" : 5}}
}
]
}
},
"_cache" : true
}
}
}
}
Filtered is deprecated at this point for ElasticSearch and was replaced with bool. The new version of this would be the following:
{
"query" : {
"nested" : {
"path" : "products",
"query" : {
"bool" : {
"must" : [
{
"term" : {"prices.currency" : "GBP"}
},
{
"range" : {"range.count" : {"gt" : 5}}
}
]}
}
}
}
}
Here's a reference to the ElasticSearch documentation

elasticsearch : object mapping and getting data from database

I use elasticsearch and oracle for the database.
Database :
I create a view with all the data I need to index. I have 1-N relations between my "occurences" table and "determinations" table and between "occurrences" table and "multimedias" table so one occurrence have multiples determinations and multimedias.
Elasticsearch :
I create a mapping and the river to get data from the database view.
the problem is I need an array of object for multimedias and determinations instead of an array for each fields in elasticsearch result ( example below ).
mapping
curl -XPUT 'localhost:9200/botanic/' -d '{
"settings": {
"index": {
"analysis": {
"analyzer": {
"keylower": {
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
}
},
"mappings": {
"specimens": {
"_all": {
"enabled": true
},
"_index": {
"enabled": true
},
"_id": {
"index": "not_analyzed",
"store": false
},
"properties": {
"_id": {
"type": "string",
"store": "no",
"index": "not_analyzed"
},
...
"MULTIMEDIA": {
"_id": {
"path": "M_MULTIMEDIAID"
},
"type": "object",
"properties": {
"M_MULTIMEDIAID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"M_CREATOR": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"M_DESCRIPTION": {
"type": "string",
"store": "yes",
"index": "analyzed"
}
...
}
},
"DETERMINATIONS": {
"_id": {
"path": "D_OCCURRENCEID"
},
"type": "object",
"properties": {
"D_OCCURRENCEID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"D_DETERMINATIONID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"D_DATEIDENTIFIED": {
"type": "string",
"store": "yes",
"index": "analyzed"
},
"D_TYPESTATUS": {
"type": "string",
"store": "yes",
"index": "analyzed"
},
"D_CREATED": {
"type": "date",
"store": "yes",
"index": "analyzed"
}
}
},
...
"I_INSTITUTIONID": {
"type": "string",
"store": "yes",
"index": "not_analyzed"
},
"I_INSTITUTIONCODE": {
"type": "string",
"store": "yes",
"index": "analyzed"
}
}
}
}
}'
the river
curl -XPUT 'localhost:9200/_river/botanic_river/_meta' -d '{
"type": "jdbc",
"jdbc": {
"index": "botanic",
"type": "specimens",
"url": "jdbc:oracle:thin:#localhost:1523:database",
"user": "user",
"password": "password",
"sql": "select * from elasticsearchview"
}
}'
the result I get ( multiple fields and for each an array ):
"hits": [
{
"_index": "botanic",
"_type": "specimens",
"_id": "345F5BEA7FDB4B17A7831514E25CD29B",
"_score": 0.4430604,
"_source": {
...
"M_MULTIMEDIAID": [
"0E91818D48DE40C785733F9F3A7932F1",
"833C6E79D7844D568B828DF2D8BA8AC7",
"F76F6766398042D38902DA9165D41514"
],
"M_CREATOR": [
"creator1",
"creator2",
"creator3"
],
"M_DESCRIPTION": [
"descr1",
"descr3",
"descr2"
],
...
}
}
]
but I need something like this ( array of object ) :
"hits": [
{
"_index": "botanic",
"_type": "specimens",
"_id": "345F5BEA7FDB4B17A7831514E25CD29B",
"_score": 0.4430604,
"_source": {
...
"MULTIMEDIA": [
{
"M_MULTIMEDIAID": "0E91818D48DE40C785733F9F3A7932F1",
"M_CREATOR": "creator1",
"M_DESCRIPTION": "descr1"
},
{
"M_MULTIMEDIAID": "833C6E79D7844D568B828DF2D8BA8AC7",
"M_CREATOR": "creator2",
"M_DESCRIPTION": "descr2"
},
{
"M_MULTIMEDIAID": "F76F6766398042D38902DA9165D41514",
"M_CREATOR": "creator3",
"M_DESCRIPTION": "descr3"
}
]
...
}
}
]
I tried "type" : "object" and "type" : "nested" in the mapping but same result.
how can do this ?

Resources