ElasticSearch aggregate information grouped by month - elasticsearch

I am working on an ElasticSearch query that should give me back all documents where a date field is now-1 year ago and then group them all by month (giving me total count for each month), but am failing on writing this query.
This is what I have:
{
"query": {
"bool": {
"must": [
{
"terms": {
"account_id": [
1
]
}
}
]
}
},
"aggs": {
"growth": {
"date_range": {
"field": "member_since",
"format": "YYYY-MM-DD",
"ranges": [
{
"to": "now-1Y/M"
},
{
"from": "now-1Y/M"
}
]
}
}
},
"size": 100
}
I am running the query like so:
POST https://my-es-cluster-url.com, but I keep getting this error:
{
"error": {
"root_cause": [
{
"type": "parse_exception",
"reason": "unit [Y] not supported for date math [-1Y/M]"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query_fetch",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "index0",
"node": "MkypXlGdQamAplca1JIgZQ",
"reason": {
"type": "parse_exception",
"reason": "unit [Y] not supported for date math [-1Y/M]"
}
}
]
},
"status": 400
}

I reproduced your problem with a simpler query on my data.
"query": {
"range": {
"recent_buy_transaction": {
"from": "now-1Y"
}
}
}
I get the same error.
ElasticSearchParseException[unit [Y] not supported for date math [-1Y]]
However, using small y fixes the problem. Hence, you should try using:
now-1y/M
From the documentation
The supported time units are: y (year), M (month), w (week), d (day), h (hour), m (minute), and s (second).

Related

Elastic search array of objects nested range aggregation

I'm trying to make range aggregation on the following data set:
{
"ProductType": 1,
"ProductDefinition": "fc588f8e-14f2-4871-891f-c73a4e3d17ca",
"ParentProduct": null,
"Sku": "074617",
"VariantSku": null,
"Name": "Paraboot Avoriaz/Jannu Marron Brut Marron Brown Hiking Boot Shoes",
"AllowOrdering": true,
"Rating": null,
"ThumbnailImageUrl": "/media/1106/074617.jpg",
"PrimaryImageUrl": "/media/1106/074617.jpg",
"Categories": [
"399d7b20-18cc-46c0-b63e-79eadb9390c7"
],
"RelatedProducts": [],
"Variants": [
"84a7ff9f-edf0-4aab-87f9-ba4efd44db74",
"e2eb2c50-6abc-4fbe-8fc8-89e6644b23ef",
"a7e16ccc-c14f-42f5-afb2-9b7d9aefbc5c"
],
"PriceGroups": [
"86182755-519f-4e05-96ef-5f93a59bbaec"
],
"DisplayName": "Paraboot Avoriaz/Jannu Marron Brut Marron Brown Hiking Boot Shoes",
"ShortDescription": "",
"LongDescription": "<ul><li>Paraboot Avoriaz Mountaineering Boots</li><li>Marron Brut Marron (Brown)</li><li>Full leather inners and uppers</li><li>Norwegien Welted Commando Sole</li><li>Hand made in France</li><li>Style number : 074617</li></ul><p>As featured on Pritchards.co.uk</p>",
"UnitPrices": {
"EUR 15 pct": 343.85
},
"Taxes": {
"EUR 15 pct": 51.5775
},
"PricesInclTax": {
"EUR 15 pct": 395.4275
},
"Slug": "paraboot-avoriazjannu-marron-brut-marron-brown-hiking-boot-shoes",
"VariantsProperties": [
{
"Key": "ShoeSize",
"Value": "8"
},
{
"Key": "ShoeSize",
"Value": "10"
},
{
"Key": "ShoeSize",
"Value": "6"
}
],
"Guid": "0d4f6899-c66a-4416-8f5d-26822c3b57ae",
"Id": 178,
"ShowOnHomepage": true
}
I'm aggregating on VariantsProperties which have the following mapping
"VariantsProperties": {
"type": "nested",
"properties": {
"Key": {
"type": "keyword"
},
"Value": {
"type": "keyword"
}
}
}
Terms aggregations are working fine with following code:
{
"aggs": {
"Nest": {
"nested": {
"path": "VariantsProperties"
},
"aggs": {
"fieldIds": {
"terms": {
"field": "VariantsProperties.Key"
},
"aggs": {
"values": {
"terms": {
"field": "VariantsProperties.Value"
}
}
}
}
}
}
}
}
However when I try to do a range aggregation to get shoes in size between 8 - 12 such as:
{
"aggs": {
"Nest": {
"nested": {
"path": "VariantsProperties"
},
"aggs": {
"fieldIds": {
"range": {
"field": "VariantsProperties.Value",
"ranges": [ { "from": 8, "to": 12 }]
}
}
}
}
}
}
I get the following error:
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Field [VariantsProperties.Value] of type [keyword] is not supported for aggregation [range]"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "product-avenueproductindexdefinition-24476f82-en-us",
"node": "ejgN4XecT1SUfgrhzP8uZg",
"reason": {
"type": "illegal_argument_exception",
"reason": "Field [VariantsProperties.Value] of type [keyword] is not supported for aggregation [range]"
}
}
],
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Field [VariantsProperties.Value] of type [keyword] is not supported for aggregation [range]",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Field [VariantsProperties.Value] of type [keyword] is not supported for aggregation [range]"
}
}
},
"status": 400
}
Is there a way to "transform" the terms aggregation into a range aggregation, without the need of changing the schema? I know I could build the ranges myself by extracting the data from the terms aggregation and building the ranges out of it, however, I would prefer a solution within the elastic itself.
There are two ways to solve this:
Option A: Use a script instead of a field. This option will work without having to reindex your data, but depending on your volume of data, the performance might suffer.
POST test/_search
{
"aggs": {
"Nest": {
"nested": {
"path": "VariantsProperties"
},
"aggs": {
"fieldIds": {
"range": {
"script": "Integer.parseInt(doc['VariantsProperties.Value'].value)",
"ranges": [
{
"from": 8,
"to": 12
}
]
}
}
}
}
}
}
Option B: Add an integer sub-field in your mapping.
PUT my-index/_mapping
{
"properties": {
"VariantsProperties": {
"type": "nested",
"properties": {
"Key": {
"type": "keyword"
},
"Value": {
"type": "keyword",
"fields": {
"numeric": {
"type": "integer",
"ignore_malformed": true
}
}
}
}
}
}
}
Once your mapping is modified, you can run _update_by_query on your index in order to reindex the VariantsProperties.Value data
PUT my-index/_update_by_query
Finally, when this last command is done, you can run the range aggregation on the VariantsProperties.Value.numeric field.
Also note that this second but will be more performant on the long term.

Using Elasticsearch, how do I apply function scores to documents which conditionally have a property

I have a handful of indexes, some of which have a particular date property indicating when it was published (date_publish), and others do not. I am trying to apply a gauss function to decay the score of documents which were published a long time ago. The relevant indexes are correctly configured to recognise the date_publish property as a date.
I have set up my query as follows, specifically filtering documents which do not have the property:
{
"index": "index_contains_prop,index_does_not_contains_prop",
"body": {
"query": {
"function_score": {
"score_mode": "avg",
"query": {
"match_all": {}
},
"functions": [
{
"script_score": {
"script": {
"source": "0"
}
}
},
{
"filter": {
"exists": {
"field": "date_publish"
}
},
"gauss": {
"date_publish": {
"origin": "now",
"scale": "728d",
"offset": "7d",
"decay": 0.5
}
}
}
]
}
},
"from": 0,
"size": 1000
}
}
However, the query errors with the following:
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "unknown field [date_publish]",
"line": 1,
"col": 0
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "index_does_not_contains_prop",
"node": "1hfXZK4TT3-K288nIr0UWA",
"reason": {
"type": "parsing_exception",
"reason": "unknown field [date_publish]",
"line": 1,
"col": 0
}
}
]
},
"status": 400
}
I have RTFM'd many times, and i can't see any discrepancy - I ahve also tried wrapping the exists condition in a bool:must object, to no avail.
Have I misunderstood the purpose of the filter argument?
The exists query will only work on fields that are part of the index mapping. It will return only documents that have a value for this field, but the field itself still needs to be defined in the mapping. This is why you're getting an error - index_does_not_contains_prop does not have date_publish mapped. You can use the put mapping API to add this field to the indexes who don't have it (it won't change any document), and then your query should work.

update a particular field of elasticsearch document

Hi I am trying to update documents a elasticsearch which meets specific criteria. I am using google sense(chrome extension) for making request. The request that I am making is as shown below:
GET styling_rules2/product_line_filters/_update
{
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{"term":{"product_line_attribute": "brand"}}
],
"minimum_should_match": 1
}
},
"filter": {
"term": {
"product_line_name": "women_skirts"
}
}
}
},
"script" : "ctx._source.brand=brands"
}
sample document is as shown below:
{
"product_line_attribute_db_path": "product_filter.brand",
"product_line_attribute": "brand",
"product_line_name": "women_skirts",
"product_line_attribute_value_list": [
"vero moda",
"faballey",
"only",
"rider republic",
"dorothy perkins"
]
}
desired result: update all the document which has product_line_attribute="brand" and product_line_name="women_skirts" to product_line_attribute="brands".
problem: I am getting the error as follows:
{
"error": {
"root_cause": [
{
"type": "search_parse_exception",
"reason": "failed to parse search source. unknown search element [script]",
"line": 18,
"col": 4
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "styling_rules2",
"node": "2ijp1pXwT46FN4on4-JPlg",
"reason": {
"type": "search_parse_exception",
"reason": "failed to parse search source. unknown search element [script]",
"line": 18,
"col": 4
}
}
]
},
"status": 400
}
thanks in advance!
You should use the _update_by_query endpoint and not _update. Also the script section is not correct, which is probably why you're getting a class_cast_exception.
Try this instead:
POST styling_rules2/product_line_filters/_update_by_query
{
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"term": {
"product_line_attribute": "brand"
}
}
],
"minimum_should_match": 1
}
},
"filter": {
"term": {
"product_line_name": "women_skirts"
}
}
}
},
"script": {
"inline": "ctx._source.brand=brands"
}
}

ES giving error when sorting by distance

I'm trying to sort search results by distance. However, when i try i get the following error:
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "sort option [location] not supported"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "roeselaredev",
"node": "2UYlfd7sTd6qlJWgdK2wzQ",
"reason": {
"type": "illegal_argument_exception",
"reason": "sort option [location] not supported"
}
}
]
},
"status": 400
}
The query i sent looks like this:
GET _search
{
"query": {
"match_all": []
},
"sort": [
{
"geo_distance": {
"location": {
"lat": 50.9436034,
"long": 3.1242917
},
"order":"asc",
"unit":"km",
"distance_type":"plane"
}
},
{
"_score": {
"order":"desc"
}
}
]
}
As near as i can tell i followed the instructions in the documentation to the letter. I'm not getting a malformed query result. I'm just getting a not supported result for the sort by distance option. Any ideas as to what i'm doing wrong?
The query dsl is invalid the OP is almost-correct :) but missing an under-score.
While sorting by distance it is _geo_distance and not geo_distance.
Example:
GET _search
{
"query": {
"match_all": []
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": 50.9436034,
"long": 3.1242917
},
"order":"asc",
"unit":"km",
"distance_type":"plane"
}
},
{
"_score": {
"order":"desc"
}
}
]
}

elasticsearch how to include many values for the same field in the match phrase

This is my query
GET blablabla/_search
{
"query": {
"bool": {
"must": [
{"match" : {"source" : "balblabla"}},
{"match" :{"blablablab" : "JBR"}},
{"match": {"city" : "blab bla"}},
{"match" : {"something": ["Balcony" , "Gym"]}}
]
}
}
}
I am getting this error:
{
"error": {
"root_cause": [
{
"type": "query_parsing_exception",
"reason": "[match] query parsed in simplified form, with direct field name, but included more options than just the field name, possibly use its 'options' form, with 'query' element?",
"index": "index name goes here",
"line": 8,
"col": 35
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "index name goes here",
"node": "4Qjq5UGPSZO2Qtg-ECI_mQ",
"reason": {
"type": "query_parsing_exception",
"reason": "[match] query parsed in simplified form, with direct field name, but included more options than just the field name, possibly use its 'options' form, with 'query' element?",
"index": "index name goes here",
"line": 8,
"col": 35
}
}
]
},
"status": 400
}
When I remove this line
{"match" : {"something": ["Balcony" , "Gym"]}}
It works fine, but why does that line causes a parsing error, since arrays are fine in json?
My elasticsearch version is 2.2
The error comes from the parser, you must specified with an "OR" when passing multiple needles.
to fix your case you must replace
{
"match": {
"something": [
"Balcony",
"Gym"
]
}
}
WITH
{
"match": {
"something": "Balcony OR Gym"
}
}
so at the end it should look like this:
{
"query": {
"bool": {
"must": [
{
"match": {
"source": "balblabla"
}
},
{
"match": {
"blablablab": "JBR"
}
},
{
"match": {
"city": "blab bla"
}
},
{
"match": {
"something": "Balcony OR Gym"
}
}
]
}
}
}
hope this will help you and point you in the right path :)
regards,
Daniel

Resources