Counting search results in ElasticSearch by a nested property

Counting search results in ElasticSearch by a nested property - elasticsearch

Here is a schema with a nested property.
{
"dynamic": "strict",
"properties" : {
"Id" : {
"type": "integer"
},
"Name_en" : {
"type": "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"normalizer": "cloudbuy_normalizer_alphanumeric"
},
"text" : {
"type" : "text",
"analyzer": "english"
}
}
},
"Menus" : {
"type" : "nested",
"properties" : {
"Id" : {
"type" : "integer"
},
"Name" : {
"type" : "keyword",
"normalizer": "normalizer_alphanumeric"
},
"AncestorsIds" : {
"type" : "integer"
}
}
}
}
}
And here is a document.
{
"Id": 12781279
"Name": "Thing of purpose made to fit",
"Menus": [
{
"Id": -571057,
"Name": "Top level menu",
"AncestorsIds": [
-571057
]
}
,
{
"Id": 1022313,
"Name": "Other",
"AncestorsIds": [
-571057
,
1022313
]
}
]
}
For any given query I need a list with two columns: the Menu.Id and the number of documents in the result set that have that Menu.Id in their Menus array.
How?
(Is there any documentation for aggs that isn't impenetrable?)

#Richard, does this query suits your need ?
POST yourindex/_search
{
"_source": "false",
"aggs":{
"menus": {
"nested": {
"path": "Menus"
},
"aggs":{
"menu_aggregation": {
"terms": {
"field": "Menus.Id",
"size": 10
}
}
}
}
}
Output :
"aggregations": {
"menus": {
"doc_count": 2,
"menu_aggregation": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": -571057,
"doc_count": 1
},
{
"key": 1022313,
"doc_count": 1
}
]
}
}
Here we specify a nested path and then aggregate on the menu Ids.
You can take a look at this documentation page : https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-nested-aggregation.html

Related

Filter aggregation keys with non nested mapping in elasticsearch

I have following mapping:
{
"Country": {
"properties": {
"State": {
"properties": {
"Name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"Code": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"Lang": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
}
}
}
}
}
}
This is sample document:
{
"Country": {
"State": [
{
"Name": "California",
"Code": "CA",
"Lang": "EN"
},
{
"Name": "Alaska",
"Code": "AK",
"Lang": "EN"
},
{
"Name": "Texas",
"Code": "TX",
"Lang": "EN"
}
]
}
}
I am querying on this index to get aggregates of count of states by name. I am using following query:
{
"from": 0,
"size": 0,
"query": {
"query_string": {
"query": "Country.State.Name: *Ala*"
}
},
"aggs": {
"counts": {
"terms": {
"field": "Country.State.Name.raw",
"include": ".*Ala.*"
}
}
}
}
I am able to get only keys matching with query_string using include regex in terms aggregation but seems there is no way to make it case insensitive regex in include.
The result I want is:
{
"aggregations": {
"counts": {
"buckets": [
{
"key": "Alaska",
"doc_count": 1
}
]
}
}
}
Is there other solution available to get me only keys matching query_string without using nested mapping?

Use Normalizer for keyword datatype. Below is the sample mapping:
Mapping:
PUT country
{
"settings": {
"analysis": {
"normalizer": {
"my_normalizer": { <---- Note this
"type": "custom",
"filter": ["lowercase"]
}
}
}
},
"mappings": {
"properties": {
"Country": {
"properties": {
"State": {
"properties": {
"Name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "my_normalizer" <---- Note this
}
}
},
"Code": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "my_normalizer"
}
}
},
"Lang": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "my_normalizer"
}
}
}
}
}
}
}
}
}
}
Document:
POST country/_doc/1
{
"Country": {
"State": [
{
"Name": "California",
"Code": "CA",
"Lang": "EN"
},
{
"Name": "Alaska",
"Code": "AK",
"Lang": "EN"
},
{
"Name": "Texas",
"Code": "TX",
"Lang": "EN"
}
]
}
}
Aggregation Query:
POST country/_search
{
"from": 0,
"size": 0,
"query": {
"query_string": {
"query": "Country.State.Name: *Ala*"
}
},
"aggs": {
"counts": {
"terms": {
"field": "Country.State.Name.raw",
"include": "ala.*"
}
}
}
}
Notice the query pattern in include. Basically all the values of the *.raw fields that you have, would be stored in lowercase letters due to the normalizer that I've applied.
Response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"counts" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "alaska",
"doc_count" : 1
}
]
}
}
}
Hope this helps!

I was able to fix the problem by using inline script to filter the keys. (Still a dirty fix but it solves my use case for now and I can avoid mapping changes)
Here is how I am executing query.
{
"from": 0,
"size": 0,
"query": {
"query_string": {
"query": "Country.State.Name: *Ala*"
}
},
"aggs": {
"counts": {
"terms": {
"script": {
"source": "doc['Country.State.Name.raw'].value.toLowerCase().contains('ala') ? doc['Country.State.Name.raw'].value : null",
"lang": "painless"
}
}
}
}
}

Elastic search fuzzy query unexpected results

I have 2 indices, cities and places. Places one has a mapping like this:
{
"mappings": {
"properties": {
"cityId": {
"type": "integer"
},
"cityName": {
"type": "text"
},
"placeName": {
"type": "text"
},
"status": {
"type": "keyword"
},
"category": {
"type": "keyword"
},
"reviews": {
"properties": {
"rating": {
"type": "long"
},
"comment": {
"type": "keyword"
},
"user": {
"type": "nested"
}
}
}
}
}
}
And City is index is mapped like this:
{
"mappings": {
"properties": {
"state": {
"type": "keyword"
},
"postal": {
"type": "keyword"
},
"phone": {
"type": "keyword"
},
"email": {
"type": "keyword"
},
"notes": {
"type": "keyword"
},
"status": {
"type": "keyword"
},
"cityName": {
"type": "text"
},
"website": {
"type": "keyword"
},
"cityId": {
"type": "integer"
}
}
}
}
Initially we had a single document where cities had places embedded but I was having trouble searching nested places array so I changed the structure to this, I want to be able to search both cityName and placeName in a single query with fuzziness. I have a city including the word Welder's in it's name and also the some places inside the same location have the word Welder's in their name, which have a type:text. However when searched for welder both of the following queries see below don't return these documents, a search for welders OR welder's does return these documents. I am not sure why welder won't match with Welder's*. I didn't specify any analyzer during the creation of both the indices and neither am I explicitly defining it in the query can anyone help me out with this query so it behaves as expected:
Query 1: index = places
{
"query": {
"bool": {
"should": [
{
"match": {
"placeName": {
"query": "welder",
"fuzziness": 20
}
}
},
{
"match": {
"cityName": {
"query": "welder",
"fuzziness": 20
}
}
}
]
}
}
}
Query 2: index = places
{
"query": {
"match": {
"placeName": {
"query": "welder",
"fuzziness": 20
}
}
}
}
Can anyone post a query that when passed a word welder would return documents having Welder's in their name (should also work for other terms like these, this is just an example)
Edit 1 :
This is a sample place document I would want to be returned by any of the queries posted above:
{
cityId: 29,
placeName: "Welder's Garage Islamabad",
cityName: "Islamabad",
status: "verified",
category: null,
reviews: []
}

Using your mapping and query and fuzziness set as "20" I am getting document back. Fuzziness: 20 will tolerate 20 edit distance between searched word and welder's so even "w" will match with "welder's". I think this value is different in your actual query.
If you want to search for welder or welders and return welder's then you can use stemmer token filter
Mapping:
PUT indexfuzzy
{
"mappings": {
"properties": {
"cityId": {
"type": "integer"
},
"cityName": {
"type": "text"
},
"placeName": {
"type": "text",
"analyzer": "my_analyzer"
},
"status": {
"type": "keyword"
},
"category": {
"type": "keyword"
},
"reviews": {
"properties": {
"rating": {
"type": "long"
},
"comment": {
"type": "keyword"
},
"user": {
"type": "nested"
}
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"stem_possessive_english",
"stem_minimal_english"
]
}
},
"filter": {
"stem_possessive_english": {
"type": "stemmer",
"name": "possessive_english"
},
"stem_minimal_english": {
"type": "stemmer",
"name": "minimal_english"
}
}
}
}
}
Query :
GET indexfuzzy/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"placeName": {
"query": "welder"--> welder,welders,welder's will work
}
}
},
{
"match": {
"cityName": {
"query": "welder"
}
}
}
]
}
}
}
Result:
[
{
"_index" : "indexfuzzy",
"_type" : "_doc",
"_id" : "Jc-yx3ABd7NBn_0GTBdp",
"_score" : 0.2876821,
"_source" : {
"cityId" : 29,
"placeName" : "Welder's Garage Islamabad",
"cityName" : "Islamabad",
"status" : "verified",
"category" : null,
"reviews" : [ ]
}
}
]
possessive_english:- removes trailing 's from tokens
minimal_english:- removes plurals
GET <index_name>/_analyze
{
"text": "Welder's Garage Islamabad",
"analyzer": "my_analyzer"
}
returns
{
"tokens" : [
{
"token" : "welder", --> will be matched for welder's, welders
"start_offset" : 0,
"end_offset" : 8,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "garage",
"start_offset" : 9,
"end_offset" : 15,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "islamabad",
"start_offset" : 16,
"end_offset" : 25,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}

Synonyms aggregation in elasticsearch 7 - term based

I am trying to aggregate fields, but fields are similar like Med and Medium. I don't want both to come in my aggregation results, only either of it should come. I tried with synonyms but it doesn't seem to work.
Question is: How can I concatenate or unify similar aggregation results when it is term based?
Below is my work.
Mapping and Setting
{
"settings": {
"index" : {
"analysis" : {
"filter" : {
"synonym_filter" : {
"type" : "synonym",
"synonyms" : [
"medium, m, med",
"large, l",
"extra small, xs, x small"
]
}
},
"analyzer" : {
"synonym_analyzer" : {
"tokenizer" : "standard",
"filter" : ["lowercase", "synonym_filter"]
}
}
}
}
},
"mappings": {
"properties": {
"skus": {
"type": "nested",
"properties": {
"labels": {
"dynamic": "true",
"properties": {
"Color": {
"type": "text",
"fields": {
"synonym": {
"analyzer": "synonym_analyzer",
"type": "text",
"fielddata":true
}
}
},
"Size": {
"type": "text",
"fields": {
"synonym": {
"analyzer": "synonym_analyzer",
"type": "text",
"fielddata":true
}
}
}
}
}
}
}
}
}}
Aggregation
{
"aggs":{
"sizesFilter": {
"aggs": {
"sizes": {
"terms": {
"field": "skus.labels.Size.synonym"
}
}
},
"nested": {
"path": "skus"
}
}
}}
With only one doc my aggregation result is
"aggregations": {
"sizesFilter": {
"doc_count": 1,
"sizes": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "m",
"doc_count": 1
},
{
"key": "med",
"doc_count": 1
},
{
"key": "medium",
"doc_count": 1
}
]
}
}
}

I got it by setting tokenizer in analyzer to "keyword"
{
"analyzer" : {
"synonym_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase", "synonym_filter"]
}
}
}

Elasticsearch mapping document for epoch in seconds

As far as I understand, es only supports epoch in ms. My data source is couchbase and the json documents in there have an insertEpoch that is stored in seconds. I have been struggling to make my mapping document do the seconds to ms conversion for me.
Here is my mapping doc:
{
"template" : "cb*",
"order" : 10,
"mappings" : {
"couchbaseCheckpoint" : {
"_source" : {
"includes" : ["doc.*"]
},
"dynamic_templates": [
{
"store_no_index": {
"match": "*",
"mapping": {
"store" : "no",
"index" : "no",
"include_in_all" : false
}
}
}
]
},
"couchbaseDoc" : {
"_source" : {
"includes" : ["meta.*","doc.*"]
},
"properties" : {
"meta" : {
"type" : "object",
"include_in_all" : false
},
"doc" : {
"type" : "nested",
"include_in_all" : false,
"transform": {
"script": "ctx._source['insertEpoch'] = ctx._source['insertEpoch'] * 1000",
"params": {},
"lang": "groovy"
}
}
}
}
}
}
The transform isn't happening.
New mapping document:
{
"template" : "wheepl",
"order" : 10,
"mappings" : {
"couchbaseCheckpoint" : {
"_source" : {
"includes" : ["doc.*"]
},
"dynamic_templates": [
{
"store_no_index": {
"match": "*",
"mapping": {
"store" : "no",
"index" : "no",
"include_in_all" : false
}
}
}
]
},
"couchbaseDoc" : {
"_timestamp" : {
"enabled" : true,
"store" : true
},
"properties" : {
"meta" : {
"type" : "object",
"include_in_all" : false
},
"doc" : {
"type" : "object",
"include_in_all" : false,
"updateEpoch" : {
"type" : "date",
"format" : "date_time",
"numeric_resolution" : "seconds"
}
}
}
}
}
}
I don't even see the _timestamp field that I should be seeing!
Here's a Kibana screenie:
Thanks

This works in 1.6, as per https://github.com/elastic/elasticsearch/pull/10420.
Even if, internally, the date itself will be kept in milliseconds, you can index it as seconds, retrieve it as seconds, meaning just like you indexed it.
I've tried out a simple test, to see this in action:
PUT /test_dates
{
"mappings": {
"test": {
"properties": {
"time_stamp": {
"type": "date",
"format": "date_time",
"numeric_resolution": "seconds"
}
}
}
}
}
Test data:
POST /test_dates/test/1
{
"time_stamp": "9231200"
}
Retrieving it:
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_dates",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"time_stamp": "9231200"
}
}
]
}
To prove it works, running this aggregation:
GET /test_dates/test/_search?search_type=count
{
"aggs": {
"NAME": {
"date_histogram": {
"field": "time_stamp",
"interval": "second",
"format": "yyyy-MM-dd"
}
}
}
}
returns
"aggregations": {
"NAME": {
"buckets": [
{
"key_as_string": "1970-04-17",
"key": 9231200000,
"doc_count": 1
}
]
}
}
Also, your template is a bit wrong. It should be:
"couchbaseDocument": {
"_timestamp": {
"enabled": true,
"store": true
},
"properties": {
"meta": {
"type": "object",
"include_in_all": false
},
"doc": {
"type": "object",
"include_in_all": false,
"properties": {
"updateEpoch": {
"type": "date",
"format": "date_time",
"numeric_resolution": "seconds"
}
}
}
}
}

How to aggregate sub buckets of each bucket on nested documents

Full sample code:
https://gist.github.com/anonymous/329eaaf5654096c529da
I have a simple, standard product/options mapping like this for a standard ecommerce site:
"mappings": {
"product": {
"properties" : {
"name":
{
"type": "string",
"fields": {
"raw": { "type": "string", "analyzer": "lowercase" }
},
"analyzer": "default"
},
"options" : {
"type": "nested",
"properties": {
"id": {"type": "integer"},
"name": {"type": "string"},
"values": {"type": "nested"}
}
},
"price":{"type": "integer"},
"createdAt": {
"type": "date",
"format": "basic_date_time"
}
}
}
}
Please note that 1 product has multiple options, and each option can have multiple values (ie.: a Shirt with option Color including blue, red; and option Size including M, XL)
Currently, after the query to search for products using multiple conditions, I aggregate the result to get a list of all options and options values in the result set:
"aggregations": {
"options": {
"nested": {
"path": "options"
},
"aggs": {
"options_ids": {
"terms": {
"field": "id"
}
},
"aggs": {
"nested": {
"path": "options.values"
},
"aggs": {
"options_values_ids": {
"terms": {
"field": "options.values.id"
}
}
}
}
}
}
}
All work well except I get something like this
"aggregations": {
"options": {
"doc_count": 4,
"options_ids": {
"buckets": [
{
"key": 1,
"doc_count": 2
},
{
"key": 2,
"doc_count": 2
}
]
},
"aggs": {
"doc_count": 7,
"options_values_ids": {
"buckets": [
{
"key": 1,
"doc_count": 2
},
{
"key": 5,
"doc_count": 2
},
{
"key": 2,
"doc_count": 1
},
{
"key": 3,
"doc_count": 1
},
{
"key": 6,
"doc_count": 1
}
]
}
}
}
}
As you can see, there is no way for me to know which option values belong to which options from the result. It will be much better if the available options values can be listed under each option. Is that possible at all?

You would need to nest your aggregations:
"aggregations": {
"options" : {
"aggs" : {
"options_ids" : {
"aggs" : {
"aggs" : {
"options_values_ids" : {
"terms" : {
"field" : "options.values.id"
}
}
},
"nested" : {
"path" : "options.values"
}
},
"terms" : {
"field" : "id"
}
}
},
"nested" : {
"path" : "options"
}
}
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Counting search results in ElasticSearch by a nested property - elasticsearch

Related

Filter aggregation keys with non nested mapping in elasticsearch

Elastic search fuzzy query unexpected results

Synonyms aggregation in elasticsearch 7 - term based

Elasticsearch mapping document for epoch in seconds

How to aggregate sub buckets of each bucket on nested documents

Categories

Resources