Return Multi-Term Distinct Values

Return Multi-Term Distinct Values - elasticsearch

Within an Elastic Search index I am attempting to query by 2 distinct top-level field values from field companyName and field productName, ordered by a generatedDate field and include the domainModelId field.
The following SQL query shows the results of all existing values and I've high-lighted the two unique document rows (in this case) by generatedDate;
{
"query": "SELECT companyName, productName, generatedDate FROM nextware_domain_metaservices_domainmodel ORDER BY generatedDate DESC"
}
response as follows:
I tried the following
{
"size":0,
"aggs":
{
"companies":
{
"terms":
{
"field": "companyName.keyword"
},
"aggs":
{
"products":
{
"terms":
{
"field": "productName.keyword"
}
}
}
}
}
}
This returns the correct buckets as follows;
"aggregations": {
"companies": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "NextWare",
"doc_count": 18,
"products": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ProductPortal",
"doc_count": 16
},
{
"key": "Domain",
"doc_count": 2
}
]
}
}
]
}
}
How can I include the value of domainModelId.Id field without a second query?

To include the value of domainModelId.Id, you need to use top_hits aggregation
Adding a working example with index data, search query, and search result
Index Data:
{
"companyName":"NextWare",
"productName":"Domain",
"domainModelId.Id":"i"
}
{
"companyName":"NextWare",
"productName":"Domain",
"domainModelId.Id":"c"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"a"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"b"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"d"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"e"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"f"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"g"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"h"
}
Search Query:
{
"size": 0,
"aggs": {
"companies": {
"terms": {
"field": "companyName.keyword"
},
"aggs": {
"products": {
"terms": {
"field": "productName.keyword"
},
"aggs": {
"top_ids": {
"top_hits": {
"_source": {
"includes": [
"domainModelId.Id"
]
},
"size": 10
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"companies": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "NextWare",
"doc_count": 9,
"products": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ProductPortal",
"doc_count": 7,
"top_ids": {
"hits": {
"total": {
"value": 7,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67049816",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"domainModelId.Id": "a"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"domainModelId.Id": "b"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_source": {
"domainModelId.Id": "d"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "5",
"_score": 1.0,
"_source": {
"domainModelId.Id": "e"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "6",
"_score": 1.0,
"_source": {
"domainModelId.Id": "f"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "7",
"_score": 1.0,
"_source": {
"domainModelId.Id": "g"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "8",
"_score": 1.0,
"_source": {
"domainModelId.Id": "h"
}
}
]
}
}
},
{
"key": "Domain",
"doc_count": 2,
"top_ids": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67049816",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"domainModelId.Id": "c"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "9",
"_score": 1.0,
"_source": {
"domainModelId.Id": "i"
}
}
]
}
}
}
]
}
}
]
}
}

Related

Create the Elastic search query to show Random 5 Questions by category

I Have fields Category & Questions in the Table.
My Requirement is for the below mentioned 3 category against I need the questions which is tagged (SO I want the Category and Questions field in the query) by writing elastic search query
Category :
OLA
BNA
DRG
GET logstash-sdc-feedback/_search? { "_source":["Category.keyword"], "size": 5, "query":{ "bool": { "must": [ {"match":{"Category.keyword"": "OLA","BNA","DRG"}}
],
}
}, "aggs": { "MyBuckets": { "terms": { "field": "questions.keyword","Category.keyword" "order":{ "_count": "asc" }, "size": "5"
} } } }

You can use terms query along with terms aggregation, to achieve your use case.
Adding a working example
Index Data:
{
"category": "XYZ",
"question": "d"
}
{
"category": "OLA",
"question": "a"
}
{
"category": "BNA",
"question": "b"
}
{
"category": "DRG",
"question": "c"
}
Search Query:
{
"query": {
"bool": {
"must": {
"terms": {
"category.keyword": [
"OLA",
"BNA",
"DRG"
]
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "category.keyword"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"question"
]
},
"size": 1
}
}
}
}
}
}
Search Result:
"aggregations": {
"top_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "BNA", // note this
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"question": "b" // note this
}
}
]
}
}
},
{
"key": "DRG",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"question": "c"
}
}
]
}
}
},
{
"key": "OLA",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"question": "a"
}
}
]
}
}
}
]
}
}

How to make flattened sub-field in the nested field in elastic search?

Here, I have a indexed document like:
doc = {
"id": 1,
"content": [
{
"txt": I,
"time": 0,
},
{
"txt": have,
"time": 1,
},
{
"txt": a book,
"time": 2,
},
{
"txt": do not match this block,
"time": 3,
},
]
}
And I want to match "I have a book", and return the matched time: 0,1,2. Is there anyone who knows how to build the index and the query for this situation?
I think the "content.txt" should be flattened but "content.time" should be nested?

want to match "I have a book", and return the matched time: 0,1,2.
Adding a working example with index mapping,search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"content": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"nested": {
"path": "content",
"query": {
"bool": {
"must": [
{
"match": {
"content.txt": "I have a book"
}
}
]
}
},
"inner_hits": {}
}
}
}
Search Result:
"inner_hits": {
"content": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 2.5226097,
"hits": [
{
"_index": "64752029",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "content",
"offset": 2
},
"_score": 2.5226097,
"_source": {
"txt": "a book",
"time": 2
}
},
{
"_index": "64752029",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "content",
"offset": 0
},
"_score": 1.5580825,
"_source": {
"txt": "I",
"time": 0
}
},
{
"_index": "64752029",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "content",
"offset": 1
},
"_score": 1.5580825,
"_source": {
"txt": "have",
"time": 1
}
}
]
}
}
}
}

Multiple aggregation in single query on Elasticsearch

I have log data in the Elasticsearch index.
`"hits": [
{
"_index": "event_log",
"_type": "log_type",
"_id": "2-d-kmoBazYRVz7KCQIj",
"_score": 1,
"_source": {
"user_id": 123,
"event": "click",
"category": "abc",
"product_id": 1112,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3ed-kmoBazYRVz7KCQLX",
"_score": 1,
"_source": {
"user_id": 456,
"event": "click",
"category": "abc",
"product_id": 112,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3ud-kmoBazYRVz7KCgIy",
"_score": 1,
"_source": {
"user_id": 1234,
"event": "click",
"category": "abc",
"product_id": 1112,
"bkt": "B"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4Od-kmoBazYRVz7KCgLr",
"_score": 1,
"_source": {
"user_id": 4567,
"event": "click",
"category": "xyz",
"product_id": 1118,
"bkt": "B"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4ud-kmoBazYRVz7KkwL2",
"_score": 1,
"_source": {
"user_id": 123,
"event": "cart",
"category": "xyz",
"product_id": 1,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "2ud-kmoBazYRVz7KCALB",
"_score": 1,
"_source": {
"user_id": 123,
"event": "cart",
"category": "xyz",
"product_id": 11,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3-d-kmoBazYRVz7KCgKP",
"_score": 1,
"_source": {
"user_id": 4567,
"event": "click",
"category": "abc",
"product_id": 111,
"bkt": "B"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3Od-kmoBazYRVz7KCQJ8",
"_score": 1,
"_source": {
"user_id": 456,
"event": "click",
"category": "abc",
"product_id": 111,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4ed-kmoBazYRVz7KCwJH",
"_score": 1,
"_source": {
"user_id": 4567,
"event": "click",
"category": "xyz",
"product_id": 1128,
"bkt": "B"
}
}
]}
I want to get the aggregation by category, bkt, event. As well as I want to aggregate user_id by category, bkt. I have two separate queries for that
Count of record aggregated by category, bkt, event.
GET event_log/_search
{"size" : 0,
"aggs": {
"category_id": {
"terms": { "field": "category.keyword" },
"aggs": {
"ab_bucket": {
"terms": { "field": "bkt.keyword" },
"aggs": {
"event_type": {
"terms": { "field": "event.keyword" }
}
}
}
}
}
}
}
The result is
"aggregations": {
"category_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "abc",
"doc_count": 5,
"ab_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"event_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "click",
"doc_count": 3
}
]
}
},
{
"key": "B",
"doc_count": 2,
"event_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "click",
"doc_count": 2
}
]
}
}
]
}
},
Users aggregated by category, bkt.
GET event_log/_search
{"size" : 0,
"aggs": {
"category_id": {
"terms": { "field": "category.keyword" },
"aggs": {
"ab_bucket": {
"terms": { "field": "bkt.keyword" },
"aggs": {
"total_uniq_users" : {
"cardinality": {
"field" : "user_id"
}
}
}
}
}
}
}
}
The result is
"aggregations": {
"category_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "abc",
"doc_count": 5,
"ab_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"total_uniq_users": {
"value": 2
}
},
{
"key": "B",
"doc_count": 2,
"total_uniq_users": {
"value": 2
}
}
]
}
},
Is there a way to combine both the queries and obtain the expected result as a single result

Yes, you can do it like this:
GET event_log/_search
{
"size": 0,
"aggs": {
"category_id": {
"terms": {
"field": "category.keyword"
},
"aggs": {
"ab_bucket": {
"terms": {
"field": "bkt.keyword"
},
"aggs": {
"total_uniq_users": {
"cardinality": {
"field": "user_id"
}
},
"event_type": {
"terms": {
"field": "event.keyword"
}
}
}
}
}
}
}
}

Elasticsearch: Query the most recent that doesn't contain the field 'X'

I have the following search query:
{
"query": {
"match": {
"name": "testlib"
}
}
}
When I do this query I get the three results below. What I want to do now is only return one result: the newest #timestamp that doesn't contain version_pre. So in this case, only return AV6qvDXDyHw9vNh6Wlpl.
[
{
"_index": "testsoftware",
"_type": "software",
"_id": "AV6qvDXDyHw9vNh6Wlpl",
"_score": 0.2876821,
"_source": {
"#timestamp": "2017-09-21T11:02:15-04:00",
"name": "testlib",
"version_major": 1,
"version_minor": 0,
"version_patch": 1
}
},
{
"_index": "testsoftware",
"_type": "software",
"_id": "AV6qvDF5MtcMTuGknsVs",
"_score": 0.18232156,
"_source": {
"#timestamp": "2017-09-20T17:21:35-04:00",
"name": "testlib",
"version_major": 1,
"version_minor": 0,
"version_patch": 0
}
},
{
"_index": "testsoftware",
"_type": "software",
"_id": "AV6qvDnVyHw9vNh6Wlpn",
"_score": 0.18232156,
"_source": {
"#timestamp": "2017-09-22T13:56:55-04:00",
"name": "testlib",
"version_major": 1,
"version_minor": 0,
"version_patch": 2,
"version_pre": 0
}
}
]

Use sort (https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-sort.html) and https://www.elastic.co/guide/en/elasticsearch/reference/2.3/query-dsl-exists-query.html:
{
"size" : 1,
"sort" : [{ "#timestamp" : {"order" : "asc"}}],
"query" : {
"bool": {
"must_not": {
"exists": {
"field": "version_pre"
}
}
}
Or even, via query string:
/_search?sort=#timestamp:desc&size=1&q=_missing_:version_pre

How to correctly aggregate with the field is a list on Elasticsearch

Currently the ES logs are indexed in a way that some fields have a list instead of a single value.
For example:
_source:{
"field1":"["item1", "item2", "item3"],
"field2":"something",
"field3": "something_else"
}
Of course, the length of list is not always the same. I'm trying to find a way to aggregate the number of logs that consist each item (so some logs will be counted multiple times)
I know I have to use aggs, but how can I form the right query (after -d)?

You can use below query that uses terms aggregation and top_hits.
{
"size": 0,
"aggs": {
"group": {
"terms": {
"script": "_source.field1.each{}"
},
"aggs":{
"top_hits_log" :{
"top_hits" :{
}
}
}
}
}
}
Output will be:
"buckets": [
{
"key": "item1",
"doc_count": 3,
"top_hits_log": {
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "so",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2",
"item3"
],
"field2": "something1"
}
},
{
"_index": "so",
"_type": "test",
"_id": "2",
"_score": 1,
"_source": {
"field1": [
"item1"
],
"field2": "something2"
}
},
{
"_index": "so",
"_type": "test",
"_id": "3",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2"
],
"field2": "something3"
}
}
]
}
}
},
{
"key": "item2",
"doc_count": 2,
"top_hits_log": {
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "so",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2",
"item3"
],
"field2": "something1"
}
},
{
"_index": "so",
"_type": "test",
"_id": "3",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2"
],
"field2": "something3"
}
}
]
}
}
},
{
"key": "item3",
"doc_count": 1,
"top_hits_log": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "so",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"field1": [
"item1",
"item2",
"item3"
],
"field2": "something1"
}
}
]
}
}
}
]
Make sure to enable dynamic scripting. Set script.disable_dynamic: false
Hope this helps.

There is no need to use scripting. It will be slow especially _source parsing. You also need to make sure your field1 is not_analyzed or you will get weird results as terms aggregation is performed on unique tokens in Inverted Index.
{
"size": 0,
"aggs": {
"unique_items": {
"terms": {
"field": "field1",
"size": 100
},
"aggs": {
"documents": {
"top_hits": {
"size": 10
}
}
}
}
}
}
Here the size is 100 inside terms aggregation, change this according to how many unique values you think you have(default is 10).
Hope this helps!

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Return Multi-Term Distinct Values - elasticsearch

Related

Create the Elastic search query to show Random 5 Questions by category

How to make flattened sub-field in the nested field in elastic search?

Multiple aggregation in single query on Elasticsearch

Elasticsearch: Query the most recent that doesn't contain the field 'X'

How to correctly aggregate with the field is a list on Elasticsearch

Categories

Resources