Elasticsearch cross-index query with aggregations - elasticsearch

I use: Elasticsearch 7.7 , Kibana 7.7
For example, lets take two indexes:
User index with simple mapping:
PUT /user_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"user_phone": { "type": "text" },
"name": { "type": "text" }
}
}
}
Check with simple mapping:
PUT /check_index
{
"mappings": {
"properties": {
"user_id": { "type": "text" },
"price": { "type": "integer" },
"goods_count": {"type": "integer"}
}
}
}
I want to build table visualization like that:
________________________________________________________________________
user_id | user_phone | average_price | sum_goods_count |
___________|_______________|_____________________|______________________
1 | 123 | 512 | 64 |
___________|_______________|_____________________|______________________
2 | 456 | 256 | 16 |
___________|_______________|_____________________|______________________
So my questions are:
Is it real?
Do I understand correctly that I need to query these two indexes, get a list of users, and then in a loop create shopping carts with checks?

First thing first, you should try to de-normalize data in ES as much as possible to get the best performance and capability offered by it, And I went through the samples provided by you and comments in the question and it seems it can be easily achieved in your use-case and shown in below example, by combining user and check index into single index.
Index mapping
{
"mappings": {
"properties": {
"user_id": {
"type": "text",
"fielddata": "true"
},
"price": {
"type": "integer"
},
"goods_count": {
"type": "integer"
}
}
}
}
Index Data:
With the index mapping defined above, index these three documents, where one document is having "user_id":"1" and 2 documents have "user_id":"2"
{
"user_id":"1",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":500,
"goods_count":100
}
{
"user_id":"2",
"price":100,
"goods_count":200
}
Search Query:
Refer to ES official documentation on Terms Aggregation, Top Hits aggregation, Sum aggregation and Avg aggregation to get detailed explanation.
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "user_id"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"user_id"
]
}
}
},
"avg_price": {
"avg": {
"field": "price"
}
},
"goods_count": {
"sum": {
"field": "goods_count"
}
}
}
}
}
}
Search Result:
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
]
},
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"user_id": "2"
}
},
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"user_id": "2"
}
}
]
}
},
"avg_price": {
"value": 300.0
},
"goods_count": {
"value": 300.0
}
},
{
"key": "1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_63925596",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"user_id": "1"
}
}
]
}
},
"avg_price": {
"value": 500.0
},
"goods_count": {
"value": 100.0
}
}
]
}
}
}
As you can see in the search results above, for "user_id":"2" the average price is (500+100)/2 = 300 and sum of goods_count is 100+200 = 300.
Similarly for "user_id":"1" the average price is 500/1 = 500 and sum of goods_count is 100.

Related

Querying array with nested objects in Elasticsearch to get multiple objects

I have data in Elasticsearch in the below format -
"segments": [
{"id": "ABC", "value":123},
{"id": "PQR", "value":345},
{"id": "DEF", "value":567},
{"id": "XYZ", "value":789},
]
I want to retrieve all segments where id is "ABC" or "DEF".
I looked up the docs (https://www.elastic.co/guide/en/elasticsearch/reference/7.9/query-dsl-nested-query.html) and few examples on YouTube but the all look to retrieve only a single object while I want to retrieve more than 1.
Is there a way to do this?
You can use nested query with inner hits as shown here.
I hope your index mapping is looks like below and segments field is define as nested
"mappings": {
"properties": {
"segments": {
"type": "nested",
"properties": {
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "long"
}
}
}
}
}
You can use below Query:
{
"_source" : false,
"query": {
"nested": {
"path": "segments",
"query": {
"terms": {
"segments.id.keyword": [
"ABC",
"DEF"
]
}
},
"inner_hits": {}
}
}
}
Response:
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_score": 1,
"inner_hits": {
"segments": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 0
},
"_score": 1,
"_source": {
"id": "ABC",
"value": 123
}
},
{
"_index": "73895503",
"_id": "TmM8iYMBrWOLJcwdvQGG",
"_nested": {
"field": "segments",
"offset": 2
},
"_score": 1,
"_source": {
"id": "DEF",
"value": 567
}
}
]
}
}
}
}
]
}

Max and min from all index in query

Is there way to get max and min for all documents in index, not only max and min from category "game" without making another request to elastic?
{
"query": {
"bool": {
"must": [
{
"match": {
"category": "game"
}
}
]
}
},
"aggs": {
"maxPoints": {
"max": {
"field": "points"
}
},
"minPoints": {
"min": {
"field": "points"
}
}
}
Here is some data data i have, with query above I want to get this 2 docs from category game and min 0, max 100 instead of min 10, max 20.
[
{
"id": 1,
"category": "offer",
"points": 0
},
{
"id": 2,
"category": "game",
"points": 10
},
{
"id": 3,
"category": "game",
"points": 20
},
{
"id": 4,
"category": "offer",
"points": 100
}
]
Yeah, just remove the match clause, and add match_all query to include all the documents in your index. Use post_filter to get the expected results in a single ES call.
{
"query": {
"match_all": {}
},
"aggs": {
"maxPoints": {
"max": {
"field": "points"
}
},
"minPoints": {
"min": {
"field": "points"
}
}
},
"post_filter": { // Note this
"term": {
"category": "game"
}
}
}
Output
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65406564",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"id": 2,
"category": "game",
"points": 10
}
},
{
"_index": "65406564",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"id": 3,
"category": "game",
"points": 20
}
}
]
},
"aggregations": {
"maxPoints": {
"value": 100.0
},
"minPoints": {
"value": 0.0
}
}
}

How to get sum value for fields based on input field in elasticsearch (input field and sum output fields are different)

This is document present in elastic search and wants to output based fields in which it returns the sum of the high and medium and which be greater than zero, the value of high and medium must be greater than > 0
{
"host_id": 1,
"hostname": "Hostname1",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Un",
"Application":"App1"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 1,
"low": 0
},
{
"host_id": 2,
"hostname": "Hostname2",
"businesshierarchy": {
"businessunit": "One Unit",
"Location":"Un",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 2,
"low": 0
},
{
"host_id": 3,
"hostname": "Hostname3",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Uk",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 2,
"medium": 2,
"low": 0
}
Is there are any query or method to get output like in elastic search?
based on location
Location - Un
High - 2
medium - 3
Location - Uk
High - 2
medium - 2
Based on application
Application - App1
High - 1
medium - 1
Application - App2
High - 3
medium - 4
or based on hostname
hostname - Hostname1
High - 1
medium - 1
hostname - Hostname2
High - 1
medium - 2
hostname - Hostname3
High - 2
medium - 2
Similarly for businessunit. The field name passed dynamically like businessunit, hostname, application, location-based on it want to get count High and medium value like the above output.
Adding a working example with index mapping, index data(same as that given in question), search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
Search Query:
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Location"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"high",
"medium"
]
}
}
},
"high_sum": {
"sum": {
"field": "high"
}
},
"medium_sum": {
"sum": {
"field": "medium"
}
}
}
}
}
}
Search Result:
Based on the location
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Un",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 3.0
}
},
{
"key": "Uk",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 2.0
}
}
]
}
For querying on the basis of application replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Application"
},
The following search result will be there:
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "App2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 3.0
},
"medium_sum": {
"value": 4.0
}
},
{
"key": "App1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
}
]
}
For querying on the basis of hostname replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "hostname"
},
Search Results will be :
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Hostname1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
},
{
"key": "Hostname2",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 2.0
}
},
{
"key": "Hostname3",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0
},
"medium_sum": {
"value": 2.0
}
}
]
}
we can use this query to get the excepted result
{
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"range": {
"medium": {
"gt": 0
}
}
},
{
"range": {
"high": {
"gt": 0
}
}
}
]
}
}
]
}
},
"aggs": {
"fieldnames": {
"terms": {
"field": "hostname.keyword"
},
"aggs": {
"medium": {
"sum": {
"field": "medium"
}
},
"high": {
"sum": {
"field": "high"
}
}
}
}
},
"size": 0
}
Search result for this look like this
"aggregations": {
"fieldnames": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ALL Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 7.0
}
},
{
"key": "Latest Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 5.0
}
},
{
"key": "NO Unit",
"doc_count": 1,
"high": {
"value": 1.0
},
"medium": {
"value": 1.0
}
}
]
}
}
In case if we need the result for location and application, just need to change
for Location
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Application.keyword"
}
for Application
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Location.keyword"
}
if the mapping is something like this,
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
There is no need for adding .keyword to
"terms": {
"field": "businesshierarchy.Location"
}

Unique search results from ElasticSearch

I am new to ElasticSearch and can't quite figure out what I want is possible or not.
I can query like this:
GET entity/_search
{
"query": {
"bool": {
"must": [
{ "match": { "searchField": "searchValue" }}
]
}
},
"aggs" : {
"uniq_Id" : {
"terms" : { "field" : "Id", "size":500 }
}
}
}
and it will return top search results and the term aggregation buckets. But ideally what I would like for the search results to return, is only one (perhaps the top one, does not matter) for each of unique Id's defined in the aggregation terms.
You can make use of Terms Aggregation along with the Top Hits Aggregation to give you the result you are looking for.
Now once you do that, specify the size as 1 in the Top Hits Aggregation
Based on your query I've created sample mapping,documents, aggregation query and the response for your reference.
Mapping:
PUT mysampleindex
{
"mappings": {
"mydocs": {
"properties": {
"searchField":{
"type": "text"
},
"Id": {
"type": "keyword"
}
}
}
}
}
Sample Documents:
POST mysampleindex/mydocs/1
{
"searchField": "elasticsearch",
"Id": "1000"
}
POST mysampleindex/mydocs/2
{
"searchField": "elasticsearch is awesome",
"Id": "1000"
}
POST mysampleindex/mydocs/3
{
"searchField": "elasticsearch is awesome",
"Id": "1001"
}
POST mysampleindex/mydocs/4
{
"searchField": "elasticsearch is pretty cool",
"Id": "1001"
}
POST mysampleindex/mydocs/5
{
"searchField": "elasticsearch is pretty cool",
"Id": "1002"
}
Query:
POST mysampleindex/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"searchField": "elasticsearch"
}
}
]
}
},
"aggs": {
"myUniqueIds": {
"terms": {
"field": "Id",
"size": 10
},
"aggs": {
"myDocs": {
"top_hits": { <---- Top Hits Aggregation
"size": 1 <---- Note this
}
}
}
}
}
}
Sample Response:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"myUniqueIds": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1000",
"doc_count": 2,
"myDocs": {
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "1",
"_score": 0.2876821,
"_source": {
"searchField": "elasticsearch",
"Id": "1000"
}
}
]
}
}
},
{
"key": "1001",
"doc_count": 2,
"myDocs": {
"hits": {
"total": 2,
"max_score": 0.25316024,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "3",
"_score": 0.25316024,
"_source": {
"searchField": "elasticsearch is awesome",
"Id": "1001"
}
}
]
}
}
},
{
"key": "1002",
"doc_count": 1,
"myDocs": {
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "mysampleindex",
"_type": "mydocs",
"_id": "5",
"_score": 0.2876821,
"_source": {
"searchField": "elasticsearch is pretty cool",
"Id": "1002"
}
}
]
}
}
}
]
}
}
}
Notice that I am not returning any bool results in the above, the search result you are looking for comes in the form of Top Hits Aggregation.
Hope this helps!

ElasticSearch - Return unique result by field values

I have 3 "places" having each a type and a location:
PUT places
{
"mappings": {
"test": {
"properties": {
"type": { "type": "keyword" },
"location": { "type": "geo_point" }
}
}
}
}
POST places/test
{
"type" : "A",
"location": {
"lat": 1.378446,
"lon": 103.763427
}
}
POST places/test
{
"type" : "B",
"location": {
"lat": 1.478446,
"lon": 104.763427
}
}
POST places/test
{
"type" : "A",
"location": {
"lat": 1.278446,
"lon": 102.763427
}
}
I'd like to retrieve only one place per "type": the closest from a random position lets say "lat": 1.178446, "lon": 101.763427
In my example result answer should be composed by exactly 2 elements (one for "type: A" and one for "type: B").
I'd also prefer to avoid "aggregations" as I will need the _source of each places.
Any help would be great.
Without an aggregation, such an operation seems impossible executing one query.
This can be achieved with the top-hits-aggregation.
The following has been tested with elasticsearch 6:
POST /places/_search?size=0
{
"aggs" : {
"group-by-type" : {
"terms" : { "field" : "type" },
"aggs": {
"min-distance": {
"top_hits": {
"sort": {
"_script": {
"type": "number",
"script": {
"source": "def x = doc['location'].lat; def y = doc['location'].lon; return Math.abs(x-1.178446) + Math.abs(y-101.763427)",
"lang": "painless"
},
"order": "asc"
}
},
"_source": {
"includes": [ "type", "location" ]
},
"size" : 1
}
}
}
}
}
}
Note, I calculated the distance as: |location.x - givenPoint.x| + |location.y - givenPoint.y|
This is the response:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"group-by-type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "A",
"doc_count": 2,
"min-distance": {
"hits": {
"total": 2,
"max_score": null,
"hits": [{
"_index": "places",
"_type": "test",
"_id": "3",
"_score": null,
"_source": {
"location": {
"lon": 102.763427,
"lat": 1.278446
},
"type": "A"
},
"sort": [1.1000006934661934]
}]
}
}
}, {
"key": "B",
"doc_count": 1,
"min-distance": {
"hits": {
"total": 1,
"max_score": null,
"hits": [{
"_index": "places",
"_type": "test",
"_id": "2",
"_score": null,
"_source": {
"location": {
"lon": 104.763427,
"lat": 1.478446
},
"type": "B"
},
"sort": [3.3000007411499093]
}]
}
}
}]
}
}
}

Resources