Count part of documents in elastic search - elasticsearch

I want to get count of grade for student index in elastic search as :
for :
grade 1: count = 4,
grade 2: count = 1
grade 3: count = 1
grade 4: count = 1
Used the below query:
{
"aggs":
{
"grade":
{
"terms":
{
"field": "marks.grade"
}
}
}
}
Output:
{
"took": 9,
"timed_out": false,
"_shards":
{
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits":
{
"total":
{
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits":
[
{
"_index": "student",
"_type": "doc",
"_id": "001",
"_score": 1.0,
"_source":
{
"name": "abc",
"marks":
[
{
"grade": 1,
"score": 95
},
{
"grade": 2,
"score": 75
},
{
"grade": 2,
"score": 72
},
{
"grade": 3,
"score": 55
}
]
}
},
{
"_index": "student",
"_type": "doc",
"_id": "002",
"_score": 1.0,
"_source":
{
"name": "xyz",
"marks":
[
{
"grade": 4,
"score": 35
},
{
"grade": 2,
"score": 79
},
{
"grade": 2,
"score": 65
}
]
}
}
]
},
"aggregations":
{
"grade":
{
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets":
[
{
"key": 2,
"doc_count": 2
},
{
"key": 1,
"doc_count": 1
},
{
"key": 3,
"doc_count": 1
},
{
"key": 4,
"doc_count": 1
}
]
}
}
}
Here, it counts grade 2 count as 2 only instead of 4.
Is there any query to get the output grade 2 count as 4?

You can use a combination of terms aggregation with cardinality aggregation
{
"size":0,
"aggs": {
"grade": {
"terms": {
"field": "marks.grade"
},
"aggs": {
"count_of_grade": {
"cardinality": {
"field": "marks.grade"
}
}
}
}
}
}
Response will be
"aggregations": {
"grade": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 2,
"count_of_grade": {
"value": 4
}
},
{
"key": 1,
"doc_count": 1,
"count_of_grade": {
"value": 3
}
},
{
"key": 3,
"doc_count": 1,
"count_of_grade": {
"value": 3
}
},
{
"key": 4,
"doc_count": 1,
"count_of_grade": {
"value": 2
}
}
]
}
}

You can do this by making marks array nested. Here is an example mapping:
{
"mappings": {
"properties": {
"marks": { "type": "nested" }
}
}
}
And you can make a nested aggregation:
{
"size": 0,
"aggs": {
"counts": {
"nested": {
"path": "marks"
},
"aggs": {
"counts": {
"terms": {
"field": "marks.grade",
"size": 10
}
}
}
}
}
}
This is the result of the aggregation:
{
"key" : 2,
"doc_count" : 4
},
{
"key" : 1,
"doc_count" : 1
},
{
"key" : 3,
"doc_count" : 1
},
{
"key" : 4,
"doc_count" : 1
}

Related

Elasticsearch - Count number of occurrence perd field per document

Is it possible to calculate the number of occurence of distinct values in a list field.
For example, let the following data:
[
{
"page":1,
"colors":[
{
"color": red
},
{
"color": white
},
{
"color": red
}
]
},
{
"page":2,
"colors":[
{
"color": yellow
},
{
"color": yellow
}
]
}
]
Is it possible to get a result as the follwing:
{
"page":1,
"colors_count":[
{
"Key": red,
"Count": 2
},
{
"Key": white,
"Count": 1
},
]
},
{
"page":2,
"colors_count":[
{
"Key": yellow,
"Count": 2
}
]
}
I tried using term aggregation but I got the number of distinct values, so for page:1 i got red:1 and white:1.
Yes, you can do it. you will have to use nested_field type and nested_Agg
Mapping:
PUT colors
{
"mappings": {
"properties": {
"page" : { "type": "keyword" },
"colors": {
"type": "nested",
"properties": {
"color": {
"type": "keyword"
}
}
}
}
}
}
Insert Documents:
PUT colors/_doc/1
{
"page": 1,
"colors": [
{
"color": "red"
},
{
"color": "white"
},
{
"color": "red"
}
]
}
PUT colors/_doc/2
{
"page": 2,
"colors": [
{
"color": "yellow"
},
{
"color": "yellow"
}
]
}
Query:
GET colors/_search
{
"size" :0,
"aggs": {
"groupByPage": {
"terms": {
"field": "page"
},
"aggs": {
"colors": {
"nested": {
"path": "colors"
},
"aggs": {
"genres": {
"terms": {
"field": "colors.color"
}
}
}
}
}
}
}
}
Output:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"groupByPage": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1", // page field value
"doc_count": 1,
"colors": {
"doc_count": 3,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "red",
"doc_count": 2
},
{
"key": "white",
"doc_count": 1
}
]
}
}
},
{
"key": "2", // page field value
"doc_count": 1,
"colors": {
"doc_count": 2,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "yellow",
"doc_count": 2
}
]
}
}
}
]
}
}
}

ElasticSearch aggregation return entire sub object, not just the key

newbies with ElasticSearch we have docs indexed with following structure:
{
"Id": 1246761,
"ContentTypeName": "Official Statement",
"Title": "Official statement Title",
"Categories": [
{
"Id": 3,
"Type": 1,
"Name": "Category A",
"ParentId": 0
},
{
"Id": 10,
"Type": 3,
"Name": "Category B",
"ParentId": 0
},
{
"Id": 426,
"Type": 7,
"Name": "Category C",
"ParentId": 0
}
]
}
The requirement is to get the aggregated list of categories + document count matching a keyword search.
So far our query looks like this:
GET _search
{
"query": {
"match_all": {}
},
"size": 0,
"aggs": {
"my-agg-name": {
"terms": {
"field": "Categories.Id"
}
}
}
}
Result is
{
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my-agg-name" : {
"doc_count_error_upper_bound" : 23845,
"sum_other_doc_count" : 1068245,
"buckets" : [
{
"key" : 426,
"doc_count" : 112651
},
{
"key" : 10,
"doc_count" : 91146
},
....
]
}
}
}
Is there a way to get back the entire Category object, not only the Id ?
Or serialize the category object into string as the key ?
You need to use nested aggregation to achieve your required use case
Adding a working example with index mapping, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"Categories": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"match_all": {}
},
"aggs": {
"resellers": {
"nested": {
"path": "Categories"
},
"aggs": {
"my-agg-name": {
"terms": {
"field": "Categories.Id"
},
"aggs": {
"categories-doc": {
"top_hits": {
"_source": {
"includes": [
"Categories.Id",
"Categories.Type",
"Categories.Name",
"Categories.ParentId"
]
},
"size": 1
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"resellers": {
"doc_count": 3,
"my-agg-name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 3, // note this
"doc_count": 1,
"categories-doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65847850",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Categories",
"offset": 0
},
"_score": 1.0,
"_source": {
"ParentId": 0,
"Type": 1,
"Id": 3, // note this
"Name": "Category A"
}
}
]
}
}
},
{
"key": 10,
"doc_count": 1,
"categories-doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65847850",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Categories",
"offset": 1
},
"_score": 1.0,
"_source": {
"ParentId": 0,
"Type": 3,
"Id": 10,
"Name": "Category B"
}
}
]
}
}
},
{
"key": 426,
"doc_count": 1,
"categories-doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65847850",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Categories",
"offset": 2
},
"_score": 1.0,
"_source": {
"ParentId": 0,
"Type": 7,
"Id": 426,
"Name": "Category C"
}
}
]
}
}
}
]
}
}
}

Max and min from all index in query

Is there way to get max and min for all documents in index, not only max and min from category "game" without making another request to elastic?
{
"query": {
"bool": {
"must": [
{
"match": {
"category": "game"
}
}
]
}
},
"aggs": {
"maxPoints": {
"max": {
"field": "points"
}
},
"minPoints": {
"min": {
"field": "points"
}
}
}
Here is some data data i have, with query above I want to get this 2 docs from category game and min 0, max 100 instead of min 10, max 20.
[
{
"id": 1,
"category": "offer",
"points": 0
},
{
"id": 2,
"category": "game",
"points": 10
},
{
"id": 3,
"category": "game",
"points": 20
},
{
"id": 4,
"category": "offer",
"points": 100
}
]
Yeah, just remove the match clause, and add match_all query to include all the documents in your index. Use post_filter to get the expected results in a single ES call.
{
"query": {
"match_all": {}
},
"aggs": {
"maxPoints": {
"max": {
"field": "points"
}
},
"minPoints": {
"min": {
"field": "points"
}
}
},
"post_filter": { // Note this
"term": {
"category": "game"
}
}
}
Output
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65406564",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"id": 2,
"category": "game",
"points": 10
}
},
{
"_index": "65406564",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"id": 3,
"category": "game",
"points": 20
}
}
]
},
"aggregations": {
"maxPoints": {
"value": 100.0
},
"minPoints": {
"value": 0.0
}
}
}

How to get sum value for fields based on input field in elasticsearch (input field and sum output fields are different)

This is document present in elastic search and wants to output based fields in which it returns the sum of the high and medium and which be greater than zero, the value of high and medium must be greater than > 0
{
"host_id": 1,
"hostname": "Hostname1",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Un",
"Application":"App1"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 1,
"low": 0
},
{
"host_id": 2,
"hostname": "Hostname2",
"businesshierarchy": {
"businessunit": "One Unit",
"Location":"Un",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 2,
"low": 0
},
{
"host_id": 3,
"hostname": "Hostname3",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Uk",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 2,
"medium": 2,
"low": 0
}
Is there are any query or method to get output like in elastic search?
based on location
Location - Un
High - 2
medium - 3
Location - Uk
High - 2
medium - 2
Based on application
Application - App1
High - 1
medium - 1
Application - App2
High - 3
medium - 4
or based on hostname
hostname - Hostname1
High - 1
medium - 1
hostname - Hostname2
High - 1
medium - 2
hostname - Hostname3
High - 2
medium - 2
Similarly for businessunit. The field name passed dynamically like businessunit, hostname, application, location-based on it want to get count High and medium value like the above output.
Adding a working example with index mapping, index data(same as that given in question), search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
Search Query:
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Location"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"high",
"medium"
]
}
}
},
"high_sum": {
"sum": {
"field": "high"
}
},
"medium_sum": {
"sum": {
"field": "medium"
}
}
}
}
}
}
Search Result:
Based on the location
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Un",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 3.0
}
},
{
"key": "Uk",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 2.0
}
}
]
}
For querying on the basis of application replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Application"
},
The following search result will be there:
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "App2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 3.0
},
"medium_sum": {
"value": 4.0
}
},
{
"key": "App1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
}
]
}
For querying on the basis of hostname replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "hostname"
},
Search Results will be :
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Hostname1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
},
{
"key": "Hostname2",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 2.0
}
},
{
"key": "Hostname3",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0
},
"medium_sum": {
"value": 2.0
}
}
]
}
we can use this query to get the excepted result
{
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"range": {
"medium": {
"gt": 0
}
}
},
{
"range": {
"high": {
"gt": 0
}
}
}
]
}
}
]
}
},
"aggs": {
"fieldnames": {
"terms": {
"field": "hostname.keyword"
},
"aggs": {
"medium": {
"sum": {
"field": "medium"
}
},
"high": {
"sum": {
"field": "high"
}
}
}
}
},
"size": 0
}
Search result for this look like this
"aggregations": {
"fieldnames": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ALL Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 7.0
}
},
{
"key": "Latest Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 5.0
}
},
{
"key": "NO Unit",
"doc_count": 1,
"high": {
"value": 1.0
},
"medium": {
"value": 1.0
}
}
]
}
}
In case if we need the result for location and application, just need to change
for Location
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Application.keyword"
}
for Application
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Location.keyword"
}
if the mapping is something like this,
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
There is no need for adding .keyword to
"terms": {
"field": "businesshierarchy.Location"
}

Elasticsearch - Count duplicated and unique values for a nested field

Elasticsearch - Count duplicated and unique values
I need also same kind of count but that field is in nested properties as
[{
"firstname": "john",
"lastname": "doe",
"addressList": [{
"addressId": 39640,
"txt": "sdf",
}, {
"addressId": 39641,
"txt": "NEW",
}, {
"addressId": 39640,
"txt": "sdf",
}, {
"addressId": 39641,
"txt": "NEW"
}
]
}, {
"firstname": "jane",
"lastname": "smith",
"addressList": [{
"addressId": 39644,
"txt": "sdf",
}, {
"addressId": 39642,
"txt": "NEW",
}, {
"addressId": 39644,
"txt": "sdf",
}, {
"addressId": 39642,
"txt": "NEW"
}
]
}
]
what would be the query for addressId duplicate counts ? Need you help on this user:3838328
I got the answer for nested field duplicate counts as
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"prop_counts": {
"nested": {
"path": "addressList"
},
"aggs": {
"duplicate_aggs": {
"terms": {
"field": "addressList.addressId",
"min_doc_count": 2,
"size": 100 <----- Note this
}
},
"duplicate_bucketcount": {
"stats_bucket": {
"buckets_path": "duplicate_aggs._count"
}
},
"nonduplicate_aggs": {
"terms": {
"field": "addressList.addressId",
"size": 100 <---- Note this
},
"aggs": {
"equal_one": {
"bucket_selector": {
"buckets_path": {
"count": "_count"
},
"script": "params.count == 1"
}
}
}
},
"nonduplicate_bucketcount": {
"sum_bucket": {
"buckets_path": "nonduplicate_aggs._count"
}
}
}
}
}
}
Response as
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop_counts": {
"doc_count": 8,
"duplicate_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": 39640,
"doc_count": 2
}, {
"key": 39641,
"doc_count": 2
}, {
"key": 39644,
"doc_count": 2
}, {
"key": 39642,
"doc_count": 2
}
]
},
"nonduplicate_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
},
"duplicate_bucketcount": {
"count": 4,
"min": 2,
"max": 2,
"avg": 2,
"sum": 8
},
"nonduplicate_bucketcount": {
"value": 0
}
}
}
}

Resources