Delete document from search query - elasticsearch

i use this request for get all(200) documents duplicated in my index, and i want delete all documents in this result. I tried tu use _delete_by_query but after many try and search on internet i failed ...
My query is :
GET /index-ip/_query
{
"aggs": {
"dup": {
"terms": {
"size" : 200,
"field": "ip_commune.keyword",
"min_doc_count": 2
}
}
}
}
returned result :
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 754,
"max_score" : 1.0,
"hits" : [
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "V64TV2kB0AORAV2GIdMZ",
"_score" : 1.0,
"_source" : {
"ip_commune" : "98.183.164.36",
"#version" : "1",
"tags" : [
"ip-ipam"
],
"#timestamp" : "2019-03-07T07:34:24.874Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "Dq4TV2kB0AORAV2GIdUe",
"_score" : 1.0,
"_source" : {
"ip_commune" : "136.168.48.254",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.979Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "264TV2kB0AORAV2GIdUf",
"_score" : 1.0,
"_source" : {
"ip_commune" : "137.83.124.226",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.889Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "F64TV2kB0AORAV2GIdUe",
"_score" : 1.0,
"_source" : {
"ip_commune" : "136.168.50.1",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.980Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "Iq4TV2kB0AORAV2GIdUe",
"_score" : 1.0,
"_source" : {
"ip_commune" : "136.168.86.91",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:25.003Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "Za4TV2kB0AORAV2GIdYh",
"_score" : 1.0,
"_source" : {
"ip_commune" : "137.69.223.4",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.888Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "Z64TV2kB0AORAV2GIdYh",
"_score" : 1.0,
"_source" : {
"ip_commune" : "137.78.101.254",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.888Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "aa4TV2kB0AORAV2GIdYh",
"_score" : 1.0,
"_source" : {
"ip_commune" : "137.83.29.198",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.890Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "d64TV2kB0AORAV2GIdYh",
"_score" : 1.0,
"_source" : {
"ip_commune" : "98.183.164.55",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.916Z"
}
},
{
"_index" : "index-ip",
"_type" : "doc",
"_id" : "-a4TV2kB0AORAV2GIdUf",
"_score" : 1.0,
"_source" : {
"ip_commune" : "98.183.164.117",
"#version" : "1",
"tags" : [
"ip-glpi"
],
"#timestamp" : "2019-03-07T07:34:24.899Z"
}
}
]
},
"aggregations" : {
"dup" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "98.183.164.124",
"doc_count" : 2
},
{
"key" : "98.183.164.142",
"doc_count" : 2
},
{
"key" : "98.183.164.166",
"doc_count" : 2
},
{
"key" : "98.183.164.172",
"doc_count" : 2
},
{
"key" : "98.183.164.232",
"doc_count" : 2
},
{
"key" : "98.183.164.25",
"doc_count" : 2
},
{
"key" : "98.183.164.27",
"doc_count" : 2
},
{
"key" : "98.183.164.28",
"doc_count" : 2
},
{
"key" : "98.183.164.29",
"doc_count" : 2
},
{
"key" : "98.183.164.35",
"doc_count" : 2
},
{
"key" : "98.183.164.36",
"doc_count" : 2
},
{
"key" : "98.183.164.37",
"doc_count" : 2
},
{
"key" : "98.183.164.38",
"doc_count" : 2
},
{
"key" : "98.183.164.39",
"doc_count" : 2
},
{
"key" : "98.183.164.58",
"doc_count" : 2
},
{
"key" : "98.183.27.21",
"doc_count" : 2
},
{
"key" : "98.183.27.22",
"doc_count" : 2
},
{
"key" : "98.183.27.23",
"doc_count" : 2
},
{
"key" : "98.183.27.24",
"doc_count" : 2
},
{
"key" : "98.183.27.25",
"doc_count" : 2
},
{
"key" : "98.183.27.27",
"doc_count" : 2
},
{
"key" : "98.183.27.28",
"doc_count" : 2
}
]
}
}
}
I don't want to keep one of the duplicate records =) I need to delete both of them.
My goal is to compare two ITSM(GLPI) and IPAM IP lists to extract all IP addresses not in GLPI
Any idea ? :confused: and sorry for my bad english ^^

Related

Elasticsearch override field value in search results

The use case is that, I want to hide product price(change to 0) if a user is not logged in.
PUT /products
{
"mappings": {
"properties": {
"price": {
"type": "scaled_float",
"scaling_factor": 100
}
}
}
}
POST /products/_doc
{
"price": 101
}
POST /products/_doc
{
"price": 102
}
POST /products/_doc
{
"price": 103
}
I try to use runtime_mapping with the following script, but the result still has the original data.
GET /products/_search
{
"query": {
"match_all": {}
},
"runtime_mappings": {
"price": {
"type": "double",
"script": "if(0 == 1) {emit(333);} else{emit(222);}"
}
}
}
What do I miss? Is it the script condition invalid?
Thanks.
-- Edit --
I expect all the price to be 222. But the original price is returned.
Expected out put:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_id" : "XXphNoMBcFxgyV6Mwe1-",
"_score" : 1.0,
"_source" : {
"price" : 222
}
},
{
"_index" : "products",
"_id" : "XnphNoMBcFxgyV6Mye2c",
"_score" : 1.0,
"_source" : {
"price" : 222
}
},
{
"_index" : "products",
"_id" : "X3phNoMBcFxgyV6M0e2W",
"_score" : 1.0,
"_source" : {
"price" : 222
}
},
{
"_index" : "products",
"_id" : "YHphNoMBcFxgyV6M3u0V",
"_score" : 1.0,
"_source" : {
"price" : 222
}
}
]
}
}
Actual output:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_id" : "XXphNoMBcFxgyV6Mwe1-",
"_score" : 1.0,
"_source" : {
"price" : 101
}
},
{
"_index" : "products",
"_id" : "XnphNoMBcFxgyV6Mye2c",
"_score" : 1.0,
"_source" : {
"price" : 102
}
},
{
"_index" : "products",
"_id" : "X3phNoMBcFxgyV6M0e2W",
"_score" : 1.0,
"_source" : {
"price" : 105
}
},
{
"_index" : "products",
"_id" : "YHphNoMBcFxgyV6M3u0V",
"_score" : 1.0,
"_source" : {
"price" : 0
}
}
]
}
}
After retry on the example in the official doc, I realized that I missed out the fields key in query body.
GET /products/_search
{
"query": {
"match_all": {}
},
"runtime_mappings": {
"price": {
"type": "double",
"script": "if(0 == 1) {emit(333);} else{emit(222);}"
}
},
"fields": ["price"]
}
Now I get both the original and script fields.

Elasticsearch query get multiple hits count

I have a scenario where my search query has to search the phone number and license number that starts with "861". This query output all the phone number and license number that begins with "861" with hits count. The output has a total count of license numbers and phone numbers.
But I am expecting to get the output hits separately for phone and license numbers as below the output
Below is my query and output. Also my expected output is below that.
GET emp_details_new/_search
{
"_source": [],
"min_score": 0.5,
"query": {
"multi_match": {
"query": "861",
"fields": ["phone","licence_num"],
"type": "phrase_prefix"
}
}
}
Output:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 6.5032897,
"hits" : [
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "20",
"_score" : 6.5032897,
"_source" : {
"id" : 20,
"firstname" : "Millard",
"phone" : "1531243932",
"licence_num" : "8616829169"
}
},
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "243",
"_score" : 6.5032897,
"_source" : {
"id" : 243,
"firstname" : "Darbie",
"phone" : "8617323318",
"licence_num" : "9154243943"
}
},
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "252",
"_score" : 6.5032897,
"_source" : {
"id" : 252,
"firstname" : "Angus",
"phone" : "2425841984",
"licence_num" : "8616203799"
}
},
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "777",
"_score" : 6.5032897,
"_source" : {
"id" : 777,
"firstname" : "Julio",
"phone" : "8613789726",
"licence_num" : "1355139833"
}
}
]
}
}
My expected output is to get the separate count for phone number and license number as below.
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 6.5032897,
"hits" : [
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "20",
"_score" : 6.5032897,
"_source" : {
"id" : 20,
"licence_num" : "8616829169"
}
},
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "252",
"_score" : 6.5032897,
"_source" : {
"id" : 252,
"licence_num" : "8616203799"
}
}
],
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 6.5032897,
"hits" : [
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "243",
"_score" : 6.5032897,
"_source" : {
"id" : 243,
"phone" : "8617323318"
}
},
{
"_index" : "emp_details_new",
"_type" : "_doc",
"_id" : "777",
"_score" : 6.5032897,
"_source" : {
"id" : 777,
"phone" : "8613789726"
}
}
]
}
}
What I believe to be an option is this:
GET _msearch
{"index": "test"}
{ "_source": ["id", "licence_num"], "min_score": 0.5, "query": { "multi_match": { "query": "861", "fields": ["licence_num"], "type": "phrase_prefix" } }}
{"index": "test"}
{ "_source": ["id", "phone"], "min_score": 0.5, "query": { "multi_match": { "query": "861", "fields": ["phone"], "type": "phrase_prefix" } }}
Response:
"responses" : [
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.2039728,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "eb_apoIBOFCrGsmFSmdS",
"_score" : 1.2039728,
"_source" : {
"licence_num" : "8616829169",
"id" : 20
}
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "e7_apoIBOFCrGsmFVmeW",
"_score" : 1.2039728,
"_source" : {
"licence_num" : "8616203799",
"id" : 252
}
}
]
},
"status" : 200
},
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.2039728,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "er_apoIBOFCrGsmFUGfI",
"_score" : 1.2039728,
"_source" : {
"phone" : "8617323318",
"id" : 243
}
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "fL_apoIBOFCrGsmFXmdO",
"_score" : 1.2039728,
"_source" : {
"phone" : "8613789726",
"id" : 777
}
}
]
},
"status" : 200
}
]

How to access the date_histogram key field in the child aggregation in elasticsearch?

I want to apply some filters on the bucket response generated by the date_histogram, that filter is dependent on the key of the date_histogram output buckets.
Suppose I have following data in
{
"entryTime":"",
"soldTime:""
}
the elastic query is something like this
{
"aggs": {
"date": {
"date_histogram": {
"field": "entryTime",
"interval": "month",
"keyed": true
},
"aggs": {
"filter_try": {
"filter": {
"bool": {
"must": [
{
"range": {
"entryTime": {
"lte": 1588840533000
}
}
},
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"exists": {
"field": "soldTime"
}
},
{
"range": {
"soldTime": {
"gt": 1588840533000
}
}
}
]
}
},
{
"bool": {
"must_not": [
{
"exists": {
"field": "soldTime"
}
}
]
}
}
]
}
}
]
}
}
}
}
}
}
}
so here in that bool query, I want to use the date generated for the specific bucket by date_histogram aggregation in both the range clauses instead of the hardcoded epoch time.
Even if we can access using script then also it's fine.
for further clarification, this is the boolean query and in the query want to replace this "DATE" with the date_histogram bucket key.
# (entryTime < DATE)
# AND
# (
# (soldTime != null AND soldTime > DATE)
# OR
# (soldTime == NULL)
# )
Consider below 10 Document I have:
"hits" : [
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1577869200000",
"soldTime" : "1578646800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578560400000",
"soldTime" : "1579942800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1579683600000",
"soldTime" : "1581325200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1580893200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "6",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1582189200000",
"soldTime" : "1582362000000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "7",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1582621200000",
"soldTime" : "1584349200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "8",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1583053200000",
"soldTime" : "1583830800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "9",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1584262800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "10",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1585472400000"
}
}
]
Now the end of January 2020 in epoch is -> 1580515199000
So if I apply on the above-mentioned bool query,
Will get the output as the
"hits" : [
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "4",
"_score" : 3.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1579683600000",
"soldTime" : "1581325200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
}
]
As document with ID 4 satisfy (soldTime != null AND soldTime > DATE) and document with ID 2 satisfy (soldTime == null) condition from OR part.
Now for the same bool request If I use the date of end February 2020 -> 1583020799000, will get the hits as follows
"hits" : [
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "7",
"_score" : 3.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1582621200000",
"soldTime" : "1584349200000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
},
{
"_index" : "vi_test",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1580893200000"
}
}
]
ID 7: Entry in Feb, but sold in March so is in stock for Feb-2020
ID 2: Entry in Jan, not sold yet means in the stock
ID 5: Entry in Feb, not sold yet means in the stock
Now the same data required for each end of the month of a whole year to plot the trend.
Thank you
I couldn't find a way using normal queries as parent aggregation key is not available in sub aggregation. I have written a script for this which selects documents where soldTime is either null or doesnot fall in same month as entryTime
Query:
{
"query": {
"script": {
"script": """
ZonedDateTime entry;
ZonedDateTime sold;
if(doc['entryTime'].size()>0)
{
entry= doc['entryTime'].value;
}
if(doc['soldTime'].size()>0)
{
sold = doc['soldTime'].value;
}
if(sold==null || ( entry.getMonthValue()!==sold.getMonthValue()|| entry.getYear()!==sold.getYear()))
{
return true;
}
else false;
"""
}
},
"size": 10,
"aggs": {
"monthly_trend": {
"date_histogram": {
"field": "entryTime",
"interval": "month"
},
"aggs": {
"docs": {
"top_hits": {
"size": 10
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index22",
"_type" : "_doc",
"_id" : "55Kv83EB8a54AbXfngYU",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
}
]
},
"aggregations" : {
"monthly_trend" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 1,
"docs" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index22",
"_type" : "_doc",
"_id" : "55Kv83EB8a54AbXfngYU",
"_score" : 1.0,
"_source" : {
"deaerId" : "4",
"entryTime" : "1578214800000"
}
}
]
}
}
}
]
}
}

How to make Elastic Search API (Query) with count of field and subfeilds

I want to get the counts of(SUSPECT and CLEAR) Each State in API using Elastic Search query-
Data inside Elastic Search looks like-
Sample data-
{
"_index" : "index_name"
"_type" : "_doc",
"_id" : "id1",
"_score" : 1.0,
"_source" : {
"slflag" : "SUSPECT",
"state_name" : "UTTAR PRADESH",
}
{
"_index" : "index_name",
"_type" : "_doc",
"_id" : id2",
"_score" : 1.0,
"_source" : {
"slflag" : "CLEAR",
"state_name" : "UTTAR PRADESH",
}
{
"_index" : "index_name"
"_type" : "_doc",
"_id" : "id3",
"_score" : 1.0,
"_source" : {
"slflag" : "SUSPECT",
"state_name" : "Delhi",
}
{
"_index" : "index_name",
"_type" : "_doc",
"_id" : id4",
"_score" : 1.0,
"_source" : {
"slflag" : "CLEAR",
"state_name" : "Madhya Pradesh",
}
{
"_index" : "index_name"
"_type" : "_doc",
"_id" : "id5",
"_score" : 1.0,
"_source" : {
"slflag" : "SUSPECT",
"state_name" : "Rajasthan",
}
{
"_index" : "index_name",
"_type" : "_doc",
"_id" : id6",
"_score" : 1.0,
"_source" : {
"slflag" : "CLEAR",
"state_name" : "Bihar",
}
Fields are - state_name, slflag
In slflag field we have two categories - "SUSPECT" and "CLEAR"
I want to make a query to get such results-
{
"stateName": "UTTAR PRADESH",
"clear": 688,
"suspect": 182
},
{
"stateName": "Bihar",
"clear": 398456,
"suspect": 117110
},
{
"stateName": "Rajasthan",
"clear": 688,
"suspect": 182
},
{
"stateName": "Delhi",
"clear": 12096,
"suspect": 984
}
I don't know how to count slflag for each state.
Thanks in advance.
Get /index-
{
"index" : {
"aliases" : { },
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"slflag" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"state_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"wl_d_ind" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1587554261571",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "zFKQmxyTSsyoVLRoCC_3IA",
"version" : {
"created" : "7060199"
},
"provided_name" : "index"
}
}
}
}
I tried below-
GET /index/_search
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state_name.keyword",
"size": 100
},
"aggs": {
"flag": {
"terms": {
"field": "slflag.keyword"
}
}
}
}
}
}
Above results in-
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"states" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "UTTAR PRADESH",
"doc_count" : 5403369,
"flag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "CLEAR",
"doc_count" : 4540278
},
{
"key" : "SUSPECT",
"doc_count" : 863091
}
]
}
},
{
"key" : "RAJASTHAN",
"doc_count" : 2239768,
"flag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "CLEAR",
"doc_count" : 1866196
},
{
"key" : "SUSPECT",
"doc_count" : 373572
}
]
}
},
{
"key" : "GOA",
"doc_count" : 12,
"flag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "CLEAR",
"doc_count" : 12
}
]
}
}
]
}
}
}
You need to first aggregate on stateName and then on slflag, like this:
GET index_name/_search?filter_path=**.key,**.doc_count
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state_name.keyword",
"size": 100
},
"aggs": {
"flag": {
"terms": {
"field": "slflag.keyword"
}
}
}
}
}
}

elasticsearch sort by subtracting of two numeric fields

Is it possible to sort the result by subtraction between two fields?
E.g. There are two fields: "price", "lastPrice". I would sort documents by price - lastPrice.
Can this be done directly or the only way to do is to create a new field?
You can use script sort
Data:
[
{
"_index" : "index19",
"_type" : "_doc",
"_id" : "LI9zpnEB1jFeu7E5l58r",
"_score" : 1.0,
"_source" : {
"title" : "a",
"price" : 10,
"lastPrice" : 15
}
},
{
"_index" : "index19",
"_type" : "_doc",
"_id" : "LY9zpnEB1jFeu7E5xJ_A",
"_score" : 1.0,
"_source" : {
"title" : "b",
"price" : 15,
"lastPrice" : 20
}
},
{
"_index" : "index19",
"_type" : "_doc",
"_id" : "Lo95pnEB1jFeu7E5Sp9w",
"_score" : 1.0,
"_source" : {
"title" : "b",
"price" : 20,
"lastPrice" : 21
}
}
]
Query:
{
"sort": [
{
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "doc['lastPrice'].value-doc['price'].value"
},
"order": "asc"
}
}
]
}
Result:
[
{
"_index" : "index19",
"_type" : "_doc",
"_id" : "Lo95pnEB1jFeu7E5Sp9w",
"_score" : null,
"_source" : {
"title" : "b",
"price" : 20,
"lastPrice" : 21
},
"sort" : [
1.0
]
},
{
"_index" : "index19",
"_type" : "_doc",
"_id" : "LI9zpnEB1jFeu7E5l58r",
"_score" : null,
"_source" : {
"title" : "a",
"price" : 10,
"lastPrice" : 15
},
"sort" : [
5.0
]
},
{
"_index" : "index19",
"_type" : "_doc",
"_id" : "LY9zpnEB1jFeu7E5xJ_A",
"_score" : null,
"_source" : {
"title" : "b",
"price" : 15,
"lastPrice" : 20
},
"sort" : [
5.0
]
}
]

Resources