Average of field in nested mapping for elasticssearch - elasticsearch

I unable to get the average of cpuaveload filed.
Mapping:
{
"kpi" : {
"mappings" : {
"kpi" : {
"properties" : {
"date" : {
"type" : "date"
},
"kpi" : {
"type" : "nested",
"properties" : {
"CpuAverageLoad" : {
"type" : "long"
},
"HaGroupId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"LbGroupId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"MemFree" : {
"type" : "long"
},
"MemUsed" : {
"type" : "long"
},
"NodeId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"NodeType" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"State" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Static_limit" : {
"type" : "long"
}
}
}
}
}
}
}
}
Query
curl -XPOST 'localhost:9200/kpi/_search?size=0&pretty' -H 'Content-Type: application/json' -d'
{
"from": 0, "size": 10,
"sort":[
{"date" : {"order" : "desc"}}
],
"stored_fields": [],
"query": {
"nested": {
"path": "kpi",
"query": {
"match": {"kpi.NodeType" : "cs"}
},
"inner_hits": {
},
"aggs" : {
"kpi" : {
"nested" : {
"path" : "kpi"
},
"aggs" : {
"avg_price" : { "avg" : { "field" : "kpi.CpuAverageLoad" } }
}
}
}
}
}
}'
When i execute I get the below error:
"[nested] query does not support [aggs]"
type : "parsing_exception",
reason : "[nested] query does not support [aggs]",
line : 16, col : 16
I need to get the average of CpuAveLoad

The problem is, that you included the aggs inside the query. The inner_hits is also defined in a wrong position. This should work:
{
"from": 0, "size": 10,
"sort":[{
"date" : {"order" : "desc"}
}],
"stored_fields": [],
"query": {
"nested" : {
"path" : "kpi",
"query" : {
"match" : { "kpi.NodeType" : "type_a" }
},
"inner_hits" : {
}
}
},
"aggs": {
"kpi": {
"nested": {
"path": "kpi"
},
"aggs" : {
"average-agg" : {
"avg" : { "field" : "kpi.CpuAverageLoad" }
}
}
}
}
}
The solution was tested with elasticsearch 5-6. Please let me know if it worked for you.

Related

Search match multiple values in single field in Elasticsearch

I'm using Elasticsearch for search for all document has string REQUEST and partnerId=2960 and customerId= in message field
I'm using this query but nothing return
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"message": "REQUEST"
}
},
{
"match_phrase": {
"message": "partnerId=2960"
}
},
{
"match_phrase": {
"message": "customerId="
}
}
]
}
}
}
response:
{
"took" : 213,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
My data:
{
"_index": "demoapp-log_2021.05.31",
"_type": "_doc",
"_id": "epU5wXkBQRhN_CfKNayH",
"_version": 1,
"_score": null,
"_source": {
"tags": [
"beats_input_codec_plain_applied",
"demoapp-log"
],
"message": [
"2021-05-31 14:00:21,433 INFO c.m.v.w.r.CollectionService_v_2_0_0 Line 434 - Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}",
"Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}"
],
"type": "beats",
"agent": {
"hostname": "web01"
},
"host": {},
"loglevel": "INFO",
"logger": "c.m.v.w.r.CollectionService_v_2_0_0",
"line": "434",
"ecs": {},
"input": {
"type": "log"
},
"#version": "1",
"fields": {
"service": "payapp",
"app_id": "demoapp-log"
},
"log": {
"file": {
"path": "/home/collection/live/logs/c_api.log"
}
},
"#timestamp": "2021-05-31T07:00:21.433Z",
"index_day": "2021.05.31"
},
"fields": {
"#timestamp": [
"2021-05-31T07:00:21.433Z"
]
},
"sort": [
1622444421433
]
}
(https://pastebin.com/UZWmXLpq )
My mapping for index
{
"demoapp-log_2021.05.31" : {
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"agent" : {
"properties" : {
"ephemeral_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"hostname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"ecs" : {
"properties" : {
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"fields" : {
"properties" : {
"app_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"max_bytes" : {
"type" : "long"
},
"service" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"host" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"index_day" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"input" : {
"properties" : {
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"line" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log" : {
"properties" : {
"file" : {
"properties" : {
"path" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"flags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"offset" : {
"type" : "long"
}
}
},
"logger" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"loglevel" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"tags" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
(https://pastebin.com/8CMm94MY )
(I updated my answer below based on your comment)
Your search doesn't work because "REQUEST" doesn't appear in your message as a standalone word but attached to other word like: "partnerRequestId" or "_REQUEST_".
So if you want "REQUEST" to match with "_REQUEST_" in a case sensitive manner, you must change the analyzer of the message field.
By default the analyzer doesn't split on underscore.
First, you need to create a new index with a custom mapping (you can later reindex your existing index into this new one).
Example of an index with only the "message" field and an analyser that split on underscore and non word character (see the doc for more on tokenizer and analyzer):
PUT my-index
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "underscore"
}
},
"tokenizer": {
"underscore": {
"type": "pattern",
"pattern": "[_\\W]+"
}
}
}
},
"mappings" : {
"properties" : {
"message" : {
"type" : "text",
"analyzer":"my_analyzer"
}
}
}
}
Then you can add your document to this new index:
POST my-index/_doc
{
"message": [
"2021-05-31 14:00:21,433 INFO c.m.v.w.r.CollectionService_v_2_0_0 Line 434 - Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}",
"Inquire to business [b57add0e-4819-4130-9051-0628979b468f] CollectionInquiryRequestMsg_V_1_0_6{action=INQUIRE, transactionId=b57add0e-4819-4130-9051-0628979b468f, partnerId=2960, merchantId=VNP, customerId=091**59, billMonth=, partnerRequestId=6762e6fc-544b-4821-95f0-f4b81b547f3f, additionalInfo={\"ACTION\":\"INQUIRE\",\"VERSION\":\"2.0.0\",\"PARTNER_ID\":2960,\"PROVINCE_ID\":\"VNP\",\"CUSTOMER_ID\":\"091**59\",\"BILL_MONTH\":\"\",\"TRANS_REQUEST_ID\":\"6762e6fc-544b-4821-95f0-f4b81b547f3f\",\"CHANNEL_ID\":\"1\",\"TRANS_DATE_TIME\":\"20210531140021\",\"SECURE_CODE\":\"3354327f5af3ec9e0285c81bf5a4f72e3d2dd3afce703fffd9e2fbec1666e93d\"}, owner=, provinceCode=, paymentCode=, channelId=1, clientIp=, createDate=Mon May 31 14:00:21 ICT 2021, lastUpdate=Mon May 31 14:00:21 ICT 2021, collectionPartnerDate=Mon May 31 14:00:21 ICT 2021, merchantDate=null}"
]
}
And finally run this search that does the job:
GET my-index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"message": "partnerId=2960"
}
},
{
"match": {
"message": "customerId"
}
},
{
"match":{
"message": "REQUEST"
}
}
]
}
}
}
Also I don't know if it is intentional or not but the "message" field is indexed as both a text and a keyword. Looks to me like you don't need the keyword on this field.

Apply query in elasticsearch to get true/false value without filtering

I have got this kind of mapping on my ES index
{
"vabaco_dhp_development_persons" : {
"mappings" : {
"person" : {
"properties" : {
"active" : {
"type" : "boolean"
},
"booking_resources" : {
"type" : "nested",
"properties" : {
"available_days" : {
"type" : "nested",
"properties" : {
"available_date" : {
"type" : "text"
},
"last_slot_time" : {
"type" : "date"
}
}
},
"booking_resource_detail" : {
"properties" : {
"from_age" : {
"type" : "long"
},
"to_age" : {
"type" : "long"
}
}
},
"booking_resource_price" : {
"type" : "float"
},
"booking_resource_restriction" : {
"properties" : {
"insurer_restrictions" : {
"properties" : {
"insurer_ic" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"booking_resource_service_restrictions" : {
"type" : "nested",
"properties" : {
"insurance_service_code" : {
"type" : "text"
},
"insurer_provider_ic" : {
"type" : "text"
},
"location_id" : {
"type" : "integer"
}
}
},
"city_id" : {
"type" : "integer"
},
"doctor_languages" : {
"properties" : {
"language_id" : {
"type" : "integer"
}
}
},
"doctor_speciality" : {
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
},
"location" : {
"properties" : {
"address" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"brand_name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"district" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"id" : {
"type" : "integer"
},
"name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"user_profile_city" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
},
"searchable_text" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"service_provision_type" : {
"properties" : {
"id" : {
"type" : "integer"
},
"id_name" : {
"type" : "text"
},
"name" : {
"properties" : {
"en" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
},
"covid_19" : {
"type" : "boolean"
},
"first_name" : {
"properties" : {
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"id" : {
"type" : "integer"
},
"last_name" : {
"properties" : {
"ka" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"person_star" : {
"properties" : {
"avg" : {
"type" : "float"
},
"disable_rating" : {
"type" : "boolean"
},
"star_count" : {
"type" : "integer"
}
}
},
"priority_city" : {
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"type" : "object"
},
"priority" : {
"type" : "integer"
}
}
},
"uuid" : {
"type" : "text"
},
"views" : {
"type" : "integer"
}
}
}
}
}
}
I want to apply query to filter based on "booking_resource_service_restrictions" field, but in some cases I just need to know if this query, if applied, would filter particular record, which means I need to get some true/false value for every record, based on this filter(but data should not be filtered), is there any way to extract this kind of information for every record?
sample query looks like this, this is how I filter data based on "booking_resource_service_restrictions" filed but as I already mentioned in some cases I want to know if this filter would work if applied and get true/false values for every record without actually filtering them
GET vabaco_dhp_development_persons/_search
{
"query": {
"bool":{
"must":[
{
"term":{
"active": true
}
},
{
"nested":{
"path":"booking_resources",
"query":{
"bool":{
"must":[
{
"nested":{
"path":"booking_resources.booking_resource_service_restrictions",
"query":{
"bool":{
"should":[
{
"bool":{
"must":[
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurer_provider_ic":"204919008"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurance_service_code":"11111"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.location_id": 1
}
}
]
}
},
{
"bool":{
"must":[
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurer_provider_ic":"204919008"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.insurance_service_code":"33333"
}
},
{
"term":{
"booking_resources.booking_resource_service_restrictions.location_id": 1
}
}
]
}
}
],
"minimum_should_match": 1
}
}
}
}
]
}
}
}
}
]
}
}
}
In general what I need is a way to know if record a satisfies some filtering requirements and base on this get true/false values(or data equivalent to true/false)
You can use named queries to give names to your queries and filters using "_name" field.
Add your query in a should clause so that it does not affect the documents returned.
When you search, you need to look for "matched_queries" array in the result to find names of all queries matched for that particular document.
{
"_index": "testindex",
"_type": "employee",
"_id": "2",
"_score": 0.19178301,
"_source": {
"name": "Barkha Jain"
},
"matched_queries": [
"query on name field"
]
}
Read about named queries here https://www.elastic.co/guide/en/elasticsearch/reference/7.11/query-dsl-bool-query.html#named-queries

Fuzzy query doesn't work on text type, but works on keyword type

I have a query that works only on keyword type and I can't figured out why.
However, if I use a Match query plus a fuzziness parameter I can make it work with the text type.
Why is this happening?
Please see find the queries below
(the working query should return Eddie's documents.)
1) FUZZY QUERY TEXT TYPE -> NOT WORKING
GET kibana_sample_data_ecommerce/_search
{
"query": {
"fuzzy": {
"customer_first_name": {
"value": "Eddi",
"fuzziness": "AUTO"
}
}
}
}
2) FUZZY QUERY KEYWORD TYPE - WORKING
GET kibana_sample_data_ecommerce/_search
{
"query": {
"fuzzy": {
"customer_first_name.keyword": {
"value": "Eddi",
"fuzziness": "AUTO"
}
}
}
}
3) MATCH QUERY + FUZINESS -> WORKING
GET kibana_sample_data_ecommerce/_search
{
"query": {
"match": {
"customer_first_name.keyword": {
"query": "Eddi",
"fuzziness": "Auto"
}
}
}
}
INDEX SETTINGS
{
"kibana_sample_data_ecommerce" : {
"aliases" : { },
"mappings" : {
"properties" : {
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"currency" : {
"type" : "keyword"
},
"customer_birth_date" : {
"type" : "date"
},
"customer_first_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"customer_full_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"customer_gender" : {
"type" : "keyword"
},
"customer_id" : {
"type" : "keyword"
},
"customer_last_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"customer_phone" : {
"type" : "keyword"
},
"day_of_week" : {
"type" : "keyword"
},
"day_of_week_i" : {
"type" : "integer"
},
"email" : {
"type" : "keyword"
},
"geoip" : {
"properties" : {
"city_name" : {
"type" : "keyword"
},
"continent_name" : {
"type" : "keyword"
},
"country_iso_code" : {
"type" : "keyword"
},
"location" : {
"type" : "geo_point"
},
"region_name" : {
"type" : "keyword"
}
}
},
"manufacturer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"order_date" : {
"type" : "date"
},
"order_id" : {
"type" : "keyword"
},
"products" : {
"properties" : {
"_id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"base_price" : {
"type" : "half_float"
},
"base_unit_price" : {
"type" : "half_float"
},
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"created_on" : {
"type" : "date"
},
"discount_amount" : {
"type" : "half_float"
},
"discount_percentage" : {
"type" : "half_float"
},
"manufacturer" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"min_price" : {
"type" : "half_float"
},
"price" : {
"type" : "half_float"
},
"product_id" : {
"type" : "long"
},
"product_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "english"
},
"quantity" : {
"type" : "integer"
},
"sku" : {
"type" : "keyword"
},
"tax_amount" : {
"type" : "half_float"
},
"taxful_price" : {
"type" : "half_float"
},
"taxless_price" : {
"type" : "half_float"
},
"unit_discount_amount" : {
"type" : "half_float"
}
}
},
"sku" : {
"type" : "keyword"
},
"taxful_total_price" : {
"type" : "half_float"
},
"taxless_total_price" : {
"type" : "half_float"
},
"total_quantity" : {
"type" : "integer"
},
"total_unique_products" : {
"type" : "integer"
},
"type" : {
"type" : "keyword"
},
"user" : {
"type" : "keyword"
}
}
},
"settings" : {
"index" : {
"number_of_shards" : "1",
"auto_expand_replicas" : "0-1",
"provided_name" : "kibana_sample_data_ecommerce",
"creation_date" : "1579684918696",
"number_of_replicas" : "0",
"uuid" : "Ga3UfyyAQjGpa5JDbJB7Sw",
"version" : {
"created" : "7050299"
}
}
}
}
}
Fuzzy Query is a Term Level Query. Which means that queries are not analyzed.
WHY QUERY 1) FAILS
If you query for "Eddi" it will be compared with analyzed text, in this case "eddie".
And from 'Eddi' to 'eddie' it is a 2 edits distance.
Hence the query will not succeed because terms between 3 and 5 length have the max edit distance is 1 (with the "fuzziness: AUTO" configuration)
WHY QUERY 2) SUCCEDS
In other hand if you use Keyword, Keywords are store without being analyzed. and therefore. Eddi is a 1 edit distance from Eddie.

How can I get distinct values of nested fields in elasticsearch?

I have below document structure in elasticsearch:
root
|-- userid: string (nullable = true)
|-- name: string (nullable = true)
|-- applications: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- applicationid: string (nullable = true)
| | |-- createdat: string (nullable = true)
| | |-- source_name: string (nullable = true)
| | |-- accounts: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- applicationcreditreportaccountid: string
(nullable = true)
| | | | |-- account_type: integer (nullable = true)
| | | | |-- account_department: string (nullable = true)
Below is the mapping of my index:
{
"bureau_data" : {
"mappings" : {
"dynamic_date_formats" : [
"yyyy-MM-dd"
],
"dynamic_templates" : [
{
"objects" : {
"match_mapping_type" : "object",
"mapping" : {
"type" : "nested"
}
}
}
],
"properties" : {
"raw_derived" : {
"type" : "nested",
"properties" : {
"applications" : {
"type" : "nested",
"properties" : {
"accounts" : {
"type" : "nested",
"properties" : {
"account_type_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"accounttypeid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"applicationcreditreportaccountid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"currentbalance" : {
"type" : "long"
},
"dayspastdue" : {
"type" : "long"
},
"institution_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"institutionid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"applicationcreditreportid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"applicationid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"createdat" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"creditbureautypeid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"dateofbirth" : {
"type" : "date",
"format" : "yyyy-MM-dd"
},
"firstname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lastname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"score" : {
"type" : "long"
},
"source_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"status" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"updatedat" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"dob" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"firstname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lastname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"middlename" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"mobilephone" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"source" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"userid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
},
"fielddata" : true
}
}
}
}
}
I want distinct values of account_type field which is a nested fields. I have tried query which is giving me only distinct count.
GET /my_index/_search?size=0
{
"aggs": {
"nested_path": {
"nested": {
"path": "raw_derived.applications.accounts"
},
"aggs": {
"distinct_values": {
"cardinality": {
"field": "raw_derived.applications.accounts.account_type.keyword"
}
}
}
}
}
}
I expected the output to have distinct values of account_type but the output is count only. Below is my output snippet:
"hits" : {
"total" : {
"value" : 50,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"nested_path" : {
"doc_count" : 828,
"distinct_values" : {
"value" : 70
}
}
}
}
Below is the query I have tried and its working:
GET /bureau_data/_search?size=0
{
"_source": "{aggregations}",
"aggs": {
"unique": {
"nested": {
"path": "raw_derived.applications"
},
"aggs": {
"score_unq": {
"terms": {
"field": "raw_derived.applications.source_name.keyword"
}
}
}
}
}
}
Any suggestion would be helpful
From the official documentation -
Cardinality Aggregation :-
A single-value metrics aggregation that calculates an approximate count of distinct values. Values can be extracted either from specific fields in the document or generated by a script.
Instead of aggregating by "cardinality" , try a terms aggregation as below:
{
"size":0,
"aggregations": {
"distinct_values": {
"terms": {
"field": "raw_derived.applications.accounts.account_type.keyword",
"size": 1000,
"min_doc_count": 1,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
}
}

How to fetch field inside a nested ES document

I have a curl command to fetch data from nested ES documents by date.
Currently it is not working.
Refer to the following for the mapping:
{
"test" : {
"mappings" : {
"doc" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"_APIName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"_parameters" : {
"properties" : {
"event" : {
"properties" : {
"body_json" : {
"properties" : {
"apps" : {
"properties" : {
"bundle" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"model_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"serial_number" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
},
"_stackName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"beat" : {
"type" : "object"
},
"category" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"log" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"log_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"message" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"offset" : {
"type" : "long"
},
"prospector" : {
"properties" : {
"type" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"source" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"stack" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
The following is a sample document in ES.
{
"_index": "test",
"_type": "doc",
"_id": "odUvZFjNxoBJGtXhSoBA",
"_version": 1,
"_score": null,
"_source": {
"log.name": "information",
"offset": 8106321,
"prospector": {
"type": "log"
},
"#version": "1",
"beat": {},
"_stackName": "test",
"_APIName": "Information",
"category": "lambda",
"#timestamp": "2019-04-16T02:22:32.000Z",
"_parameters": {
"event": {
"body_json": {
"model_name": "model-01",
"serial_number": "1234567890",
"version": "1.2",
"apps": [
{
"name": "app1",
"version": "1.0.14"
},
{
"name": "app2",
"version": "1.0.15"
}
]
}
}
},
"stack": "test"
},
"fields": {
"#timestamp": [
"2019-04-16T02:22:32.000Z"
]
}
}
This is my curl command:
#!/bin/bash
curl -XGET "http://localhost:9200/test*/_search?pretty" -H 'Content-Type: application/json' -d' {
"query": {
"bool":{
"must":[
{
"range": {
"#timestamp": {
"gte": 1546837215000,
"lte": 1552712415000,
"format": "epoch_millis"
}
}
}
]
}
},
"aggs": {
"source_bucket": {
"nested": {
"path": "_source._parameters.event.body_json"
},
"aggs": {
"model_name": {
"terms": {
"script": {
"inline": "def model = doc['_source._parameters.event.body_json.model_name'].value;\n def serial = doc['_source._parameters.event.body_json.serial_number'].value;\nreturn \"model + serial\";",
"lang": "painless"
}
}
}
}
}
}
}'
As of now, returns this error:
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "compile error",
"script_stack" : [
"def model = doc[_parameters.event.body_js ...",
" ^---- HERE"
],
"script" : "def model = doc[_parameters.event.body_json.model_name.keyword].value;\n def serial = doc[_parameters.event.body_json.serial_number.keyword].value;\nreturn model + serial;",
"lang" : "painless"
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "test",
"node" : "-OHA7hfMTBGqlTNwjOOngg",
"reason" : {
"type" : "script_exception",
"reason" : "compile error",
"script_stack" : [
"def model = doc[_parameters.event.body_js ...",
" ^---- HERE"
],
"script" : "def model = doc[_parameters.event.body_json.model_name.keyword].value;\n def serial = doc[_parameters.event.body_json.serial_number.keyword].value;\nreturn model + serial;",
"lang" : "painless",
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Variable [_parameters] is not defined."
}
}
}
]
},
"status" : 500
}
How can I effectively get model_name and serial_number, concatenate them and return?
Ok, you don't have any nested fields in your mapping, so your query should look like this instead:
#!/bin/bash
curl -XGET "http://localhost:9200/test*/_search?pretty" -H 'Content-Type: application/json' -d'{
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": 1546837215000,
"lte": 1552712415000,
"format": "epoch_millis"
}
}
},
{
"exists": {
"field": "_parameters.event.body_json"
}
}
]
}
},
"aggs": {
"model_name": {
"terms": {
"script": {
"source": "def model = doc['_parameters.event.body_json.model_name.keyword'].value;\n def serial = doc['_parameters.event.body_json.serial_number.keyword'].value;\nreturn model + serial;",
"lang": "painless"
}
}
}
}
}'

Resources