Query muilt filed by date and ip in elasticesarch - elasticsearch

in elasticsearch data load from next josn data.
,i want get the max value of cpu0 and in_eth1 for every ip in elasticsearch and sorted by date , so some one can help me with the flowing query?
{
"ip":"10.235.13.172",
"date":"2015-11-09",
"time":"18:30:00",
"cpu0":7"cpu13":2,
"cpu14":1,
"diskio(%)":0,
"memuse(MB)":824,
"in_eth1(Mbps)":34
}
"aggs": {
"events_by_date": {
"date_histogram": {
"field": "date",
"interval": "day"
},
"aggs" : {
"genders" : {
"terms" : {
"field" : "ip",
"size": 100000,
"order" : { "_count" : "asc" }
},
"aggs" : {
"maxcpu" : { "max" : { "field" : "cpu(%)" } },
"maxin" : { "max" : { "field" : "in_eth1(Mbps)" } },
}
}
}
}
}

Related

Getting avg sub aggregation

I'd like to get the avg of a sub aggregation. For example, i have daily profit of each branch. I want to sum them so that i can get total daily profit. and then i want to get the monthly or week average of that daily profit. So far i have done this
{
"size" : 0,
"aggs" : {
"group_by_month": {
"date_histogram": {
"field": "Profit_Day",
"interval": "month",
"format" : "MM-yyyy"
},
"aggs": {
"avgProf": {
"avg": {
"field": "ProfitValue"
}
},
"group_by_day": {
"date_histogram": {
"field": "Profit_Day",
"interval": "day",
"format" : "yyyy-MM-dd"
},
"aggs": {
"prof": {
"sum": {
"field": "ProfitValue"
}
}
}
}
}
}
}
}
Issue is i am getting daaily sum which is correct
but instead of getting monthly average of daily sum
i am getting monthly average of profit from each branch.
You need to use average bucket aggragetion
Query:
GET sales1/_search
{
"size": 0,
"aggs": {
"group_by_month": {
"date_histogram": {
"field": "proffit_day",
"interval": "month",
"format": "MM-yyyy"
},
"aggs": {
"group_by_day": {
"date_histogram": {
"field": "proffit_day",
"interval": "day",
"format": "yyyy-MM-dd"
},
"aggs": {
"prof": {
"sum": {
"field": "proffit_value"
}
}
}
},
"avg_monthly_sales": {
"avg_bucket": {
"buckets_path": "group_by_day>prof"
}
}
}
}
}
}
Response:
{
"group_by_month" : {
"buckets" : [
{
"key_as_string" : "09-2019",
"key" : 1567296000000,
"doc_count" : 2,
"group_by_day" : {
"buckets" : [
{
"key_as_string" : "2019-09-25",
"key" : 1569369600000,
"doc_count" : 2,
"prof" : {
"value" : 15.0
}
}
]
},
"avg_monthly_sales" : {
"value" : 15.0
}
},
{
"key_as_string" : "10-2019",
"key" : 1569888000000,
"doc_count" : 2,
"group_by_day" : {
"buckets" : [
{
"key_as_string" : "2019-10-01",
"key" : 1569888000000,
"doc_count" : 1,
"prof" : {
"value" : 10.0
}
},
{
"key_as_string" : "2019-10-02",
"key" : 1569974400000,
"doc_count" : 0,
"prof" : {
"value" : 0.0
}
},
{
"key_as_string" : "2019-10-03",
"key" : 1570060800000,
"doc_count" : 1,
"prof" : {
"value" : 15.0
}
}
]
},
"avg_monthly_sales" : {
"value" : 12.5
}
}
]
}
}
}

Count the percentage of character fields

I want to count the percentage of specified field data.
this is my Restful API:
Restful API:
GET _search
{
"_source": {
"includes": [ "FIRST_SWITCHED","LAST_SWITCHED","IPV4_DST_ADDR","L4_DST_PORT","IPV4_SRC_ADDR","L7_PROTO_NAME","IN_BYTES","IN_PKTS","OUT_BYTES","OUT_PKTS"]
},
"from" : 0, "size" : 10000,
"query": {
"bool": {
"must": [
{
"match" : { "_index" : "logstash-2017.12.22" }
},
{
"match_phrase":{"IPV4_SRC_ADDR":"192.168.0.159"}
},
{
"range" : {
"LAST_SWITCHED" : {
"gte" : 1513683600
}
}
}
]
}
},
"aggs": {
"IN_PKTS": {
"sum": {
"field": "IN_PKTS"
}
},
"IN_BYTES": {
"sum": {
"field": "IN_BYTES"
}
},
"OUT_BYTES": {
"sum": {
"field": "OUT_BYTES"
}
},
"OUT_PKTS": {
"sum": {
"field": "OUT_PKTS"
}
},
"percent":{
"significant_terms" : {
"field" : "L7_PROTO_NAME",
"percentage":{}
}},
"protocol" : {
"terms" : {
"field" : "PROTOCOL",
"include" : ["17", "6"]
}
},
"Using_port_count" : {
"cardinality" : {
"field" : "L4_SRC_PORT"
}
}
}
}
but there's some errors.
this is error messages:
error messages:
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [L7_PROTO_NAME] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
thank you in advance!
ok, I find the answer!
just add .keyword at here then it can run!
"field" : "L7_PROTO_NAME.keyword"

Multiple aggregations in Elasticsearch

I want to do a terms aggregation on two fields. I don't want a sub-aggregations but I want results in two different bucket groups like if I did two separate queries for the two fields. Is it possible to combine these two queries into one?
First query:
{
"size" : 0,
"aggs" : {
"brands" : {
"terms" : {
"field" : "my_field1",
"size" : 15
},
"aggs" : {
"my_field_top_hits1" : {
"top_hits" : {
"size" : 1
}
}
}
}
}
}
Second query:
{
"size" : 0,
"aggs" : {
"brands" : {
"terms" : {
"field" : "my_field2",
"size" : 15
},
"aggs" : {
"my_field_top_hits2" : {
"top_hits" : {
"size" : 1
}
}
}
}
}
}
Unless I'm missing something obvious, you just need to do:
{
"size": 0,
"aggs": {
"brands_field1": {
"terms": {
"field": "my_field1",
"size": 15
},
"aggs": {
"my_field_top_hits1": {
"top_hits": {
"size": 1
}
}
}
},
"brands_field2": {
"terms": {
"field": "my_field2",
"size": 15
},
"aggs": {
"my_field_top_hits1": {
"top_hits": {
"size": 1
}
}
}
}
}
}

elasticsearch returns null on stats aggregation

I have small data of 1200 entries in Elasticsearch that is automatically input in mapped fields of document-types. The float goes in float and double goes in double.
When taking 'aggs' of the data on 'stats' like:
GET /statsd-2015.09.28/timer_data/_search
{
"query" : {
"filtered" : {
"query" : { "match_all" : {}},
"filter" : {
"range" : { "ns" : { "lte" : "gunicorn" }}
}
}
},
"aggs" : {
"value_val" : { "stats" : { "field" : "u'count_90'" } }
}
}
I get null on return like this:
...
"aggregations": {
"value_val": {
"count": 0,
"min": null,
"max": null,
"avg": null,
"sum": null
}
}
...
Here is my mapping of fields:
{"statsd-2015.09.28":{"mappings":{"timer":{"properties":{"#timestamp":{"type":"string"},"act":{"type":"string"},"grp":{"type":"string"},"ns":{"type":"string"},"tgt":{"type":"string"},"val":{"type":"float"}}},"gauge":{"properties":{"#timestamp":{"type":"string"},"act":{"type":"string"},"grp":{"type":"string"},"ns":{"type":"string"},"tgt":{"type":"string"},"val":{"type":"float"}}},"counter":{"properties":{"#timestamp":{"type":"string"},"act":{"type":"string"},"grp":{"type":"string"},"ns":{"type":"string"},"tgt":{"type":"string"},"val":{"type":"float"}}},"timer_data":{"properties":{"#timestamp":{"type":"double"},"act":{"type":"string"},"count":{"type":"float"},"count_90":{"type":"float"},"count_ps":{"type":"float"},"grp":{"type":"string"},"lower":{"type":"float"},"mean":{"type":"float"},"mean_90":{"type":"float"},"median":{"type":"float"},"ns":{"type":"string"},"std":{"type":"float"},"sum":{"type":"float"},"sum_90":{"type":"float"},"sum_squares":{"type":"float"},"sum_squares_90":{"type":"float"},"tgt":{"type":"string"},"upper":{"type":"float"},"upper_90":{"type":"float"}}}}}}
What I want to ask is that why is my output not desired? And how can I get it?
GET /statsd-2015.09.28/timer_data/_search
{
"query" : {
"filtered" : {
"query" : { "match_all" : {}},
"filter" : {
"range" : { "ns" : { "lte" : "gunicorn" }}
}
}
},
"aggs" : {
"value_val" : { "stats" : { "field" : "count_90" } }
}
}
I am new to this but I realized that field name was not what I was using. After this, everything became clear.

Post filter on subaggregation in elasticsearch

I am trying to run a post filter on the aggregated data, but it is not working as i expected. Can someone review my query and suggest if i am doing anything wrong here.
"query" : {
"bool" : {
"must" : {
"range" : {
"versionDate" : {
"from" : null,
"to" : "2016-04-22T23:13:50.000Z",
"include_lower" : false,
"include_upper" : true
}
}
}
}
},
"aggregations" : {
"associations" : {
"terms" : {
"field" : "association.id",
"size" : 0,
"order" : {
"_term" : "asc"
}
},
"aggregations" : {
"top" : {
"top_hits" : {
"from" : 0,
"size" : 1,
"_source" : {
"includes" : [ ],
"excludes" : [ ]
},
"sort" : [ {
"versionDate" : {
"order" : "desc"
}
} ]
}
},
"disabledDate" : {
"filter" : {
"missing" : {
"field" : "disabledDate"
}
}
}
}
}
}
}
STEPS in the query:
Filter by indexDate less than or equal to a given date.
Aggregate based on formId. Forming buckets per formId.
Sort in descending order and return top hit result per bucket.
Run a subaggregation filter after the sort subaggregation and remove all the documents from buckets where disabled date is not null.(Which is not working)
The whole purpose of post_filter is to run after aggregations have been computed. As such, post_filter has no effect whatsoever on aggregation results.
What you can do in your case is to apply a top-level filter aggregation so that documents with no disabledDate are not taken into account in aggregations, i.e. consider only documents with disabledDate.
{
"query": {
"bool": {
"must": {
"range": {
"versionDate": {
"from": null,
"to": "2016-04-22T23:13:50.000Z",
"include_lower": true,
"include_upper": true
}
}
}
}
},
"aggregations": {
"with_disabled": {
"filter": {
"exists": {
"field": "disabledDate"
}
},
"aggs": {
"form.id": {
"terms": {
"field": "form.id",
"size": 0
},
"aggregations": {
"top": {
"top_hits": {
"size": 1,
"_source": {
"includes": [],
"excludes": []
},
"sort": [
{
"versionDate": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}

Resources