elasticsearch query to extract namespace and log fields? - elasticsearch

Using match_all I can get the following from a local es cluster:
$ curl "http://127.0.0.1:9200/_search?pretty" -H 'Content-Type: application/json' -d'{ "query": { "match_all": {} }}'
{
"took" : 9,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "GSh-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"#timestamp" : "2021-08-30T14:37:05.020Z",
"time" : "2021-08-30T14:37:05.020460752Z",
"stream" : "stderr",
"_p" : "F",
"log" : "[2021/08/30 14:37:05] [error] [net] TCP connection failed: elasticsearch-master.elk.svc.cluster.local:9200 (Connection refused)",
"kubernetes" : {
"pod_name" : "fluent-bit-49z9h",
"namespace_name" : "logging",
"pod_id" : "02428324-c3e0-459e-bcc5-0c33af8db989",
"labels" : {
"app_kubernetes_io/instance" : "fluent-bit",
"app_kubernetes_io/name" : "fluent-bit",
"controller-revision-hash" : "74556bf9cf",
"pod-template-generation" : "1"
},
"annotations" : {
"checksum/config" : "f4a875e2e4705ad60e5dcc5c306e94891f9200db72649cff4020642d9df2ecf1",
"checksum/luascripts" : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"host" : "my-kind-worker",
"container_name" : "fluent-bit",
"docker_id" : "46f6f349cc5bed659c50f9d29a94a76376f9243c076cdd29dfbd7cc60c238149",
"container_hash" : "docker.io/fluent/fluent-bit#sha256:10ea2709cef6e7059d980b4969d5f9d753ef97278a817c214cbe9120b1152082",
"container_image" : "docker.io/fluent/fluent-bit:1.8.3"
}
}
},
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "Gih-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"#timestamp" : "2021-08-30T14:37:05.020Z",
"time" : "2021-08-30T14:37:05.020491241Z",
"stream" : "stderr",
"_p" : "F",
"log" : "[2021/08/30 14:37:05] [error] [net] socket #64 could not connect to elasticsearch-master.elk.svc.cluster.local:9200",
"kubernetes" : {
"pod_name" : "fluent-bit-49z9h",
"namespace_name" : "logging",
"pod_id" : "02428324-c3e0-459e-bcc5-0c33af8db989",
"labels" : {
"app_kubernetes_io/instance" : "fluent-bit",
"app_kubernetes_io/name" : "fluent-bit",
"controller-revision-hash" : "74556bf9cf",
"pod-template-generation" : "1"
},
"annotations" : {
"checksum/config" : "f4a875e2e4705ad60e5dcc5c306e94891f9200db72649cff4020642d9df2ecf1",
"checksum/luascripts" : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"host" : "my-kind-worker",
"container_name" : "fluent-bit",
"docker_id" : "46f6f349cc5bed659c50f9d29a94a76376f9243c076cdd29dfbd7cc60c238149",
"container_hash" : "docker.io/fluent/fluent-bit#sha256:10ea2709cef6e7059d980b4969d5f9d753ef97278a817c214cbe9120b1152082",
"container_image" : "docker.io/fluent/fluent-bit:1.8.3"
}
}
}
]
}
}
For each hit I would like to just print the log and namespace_name. I have tried with:
$ curl -X GET "http://127.0.0.1:9200/_search?pretty" -H 'Content-Type: application/json' -d'
{
"_source": {
"includes": [ "log", "kubernetes.namespace_name" ],
"excludes": [ "_type" ]
}
}
'
{
"took" : 293,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "GSh-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"kubernetes" : {
"namespace_name" : "logging"
},
"log" : "[2021/08/30 14:37:05] [error] [net] TCP connection failed: elasticsearch-master.elk.svc.cluster.local:9200 (Connection refused)"
}
},
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "Gih-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"kubernetes" : {
"namespace_name" : "logging"
},
"log" : "[2021/08/30 14:37:05] [error] [net] socket #64 could not connect to elasticsearch-master.elk.svc.cluster.local:9200"
}
}
]
}
}
But even though I have "excludes": [ "_type" ] its still part of the output.
How do I trim those unwanted fields from the output?

You can use filter_path to modify your response result
POST/ http://localhost:9200/index-name/_search?filter_path=hits.hits._source
{
"_source": {
"includes": [
"log",
"kubernetes.namespace_name"
]
}
}
The search result will be
{
"hits": {
"hits": [
{
"_source": {
"kubernetes": {
"namespace_name": "logging"
},
"log": "[2021/08/30 14:37:05] [error] [net] socket #64 could not connect to elasticsearch-master.elk.svc.cluster.local:9200"
}
}
]
}
}

Related

Null field in elasticsearch need to be replaced

How can i replace the "build_duration" : "null", with value 21600000 in elasticsearch?
DevTools > Console
GET myindex/_search
{
"query": {
"term": {
"build_duration": "null"
}
}
}
Output:-
{
"took" : 10,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 9.658761,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "40324749",
"_score" : 9.658761,
"_source" : {
"build_duration" : "null",
"build_end_time" : "2021-05-20 04:00:36",
"build_requester" : "daniel.su",
"build_site" : "POL",
"build_id" : "40324749",
"#version" : "1"
}
}
]
}
}
With below query able to replace the filed value.
POST /myindex/_update/mydocid
{
"doc" : {
"build_duration": "21600000"
}
}

How do I apply reindex to new data values through filters?

This is basic_data(example) Output value
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 163,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "0513_final_test_instgram",
"_type" : "_doc",
"_id" : "6uShY3kBEkIlakOYovrR",
"_score" : 1.0,
"_source" : {
"host" : "DESKTOP-7MDCA36",
"path" : "C:/python_file/20210513_114123_instargram.csv",
"#version" : "1",
"message" : "hello",
"#timestamp" : "2021-05-13T02:50:05.962Z"
},
{
"_index" : "0513_final_test_instgram",
"_type" : "_doc",
"_id" : "EeShY3kBEkIlakOYovvm",
"_score" : 1.0,
"_source" : {
"host" : "DESKTOP-7MDCA36",
"path" : "C:/python_file/20210513_114123_instargram.csv",
"#version" : "1",
"message" : "python,
"#timestamp" : "2021-05-13T02:50:05.947Z"
}
First of all, out of various field values, only message values have been extracted.(under code example)
GET 0513_final_test_instgram/_search?_source=message&filter_path=hits.hits._source
{
"hits" : {
"hits" : [
{
"_source" : {
"message" : "hello"
}
},
{
"_source" : {
"message" : "python"
}
I got to know reindex that stores new indexes.
https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-reindex.html
However, I don't know even if I look at the document.
0513 attempt code
POST _reindex
{
"source": {
"index": "0513_final_test_instgram"
},
"dest": {
"index": "new_data_index"
}
}
How do you use reindex to store data that only extracted message values in a new index?
update comment attempt
output
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 163,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "new_data_index",
"_type" : "_doc",
"_id" : "6uShY3kBEkIlakOYovrR",
"_score" : 1.0,
"_source" : {
"message" : "hello"
}
},
{
"_index" : "new_data_index",
"_type" : "_doc",
"_id" : "EeShY3kBEkIlakOYovvm",
"_score" : 1.0,
"_source" : {
"message" : "python"
}
}
You simply need to specify which fields you want to reindex into the new index:
{
"source": {
"index": "0513_final_test_instgram",
"_source": ["message"]
},
"dest": {
"index": "new_data_index"
}
}

ElasticSearch - Filter Buckets

My elasticSearch query is like:
{
"size": 0,
"aggs": {
"group_by_id": {
"terms": {
"field": "Infos.InstanceInfo.ID.keyword",
"size": 1000
},
"aggs": {
"tops": {
"top_hits": {
"size": 100,
"sort": {
"Infos.InstanceInfo.StartTime": "asc"
}
}
}
}
}
}
}
It works fine, I have a result of this form:
aggregations
=========>group_by_id
==============>buckets
{key:id1}
===============>docs
{doc1.Status:"KO"}
{doc2.Status:"KO"}
{key:id2}
===============>docs
{doc1.Status:"KO"}
{doc2.Status:"OK"}
{key:id3}
===============>docs
{doc1.Status:"KO"}
{doc2.Status:"OK"}
I'm trying to add a filter, so when "OK" the result must be like this:
aggregations
=========>group_by_id
==============>buckets
{key:id2}
===============>docs
{doc1.Status:"KO"}
{doc2.Status:"OK"}
{key:id3}
===============>docs
{doc1.Status:"KO"}
{doc2.Status:"OK"}
and for "KO" :
aggregations
=========>group_by_id
==============>buckets
{key:id1}
===============>docs
{doc1.Status:"KO"}
{doc2.Status:"KO"}
Fields "Startime" & "Status" are at the same level "Infos.InstanceInfo.[...]"
Any idea?
EDIT
Sample docs:
{
"took" : 794,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"group_by_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 143846,
"buckets" : [
{
"key" : "1000",
"doc_count" : 6,
"tops" : {
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "vHFvoXYBVWrYChNi7hB7",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "1000",
"StartTime" : "2020-12-27T00:43:56.011+01:00",
"status" : "KO"
}
}
},
"sort" : [
1609026236011
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "xHFvoXYBVWrYChNi7xAB",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "1000",
"StartTime" : "2020-12-27T00:43:56.145+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609026236145
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "xXFvoXYBVWrYChNi7xAC",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "1000",
"StartTime" : "2020-12-27T00:43:56.147+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609026236147
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "x3FvoXYBVWrYChNi7xAs",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "1000",
"StartTime" : "2020-12-27T00:43:56.188+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609026236188
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "yHFvoXYBVWrYChNi7xAs",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "1000",
"StartTime" : "2020-12-27T00:43:56.19+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609026236190
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "ynFvoXYBVWrYChNi7xBd",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "1000",
"StartTime" : "2020-12-27T00:43:56.236+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609026236236
]
}
]
}
}
},
{
"key" : "2000",
"doc_count" : 2,
"tops" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "7HL_onYBVWrYChNij4Is",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "2000",
"StartTime" : "2020-12-27T08:00:26.011+01:00",
"status" : "KO"
}
}
},
"sort" : [
1609052426011
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "9HL_onYBVWrYChNij4Kz",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "2000",
"StartTime" : "2020-12-27T08:00:26.146+01:00",
"status" : "KO"
}
}
},
"sort" : [
1609052426146
]
}
]
}
}
},
{
"key" : "3000",
"doc_count" : 6,
"tops" : {
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "7nNRpHYBVWrYChNiiruh",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "3000",
"StartTime" : "2020-12-27T14:09:36.015+01:00",
"status" : "KO"
}
}
},
"sort" : [
1609074576015
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "9nNRpHYBVWrYChNii7s5",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "3000",
"StartTime" : "2020-12-27T14:09:36.166+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609074576166
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "93NRpHYBVWrYChNii7s5",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "3000",
"StartTime" : "2020-12-27T14:09:36.166+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609074576166
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "-XNRpHYBVWrYChNii7ti",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "3000",
"StartTime" : "2020-12-27T14:09:36.209+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609074576209
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "-nNRpHYBVWrYChNii7ts",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "3000",
"StartTime" : "2020-12-27T14:09:36.219+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609074576219
]
},
{
"_index" : "azerty",
"_type" : "_doc",
"_id" : "_HNRpHYBVWrYChNii7ud",
"_score" : null,
"_source" : {
"Infos" : {
"InstanceInfo" : {
"ID" : "3000",
"StartTime" : "2020-12-27T14:09:36.269+01:00",
"status" : "OK"
}
}
},
"sort" : [
1609074576269
]
}
]
}
}
}
]
}
}
}
Assuming the status field is under Infos.InstanceInfo and it's of the keyword mapping, you can utilize the filter aggregation:
{
"size": 0,
"aggs": {
"status_KO_only": {
"filter": { <--
"term": {
"Infos.InstanceInfo.Status": "KO"
}
},
"aggs": {
"group_by_id": {
"terms": {
"field": "Infos.InstanceInfo.ID.keyword",
"size": 1000
},
"aggs": {
"tops": {
"top_hits": {
"size": 100,
"sort": {
"Infos.InstanceInfo.StartTime": "asc"
}
}
}
}
}
}
}
}
}
In this particular case you could've applied the same term query in the query part of the search request without having to use a filter aggregation.
If you want to get both OK and KO in the same request, you can copy/paste the whole status_KO_only aggregation, rename the 2nd one, and voila -- you now have both groups in one request. You can of course have as many differently named (top-level) filter aggs as you like.
Now, when you indeed need multiple filter aggs at once, there's a more elegant way that does not require copy-pasting -- enter the filters aggregation:
{
"size": 0,
"aggs": {
"by_statuses": {
"filters": { <--
"filters": {
"status_KO": {
"term": {
"Infos.InstanceInfo.Status": "KO"
}
},
"status_OK": {
"term": {
"Infos.InstanceInfo.Status": "OK"
}
}
}
},
"aggs": {
"group_by_id": {
"terms": {
"field": "Infos.InstanceInfo.ID.keyword",
"size": 1000
},
"aggs": {
"tops": {
"top_hits": {
"size": 100,
"sort": {
"Infos.InstanceInfo.StartTime": "asc"
}
}
}
}
}
}
}
}
}
Any of the child sub-aggregations will automatically be the buckets of the explicitly declared term filters.
I personally find the copy/paste approach more readable, esp. when constructing such requests dynamically (based on UI dropdowns and such.)

How to perform the arthimatic operation on data from elasticsearch

I need to have average of cpuload on specific nodetype. For example if I give nodetype as tpt it should give the average of cpuload of nodetype's of all tpt available. I tried different methods but vain...
My data in elasticsearch is below:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 1.0,
"hits" : [
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0003",
"_score" : 1.0,
"_source" : {
"kpi" : {
"CpuAverageLoad" : 13,
"NodeId" : "kishan",
"NodeType" : "Tpt",
"State" : "online",
"Static_limit" : 0
}
}
},
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0005",
"_score" : 1.0,
"_source" : {
"kpi" : {
"CpuAverageLoad" : 15,
"NodeId" : "kishan1",
"NodeType" : "tpt",
"State" : "online",
"Static_limit" : 0
}
}
},
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0004",
"_score" : 1.0,
"_source" : {
"kpi" : {
"MaxLbCapacity" : "700000",
"NodeId" : "kishan2",
"NodeType" : "bang",
"OnlineCSCF" : [
"001",
"002"
],
"State" : "Online",
"TdbGroup" : 1,
"TdGroup" : 0
}
}
},
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0002",
"_score" : 1.0,
"_source" : {
"kpi" : {
"MaxLbCapacity" : "700000",
"NodeId" : "kishan3",
"NodeType" : "bang",
"OnlineCSCF" : [
"001",
"002"
],
"State" : "Online",
"TdLGroup" : 1,
"TGroup" : 0
}
}
}
]
}
}
And my query is
curl -XGET 'localhost:9200/_search?pretty' -H 'Content-Type: application/json' -d'
{
"query": {
"bool" : {
"must" : {
"script" : {
"script" : {
"source" : "kpi[CpuAverageLoad].value > params.param1",
"lang" : "painless",
"params" : {
"param1" : 5
}
}
}
}
}
}
}'
but is falling as it is unable to find the exact source.
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "[script] unknown field [source], parser not found"
}
],
"type" : "illegal_argument_exception",
"reason" : "[script] unknown field [source], parser not found"
},
"status" : 400
}

Search by text field

Here is my index:
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_search?pretty -d'{"query":{"match_all":{}}}'
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test",
"_type" : "mytype",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Dio",
"age" : 10
}
},
{
"_index" : "test",
"_type" : "mytype",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "Paul",
"pro" : {
"f" : "Cris",
"t" : "So"
}
}
}
]
}
}
Here is a default mapping:
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_mapping?pretty
{
"test" : {
"mappings" : {
"mytype" : {
"properties" : {
"age" : {
"type" : "long"
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
I can find by age field, but cannot by name field. Why ?
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_search?pretty -d'{"query":{"term":{"age":10}}}'
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test",
"_type" : "mytype",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Dio",
"age" : 10
}
}
]
}
}
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_search?pretty -d'{"query":{"term":{"name":"Paul"}}}'
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
The problem is that you name field is analyzed by default with the standard analyzer, which lowercases the field. You can either search for paul or search in name.keyword field with Paul.

Resources