How to perform the arthimatic operation on data from elasticsearch - elasticsearch

I need to have average of cpuload on specific nodetype. For example if I give nodetype as tpt it should give the average of cpuload of nodetype's of all tpt available. I tried different methods but vain...
My data in elasticsearch is below:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 1.0,
"hits" : [
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0003",
"_score" : 1.0,
"_source" : {
"kpi" : {
"CpuAverageLoad" : 13,
"NodeId" : "kishan",
"NodeType" : "Tpt",
"State" : "online",
"Static_limit" : 0
}
}
},
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0005",
"_score" : 1.0,
"_source" : {
"kpi" : {
"CpuAverageLoad" : 15,
"NodeId" : "kishan1",
"NodeType" : "tpt",
"State" : "online",
"Static_limit" : 0
}
}
},
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0004",
"_score" : 1.0,
"_source" : {
"kpi" : {
"MaxLbCapacity" : "700000",
"NodeId" : "kishan2",
"NodeType" : "bang",
"OnlineCSCF" : [
"001",
"002"
],
"State" : "Online",
"TdbGroup" : 1,
"TdGroup" : 0
}
}
},
{
"_index" : "kpi",
"_type" : "kpi",
"_id" : "\u0002",
"_score" : 1.0,
"_source" : {
"kpi" : {
"MaxLbCapacity" : "700000",
"NodeId" : "kishan3",
"NodeType" : "bang",
"OnlineCSCF" : [
"001",
"002"
],
"State" : "Online",
"TdLGroup" : 1,
"TGroup" : 0
}
}
}
]
}
}
And my query is
curl -XGET 'localhost:9200/_search?pretty' -H 'Content-Type: application/json' -d'
{
"query": {
"bool" : {
"must" : {
"script" : {
"script" : {
"source" : "kpi[CpuAverageLoad].value > params.param1",
"lang" : "painless",
"params" : {
"param1" : 5
}
}
}
}
}
}
}'
but is falling as it is unable to find the exact source.
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "[script] unknown field [source], parser not found"
}
],
"type" : "illegal_argument_exception",
"reason" : "[script] unknown field [source], parser not found"
},
"status" : 400
}

Related

elasticsearch query to extract namespace and log fields?

Using match_all I can get the following from a local es cluster:
$ curl "http://127.0.0.1:9200/_search?pretty" -H 'Content-Type: application/json' -d'{ "query": { "match_all": {} }}'
{
"took" : 9,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "GSh-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"#timestamp" : "2021-08-30T14:37:05.020Z",
"time" : "2021-08-30T14:37:05.020460752Z",
"stream" : "stderr",
"_p" : "F",
"log" : "[2021/08/30 14:37:05] [error] [net] TCP connection failed: elasticsearch-master.elk.svc.cluster.local:9200 (Connection refused)",
"kubernetes" : {
"pod_name" : "fluent-bit-49z9h",
"namespace_name" : "logging",
"pod_id" : "02428324-c3e0-459e-bcc5-0c33af8db989",
"labels" : {
"app_kubernetes_io/instance" : "fluent-bit",
"app_kubernetes_io/name" : "fluent-bit",
"controller-revision-hash" : "74556bf9cf",
"pod-template-generation" : "1"
},
"annotations" : {
"checksum/config" : "f4a875e2e4705ad60e5dcc5c306e94891f9200db72649cff4020642d9df2ecf1",
"checksum/luascripts" : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"host" : "my-kind-worker",
"container_name" : "fluent-bit",
"docker_id" : "46f6f349cc5bed659c50f9d29a94a76376f9243c076cdd29dfbd7cc60c238149",
"container_hash" : "docker.io/fluent/fluent-bit#sha256:10ea2709cef6e7059d980b4969d5f9d753ef97278a817c214cbe9120b1152082",
"container_image" : "docker.io/fluent/fluent-bit:1.8.3"
}
}
},
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "Gih-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"#timestamp" : "2021-08-30T14:37:05.020Z",
"time" : "2021-08-30T14:37:05.020491241Z",
"stream" : "stderr",
"_p" : "F",
"log" : "[2021/08/30 14:37:05] [error] [net] socket #64 could not connect to elasticsearch-master.elk.svc.cluster.local:9200",
"kubernetes" : {
"pod_name" : "fluent-bit-49z9h",
"namespace_name" : "logging",
"pod_id" : "02428324-c3e0-459e-bcc5-0c33af8db989",
"labels" : {
"app_kubernetes_io/instance" : "fluent-bit",
"app_kubernetes_io/name" : "fluent-bit",
"controller-revision-hash" : "74556bf9cf",
"pod-template-generation" : "1"
},
"annotations" : {
"checksum/config" : "f4a875e2e4705ad60e5dcc5c306e94891f9200db72649cff4020642d9df2ecf1",
"checksum/luascripts" : "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"host" : "my-kind-worker",
"container_name" : "fluent-bit",
"docker_id" : "46f6f349cc5bed659c50f9d29a94a76376f9243c076cdd29dfbd7cc60c238149",
"container_hash" : "docker.io/fluent/fluent-bit#sha256:10ea2709cef6e7059d980b4969d5f9d753ef97278a817c214cbe9120b1152082",
"container_image" : "docker.io/fluent/fluent-bit:1.8.3"
}
}
}
]
}
}
For each hit I would like to just print the log and namespace_name. I have tried with:
$ curl -X GET "http://127.0.0.1:9200/_search?pretty" -H 'Content-Type: application/json' -d'
{
"_source": {
"includes": [ "log", "kubernetes.namespace_name" ],
"excludes": [ "_type" ]
}
}
'
{
"took" : 293,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "GSh-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"kubernetes" : {
"namespace_name" : "logging"
},
"log" : "[2021/08/30 14:37:05] [error] [net] TCP connection failed: elasticsearch-master.elk.svc.cluster.local:9200 (Connection refused)"
}
},
{
"_index" : "kubernetes-2021.08.30",
"_type" : "_doc",
"_id" : "Gih-l3sBkJvTF8SCKo5f",
"_score" : 1.0,
"_source" : {
"kubernetes" : {
"namespace_name" : "logging"
},
"log" : "[2021/08/30 14:37:05] [error] [net] socket #64 could not connect to elasticsearch-master.elk.svc.cluster.local:9200"
}
}
]
}
}
But even though I have "excludes": [ "_type" ] its still part of the output.
How do I trim those unwanted fields from the output?
You can use filter_path to modify your response result
POST/ http://localhost:9200/index-name/_search?filter_path=hits.hits._source
{
"_source": {
"includes": [
"log",
"kubernetes.namespace_name"
]
}
}
The search result will be
{
"hits": {
"hits": [
{
"_source": {
"kubernetes": {
"namespace_name": "logging"
},
"log": "[2021/08/30 14:37:05] [error] [net] socket #64 could not connect to elasticsearch-master.elk.svc.cluster.local:9200"
}
}
]
}
}

Elasticsearch suggestion scoring not working with fuzzy search

When next elasticsearch query getting data for autocomplete recieved data is not relevant and scoring not working
GET quick_search/_search
{
"suggest": {
"name-suggest": {
"text": "Clic",
"completion": {
"field": "Name",
"size": 25,
"skip_duplicates": true,
"fuzzy" : {
"fuzziness": 1,
"prefix_length": 1,
"min_length": 4,
"unicode_aware": true
}
}
}
}
}
Query for search is "Clic" but in search results fuzzy search found not maximum relevant data. How can I boost my results for maximum relevancy for words as "CLIC7000" cause for my query it more relative than "CLI36"
{
"took" : 706,
"timed_out" : false,
"_shards" : {
"total" : 15,
"successful" : 15,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : 0.0,
"hits" : [ ]
},
"suggest" : {
"name-suggest" : [
{
"text" : "Clic",
"offset" : 0,
"length" : 4,
"options" : [
{
"text" : "CLI36",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "330719",
"_score" : 3.0,
"_source" : {
"ID" : "330719",
"Name" : "CLI36"
}
},
{
"text" : "CLI361511B001",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "330717",
"_score" : 3.0,
"_source" : {
"ID" : "330717",
"Name" : "CLI361511B001"
}
},
{
"text" : "CLI42C6385B001",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "185340",
"_score" : 3.0,
"_source" : {
"ID" : "185340",
"Name" : "CLI42C6385B001"
}
},
{
"text" : "CLI42PM",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "185345",
"_score" : 3.0,
"_source" : {
"ID" : "185345",
"Name" : "CLI42PM",
}
},
{
"text" : "CLI42PM6389B001",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "185343",
"_score" : 3.0,
"_source" : {
"ID" : "185343",
"Name" : "CLI42PM6389B001"
}
},
{
"text" : "CLI441",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "233554",
"_score" : 3.0,
"_source" : {
"ID" : "233554",
"Name" : "CLI441"
}
},
{
"text" : "CLI451BK",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "185334",
"_score" : 3.0,
"_source" : {
"ID" : "185334",
"Name" : "CLI451BK"
}
},
{
"text" : "CLI451BK6523B001",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "185332",
"_score" : 3.0,
"_source" : {
"ID" : "185332",
"Name" : "CLI451BK6523B001"
}
},
{
"text" : "CLI451C",
"_index" : "quick_search",
"_type" : "quick_search",
"_id" : "185331",
"_score" : 3.0,
"_source" : {
"ID" : "185331",
"Name" : "CLI451C"
}
}
]
}
]
}
}

Query with match to get all values for a given field! ElasticSearch

I'm pretty new to elastic search and would like to write a query for all of the values a specific field? I mean, say i have a field "Number" and "change_manager_group", is there a query to perform list all the numbers of which "change_manager_group = Change Managers - 2"
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 10,
"successful" : 10,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1700,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test-tem-changes",
"_type" : "_doc",
"_id" : "CHG0393073_1554800400000",
"_score" : 1.0,
"_source" : {
"work_notes" : "",
"priority" : "4 - Low",
"planned_start" : 1554800400000,
"Updated_by" : "system",
"Updated" : 1554819333000,
"phase" : "Requested",
"Number" : "CHG0312373",
"change_manager_group" : "Change Managers - 1",
"approval" : "Approved",
"downtime" : "false",
"close_notes" : "",
"Standard_template_version" : "",
"close_code" : null,
"actual_start" : 1554819333000,
"closed_by" : "",
"Type" : "Normal"
}
},
{
"_index" : "test-tem-changes",
"_type" : "_doc",
"_id" : "CHG0406522_0",
"_score" : 1.0,
"_source" : {
"work_notes" : "",
"priority" : "4 - Low",
"planned_start" : 0,
"Updated_by" : "svcmdeploy_automation",
"Updated" : 1553320559000,
"phase" : "Requested",
"Number" : "CHG041232",
"change_manager_group" : "Change Managers - 2",
"approval" : "Approved",
"downtime" : "false",
"close_notes" : "Change Installed",
"Standard_template_version" : "",
"close_code" : "Successful",
"actual_start" : 1553338188000,
"closed_by" : "",
"Type" : "Automated"
}
},
{
"_index" : "test-tem-changes",
"_type" : "_doc",
"_id" : "CHG0406526_0",
"_score" : 1.0,
"_source" : {
"work_notes" : "",
"priority" : "4 - Low",
"planned_start" : 0,
"Updated_by" : "svcmdeploy_automation",
"Updated" : 1553321854000,
"phase" : "Requested",
"Number" : "CHG0412326",
"change_manager_group" : "Change Managers - 2",
"approval" : "Approved",
"downtime" : "false",
"close_notes" : "Change Installed",
"Standard_template_version" : "",
"close_code" : "Successful",
"actual_start" : 1553339629000,
"closed_by" : "",
"Type" : "Automated"
}
},
I tried this after a bit of googling, but that errors out
curl -XGET "http://localhost:9200/test-tem-changes/_search?pretty=true" -H 'Content-Type: application/json' -d '
> {
> "query" : { "Number" : {"query" : "*"} }
> }
> '
What am i missing here?
To get all the documents where change_manager_group ==Change Managers - 2 you want to use a Term Query. Below I am wrapping it in a filter context so that it is faster (does not score relevance).
If change_manager_group is not a keyword mapped field, you may have to use change_manager_group.keyword depending on your mapping.
GET test-tem-changes/_search
{
"query": {
"bool": {
"filter": {
"term": {
"change_manager_group": "Change Managers - 2"
}
}
}
}
}

Search by text field

Here is my index:
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_search?pretty -d'{"query":{"match_all":{}}}'
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test",
"_type" : "mytype",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Dio",
"age" : 10
}
},
{
"_index" : "test",
"_type" : "mytype",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"name" : "Paul",
"pro" : {
"f" : "Cris",
"t" : "So"
}
}
}
]
}
}
Here is a default mapping:
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_mapping?pretty
{
"test" : {
"mappings" : {
"mytype" : {
"properties" : {
"age" : {
"type" : "long"
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
I can find by age field, but cannot by name field. Why ?
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_search?pretty -d'{"query":{"term":{"age":10}}}'
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test",
"_type" : "mytype",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"name" : "Dio",
"age" : 10
}
}
]
}
}
λ curl -XGET -u elastic:elasticpassword http://192.168.1.71:9200/test/mytype/_search?pretty -d'{"query":{"term":{"name":"Paul"}}}'
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
The problem is that you name field is analyzed by default with the standard analyzer, which lowercases the field. You can either search for paul or search in name.keyword field with Paul.

How to read the JSON output of a faceted search query?

I am having Movies that belong to a genre and have multiple ratings. With ElasticSearch, I want to do a faceted search on Genres first, and then Ratings.
I was reading about the idea here: http://www.elasticsearch.org/guide/reference/api/search/facets/
But I am confused how to understand the output of this Curl query:
curl -X POST "http://localhost:9200/movies/_search?pretty=true" -d '
{
"query" : { "query_string" : {"query" : "T*"} },
"facets" : {
"categories" : { "terms" : {"field" : "categories"} }
}
}
'
{
"took" : 35,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 1.0,
"hits" : [ {
"_index" : "movies",
"_type" : "movie",
"_id" : "13",
"_score" : 1.0, "_source" : {"category_id":2,"created_at":"2013-05-03T16:40:21Z","description":null,"title":"Tiny Plastic Men","updated_at":"2013-05-03T16:40:21Z","user_id":null}
}, {
"_index" : "movies",
"_type" : "movie",
"_id" : "32",
"_score" : 1.0, "_source" : {"category_id":14,"created_at":"2013-05-03T16:55:02Z","description":null,"title":"The Extreme Truth","updated_at":"2013-05-03T16:55:02Z","user_id":null}
}, {
"_index" : "movies",
"_type" : "movie",
"_id" : "39",
"_score" : 1.0, "_source" : {"category_id":7,"created_at":"2013-05-03T16:55:02Z","description":null,"title":"A Time of Day","updated_at":"2013-05-03T16:55:02Z","user_id":null}
} ]
},
"facets" : {
"categories" : {
"_type" : "terms",
"missing" : 3,
"total" : 0,
"other" : 0,
"terms" : [ ]
}
}
I am having some movies that start with a 'T', but additionally I would expect movies from the Genre/Category 'Thriller'.
Therefore, what can I read from the JSON above?
It seems like your facet does not match any fields in your document you should probably use:
curl -X POST "http://localhost:9200/movies/_search?pretty=true" -d '
{
"query" : { "query_string" : {"query" : "T*"} },
"facets" : {
"categories" : { "terms" : {"field" : "category_id"} }
}
}
'
then you sould get a list of category_id and a count of documents in each category_id
Facets are deprecated. See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-facets.html
Better alternative is to use aggregations: https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-aggregations.html

Resources