Elasticsearch 1.7.3: doc_values treated as fielddata - elasticsearch

I'm new to ElasticSearch, started working with ElasticSearch 1.7.3 as part of a Logstash-ElasticSearch-Kibana deployment.
I've defined a mapping template for my log messages, this is the interesting part:
{
"template" : "logstash-*",
"settings" : { "index.refresh_interval" : "5s" },
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true, "omit_norms" : true},
"dynamic_templates" : [ {
"date_fields" : {
"match" : "*",
"match_mapping_type" : "date",
"mapping" : { "type" : "date", "doc_values" : true }
}
}],
"properties" : {
"#version" : { "type" : "string", "index" : "not_analyzed" },
"#timestamp" : { "type" : "date", "format" : "dateOptionalTime" },
"message" : { "type" : "string" }
}
} ,
"my_log" : {
"_all" : { "enabled" : true, "omit_norms" : true },
"dynamic_templates" : [ {
"date_fields" : {
"match" : "*",
"match_mapping_type" : "date",
"mapping" : { "type" : "date", "doc_values" : true }
}
}],
"properties" : {
"#timestamp" : { "type" : "date", "format" : "dateOptionalTime" },
"file" : { "type" : "string" },
"message" : { "type" : "string" }
"geolocation" : { "type" : "string" },
}
}
}
}
Although the #timestamp field is defined as doc_value:true I have an error of MemoryException because it is a fielddata:
[FIELDDATA] Data too large, data for [#timestamp] would be larger than
limit of [633785548/604.4 mb]
NOTE:
I know I can change the memory or add more nodes to the cluster, but in my point of view this is a design problem where this field should not be indexed in memory.

Related

Kibana index pattern mapping conflict

I am tired of reindexing every 2 3 weeks i have to do reindex.
{
"winlogbeat_sysmon" : {
"order" : 0,
"index_patterns" : [
"log-wlb-sysmon-*"
],
"settings" : {
"index" : {
"lifecycle" : {
"name" : "winlogbeat_sysmon_policy",
"rollover_alias" : "log-wlb-sysmon"
},
"refresh_interval" : "1s",
"number_of_shards" : "1",
"number_of_replicas" : "1"
}
},
"mappings" : {
"properties" : {
"thread_id" : {
"type" : "long"
},
"z_elastic_ecs.event.code" : {
"type" : "long"
},
"geoip" : {
"type" : "object",
"properties" : {
"ip" : {
"type" : "ip"
},
"latitude" : {
"type" : "half_float"
},
"location" : {
"type" : "geo_point"
},
"longitude" : {
"type" : "half_float"
}
}
},
"dst_ip_addr" : {
"type" : "ip"
}
}
},
"aliases" : { }
}
}
this is the template i set earlier from then i didn't change anything
in current and previous indices of log-wlb-sysmon has dst_ip_addr has ip field and older indices of log-wlb-sysmon has text field in logstash i didn't see any warnning for this issue

How to do ES Moving Avearge Prediction with Logstash?

I am using Elasticsearch 2.3.2, and Logstash 2.3.3. I have found from https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-movavg-aggregation.html which states that moving average can do predictions. I know it is possible to only make query in ES, but I am not sure how should I do that with logstash.
I have a logstash file which reads a csv log file storing CPU usage for every 15 seconds. Should I just include the following into the logstash output json file for the related index as an output mapping?
{
"the_movavg":{
"moving_avg":{
"buckets_path": "the_sum",
"window" : 30,
"model" : "holt_winters",
"settings" : {
"type" : "mult",
"alpha" : 0.5,
"beta" : 0.5,
"gamma" : 0.5,
"period" : 7,
"pad" : true
}
}
}
This is my json file for logstash
{
"template" : "linux_cpu-*",
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true, "omit_norms" : true},
"dynamic_templates" : [ {
"message_field" : {
"match" : "message",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fielddata" : { "format" : "disabled" }
}
}
}, {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fielddata" : { "format" : "disabled" },
"fields" : {
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
}
}
}
} ],
"properties" : {
"#timestamp": { "type": "date" },
"#version": { "type": "string", "index": "not_analyzed" },
"geoip" : {
"dynamic": true,
"properties" : {
"ip": { "type": "ip" },
"location" : { "type" : "geo_point" },
"latitude" : { "type" : "float" },
"longitude" : { "type" : "float" }
}
}
}
}
}
}
And is it possible to have it as a graph as to be shown in Kibana?

aggregation fails on nested aggregation field

I've this mapping for fuas type:
curl -XGET 'http://localhost:9201/living_team/_mapping/fuas?pretty'
{
"living_v1" : {
"mappings" : {
"fuas" : {
"properties" : {
"backlogStatus" : {
"type" : "long"
},
"comment" : {
"type" : "string"
},
"dueTimestamp" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"matter" : {
"type" : "string"
},
"metainfos" : {
"properties" : {
"category 1" : {
"type" : "string"
},
"key" : {
"type" : "string"
},
"null" : {
"type" : "string"
},
"processos" : {
"type" : "string"
}
}
},
"resources" : {
"properties" : {
"noteId" : {
"type" : "string"
},
"resourceId" : {
"type" : "string"
}
}
},
"status" : {
"type" : "long"
},
"timestamp" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"user" : {
"type" : "string",
"index" : "not_analyzed"
}
}
}
}
}
}
I'm trying to perform this aggregation:
curl -XGET 'http://ESNode01:9201/living_team/fuas/_search?pretty' -d '
{
"aggs" : {
"demo" : {
"nested" : {
"path" : "metainfos"
},
"aggs" : {
"key" : { "terms" : { "field" : "metainfos.key" } }
}
}
}
}
'
ES realizes me:
"error" : {
"root_cause" : [ {
"type" : "aggregation_execution_exception",
"reason" : "[nested] nested path [metainfos] is not nested"
} ],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query_fetch",
"grouped" : true,
"failed_shards" : [ {
"shard" : 3,
"index" : "living_v1",
"node" : "HfaFBiZ0QceW1dpqAnv-SA",
"reason" : {
"type" : "aggregation_execution_exception",
"reason" : "[nested] nested path [metainfos] is not nested"
}
} ]
},
"status" : 500
}
Any ideas?
You're missing "type":"nested" from your metainfos mapping.
Should have been:
"metainfos" : {
"type":"nested",
"properties" : {
"category 1" : {
"type" : "string"
},
"key" : {
"type" : "string"
},
"null" : {
"type" : "string"
},
"processos" : {
"type" : "string"
}
}
}

How can i map custom date format in elasticsearch and Kibana4

I have nginx logs and i have this date format [02/Mar/2015:13:02:51 +0000]
What should i use in elasticsearch and what i should put in the dateformat field of Kibana4?
curl -XGET 'http://localhost:9200/_mapping?pretty'
{
"nginx" : {
"mappings" : {
"t07_nginx" : {
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"body_bytes_sent" : {
"type" : "string"
},
"geoip_country_code" : {
"type" : "string"
},
"host" : {
"type" : "string"
},
"http_host" : {
"type" : "string"
},
"http_referer" : {
"type" : "string"
},
"http_user_agent" : {
"type" : "string",
"index" : "not_analyzed"
},
"http_x_forwarded_for" : {
"type" : "string"
},
"message" : {
"type" : "string"
},
"msec request_time" : {
"type" : "string"
},
"remote_addr" : {
"type" : "string"
},
"request_http_protocol" : {
"type" : "string"
},
"request_time" : {
"type" : "string"
},
"request_type" : {
"type" : "string"
},
"request_url" : {
"type" : "string"
},
"status" : {
"type" : "string"
},
"upstream_addr" : {
"type" : "string"
},
"upstream_response_time" : {
"type" : "string"
}
}
}
}
}
with the above i can't see any data(events) in Kibana
Thanks
What does the input plugin for nginx/output plugin for elasticsearch in your fluentd config file look like?
Also, make sure you have your time range setup correctly in kibana. I believe it defaults to 15 minutes.

How to use _timestamp in logstash elasticsearch

I am trying to figure out how to use the _timestamp with logstash.
I have tried to add to the mapping:
"_timestamp" : {
"enabled" : true,
"path" : "#timestamp"
},
But that does not have the expected effect. I did this in the elasticsearch-template.json file (I tried with and without the "store"=true):
{
"template" : "logstash-*",
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"_timestamp" : {
"enabled" : true,
"store" : true,
"path" : "#timestamp"
},
"_all" : {"enabled" : true},
"dynamic_templates" : [ {
.....
And I added the modified file to the output filter
output {
elasticsearch_http {
template => '/tmp/elasticsearch-template.json'
host => '127.0.0.1'
port=>9200
}
}
In order to make sure the database is clean I repeatedly do:
curl -XDELETE http://localhost:9200/logstash*
curl -XDELETE http://localhost:9200/_template/logstash
rm ~/.sincedb_*
and then I try to import my logfile. But for some reasons, the _timestamp is not set.
The mapping seems to be ok
{
"logstash-2014.03.24" : {
"_default_" : {
"dynamic_templates" : [ {
"string_fields" : {
"mapping" : {
"index" : "analyzed",
"omit_norms" : true,
"type" : "string",
"fields" : {
"raw" : {
"index" : "not_analyzed",
"ignore_above" : 256,
"type" : "string"
}
}
},
"match" : "*",
"match_mapping_type" : "string"
}
} ],
"_timestamp" : {
"enabled" : true,
"store" : true,
"path" : "#timestamp"
},
"properties" : {
"#version" : {
"type" : "string",
"index" : "not_analyzed",
"omit_norms" : true,
"index_options" : "docs"
},
"geoip" : {
"dynamic" : "true",
"properties" : {
"location" : {
"type" : "geo_point"
}
}
}
}
},
"logs" : {
"dynamic_templates" : [ {
"string_fields" : {
"mapping" : {
"index" : "analyzed",
"omit_norms" : true,
"type" : "string",
"fields" : {
"raw" : {
"index" : "not_analyzed",
"ignore_above" : 256,
"type" : "string"
}
}
},
"match" : "*",
"match_mapping_type" : "string"
}
} ],
"_timestamp" : {
"enabled" : true,
"store" : true,
"path" : "#timestamp"
},
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "dateOptionalTime"
},
The documents in the database look like
{
"_id": "Cps2Lq1nTIuj_VysOwwcWw",
"_index": "logstash-2014.03.25",
"_score": 1.0,
"_source": {
"#timestamp": "2014-03-25T00:47:09.703Z",
"#version": "1",
"created": "2014-03-25 01:47:09,703",
"host": "macbookpro.fritz.box",
"message": "2014-03-25 01:47:09,703 - Starting new HTTP connection (1): localhost",
"path": "/Users/scharf/git/ckann/annotator-store/logs/requests.log",
"text": "Starting new HTTP connection (1): localhost"
},
"_type": "logs"
},
why is the _timestamp not set???
In short, it does work.
I tested your exact scenario and here's what I found:
When using _source enabled and specifying _timestamp from some path in the _source,
you will never see _timestamp as part of the document, but if however, you add the ?fields query string part, for example:
http://<localhost>:9200/es_test_logs/ESTest1/ilq4PU3tR9SeoLo794wZlg?fields=_timestamp
you will get the correct _timestamp value.
If, instead of using path, you pass _timestamp externally (in the _source document), you will see _timestamp under the _source property in the document as normal.
If you disable the _source field, you will not see ANY property at all in the document, even those you set as "store" : true. You will only see them when specifying ?fields, or when building a query that returns those fields.

Resources