Is it possible to set custom mapping for index in logstash but not in elasticsearch? - elasticsearch

There's input, filter and then output in Logstash main coding.
Is it possible to set custom mapping in
output
{ elasticsearch {
}
If it is possible, how do I set it?
With this example:
"mappings" : {
"_default_" : {
"properties" : {
"service" : { "type" : "integer" },
"rule" : { "type" : "integer" },
"ICMP Type" : { "type" : "integer" },
"ICMP Code" : { "type" : "integer" },
"ip_offset" : { "type" : "integer" },
"ip_id" : { "type" : "integer" },
"ip_len" : { "type" : "integer" },
"Confidence Level" : { "type" : "integer" },
"fragments_dropped" : { "type" : "integer" },
"Severity" : { "type" : "integer" },
"serial_num" : { "type" : "integer" },
"during_sec" : { "type" : "integer" },
"Attack info" : {"type": "string", "index" : "not_analyzed" },
"peer gateway" : {"type": "string", "index" : "not_analyzed" }

Logstash comes with a default template that is used when writing documents to elasticsearch.
If you'd like to change the default, you can update your config and pass it the location of a template file.
https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html#plugins-outputs-elasticsearch-template

You can use template and template_overwrite fields like that :
elasticsearch {
template => "/tttttttttttt/elasticsearch-logstash-template.json"
index => "logstash-%{+YYYY.MM.dd}"
cluster=>"cluster"
template_overwrite => true
}

Related

Ingesting data from Spark to Elasticsearch with index template

In our existing design we are using logstash to fetch data from Kafka (JSON) and put it in ElasticSearch.
We are also using index template mapping while inserting data from logstash to ES and this could be done by setting 'template' property of ES output plugin of logstash, e.g.,
output {
elasticsearch {
template => "elasticsearch-template.json", //template file path
hosts => "localhost:9200"
template_overwrite => true
manage_template => true
codec=>plain
}
}
elasticsearch-template.json looks like below,
{
"template" : "logstash-*",
"settings" : {
"index.refresh_interval" : "3s"
},
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true},
"dynamic_templates" : [ {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fields" : {
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256, "doc_values":true}
}
}
}
} ],
"properties" : {
"#version": { "type": "string", "index": "not_analyzed" },
"geoip" : {
"type" : "object",
"dynamic": true,
"properties" : {
"location" : { "type" : "geo_point" }
}
}
}
}
}
}
Now we are going to replace logstash with Apache Spark and I want to use similar kind of usage of index template in Spark while inserting data to ES.
I am using elasticsearch-spark_2.11 library for this implementation.
Thanks.

Logstash issues in creating index remove .raw field in kibana

I have written a logstash conf filefor reading logs. If I use the default index, that is logstash-*, I could see .raw field in kibana. However, if I create a new index in conf file in logstash like
output{
elasticsearch {
hosts => "localhost"
index => "batchjob-*"}
}
Then the new index cant configure .raw field. Is there any resolve ways to solve it? Great Thanks.
The raw fields are created by a specific index template that the Logstash elasticsearch output creates in Elasticsearch.
What you can do is simply copy that template to a file named batchjob.json and change the template name to batchjob-* (see below)
{
"template" : "batchjob-*",
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true, "omit_norms" : true},
"dynamic_templates" : [ {
"message_field" : {
"match" : "message",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fielddata" : { "format" : "disabled" }
}
}
}, {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fielddata" : { "format" : "disabled" },
"fields" : {
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
}
}
}
} ],
"properties" : {
"#timestamp": { "type": "date" },
"#version": { "type": "string", "index": "not_analyzed" },
"geoip" : {
"dynamic": true,
"properties" : {
"ip": { "type": "ip" },
"location" : { "type" : "geo_point" },
"latitude" : { "type" : "float" },
"longitude" : { "type" : "float" }
}
}
}
}
}
}
Then you can modify your elasticsearch output like this:
output {
elasticsearch {
hosts => "localhost"
index => "batchjob-*"
template_name => "batchjob"
template => "/path/to/batchjob.json"
}
}

how to change type of a value in elasticsearch

I am trying to do geomap of a value in Elasticsearch but the value type of the client_location is set as a string and I would like to change it to geo_point. When I run the following I am getting:
#curl -XGET "http://core.z0z0.tk:9200/_all/_mappings/http?pretty"
{
"packetbeat-2015.12.04" : {
"mappings" : {
"http" : {
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"beat" : {
"properties" : {
"hostname" : {
"type" : "string"
},
"name" : {
"type" : "string"
}
}
},
"bytes_in" : {
"type" : "long"
},
"bytes_out" : {
"type" : "long"
},
"client_ip" : {
"type" : "string"
},
"client_location" : {
"type" : "string"
},
"client_port" : {
"type" : "long"
},
"client_proc" : {
"type" : "string"
},
"client_server" : {
"type" : "string"
},
"count" : {
"type" : "long"
},
"direction" : {
"type" : "string"
},
"http" : {
"properties" : {
"code" : {
"type" : "long"
},
"content_length" : {
"type" : "long"
},
"phrase" : {
"type" : "string"
}
}
},
"ip" : {
"type" : "string"
},
"method" : {
"type" : "string"
},
"notes" : {
"type" : "string"
},
"params" : {
"type" : "string"
},
"path" : {
"type" : "string"
},
"port" : {
"type" : "long"
},
"proc" : {
"type" : "string"
},
"query" : {
"type" : "string"
},
"responsetime" : {
"type" : "long"
},
"server" : {
"type" : "string"
},
"status" : {
"type" : "string"
},
"type" : {
"type" : "string"
}
}
}
}
}
}
When I run the following command to change the type of the value from string to geo_point I am getting the following error:
# curl -XPUT "http://localhost:9200/_all/_mappings/http" -d '
> {
> "http" : {
> "properties" : {
> "client_location" : {
> "type" : "geo_point"
> }
> }
> }
> }
> '
{"error":{"root_cause":[{"type":"merge_mapping_exception","reason":"Merge failed with failures {[mapper [client_location] of different type, current_type [string], merged_type[geo_point]]}"}],"type":"merge_mapping_exception","reason":"Merge failed with failures {[mapper [client_location] of different type, current_type [string], merged_type [geo_point]]}"},"status":400}
Any suggestion how should I correctly change the type?
Thanks in advance.
Unfortunately, once you've created a field you cannot change its type anymore. The best thing to do is to delete the index and recreate it properly with the adequate mapping.
Another temporary solution if you don't want to delete your index immediately, is to create a sub-field of your existing field:
# curl -XPUT "http://localhost:9200/_all/_mappings/http" -d '{
"http": {
"properties": {
"client_location": {
"type": "string",
"fields": {
"geo": {
"type": "geo_point"
}
}
}
}
}
}'
And then you can access it in your queries using client_location.geo.
Also note that you have to re-index your data in order to populate that new sub-field... which means you might just as well delete your index and re-create it properly.
UPDATE
After installing Packetbeat you need to make sure to install the packetbeat template yourself as described here (i.e. it is not done automatically):
https://www.elastic.co/guide/en/beats/packetbeat/current/packetbeat-getting-started.html#packetbeat-template
curl -XPUT 'http://localhost:9200/_template/packetbeat' -d#/etc/packetbeat/packetbeat.template.json

How to map geoip field in logstash with elasticsearch in order to display it in tile map of Kibana4

I'd like to display geoip fields in tile map of Kibana4.
Using the standard / automatic logstash geoip mapping to elasticsearch it all works fine.
However when creating a non-standard geoip field, I am not quite sure how to customize the elasticsearch-template.json in logstash in order to represent this field correctly in elasticsearch so that it can be chosen in Kibana4 for tile map creation.
Sure, customizing the standard template is not the best way - better create a custom template and point to it in elasticsearch output of logstash.conf. I just quickly wanted to check how the mapping has to be defined, so I modified the standard template.
My logstash.conf:
input {
tcp {
port => 514
type => syslog
}
udp {
port => 514
type => syslog
}
}
filter {
# Standard geoip field is automatically mapped by logstash to
# elastic search by using the elasticsearch-template.json file
geoip { source => "host" }
grok {
match => [
"message", "<%{POSINT:syslog_pri}>%{YEAR} %{SYSLOGTIMESTAMP:syslog_timestamp} %{DATA:device} <%{POSINT:status}> %{WORD:activity} %{DATA:inout} \(%{DATA:msg}\) Src:%{IPV4:src} SPort:%{INT:sport} Dst:%{IPV4:dst} DPort:%{INT:dport} IPP:%{INT:ipp} Rule:%{INT:rule} Interface:%{WORD:iface}",
"message", "<%{POSINT:syslog_pri}>%{YEAR} %{SYSLOGTIMESTAMP:syslog_timestamp} %{DATA:device} <%{POSINT:status}> %{WORD:activity} %{DATA:inout} \(%{DATA:msg}\) Src:%{IPV4:src} Dst:%{IPV4:dst} IPP:%{INT:ipp} Rule:%{INT:rule} Interface:%{WORD:iface}",
"message", "<%{POSINT:syslog_pri}>%{YEAR} %{SYSLOGTIMESTAMP:syslog_timestamp} %{DATA:device} <%{POSINT:status}> %{WORD:activity} %{DATA:inout} \(%{DATA:msg}\) Src:%{IPV4:src} Dst:%{IPV4:dst} Type:%{POSINT:type} Code:%{INT:code} IPP:%{INT:ipp} Rule:%{INT:rule} Interface:%{WORD:iface}"
]
}
# Is not mapped automatically by logstash in that it can be
# chosen in Kibana4 for tile map creation
geoip {
source => "src"
target => "src_geoip"
}
}
output {
elasticsearch {
host => "localhost"
protocol => "http"
}
}
My ...logstash-1.4.2\lib\logstash\outputs\elasticsearch\elasticsearch-template.json:
{
"template" : "logstash-*",
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"_all" : {"enabled" : true},
"dynamic_templates" : [ {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "string", "index" : "analyzed", "omit_norms" : true,
"fields" : {
"raw" : {"type": "string", "index" : "not_analyzed", "ignore_above" : 256}
}
}
}
} ],
"properties" : {
"#version": { "type": "string", "index": "not_analyzed" },
"geoip" : {
"type" : "object",
"dynamic": true,
"path": "full",
"properties" : {
"location" : { "type" : "geo_point" }
}
},
"src_geoip" : {
"type" : "object",
"dynamic": true,
"path": "full",
"properties" : {
"location" : { "type" : "geo_point" }
}
}
}
}
}
}
UPDATE: I havent figured out yet when this json file gets applied in elasticsearch. I followed the hints outlined in this question and copied the json file to a config/templates folder in elasticsearch directory. After deleting the indizes and restart of elasticsearch, the template was applied successfully.
Anyway, the field "src_geoip.location" still does not show up in the tile map creation form of Kibana4 (only the standard geoip.location field does).
Try overwrite template after editing template. Re-create indexes in Kibana after config change.
output {
elasticsearch {
template_overwrite => "true"
...
}
}
You also need to add objects for the src_geoip object in the index template on your elasticsearch instance. To set the default template for all indexes that match "logstash-netflow-*", execute the following on your elasticsearch instance:
curl -XPUT localhost:9200/_template/logstash-netflow -d '{
"template" : "logstash-netflow-*",
"mappings" : {
"_default_" : {
"_all" : {
"enabled" : false
},
"properties" : {
"#timestamp" : { "index" : "analyzed", "type" : "date" },
"#version" : { "index" : "analyzed", "type" : "integer" },
"src_geoip" : {
"dynamic" : true,
"type" : "object",
"properties" : {
"area_code" : { "type" : "long" },
"city_name" : { "type" : "string" },
"continent_code" : { "type" : "string" },
"country_code2" : { "type" : "string" },
"country_code3" : { "type" : "string" },
"country_name" : { "type" : "string" },
"dma_code" : { "type" : "long" },
"ip" : { "type" : "string" },
"latitude" : { "type" : "double" },
"location" : { "type" : "double" },
"longitude" : { "type" : "double" },
"postal_code" : { "type" : "string" },
"real_region_name" : { "type" : "string" },
"region_name" : { "type" : "string" },
"timezone" : { "type" : "string" }
}
},
"netflow" : { ....snipped......
}
}
}
}}'

How can i map custom date format in elasticsearch and Kibana4

I have nginx logs and i have this date format [02/Mar/2015:13:02:51 +0000]
What should i use in elasticsearch and what i should put in the dateformat field of Kibana4?
curl -XGET 'http://localhost:9200/_mapping?pretty'
{
"nginx" : {
"mappings" : {
"t07_nginx" : {
"properties" : {
"#timestamp" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"body_bytes_sent" : {
"type" : "string"
},
"geoip_country_code" : {
"type" : "string"
},
"host" : {
"type" : "string"
},
"http_host" : {
"type" : "string"
},
"http_referer" : {
"type" : "string"
},
"http_user_agent" : {
"type" : "string",
"index" : "not_analyzed"
},
"http_x_forwarded_for" : {
"type" : "string"
},
"message" : {
"type" : "string"
},
"msec request_time" : {
"type" : "string"
},
"remote_addr" : {
"type" : "string"
},
"request_http_protocol" : {
"type" : "string"
},
"request_time" : {
"type" : "string"
},
"request_type" : {
"type" : "string"
},
"request_url" : {
"type" : "string"
},
"status" : {
"type" : "string"
},
"upstream_addr" : {
"type" : "string"
},
"upstream_response_time" : {
"type" : "string"
}
}
}
}
}
with the above i can't see any data(events) in Kibana
Thanks
What does the input plugin for nginx/output plugin for elasticsearch in your fluentd config file look like?
Also, make sure you have your time range setup correctly in kibana. I believe it defaults to 15 minutes.

Resources