Logstash changes original #timestamp value received from filebeat - elasticsearch

I noticed that #timestamp field, which is correctly defined by filebeat, is changed automatically by logstash and its value is replaced with a log timestamp value (field name is a_timestamp).
Here is part of logstash debug log:
[2017-07-18T11:55:03,598][DEBUG][logstash.pipeline ] filter received {"event"=>{"#****timestamp"=>2017-07-18T09:54:53.507Z, "offset"=>498, "#version"=>"1", "input_type"=>"log", "beat"=>{"hostname"=>"centos-ea", "name"=>"filebeat_shipper_kp", "version"=>"5.5.0"}, "host"=>"centos-ea", "source"=>"/home/elastic/ELASTIC_NEW/log_bw/test.log", "message"=>"2017-06-05 19:02:46.779 INFO [bwEngThread:In-Memory Process Worker-4] psg.logger - a_applicationName=\"PieceProxy\", a_processName=\"piece.PieceProxy\", a_jobId=\"bw0a10ao\", a_processInstanceId=\"bw0a10ao\", a_level=\"Info\", a_phase=\"ProcessStart\", a_activityName=\"SetAndLog\", a_timeStamp=\"2017-06-05T19:02:46.779\", a_sessionId=\"\", a_sender=\"PCS\", a_cruid=\"37d7e225-bbe5-425b-8abc-f4b44a5a1560\", a_MachineCode=\"CFDM7757\", a_correlationId=\"fa10f\", a_trackingId=\"9d3b8\", a_message=\"START=piece.PieceProxy\"", "type"=>"log", "tags"=>["beats_input_codec_plain_applied"]}}
[2017-07-18T11:55:03,629][DEBUG][logstash.pipeline ] output received {"event"=>{"a_message"=>"START=piece.PieceProxy", "log"=>"INFO ", "bwthread"=>"[bwEngThread:In-Memory Process Worker-4]", "logger"=>"psg.logger ", "a_correlationId"=>"fa10f", "source"=>"/home/elastic/ELASTIC_NEW/log_bw/test.log", "a_trackingId"=>"9d3b8", "type"=>"log", "a_sessionId"=>"\"\"", "a_sender"=>"PCS", "#version"=>"1", "beat"=>{"hostname"=>"centos-ea", "name"=>"filebeat_shipper_kp", "version"=>"5.5.0"}, "host"=>"centos-ea", "a_level"=>"Info", "a_processName"=>"piece.PieceProxy", "a_cruid"=>"37d7e225-bbe5-425b-8abc-f4b44a5a1560", "a_activityName"=>"SetAndLog", "offset"=>498, "a_MachineCode"=>"CFDM7757", "input_type"=>"log", "message"=>"2017-06-05 19:02:46.779 INFO [bwEngThread:In-Memory Process Worker-4] psg.logger - a_applicationName=\"PieceProxy\", a_processName=\"piece.PieceProxy\", a_jobId=\"bw0a10ao\", a_processInstanceId=\"bw0a10ao\", a_level=\"Info\", a_phase=\"ProcessStart\", a_activityName=\"SetAndLog\", a_timeStamp=\"2017-06-05T19:02:46.779\", a_sessionId=\"\", a_sender=\"PCS\", a_cruid=\"37d7e225-bbe5-425b-8abc-f4b44a5a1560\", a_MachineCode=\"CFDM7757\", a_correlationId=\"fa10f\", a_trackingId=\"9d3b8\", a_message=\"START=piece.PieceProxy\"", "a_phase"=>"ProcessStart", "tags"=>["beats_input_codec_plain_applied", "_dateparsefailure", "kv_ok", "taskStarted"], "a_processInstanceId"=>"bw0a10ao", "#timestamp"=>2017-06-05T17:02:46.779Z, "my_index"=>"bw_logs", "a_timeStamp"=>"2017-06-05T19:02:46.779", "a_jobId"=>"bw0a10ao", "a_applicationName"=>"PieceProxy", "TMS"=>"2017-06-05 19:02:46.779"}}
NB:
I noticed that this doesn't happen with a simple pipeline (without grok, kv and other plugins I use in my custom pipeline)
I changed filebeat's property json.overwrite_keys to TRUE but with no success.
Can you explain me why and what happens with #_timestamp changing? I don't expect it to be done automatically (I saw many posts of people asking how to do that) because #timestamp is a system field.. What's wrong with that?
Here is my pipeline:
input {
beats {
port => "5043"
type => json
}
}
filter {
#date {
# match => [ "#timestamp", "ISO8601" ]
# target => "#timestamp"
#}
if "log_bw" in [source] {
grok {
patterns_dir => ["/home/elastic/ELASTIC_NEW/logstash-5.5.0/config/patterns/extrapatterns"]
match => { "message" => "%{CUSTOM_TMS:TMS}\s*%{CUSTOM_LOGLEVEL:log}\s*%{CUSTOM_THREAD:bwthread}\s*%{CUSTOM_LOGGER:logger}-%{CUSTOM_TEXT:text}" }
tag_on_failure => ["no_match"]
}
if "no_match" not in [tags] {
if "Payload for Request is" in [text] {
mutate {
add_field => { "my_index" => "json_request" }
}
grok {
patterns_dir => ["/home/elastic/ELASTIC_NEW/logstash-5.5.0/config/patterns/extrapatterns"]
match => { "text" => "%{CUSTOM_JSON:json_message}" }
}
json {
source => "json_message"
tag_on_failure => ["errore_parser_json"]
target => "json_request"
}
mutate {
remove_field => [ "json_message", "text" ]
}
}
else {
mutate {
add_field => { "my_index" => "bw_logs" }
}
kv {
source => "text"
trim_key => "\s"
field_split => ","
add_tag => [ "kv_ok" ]
}
if "kv_ok" not in [tags] {
drop { }
}
else {
mutate {
remove_field => [ "text" ]
}
if "ProcessStart" in [a_phase] {
mutate {
add_tag => [ "taskStarted" ]
}
}
if "ProcessEnd" in [a_phase] {
mutate {
add_tag => [ "taskTerminated" ]
}
}
date {
match => [ "a_timeStamp", "yyyy'-'MM'-'dd'T'HH:mm:ss.SSS" ]
}
elapsed {
start_tag => "taskStarted"
end_tag => "taskTerminated"
unique_id_field => "a_cruid"
}
}
}
}
}
else {
mutate {
add_field => { "my_index" => "other_products" }
}
}
}
output {
elasticsearch {
index => "%{my_index}"
hosts => ["localhost:9200"]
}
stdout { codec => rubydebug }
file {
path => "/tmp/loggata.tx"
codec => json
}
}
Thank you very much,
Andrea

This was the error (a typo from previous tests):
date {
match => [ "a_timeStamp", "yyyy'-'MM'-'dd'T'HH:mm:ss.SSS" ]
}
Thank you guys, anyway!

Related

Change a specific "Value" inside a "Field" in Logstash Config file

I want to change a value inside a field of logstash config file.
For my case my logstash config file is like this..
# Read input from filebeat by listening to port 5044 on which filebeat will send the data
input {
beats {
port => "5044"
}
}
filter {
######################################### For Solr ##############################################
if "solr" in [log][file][path] {
grok {
match => {"message" => "%{DATA:timestamp}%{SPACE}%{LOGLEVEL:log-level}%{SPACE}%{GREEDYDATA:log-message}"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
############################################## For Server ##############################################
if "server.log" in [log][file][path] {
grok {
match => {"message" => "%{DATA:timestamp}%{SPACE}%{LOGLEVEL:log-level}%{SPACE}\[%{DATA}\]%{SPACE}\(%{DATA:thread}\)%{SPACE}%{GREEDYDATA:log-message}"}
#match => { "[log][file][path]" => "%{GREEDYDATA}/%{GREEDYDATA:jboss-log}.log"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
############################################## For Mongo ##############################################
else if "mongos" in [log][file][path] or "config" in [log][file][path] or "shard" in [log][file][path] or "metrics_" in [log][file][path]{
grok {
match => {"message" => "%{TIMESTAMP_ISO8601:timestamp}%{SPACE}%{GREEDYDATA:log-message}"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
############################################## For mongo.log #####################################################
else if "mongo" in [log][file][path] {
grok {
match => {"message" => "\[%{DATA:timestamp}\]%{SPACE}%{LOGLEVEL:log-level}%{SPACE}\[%{DATA:class}\]%{SPACE}%{GREEDYDATA:log-message}"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
############################################## For Kafka ##############################################
else if "kafka" in [log][file][path] {
grok {
match => {"message" => "\[%{DATA:timestamp}\]%{SPACE}%{LOGLEVEL:log-level}%{SPACE}\[%{DATA:class}\]%{SPACE}%{GREEDYDATA:log-message}"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
############################################## For mongodb_output & mongodb_exception ##############################################
else if "mongodb_exception" in [log][file][path] or "mongodb_output" in [log][file][path]{
grok {
match => {"message" => "\[%{DATA:timestamp}\]%{SPACE}%{LOGLEVEL:log-level}%{SPACE}\[%{DATA:class}\]%{SPACE}%{GREEDYDATA:log-message}"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
############################################## Other Logs ##############################################
else {
grok {
#match => {"message" => "\[%{MONTHDAY:day}%{SPACE}%{MONTH:month}%{SPACE}%{YEAR:year},%{SPACE}%{TIME:time}\]%{SPACE}%{LOGLEVEL:log-level}%{SPACE}\[%{DATA:class}\]\[%{DATA:thread}\]%{SPACE}%{GREEDYDATA:log-message}"}
match => {"message" => "\[%{DATA:timestamp}\]%{SPACE}%{LOGLEVEL:log-level}%{SPACE}\[%{DATA:class}\]\[%{DATA:thread}\]%{SPACE}%{GREEDYDATA:log-message}"}
#remove_field => ["message"]
#add_field => {"message" => "%{log-message}"}
}
}
################################################################
grok {
match => { "[log][file][path]" => ["%{GREEDYDATA}/%{GREEDYDATA:component}.log" , "%{PATH}\\%{GREEDYDATA:component}\_%{GREEDYDATA}.log" ]}
}
if [component] =~ "^server" {
mutate {
rename => { "%{server}" => "renamed_server" }
}
}
}
output {
# sending properly parsed log events to elasticsearch
elasticsearch {
hosts => ["localhost:9200"]
}
}
I am getting a value of component field as server but I want to change the value of component field server to renamed_server.
I have tried the above but I am not getting any output.
Please help me to find out the required solution.
I guess the problem is with this block:
if [component] =~ "^server" {
mutate {
rename => { "%{server}" => "renamed_server" }
}
}
.. since it doesn't do what you desire, i.e.
I want to change the value of component field server to renamed_server.
rename mutate configuration doesn't change values, it renames fields.
If you want to change value, you can use gsub. And since you want to change the exact value, maybe you can get by without the conditional altogether. E.g.:
mutate {
gsub => [
# replace `server` value with `renamed_server` in component field
"component", "^server$", "renamed_server"
]
}
I have modified this field with gsub it also worked as well.
mutate {
gsub => [
"component", "^server$", "renamed_server",
"component", "^[0-9]{3}.*[0-9]{3}.*[0-9]{2}.*[0-9]{2}.*[0-9]{5}.*output$" , "client_output"
]
}

How to parse the field value in logstash?

I have logstash in which I am getting data from HTTP API but there is a field for which I need to parse the value from this
"ServiceProvider": "T:ISP | CIR:450Mbps BR:1Gbps | VD:Beq | CID:124"
to this
"ServiceProvider": "T:ISP"
"CIR": "450Mbps"
"BR": "1Gbps"
"VD": "Beq"
"CID": "124"
My config file for Logstash is:
input{
http_poller {
urls => {
"ISP" => {
method => get
url => "http://xyz:8080/api"
headers => {
Accept => "application/json"
}
}
}
request_timeout => 60
tags => "hourly"
schedule => { cron => "30 * * * *"}
codec => "json"
metadata_target => "meta"
}
}
filter {
mutate {
remove_field => [ "[meta][request][auth][user]", "[meta][request][auth][pass]","[meta][request][headers][Accept]" ]
}
}
output {
elasticsearch {
hosts => ["http://xyz:9100"]
index => "xyz"
}
}
Thanks in advance!!
I would leverage the dissect filter and kv filter like this:
filter {
dissect {
mapping => {
"message" => "%{ServiceProvider} | %{[#metadata][kv]}"
}
}
kv {
source => "[#metadata][kv]"
field_split => "| "
value_split => ":"
}
mutate {
remove_field => [ "[meta][request][auth][user]", "[meta][request][auth][pass]","[meta][request][headers][Accept]" ]
}
}

How to create field using Logstash and grok plugin

I have a tomcat log of below format
10.0.6.35 - - [21/Oct/2019:00:00:04 +0000] "GET /rest/V1/productlist/category/4259/ar/final_price/asc/4/20 HTTP/1.1" 200 14970 12
I want to create the field of last two column which is bytes and duration and want to analyze it using Kibana. I had used Filebeat and Logstash for transferring data to the Elasticsearch.
My Logstash configuration file is below:
I had tried with below configuration but not able to see the field on kibana.
input {
beats {
port => 5044
}
}
filter {
grok {
match => ["message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes}(?m) %{NUMBER:duration}" ]
#match=>{"duration"=> "%{NUMBER:duration}"}
# match => { "message" => "%{COMBINEDAPACHELOG}" }
}
# mutate {
# remove_field => ["#version", "#timestamp"]
# }
date {
match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ]
}
}
output {
if [fields][log_type] == "access-log"
{
elasticsearch {
hosts => ["172.31.30.73:9200"]
index => "%{[fields][service]}-%{+YYYY.MM.dd}"
}
}
if [fields][log_type] == "application-log"
{
elasticsearch {
hosts => ["172.31.30.73:9200"]
index => "%{[fields][service]}-%{+YYYY.MM.dd}"
}
}
else
{
elasticsearch {
hosts => ["172.31.30.73:9200"]
index => "logstashhh-%{+YYYY.MM.dd}"
}
I want that duration and bytes becomes my field on Kibana for visualization.
Try this as your logstash configuration:
input {
beats {
port => 5044
}
}
filter {
grok {
match => ["message" => "%{NUMBER:bytes}(?m) %{NUMBER:duration}$" ]
#match=>{"duration"=> "%{NUMBER:duration}"}
# match => { "message" => "%{COMBINEDAPACHELOG}" }
}
# mutate {
# remove_field => ["#version", "#timestamp"]
# }
date {
match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ]
}
}
output {
if [fields][log_type] == "access-log"
{
elasticsearch {
hosts => ["172.31.30.73:9200"]
index => "%{[fields][service]}-%{+YYYY.MM.dd}"
}
}
if [fields][log_type] == "application-log"
{
elasticsearch {
hosts => ["172.31.30.73:9200"]
index => "%{[fields][service]}-%{+YYYY.MM.dd}"
}
}
else
{
elasticsearch {
hosts => ["172.31.30.73:9200"]
index => "logstashhh-%{+YYYY.MM.dd}"
}

Match multiple patterns in Logstash?

I have two types of error messages in the below format:
[2017-05-25 01:00:00,647][ERROR][marvel.agent.exporter.local] local exporter [default_local] - failed to delete indices
RemoteTransportException[[data-0][10.0.0.8:9300][indices:admin/delete]]; nested: IndexNotFoundException[no such index];
[2017-05-18 00:00:06,339][DEBUG][action.admin.indices.create] [data-2] [data-may-2017,data-apr-2017,data-mar-2017] failed to create
[data-may-2017,data-apr-2017,data-mar-2017]
My logstash configuration is like this:
input {
file {
path => "D:\logstash\logstash-2.4.0\bin\logs.txt"
start_position => "beginning"
codec => multiline {
pattern => "^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]"
negate => true
what => "previous"
}
}
}
filter {
grok {
match => [ "message", "(?m)^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]\[%{LOGLEVEL:LEVEL}%{SPACE}\]\[%{DATA:ERRORTYPE}\]%{SPACE}\[%{DATA:SERVERNAME}\]%{SPACE}(?<ERRORMESSAGE>(.|\r|\n)*)", "message", "(?m)^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]\[%{LOGLEVEL:LEVEL}%{SPACE}\]\[%{DATA:ERRORTYPE}%{SPACE}\]%{SPACE}(?<ERRORMESSAGE>(.|\r|\n)*)"]
}
}
output {
stdout { codec => rubydebug }
}
For Both the logs it is taking only the first grok pattern. Why it is not taking the second one?
Seems my first grok pattern is matching all the logs , so thats why logstash is taking only the first pattern. So that i had used the below config with if condition which is working fine.
input {
file {
path => "D:\logstash\logstash-2.4.0\bin\logs.txt"
start_position => "beginning"
type => "log"
codec => multiline {
pattern => "^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]"
negate => true
what => "previous"
}
}
}
filter {
if [type] == "log" {
grok {
match => [ "message", "(?m)^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]\[%{LOGLEVEL:LEVEL}%{SPACE}\]\[%{DATA:ERRORTYPE}%{SPACE}\]%{SPACE}(?<ERRORMESSAGE>(.|\r|\n)*)"]
}
# DEBUG Logs
if "grokked" not in [tags] and "DEBUG" == [LEVEL] {
grok { match => [ "ERRORMESSAGE", "(?m)^\[%{DATA:SERVERNAME}\]" ]
add_tag => [ "Debug Logs", "grokked" ]
tag_on_failure => [ ]
}
}
}
}
output {
stdout { codec => rubydebug }
}
Your question was:
Why it is not taking the second one?
Answer is here:
filter {
grok {
match => [ "message", "(?m)^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]\[%{LOGLEVEL:LEVEL}%{SPACE}\]\[%{DATA:ERRORTYPE}\]%{SPACE}\[%{DATA:SERVERNAME}\]%{SPACE}(?<ERRORMESSAGE>(.|\r|\n)*)", "message", "(?m)^\[%{TIMESTAMP_ISO8601:TIMESTAMP}\]\[%{LOGLEVEL:LEVEL}%{SPACE}\]\[%{DATA:ERRORTYPE}%{SPACE}\]%{SPACE}(?<ERRORMESSAGE>(.|\r|\n)*)"]
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------^
}
}
You don't have to specify source multiple times, just once.
What you did now was:
["message", "pattern", "message", "pattern"]
While in reality it has to be:
["message", "pattern", "pattern", ..., "pattern"]

:reason=>"Something is wrong with your configuration." GeoIP.dat Mutate Logstash

I have the following configuration for logstash.
There are 3 parts to this one is a generallog which we use for all applications they land in here.
second part is the application stats where in which we have a specific logger which will be configured to push the application statistics
third we have is the click stats when ever an event occurs on client side we may want to push it to the logstash on the upd address.
all 3 are udp based, we also use log4net to to send the logs to the logstash.
the base install did not have a GeoIP.dat file so got the file downloaded from the https://dev.maxmind.com/geoip/legacy/geolite/
have put the file in the /opt/logstash/GeoIPDataFile with a 777 permissions on the file and folder.
second thing is i have a country name and i need a way to show how many users form each country are viewing the application in last 24 hours.
so for that reason we also capture the country name as its in their profile in the application.
now i need a way to get the geo co-ordinates to use the tilemap in kibana.
What am i doing wrong.
if i take the geoIP { source -=> "country" section the logstash works fine.
when i check the
/opt/logstash/bin/logstash -t -f /etc/logstash/conf.d/logstash.conf
The configuration file is ok is what i receive. where am i going worng?
Any help would be great.
input {
udp {
port => 5001
type => generallog
}
udp {
port => 5003
type => applicationstats
}
udp {
port => 5002
type => clickstats
}
}
filter {
if [type] == "generallog" {
grok {
remove_field => message
match => { message => "(?m)%{TIMESTAMP_ISO8601:sourcetimestamp} \[%{NUMBER:threadid}\] %{LOGLEVEL:loglevel} +- %{IPORHOST:requesthost} - %{WORD:applicationname} - %{WORD:envname} - %{GREEDYDATA:logmessage}" }
}
if !("_grokparsefailure" in [tags]) {
mutate {
replace => [ "message" , "%{logmessage}" ]
replace => [ "host" , "%{requesthost}" ]
add_tag => "generalLog"
}
}
}
if [type] == "applicationstats" {
grok {
remove_field => message
match => { message => "(?m)%{TIMESTAMP_ISO8601:sourceTimestamp} \[%{NUMBER:threadid}\] %{LOGLEVEL:loglevel} - %{WORD:envName}\|%{IPORHOST:actualHostMachine}\|%{WORD:applicationName}\|%{NUMBER:empId}\|%{WORD:regionCode}\|%{DATA:country}\|%{DATA:applicationName}\|%{NUMBER:staffapplicationId}\|%{WORD:applicationEvent}" }
}
geoip {
source => "country"
target => "geoip"
database => "/opt/logstash/GeoIPDataFile/GeoIP.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float"]
}
if !("_grokparsefailure" in [tags]) {
mutate {
add_tag => "applicationstats"
add_tag => [ "eventFor_%{applicationName}" ]
}
}
}
if [type] == "clickstats" {
grok {
remove_field => message
match => { message => "(?m)%{TIMESTAMP_ISO8601:sourceTimestamp} \[%{NUMBER:threadid}\] %{LOGLEVEL:loglevel} - %{IPORHOST:remoteIP}\|%{IPORHOST:fqdnHost}\|%{IPORHOST:actualHostMachine}\|%{WORD:applicationName}\|%{WORD:envName}\|(%{NUMBER:clickId})?\|(%{DATA:clickName})?\|%{DATA:clickEvent}\|%{WORD:domainName}\\%{WORD:userName}" }
}
if !("_grokparsefailure" in [tags]) {
mutate {
add_tag => "clicksStats"
add_tag => [ "eventFor_%{clickName}" ]
}
}
}
}
output {
if [type] == "applicationstats" {
elasticsearch {
hosts => "localhost:9200"
index => "applicationstats-%{+YYYY-MM-dd}"
template => "/opt/logstash/templates/udp-applicationstats.json"
template_name => "applicationstats"
template_overwrite => true
}
}
else if [type] == "clickstats" {
elasticsearch {
hosts => "localhost:9200"
index => "clickstats-%{+YYYY-MM-dd}"
template => "/opt/logstash/templates/udp-clickstats.json"
template_name => "clickstats"
template_overwrite => true
}
}
else if [type] == "generallog" {
elasticsearch {
hosts => "localhost:9200"
index => "generallog-%{+YYYY-MM-dd}"
template => "/opt/logstash/templates/udp-generallog.json"
template_name => "generallog"
template_overwrite => true
}
}
else{
elasticsearch {
hosts => "localhost:9200"
index => "logstash-%{+YYYY-MM-dd}"
}
}
}
As per the error message, the mutation which you're trying to do could be wrong. Could you please change your mutate as below:
mutate {
convert => { "geoip" => "float" }
convert => { "coordinates" => "float" }
}
I guess you've given the mutate as an array, and it's a hash type by origin. Try converting both the values individually. Your database path for geoip seems to be fine in your filter. Is that the whole error which you've mentioned in the question? If not update the question with the whole error if possible.
Refer here, for in depth explanations.

Resources