logstash elasticsearch ouput plugin script example to add value to array filed? - elasticsearch

Hello I am getting this error when I try to add value to existing array field in elasticseach, and my logstash output configuration is:
elasticsearch {
document_id => 1
action => "update"
hosts => ["X.X.X.X:9200"]
index => "test"
script_lang => "painless"
script_type => "inline"
script => 'ctx._source.arrat.add(event("[file][fuid]"))'
}
The error i was getting is
error"=>{"type"=>"illegal_argument_exception", "reason"=>"failed to execute script", "caused_by"=>{"type"=>"script_exception", "reason"=>"compile error", "script_stack"=>["ctx._source.arrat.add(event(\"[file][fuid]\"))", " ^---- HERE"], "script"=>"ctx._source.arrat.add(event(\"[file][fuid]\"))", "lang"=>"painless", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"Unknown call [event] with [1] arguments."}}}}}}.
Below is the logstash configuration
input {
beats {
port => "12109"
}
}
filter {
mutate {
id => "brolog-files-rename-raw-fields"
rename => { "[ts]" => "[file][ts]"
"[fuid]" => "[file][fuid]"
"[tx_hosts]" => "[file][tx_hosts]"
"[rx_hosts]" => "[file][rx_hosts]"
"[conn_uids]" => "[file][conn_uids]"
"[source]" => "[file][source]"
"[depth]" => "[file][depth]"
"[analyzers]" => "[file][analyzers]"
"[mime_type]" => "[file][mime_type]"
"[duration]" => "[file][duration]"
"[is_orig]" => "[file][is_orig]"
"[seen_bytes]" => "[file][seen_bytes]"
"[missing_bytes]" => "[file][missing_bytes]"
"[overflow_bytes]" => "[file][overflow_bytes]"
"[timedout]" => "[file][timedout]"
"[md5]" => "[file][md5]"
"[sha1]" => "[file][sha1]"
}
}
}
output{
stdout { codec => rubydebug}
elasticsearch {
document_id => 1
action => "update"
doc_as_upsert => "true"
hosts => ["X.X.X.X:9200"]
index => "test"
script_lang => "painless"
script_type => "inline"
script => 'ctx._source.arrat.add(event.[file][fuid])'
}
}
i am getting data in json format.

Related

Logstash Input -> JDBC in some properties or parameterizable file?

I am using logstash to ingest elasticsearch. I am using input jdbc, and I am urged by the need to parameterize the inputt jdbc settings, such as the connection string, pass, etc, since I have 10 .conf files where each one has 30 jdbc and 30 output inside.
So, since each file has the same settings, would you like to know if it is possible to do something generic or reference that information from somewhere?
I have this 30 times:...
input {
# Number 1
jdbc {
jdbc_driver_library => "/usr/share/logstash/logstash-core/lib/jars/ifxjdbc-4.50.3.jar"
jdbc_driver_class => "com.informix.jdbc.IfxDriver"
jdbc_connection_string => "jdbc:informix-sqli://xxxxxxx/schema:informixserver=server"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "*/1 * * * *"
statement => "SELECT * FROM public.test ORDER BY id ASC"
tags => "001"
}
# Number 2
jdbc {
jdbc_driver_library => "/usr/share/logstash/logstash-core/lib/jars/ifxjdbc-4.50.3.jar"
jdbc_driver_class => "com.informix.jdbc.IfxDriver"
jdbc_connection_string => "jdbc:informix-sqli://xxxxxxx/schema:informixserver=server"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "*/1 * * * *"
statement => "SELECT * FROM public.test2 ORDER BY id ASC"
tags => "002"
}
[.........]
# Number X
jdbc {
jdbc_driver_library => "/usr/share/logstash/logstash-core/lib/jars/ifxjdbc-4.50.3.jar"
jdbc_driver_class => "com.informix.jdbc.IfxDriver"
jdbc_connection_string => "jdbc:informix-sqli://xxxxxxx/schema:informixserver=server"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "*/1 * * * *"
statement => "SELECT * FROM public.testx ORDER BY id ASC"
tags => "00x"
}
}
filter {
mutate {
add_field => { "[#metadata][mitags]" => "%{tags}" }
}
# Number 1
if "001" in [#metadata][mitags] {
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
}
# Number 2
if "002" in [#metadata][mitags] {
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
}
[......]
# Number x
if "002" in [#metadata][mitags] {
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
}
mutate {
remove_field => [ "#version","#timestamp","tags" ]
}
}
output {
# Number 1
if "001" in [#metadata][mitags] {
# Para ELK
elasticsearch {
hosts => "localhost:9200"
index => "001"
document_type => "001"
document_id => "%{id}"
manage_template => true
template => "/home/user/logstash/templates/001.json"
template_name => "001"
template_overwrite => true
}
}
# Number 2
if "002" in [#metadata][mitags] {
# Para ELK
elasticsearch {
hosts => "localhost:9200"
index => "002"
document_type => "002"
document_id => "%{id}"
manage_template => true
template => "/home/user/logstash/templates/002.json"
template_name => "002"
template_overwrite => true
}
}
[....]
# Number x
if "00x" in [#metadata][mitags] {
# Para ELK
elasticsearch {
hosts => "localhost:9200"
index => "002"
document_type => "00x"
document_id => "%{id}"
manage_template => true
template => "/home/user/logstash/templates/00x.json"
template_name => "00x"
template_overwrite => true
}
}
}
You will still need one jdbc input for each query you need to do, but you can improve your filter and output blocks.
In your filter block you are using the field [#metadata][mitags] to filter your inputs but you are applying the same mutate filter to each one of the inputs, if this is the case you don't need the conditionals, the same mutate filter can be applied to all your inputs if you don't filter it.
Your filter block could be resumed to something as this one.
filter {
mutate {
add_field => { "[#metadata][mitags]" => "%{tags}" }
}
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
mutate {
remove_field => [ "#version","#timestamp","tags" ]
}
}
In your output block you use the tag just to change the index, document_type and template, you don't need to use conditionals to that, you can use the value of the field as a parameter.
output {
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][mitags]}"
document_type => "%{[#metadata][mitags]}"
document_id => "%{id}"
manage_template => true
template => "/home/unitech/logstash/templates/%{[#metadata][mitags]}.json"
template_name => "iol-fue"
template_overwrite => true
}
}
But this only works if you have a single value in the field [#metadata][mitags], which seems to be the case.
EDIT:
Edited just for history reasons, as noted in the comments, the template config does not allow the use of dynamic parameters as it is only loaded when logstash is starting, the other configs works fine.

logstash don't report all the events

i could see some events are missing while reporting logs to elastic search. Take an example i am sending 5 logs event only 4 or 3 are reporting.
Basically i am using logstash 7.4 to read my log messages and store the information on elastic search 7.4. below is my logstash configuration
input {
file {
type => "web"
path => ["/Users/a0053/Downloads/logs/**/*-web.log"]
start_position => "beginning"
sincedb_path => "/tmp/sincedb_file"
codec => multiline {
pattern => "^(%{MONTHDAY}-%{MONTHNUM}-%{YEAR} %{TIME}) "
negate => true
what => previous
}
}
}
filter {
if [type] == "web" {
grok {
match => [ "message","(?<frontendDateTime>%{MONTHDAY}-%{MONTHNUM}-%{YEAR} %{TIME})%{SPACE}(\[%{DATA:thread}\])?( )?%{LOGLEVEL:level}%{SPACE}%{USERNAME:zhost}%{SPACE}%{JAVAFILE:javaClass} %{USERNAME:orgId} (?<loginId>[\w.+=:-]+#[0-9A-Za-z][0-9A-Za-z-]{0,62}(?:[.](?:[0-9A-Za-z][0-9A-Za-zā€Œā€‹-]{0,62}))*) %{GREEDYDATA:jsonstring}"]
}
json {
source => "jsonstring"
target => "parsedJson"
remove_field=>["jsonstring"]
}
mutate {
add_field => {
"actionType" => "%{[parsedJson][actionType]}"
"errorMessage" => "%{[parsedJson][errorMessage]}"
"actionName" => "%{[parsedJson][actionName]}"
"Payload" => "%{[parsedJson][Payload]}"
"pageInfo" => "%{[parsedJson][pageInfo]}"
"browserInfo" => "%{[parsedJson][browserInfo]}"
"dateTime" => "%{[parsedJson][dateTime]}"
}
}
}
}
output{
if "_grokparsefailure" in [tags]
{
elasticsearch
{
hosts => "localhost:9200"
index => "grokparsefailure-%{+YYYY.MM.dd}"
}
}
else {
elasticsearch
{
hosts => "localhost:9200"
index => "zindex"
}
}
stdout{codec => rubydebug}
}
As keep on new logs are writing to log files, i could see a difference of log counts.
Any suggestions would be appreciated.

How to use script upsert in logstash for updating document

I am using below output block to upsert the document and incrementing the counter (partial updates) for an existing document with matching ID.
Currently, post first doc entry into elasticsearch , "script" has not impact through the subsequent update calls. It is not incrementing the counter value
Below is the output block of logstash using upsert and script:
`output {
stdout { }
elasticsearch {
hosts => "localhost"
index => "test_dest"
script => "ctx._source.views+=1"
script_lang => "painless"
script_type => "inline"
# scripted_upsert => true
doc_as_upsert => true
document_id => "%{[userId]}"
}
stdout {
codec => "json"
}
}`
Try to set the action to "update" and scripted_upsert to "true" as below:
output {
elasticsearch {
action => "update"
hosts => "localhost"
index => "test_dest"
script => "ctx._source.views+=1"
script_lang => "painless"
script_type => "inline"
scripted_upsert => true
document_id => "%{[userId]}"
}
}

Logstash code to upload CSV file content into Elasticsearch

I have CSV file and I want to import it into Elasticsearch 5.0.0 using Logstash.
This are first 2 lines of the CSV file:
Id,CameraId,ZoneId,Latitude,Longitude,Number,OriginalNumber,Country,OriginalCountry,CapturedOn,Status,Direction,Speed,Confidence,AvgDigitsHeight,MultiplateRate,ProcessingTimeOCR,Signaled,OcrImageId,EnvImageIds,CapturerId,CapturerType,IsAlarm,AlarmListIds,ReplicationId,ImagesUploaded
111,24,1,42.8,3.5,XXDFR,XXDFR,DE,DE,2017-03-04 12:06:20.0,0,1,0,99,21.0,3,16.0193003809306,0,0,[],null,null,0,[],0,0
I run this Logstash script:
input {
file {
path => ["/usr/develop/test.csv"]
type => "core2"
start_position => "beginning"
}
}
filter {
csv {
columns => [
"Id","CameraId","ZoneId","Latitude","Longitude,"Number","OriginalNumber","Country","OriginalCountry","CapturedOn","Status","Direction","Speed","Confidence","AvgDigitsHeight","MultiplateRate","ProcessingTimeOCR","Signaled","OcrImageId","EnvImageIds","CapturerId","CapturerType","IsAlarm","AlarmListIds","ReplicationId","ImagesUploaded"
]
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
action => "index"
hosts => ["127.0.0.1:9200"]
index => "mytest"
document_type => "doc"
document_id => "%{Id}"
workers => 1
}
}
I get this error:
logstash.agent ] fetched an invalid config {:config=>"input
{\nfile {\npath => [\"/usr/develop/test.csv\"]\ntype =>
\"core2\"\nstart_position => \"beginning\" \n}\n}\nfilter {\ncsv
{\nseparator => \",\"\ncolumns =>
[\"Id\",\"CameraId\",\"ZoneId\",\"Latitude\",\"Longitude,\"Number\",\"OriginalNumber\",\"Country\",\"OriginalCountry\",\"CapturedOn\"]\n}\n}\noutput
{\nelasticsearch {\naction => \"index\"\nhosts =>
[\"localhost:9200\"]\nindex => \"test\"\ndocument_type =>
\"doc\"\ndocument_id => \"%{Id}\"\nworkers => 1\n}\nstdout { codec =>
rubydebug}\n}\n\n", :reason=>"Expected one of #, {, ,, ] at line 11,
column 61 (byte 225) after filter {\ncsv {\nseparator =>
\",\"\ncolumns =>
[\"Id\",\"CameraId\",\"ZoneId\",\"Latitude\",\"Longitude,\""}
Not sure if you caught this yet, but it's because you are missing a " for the column name "Longitude"

Logstash Duplicate Data

i have duplicate data in Logstash
how could i remove this duplication?
my input is:
input
input {
file {
path => "/var/log/flask/access*"
type => "flask_access"
max_open_files => 409599
}
stdin{}
}
filter
filter of files is :
filter {
mutate { replace => { "type" => "flask_access" } }
grok {
match => { "message" => "%{FLASKACCESS}" }
}
mutate {
add_field => {
"temp" => "%{uniqueid} %{method}"
}
}
if "Entering" in [api_status] {
aggregate {
task_id => "%{temp}"
code => "map['blockedprocess'] = 2"
map_action => "create"
}
}
if "Entering" in [api_status] or "Leaving" in [api_status]{
aggregate {
task_id => "%{temp}"
code => "map['blockedprocess'] -= 1"
map_action => "update"
}
}
if "End Task" in [api_status] {
aggregate {
task_id => "%{temp}"
code => "event['blockedprocess'] = map['blockedprocess']"
map_action => "update"
end_of_task => true
timeout => 120
}
}
}
Take a look at the image, the same data log, at the same time, and I just sent one log request.
i solve it
i create a unique id by ('document_id') in output section
document_id point to my temp and temp is my unique id in my project
my output changed to:
output {
elasticsearch {
hosts => ["localhost:9200"]
document_id => "%{temp}"
# sniffing => true
# manage_template => false
# index => "%{[#metadata][beat]}-%{+YYYY.MM.dd}"
# document_type => "%{[#metadata][type]}"
}
stdout { codec => rubydebug }
}
Executing tests in my local lab, I've just found out that logstash is sensitive to the number of its config files that are kept in /etc/logstash/conf.d directory.
If config files are more than 1, then we can see duplicates for the same record.
So, try to remove all backup configs from /etc/logstash/conf.d directory and perform logstash restart.

Resources