Elastic's Logstash Mutate Split not working - elasticsearch

I'm having trouble splitting the http.request.referrer field in logstash. This is coming from packet beat. I want to only use the domain and not the full path. With the following filter, as suggested here https://www.elastic.co/guide/en/logstash/current/plugins-filters-mutate.html, I get the following error
[WARN ][logstash.filters.mutate ] Exception caught while applying mutate filter {:exception=>"Invalid FieldReference: `sfa[2]`"}
But if I dont try to retrieve the second element and just used the field sfa to add to sfa_ref then it works, only with the forward slashes replaced by commas.
filter {
mutate {
add_field => {"sfa" => "%{[http][request][referrer]}"}
}
mutate {
split => ["sfa", "/"]
add_field => {"sfa_ref" => "%{sfa[2]}"}
}
}
input is as follows:
{
"http": {
"request": {
"bytes": 727,
"method": "get",
"headers": {
"content-length": 0
},
"referrer": "https://example.domain.com/web/font-awesome/css/font-awesome.min.css"
},
"response": {
"bytes": 66989,
"status_code": 200,
"body": {
"bytes": 66624
},
"headers": {
"content-length": 66624,
"content-type": "application/font-woff2"
}
},
"version": "1.1"
},
"status": "OK"
}
After the split,the sfa field becomes:
"sfa": [ "https:", "", "example.domain.com", "web", "font-awesome", "css", "font-awesome.min.css" ]

The documentation followed seems to be outdated. In the newer versions of logstash, the proper way to address an array or elements in them is %{[field_name][index]}.
So I needed square brackects around the field name also.
mutate {
split => ["sfa", "/"]
add_field => {"sfa_ref" => "%{[sfa][2]}"}
}

Related

Incorrect document_id for Logstash elastic search output

I'm using Logstash to read json messages from Solace queue and write it to elastic Search.I'm using the doc_as_upsert => true along with the document_id parameters in the output.This is how my logstash configuration looks like
logstash.conf
input
{
jms {
include_header => false
include_properties => false
include_body => true
use_jms_timestamp => false
destination => 'SpringBatchTestQueue'
pub_sub => false
jndi_name => '/JMS/CF/MDM'
jndi_context => {
'java.naming.factory.initial' => 'com.solacesystems.jndi.SolJNDIInitialContextFactory'
'java.naming.security.principal' => 'EDM_Test_User#NovartisDevVPN'
'java.naming.provider.url' => 'tcp://localhost:55555'
'java.naming.security.credentials' => 'EDM_Test_User'
}
require_jars=> ['/app/elasticsearch/jms/commons-lang-2.6.jar',
'/app/elasticsearch/jms/sol-jms-10.10.0.jar',
'/app/elasticsearch/jms/geronimo-jms_1.1_spec-1.1.1.jar']
}
}
output
{
elasticsearch
{
hosts => ["https://glchbs-sd220240.eu.novartis.net:9200/"]
index => "test-%{+YYYY.MM.dd}"
document_id => "%{customerId}"
doc_as_upsert => true
ssl => true
ssl_certificate_verification => true
cacert => "/app/elasticsearch/config/ssl/Novartis_Silver_Three_Chain.pem"
}
}
Json Message:
{
"customerId": "N-CA-Z9II2YJ1YJ",
"name": "Alan Birch",
"customerRecordType": "Health Care Professional",
"country": "CA",
"language": "EN",
"privacyLawStatus": false,
"salutation": "Mr.",
"firstName": "Alan",
"lastName": "Birch",
"customerType": "Non Prescriber",
"hcpType": "Pharmacist Assistant",
"isMedicalExpert": false,
"customerAddresses": [
{
"addressType": "Primary Address",
"addressLine1": "4001 Leslie Street"
},
{
"addressType": "Other",
"addressLine1": "3004 Center St"
}
],
"meansOfContact": [
{
"type": "Email1",
"value": "alab#noname.com",
"status": "Active"
},
{
"type": "Email2",
"value": "balan#gmail.com",
"status": "Active"
}
],
"specialities": [
{
"specialtyType": "Primary Specialty",
"specialty": "Pharmacy Technician",
"status": "Active"
}
]
}
As you can see, I'm trying to use the customerId field of the JSON message as the document id for elasticsearch. But this is what a document inserted into Elasticsearch looks like:
As you can see document_id field should be mapped to customerId field but this is not case..Document is being inserted as %{customerId}
How to fix this?Appreciate your help
That is telling you that the [customerId] field does not exist on that event. If the [message] field is JSON then you should add a json filter to parse it. That will create the [customerId] field, which you can then use as the document_id.
json { source => "message" }

Json Array splitting issue Logstash configuration : Unexpected end-of-input: expected close marker for Array (start marker at [Source: (S

This is how my json object looks like, i have verified that the json i am getting is a valid. I tries setting up configuration files for the same, but always get the same error
SON parse error, original data now in message field {:error=>#, :data=>"{\"total_rows\":15587,\"offset\":0,\"rows\":[\r"}
[2019-08-05T21:07:49,799][WARN ][logstash.filters.split ] Only String and Array types are splittable. field:[doc][serversGroups] is of type = NilClass
[2019-08-05T21:07:50,584][WARN ][logstash.filters.split ] Only String and Array types are splittable. field:[doc][serversGroups][ActiveUsers] is of type = NilClass
This is my source Config file i am using for logstash
filter {
json {
source => "message"
skip_on_invalid_json => "true"
target => "doc"
}
split {
field => "[doc][serversGroups]"
}
split {
field => "[doc][serversGroups][ActiveUsers]"
}
date {
match => [ "[doc][date]", "UNIX" ]
target => "unix_time"
}
mutate {
convert => { "[doc][serversGroups][ActiveUsers][handle]" => "integer"
"[doc][serversGroups][list][UsedLicenses]" => "integer"
"[doc][serversGroups][list][issuedLicenses]" => "integer"
}
}
fingerprint {
concatenate_all_fields => "true"
method => "SHA256"
target => "fingerprint"
}
}
output {
stdout {
codec => "rubydebug"
}
elasticsearch {
hosts => ["localhost:9200"]
index => "pyyython"
codec => "json"
document_id => "%{[fingerprint]}"
}
}
This is my source JSON
{
"total_rows": 156122,
"offset": 12,
"rows": [
{
"id": "12345",
"key": "12345",
"value": {
"rev": "1-12345"
},
"doc": {
"_id": "12345",
"_rev": "1-12345",
"date": "15645348122",
"HostServerName": "abc.com",
"serversGroups": [
{
"ServiceName": "--- ",
"list": {
"issuedLicenses": "123",
"UsedLicenses": "12"
},
"ActiveUsers": [
{}
]
},
{
"ServiceName": "--- ",
"list": {
"issuedLicenses": "123",
"UsedLicenses": "12"
},
"ActiveUsers": [
{}
]
},
{
"ServiceName": "--- ",
"list": {
"issuedLicenses": "123",
"UsedLicenses": "12"
},
"ActiveUsers": [
{}
]
},
{
"ServiceName": "--- ",
"list": {
"issuedLicenses": "123",
"UsedLicenses": "1"
},
"ActiveUsers": [
{
"user": "me",
"user_host": "myself",
"dispay": "andI",
"version": "v1.1",
"server_host": "testing.abc.com",
"handle": "12345",
"last_date_license_check": "7/7",
"last_time_license_check": "12:12"
}
]
}
]
}
}
]
}
I keep getting this error
SON parse error, original data now in message field {:error=>#<LogStash::Json::ParserError: Unexpected end-of-input: expected close marker for Array (start marker at [Source: (S"; line: 1, column: 39])87,"offset":0,"rows":[
"; line: 2, column: 41]>, :data=>"{\"total_rows\":15587,\"offset\":0,\"rows\":[\r"}
[2019-08-05T21:07:49,799][WARN ][logstash.filters.split ] Only String and Array types are splittable. field:[doc][serversGroups] is of type = NilClass
[2019-08-05T21:07:50,584][WARN ][logstash.filters.split ] Only String and Array types are splittable. field:[doc][serversGroups][ActiveUsers] is of type = NilClass
not sure if my splitting is wrong!
The source JSON that you show is clearly invalid, since it ends with a comma. If I replace the comma with
]
}
}
]
}
then it is valid. With that change made it can be split using
split { field => "[doc][rows][0][doc][serversGroups]" }
split { field => "[doc][rows][0][doc][serversGroups][ActiveUsers]" }

Using multiple config files for logstash

I am just learning elasticsearch and I need to know how to correctly split a configuration file into multiple. I'm using the official logstash on docker with ports bound on 9600 and 5044. Originally I had a working single logstash file without conditionals like so:
input {
beats {
port => '5044'
}
}
filter
{
grok{
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} \[(?<event_source>[\w\s]+)\]:\[(?<log_type>[\w\s]+)\]:\[(?<id>\d+)\] %{GREEDYDATA:details}"
"source" => "%{GREEDYDATA}\\%{GREEDYDATA:app}.log"
}
}
mutate{
convert => { "id" => "integer" }
}
date {
match => [ "timestamp", "ISO8601" ]
locale => en
remove_field => "timestamp"
}
}
output
{
elasticsearch {
hosts => ["http://elastic:9200"]
index => "logstash-supportworks"
}
}
When I wanted to add metricbeat I decided to split that configuration into a new file. So I ended up with 3 files:
__input.conf
input {
beats {
port => '5044'
}
}
metric.conf
# for testing I'm adding no filters just to see what the data looks like
output {
if ['#metadata']['beat'] == 'metricbeat' {
elasticsearch {
hosts => ["http://elastic:9200"]
index => "%{[#metadata][beat]}-%{[#metadata][version]}"
}
}
}
supportworks.conf
filter
{
if ["source"] =~ /Supportwork Server/ {
grok{
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} \[(?<event_source>[\w\s]+)\]:\[(?<log_type>[\w\s]+)\]:\[(?<id>\d+)\] %{GREEDYDATA:details}"
"source" => "%{GREEDYDATA}\\%{GREEDYDATA:app}.log"
}
}
mutate{
convert => { "id" => "integer" }
}
date {
match => [ "timestamp", "ISO8601" ]
locale => en
remove_field => "timestamp"
}
}
}
output
{
if ["source"] =~ /Supportwork Server/ {
elasticsearch {
hosts => ["http://elastic:9200"]
index => "logstash-supportworks"
}
}
}
Now no data is being sent to the ES instance. I have verified that filebeat at least is running and publishing messages, so I'd expect to at least see that much going to ES. Here's a published message from my server running filebeat
2019-03-06T09:16:44.634-0800 DEBUG [publish] pipeline/processor.go:308 Publish event: {
"#timestamp": "2019-03-06T17:16:44.634Z",
"#metadata": {
"beat": "filebeat",
"type": "doc",
"version": "6.6.1"
},
"source": "C:\\Program Files (x86)\\Hornbill\\Supportworks Server\\log\\swserver.log",
"offset": 4773212,
"log": {
"file": {
"path": "C:\\Program Files (x86)\\Hornbill\\Supportworks Server\\log\\swserver.log"
}
},
"message": "2019-03-06 09:16:42 [COMMS]:[INFO ]:[4924] Helpdesk API (5005) Socket error while idle - 10053",
"prospector": {
"type": "log"
},
"input": {
"type": "log"
},
"beat": {
"name": "WIN-22VRRIEO8LM",
"hostname": "WIN-22VRRIEO8LM",
"version": "6.6.1"
},
"host": {
"name": "WIN-22VRRIEO8LM",
"architecture": "x86_64",
"os": {
"platform": "windows",
"version": "6.3",
"family": "windows",
"name": "Windows Server 2012 R2 Standard",
"build": "9600.0"
},
"id": "e5887ac2-6fbf-45ef-998d-e40437066f56"
}
}
I got this working by adding a mutate filter to __input.conf to replace backslashes with forward slashes in the source field
filter {
mutate{
gsub => [ "source", "[\\]", "/" ]
}
}
And removing the " from the field accessors in my conditionals So
if ["source"] =~ /Supportwork Server/
Became
if [source] =~ /Supportwork Server/
Both changes seemed to be necessary to get this configuration working.

Logstash split filter

Recently I have discovered that I am able to pool data directly from the Logstash by directly providing URLs. Fetching the input works very well, however it downloads and loads full documents into ES.
I would like to create a new record on elastic search for every line. By default whole file is loaded in a message field and it slows Kibana loads in Discovery tab etc.
Kibana output:
{
"_index": "blacklists",
"_type": "default",
"_id": "pf3k_2QB9sEBYW4CK4AA",
"_version": 1,
"_score": null,
"_source": {
"#timestamp": "2018-08-03T13:05:00.569Z",
"tags": [
"_jsonparsefailure",
"c2_info",
"ipaddress"
],
"#version": "1",
"message": "#############################################################\n## Master Feed of known, active and non-sinkholed C&Cs IP \n## addresses\n## \n## HIGH-CONFIDENCE FAMILIES ONLY\n## \n## Feed generated at: 2018-08-03 12:13 \n##\n## Feed Provided By: John Bambenek of Bambenek Consulting\n## jcb#bambenekconsulting.com // http://bambenekconsulting.com\n## Use of this feed is governed by the license here: \n## http://osint.bambenekconsulting.com/license.txt,
"client": "204.11.56.48",
"http_poller_metadata": {
"name": "bembenek_c2",
"host": "node1",
"request": {
"method": "get",
"url": "http://osint.bambenekconsulting.com/feeds/c2-ipmasterlist-high.txt"
},
"response_message": "OK",
"runtime_seconds": 0.27404,
"response_headers": {
"content-type": "text/plain",
"accept-ranges": "bytes",
"cf-ray": "4448fe69e02197ce-FRA",
"date": "Fri, 03 Aug 2018 13:05:05 GMT",
"connection": "keep-alive",
"last-modified": "Fri, 03 Aug 2018 12:13:44 GMT",
"server": "cloudflare",
"vary": "Accept-Encoding",
"etag": "\"4bac-57286dbe759e4-gzip\""
},
"code": 200,
"times_retried": 0
}
},
"fields": {
"#timestamp": [
"2018-08-03T13:05:00.569Z"
]
},
"sort": [
1533301500569
]
}
Logstash config:
input {
http_poller {
urls => {
bembenek_c2 => "http://osint.bambenekconsulting.com/feeds/c2-ipmasterlist-high.txt"
bembenek_c2dom => "http://osint.bambenekconsulting.com/feeds/c2-dommasterlist-high.txt"
blocklists_all => "http://lists.blocklist.de/lists/all.txt"
}
request_timeout => 30
codec => "json"
tags => c2_info
schedule => { cron => "*/10 * * * *"}
metadata_target => "http_poller_metadata"
}
}
filter {
grok {
match => { "message" => [
"%{IPV4:ipaddress}" }
add_tag => [ "ipaddress" ]
}
}
output {
stdout { codec => dots }
elasticsearch {
hosts => ["10.0.50.51:9200"]
index => "blacklists"
document_type => "default"
template_overwrite => true
}
file {
path => "/tmp/blacklists.json"
codec => json {}
}
}
Does anyone know how to split the loaded file with "\n"?
I have tried
filter {
split {
terminator => "\n"
}
}
Documentation and examples how to use this filter is not that popular.
The missing filter was:
filter {
split {
field => "[message]"
}
}
We do not have to specify the terminator, as it is set by default as "\n" per Logstash 6.3 documentation.

logstash - geoip in Kibana can not show any information using the IP addresses

I want to display the number of users accessing my app in a World Map using ElasticSearch, Kibana and Logstash.
Here is my log (Json format):
{
"device": "",
"public_ip": "70.90.17.210",
"mac": "00:01:02:03:04:05",
"ip": "192.16.1.10",
"event": {
"timestamp": "2014-08-15T00:00:00.000Z",
"source": "system",
"name": "status"
},
"status": {
"channel": "channelname",
"section": "pictures",
"downlink": 1362930,
"network": "Wi-Fi"
}
}
And this is my config file:
input {
file {
path => ["/mnt/logs/stb.events"]
codec => "json"
type => "event"
}
}
filter {
date {
match => [ "timestamp", "yyyy-MM-dd HH:mm:ss", "ISO8601" ]
}
}
filter {
mutate {
convert => [ "downlink", "integer" ]
}
}
filter {
geoip {
add_tag => [ "geoip" ]
database => "/opt/logstash/vendor/geoip/GeoLiteCity.dat"
source => "public_ip"
target => "geoip"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
output {
elasticsearch {
host => localhost
}
}
At the end in Kibana I see only an empty geoip tag
Can someone help me and to point me where is my mistake?
Since Logstash 1.3.0 you can use the geoip.location field that is created automatically instead of creating the coordinates field and converting it to float manually.
One curly bracket seems to be missing from your log, I guess this is the correct format:
{
"device": {
"public_ip": "70.90.17.210",
"mac": "00:01:02:03:04:05",
"ip": "192.16.1.10"
},
"event": {
"timestamp": "2014-08-15T00:00:00.000Z",
"source": "system",
"name": "status"
},
"status": {
"channel": "channelname",
"section": "pictures",
"downlink": 1362930,
"network": "Wi-Fi"
}
}
In this case I would suggest you to try the following configuration for the filter (without mutate):
filter {
geoip {
source => "[device][public_ip]"
}
}
Then you should be able to use "geoip.location" in your map. I did quite some research and debugging to find out that in order to be resolved correctly, nested fields should be surrounded by [ ] when used as source.

Resources