Script fails in upsert, with logstash output elasticsearch plugin - elasticsearch

Environment
DB: Sybase
Logstash: 2.2.0 with JDBC Plugin, Elasticsearch Output plugin
SQL Query:
select res.id as 'res.id', res.name as 'res.name', tag.name as 'tag.name'
from Res res, ResTags rt, Tags tag
where res.id *= rt.resrow and rt.tagid *= tag.id
SQL Result:
res.id | res.name | tag.name
0 | result0 | null
0 | result0 | tagA
1 | result1 | tagA
1 | result1 | tagB
2 | result2 | tagA
2 | result2 | tagC
Index Mapping:
{
"mappings": {
"res": {
"properties": {
"id": { "type": "long"},
"name": { "type": "string" },
"tags": {
"type": "nested",
"properties": { "tagname": { "type": "string" }}
}
}
}
}
Conf File:
input {
jdbc {
jdbc_driver_library => "jtds-1.3.1.jar"
jdbc_driver_class => "Java::net.sourceforge.jtds.jdbc.Driver"
jdbc_connection_string => "jdbc:jtds:sybase://hostname.com:1234/schema"
jdbc_user => "george"
jdbc_password => "monkey"
jdbc_fetch_size => 100
statement_filepath => "/home/george/sql"
}
}
output {
elasticsearch {
action => "update"
index => "myres"
document_type => "res"
document_id => "%{res.id}"
script_lang => "groovy"
hosts => [ "my.other.host.com:5921" ]
upsert => ' {
"id" : %{res.id},
"name" : "%{res.name}",
"tags" :[{ "tagname": "%{tag.name}" }]
}'
script => '
if (ctx._source.res.tags.containsValue(null)) {
// if null has been added replace it with actual value
cts._source.res.tags = [{"tagname": "%{tag.name}" }];
else {
// if you find the tag, then do nothing
if (ctx._source.res.tags.containsValue("%{tag.name}")) {}
else {
// if the value you try to add is not null
if (%{tag.name} != null)
// add it as a new object into the tag array
ctx._source.res.tags += {"tagname": "%{tag.name}"};
}
}
'
}
}
The GOAL is to add the multiple rows returned from the database into ES, concatenating the tags as new objects (this is simplified example, so add_tag and filters do not do the job, as I have json structure deeper than 2 levels (nested of nested, etc))
The desired outcome after the bulk upload into ES would be:
{
"hits": {
"total": 3,
"max_score": 1,
"hits": [ {
"_index": "myres",
"_type": "res",
"_id": 0,
"_score": 1,
"_source": {
"res": {
"id":0,
"name": "result0",
"tags": [{"tagname": "tagA"}],
"#version": "2",
"#timestamp": "2016-xx-yy..."
}
},{
"_index": "myres",
"_type": "res",
"_id": 1,
"_score": 1,
"_source": {
"res": {
"id":1,
"name": "result1",
"tags": [{"tagname": "tagA"},{"tagname": "tagB"}],
"#version": "2",
"#timestamp": "2016-xx-yy..."
}
}{
"_index": "myres",
"_type": "res",
"_id": 2,
"_score": 1,
"_source": {
"res": {
"id":2,
"name": "result2",
"tags": [{"tagname": "tagA"},{"tagname": "tagC"],
"#version": "2",
"#timestamp": "2016-xx-yy..."
}
}
}
...
ISSUE: if in the conf, output section the script is not commented out, the below error pops out. If the script is not included, then only the initial tags (as expected) are imported, and the second ones are not.
It looks like script is not working within elasticsearch output.
ERROR message:
[400] {"error":"ActionRequestValidationException[Validation Failed:
1: script or doc is missing;
2: script or doc is missing;
3: script or doc is missing;],"status":400]} {:class=> ... bla bla ...}
NOTES
To avoid wasting peoples' time, doc_as_upsert => true also does not work as expected. It just keeps on updating / overwriting and just keeps the latest row of the db.
Also, the river plugin for jdbc to ES does not support nested of nested structure so that does not work eithe

Related

Logstash Config how to trasfer aws s3 csv without header to Elasticsearch

I have sample csv file in s3 with 3 column without any header. But during data transfer from s3 csv to elasticsearch, I want to give some name to each column (in my case id, name, age to column 0 to 2 respectively).
Input Sample.csv
1,myname,23
2,myname2,24
Expected Output should be following doc in ES index:
[{
"_index": "user_detail",
"_type": "user_detail_type",
"_id": "1",
"_score": 1.0,
"_source": {
"id": "1",
"name": "myname",
"age": "23"
}
},
{
"_index": "user_detail",
"_type": "user_detail_type",
"_id": "2",
"_score": 1.0,
"_source": {
"id": "2",
"name": "myname2",
"age": "24"
}
}]
Logstash config that I have written is:
input {
s3 {
bucket => "users"
region => "us-east-1"
watch_for_new_files => false
prefix => "user.csv"
}
}
filter {
// Need help here
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "user_detail"
document_type => "user_detail_type"
document_id => "%{id}"
}
}
Doubt:
What should I write in filter section or any change in config to convert column[0] => id, column[1] => name, column[2] => age during Elasticsearch insertion.

How to get fields inside message array from Logstash?

I've been trying to configure a logstash pipeline with input type is snmptrap along with yamlmibdir. Here's the code
input {
snmptrap {
host => "abc"
port => 1062
yamlmibdir => "/usr/share/logstash/vendor/bundle/jruby/2.5.0/gems/snmp-1.3.2/data/ruby/snmp/mibs"
}
}
filter {
mutate {
gsub => ["message","^\"{","{"]
gsub => ["message","}\"$","}"]
gsub => ["message","[\\]",""]
}
json { source => "message" }
split {
field => "message"
target => "evetns"
}
}
output {
elasticsearch {
hosts => "xyz"
index => "logstash-%{+YYYY.MM.dd}"
}
stdout { codec => rubydebug }
}
and the result shown in Kibana (JSON format)
{
"_index": "logstash-2019.11.18-000001",
"_type": "_doc",
"_id": "Y_5zjG4B6M9gb7sxUJwG",
"_version": 1,
"_score": null,
"_source": {
"#version": "1",
"#timestamp": "2019-11-21T05:33:07.675Z",
"tags": [
"_jsonparsefailure"
],
"1.11.12.13.14.15": "teststring",
"message": "#<SNMP::SNMPv1_Trap:0x244bf33f #enterprise=[1.2.3.4.5.6], #timestamp=#<SNMP::TimeTicks:0x196a1590 #value=55>, #varbind_list=[#<SNMP::VarBind:0x21f5e155 #name=[1.11.12.13.14.15], #value=\"teststring\">], #specific_trap=99, #source_ip=\"xyz\", #agent_addr=#<SNMP::IpAddress:0x5a5c3c5f #value=\"xC0xC1xC2xC3\">, #generic_trap=6>",
"host": "xyz"
},
"fields": {
"#timestamp": [
"2019-11-21T05:33:07.675Z"
]
},
"sort": [
1574314387675
]
}
As you can see in the message field, it's an array so how can I get all the field inside the array. also able to select these field to display on Kibana.
ps1. still got tags _jsonparsefailure if select type 'Table' in Expanded document
ps2. even if using gsub for remove '\' from expected json result, why still got an result with '\' ?

Outputting document metadata from ElasticSearch using Logstash output csv plugin

I am attempting to output the _id metadata field from ES into a CSV file using Logstash.
{
"_index": "data",
"_type": "default",
"_id": "vANfNGYB9XD0VZRJUFfy",
"_version": 1,
"_score": null,
"_source": {
"vulnid": "CVE-2018-1000060",
"product": [],
"year": "2018",
"month": "02",
"day": "09",
"hour": "23",
"minute": "29",
"published": "2018-02-09T18:29:02.213-05:00",
},
"sort": [
1538424651203
]
}
My logstash output filter is:
output { csv { fields => [ "_id", "vulnid", "published"] path =>
"/tmp/export.%{+YYYY-MM-dd-hh-mm}.csv" } }
I get output:
,CVE-2018-1000060,2018-02-09T18:29:02.213-05:00
But I would like to get:
vANfNGYB9XD0VZRJUFfy,CVE-2018-1000060,2018-02-09T18:29:02.213-05:00
How to output the metadata _id into the csv file?
It does not matter if I specify the field like "_id" or "#_id" or "#id".
When we query ES we have to enable docinfo => true. By default it is false.
input {
elasticsearch {
hosts => [ your hosts ]
index => "ti"
query => '{your query}'
size => 1000
scroll => "1s"
docinfo => true
schedule => "14 * * * *"
}
}
Well logstash is not able to get "_id" field from your input, because you must not have set the option docinfo into true.
docinfo helps to include elasticsearch documents information such as index,type _id etc..Please have a look here for more info https://www.elastic.co/guide/en/logstash/current/plugins-inputs-elasticsearch.html#plugins-inputs-elasticsearch-docinfo
use your input plugin as
input {
elasticsearch {
hosts => "hostname"
index => "yourIndex"
query => '{ "query": { "query_string": { "query": "*" } } }' //optional
size => 500 //optional
scroll => "5m" //optional
docinfo => true
}
}

Wrong data shown when searched

i have a dataset of more than a million rows. I have integrated elasticsearch with Mysql using logstash.
When i type the following URL to fetch in postman,
http://localhost:9200/persondetails/Document/_search?q=*
i get the following:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "persondetails",
"_type": "Document",
"_id": "%{idDocument}",
"_score": 1,
"_source": {
"iddocument": 514697,
"#timestamp": "2017-08-31T05:18:46.916Z",
"author": "vaibhav",
"expiry_date": null,
"#version": "1",
"description": "ly that",
"creation_date": null,
"type": 1
}
},
{
"_index": "persondetails",
"_type": "Document_count",
"_id": "AV4o0J3OJ5ftvuhV7i0H",
"_score": 1,
"_source": {
"query": {
"term": {
"author": "rishav"
}
}
}
}
]
}
}
it is wrong as the number of rows in my table is more than 1 million and this shows that total is only 2. I am unable to find what is the mistake here.
when i type http://localhost:9200/_cat/indices?v
It shows this
health:yellow
status:open
index:persondetails
uuid:4FiGngZcQfS0Xvu6IeHIfg
pri:5
rep : 1
docs.count : 2
docs.deleted :1054
store.size : 125.4kb
pri.store.size : 125.4kb
This is my logstash.conf file
input {
jdbc {
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/persondetails"
jdbc_user => "root"
jdbc_password => ""
schedule => "* * * * *"
jdbc_validate_connection => true
jdbc_driver_library => "/usr/local/Cellar/logstash/5.5.2/mysql-connector-java-3.1.14/mysql-connector-java-3.1.14-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
statement => "SELECT * FROM Document"
type => "persondetails"
}
}
output {
elasticsearch {
#protocol=>http
index =>"persondetails"
document_type => "Document"
document_id => "%{idDocument}"
hosts => ["http://localhost:9200"]
stdout{ codec => rubydebug}
}
}
From your result, it looks like there is an issue with your logstash configuration which is causing your document to be overwritten because the document_id is not getting generated, and effectively there is only one document in your index with document Id as "%{idDocument}"
See the following _source snippet from the result to the search query you provided:
"_source": {
"iddocument": 514697,
"#timestamp": "2017-08-31T05:18:46.916Z",
"author": "vaibhav",
"expiry_date": null,
"#version": "1",
"description": "ly that",
"creation_date": null,
"type": 1
}
Even looking at the small size of the index, it doesn't look like there are more documents. You should look at whether your jdbc input is providing the "idDocument" field.

using elasticsearch filter in logstash pipeline

I'm using the elasticsearch filter in my logstash pipeline. I correctly find the result using :
filter{
if [class] == "DPAPIINTERNAL" {
elasticsearch {
hosts => "10.1.10.16"
index => "dp_audit-2017.02.16"
query_template => "/home/vittorio/Documents/elastic-queries/matching-requestaw.json"
}
}
}
as you can see, Im using "query_template" which is :
{
"query": {
"query_string": {
"query": "class:DPAPI AND request.aw:%{[aw]}"
}
},
"_source": ["end_point", "vittorio"]
}
that tells elastichsearch to look up the log with that specific class that match "aw" with the DPAPIINTERNAL log.
Perfect! but now that i found the result, i want to add some field from it and attach them to my DPAPIINTERNAL log, for instance, i want to take "end_point" and add it in the new key "vittorio" inside my log.
This is not happening and I don't understand why.
here is the log that i'm looking at using the query:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "dp_audit-2017.02.16",
"_type": "logs",
"_id": "AVpHoPHPuEPlW12Qu",
"_score": 1,
"_source": {
"svc": "dp-1.1",
"request": {
"method": "POST|PATCH|DELETE",
"aw": "prova",
"end_point": "/bank/6311",
"app_instance": "7D1-D233-87E1-913"
},
"path": "/home/vittorio/Documents/dpapi1.json",
"#timestamp": "2017-02-16T15:53:33.214Z",
"#version": "1",
"host": "Vito",
"event": "bank.add",
"class": "DPAPI",
"ts": "2017-01-16T19:20:30.125+01:00"
}
}
]
}
}
Your need to specify the fields parameter in your elasticsearch filter, like this:
elasticsearch {
hosts => "10.1.10.16"
index => "dp_audit-2017.02.16"
query_template => "/home/vittorio/Documents/elastic-queries/matching-requestaw.json"
fields => { "[request][end_point]" => "vittorio" }
}
Note that since end_point is a nested field, you need to modify the _source in your query template like this:
"_source": ["request.end_point"]
the problem is simply that you don't have to specify the "new" field using the query_template.
"_source": ["request"] # here you specify the field you want from the query result.
and then
filter{
if [class] == "DPAPIINTERNAL" {
elasticsearch {
hosts => "10.1.10.16"
index => "dp_audit-2017.02.16"
query_template => "/home/vittorio/Documents/elastic-queries/matching-requestaw.json"
fields => {"request" => "new_key"} # here you add the fields and will tell elastich filter to put request inside new_key
}
}
}
That worked for me!

Resources