Elasticsearch array of an object using logstash - elasticsearch

I have a mysql database working as a primary database and i'm ingesting data into elasticsearch from mysql using logstash. I have successfully indexed the users table into elasticsearch and it is working perfectly fine however, my users table has fields interest_id and interest_name which contains the ids and names of user interests as follows:
"interest_id" : "1,2",
"interest_name" : "Business,Farming"
What i'm trying to achieve:
I want to make an object of interests and this object should contain array of interest ids and interests_names like so:
interests : {
[
"interest_name" : "Business"
"interest_id" : "1"
],
[
"interest_name" : "Farming"
"interest_id" : "2"
]
}
Please let me know if its possible and also what is the best approach to achieve this.
My conf:
input {
jdbc {
jdbc_driver_library => "/home/logstash-7.16.3/logstash-core/lib/jars/mysql-connector-java-
8.0.22.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/"
jdbc_user => "XXXXX"
jdbc_password => "XXXXXXX"
sql_log_level => "debug"
clean_run => true
record_last_run => false
statement_filepath => "/home/logstash-7.16.3/config/queries/query.sql"
}
}
filter {
mutate {
remove_field => ["#version", "#timestamp",]
}
}
output {
elasticsearch {
hosts => ["https://XXXXXXXXXXXX:443"]
index => "users"
action => "index"
user => "XXXX"
password => "XXXXXX"
template_name => "myindex"
template => "/home/logstash-7.16.3/config/my_mapping.json"
template_overwrite => true
}
}
I have tried doing this by creating a nested field interests in my mapping and then adding mutate filer in my conf file like this:
mutate {
rename => {
"interest_id" => "[interests][interest_id]"
"interest_name" => "[interests][interest_name]"
}
With this i'm only able to get this output:
"interests" : {
"interest_id" : "1,2",
"interest_name" : "Business,Farming"
}

Related

Logstash Input -> JDBC in some properties or parameterizable file?

I am using logstash to ingest elasticsearch. I am using input jdbc, and I am urged by the need to parameterize the inputt jdbc settings, such as the connection string, pass, etc, since I have 10 .conf files where each one has 30 jdbc and 30 output inside.
So, since each file has the same settings, would you like to know if it is possible to do something generic or reference that information from somewhere?
I have this 30 times:...
input {
# Number 1
jdbc {
jdbc_driver_library => "/usr/share/logstash/logstash-core/lib/jars/ifxjdbc-4.50.3.jar"
jdbc_driver_class => "com.informix.jdbc.IfxDriver"
jdbc_connection_string => "jdbc:informix-sqli://xxxxxxx/schema:informixserver=server"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "*/1 * * * *"
statement => "SELECT * FROM public.test ORDER BY id ASC"
tags => "001"
}
# Number 2
jdbc {
jdbc_driver_library => "/usr/share/logstash/logstash-core/lib/jars/ifxjdbc-4.50.3.jar"
jdbc_driver_class => "com.informix.jdbc.IfxDriver"
jdbc_connection_string => "jdbc:informix-sqli://xxxxxxx/schema:informixserver=server"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "*/1 * * * *"
statement => "SELECT * FROM public.test2 ORDER BY id ASC"
tags => "002"
}
[.........]
# Number X
jdbc {
jdbc_driver_library => "/usr/share/logstash/logstash-core/lib/jars/ifxjdbc-4.50.3.jar"
jdbc_driver_class => "com.informix.jdbc.IfxDriver"
jdbc_connection_string => "jdbc:informix-sqli://xxxxxxx/schema:informixserver=server"
jdbc_user => "xxx"
jdbc_password => "xxx"
schedule => "*/1 * * * *"
statement => "SELECT * FROM public.testx ORDER BY id ASC"
tags => "00x"
}
}
filter {
mutate {
add_field => { "[#metadata][mitags]" => "%{tags}" }
}
# Number 1
if "001" in [#metadata][mitags] {
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
}
# Number 2
if "002" in [#metadata][mitags] {
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
}
[......]
# Number x
if "002" in [#metadata][mitags] {
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
}
mutate {
remove_field => [ "#version","#timestamp","tags" ]
}
}
output {
# Number 1
if "001" in [#metadata][mitags] {
# Para ELK
elasticsearch {
hosts => "localhost:9200"
index => "001"
document_type => "001"
document_id => "%{id}"
manage_template => true
template => "/home/user/logstash/templates/001.json"
template_name => "001"
template_overwrite => true
}
}
# Number 2
if "002" in [#metadata][mitags] {
# Para ELK
elasticsearch {
hosts => "localhost:9200"
index => "002"
document_type => "002"
document_id => "%{id}"
manage_template => true
template => "/home/user/logstash/templates/002.json"
template_name => "002"
template_overwrite => true
}
}
[....]
# Number x
if "00x" in [#metadata][mitags] {
# Para ELK
elasticsearch {
hosts => "localhost:9200"
index => "002"
document_type => "00x"
document_id => "%{id}"
manage_template => true
template => "/home/user/logstash/templates/00x.json"
template_name => "00x"
template_overwrite => true
}
}
}
You will still need one jdbc input for each query you need to do, but you can improve your filter and output blocks.
In your filter block you are using the field [#metadata][mitags] to filter your inputs but you are applying the same mutate filter to each one of the inputs, if this is the case you don't need the conditionals, the same mutate filter can be applied to all your inputs if you don't filter it.
Your filter block could be resumed to something as this one.
filter {
mutate {
add_field => { "[#metadata][mitags]" => "%{tags}" }
}
mutate {
rename => [ "codigo", "[properties][codigo]" ]
}
mutate {
remove_field => [ "#version","#timestamp","tags" ]
}
}
In your output block you use the tag just to change the index, document_type and template, you don't need to use conditionals to that, you can use the value of the field as a parameter.
output {
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][mitags]}"
document_type => "%{[#metadata][mitags]}"
document_id => "%{id}"
manage_template => true
template => "/home/unitech/logstash/templates/%{[#metadata][mitags]}.json"
template_name => "iol-fue"
template_overwrite => true
}
}
But this only works if you have a single value in the field [#metadata][mitags], which seems to be the case.
EDIT:
Edited just for history reasons, as noted in the comments, the template config does not allow the use of dynamic parameters as it is only loaded when logstash is starting, the other configs works fine.

How to delete all documents in elasticsearch with logstash from a search

I am using logstash to pass data to elasticsearch and I would like to know how to delete all documents.
I do this to remove those that come with id, but what I need now is to delete all documents that match a fixed value, for example Fixedfield = "Base1" regardless of whether the id that is obtained in jdbc input exists or not.
The idea is to delete all the documents where elasticsearch fixedField = "Base1" exists and insert the new documents that I get from the jdbc input, this way I avoid leaving documents that no longer exist in my source (jdbc input).
A more complete example
My document_id is formed: 001, 002, 003, etc.
My fixed field is made up of "Base1" for the three document_id
Any ideas?
input {
jdbc {
jdbc_driver_library => ""
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://xxxxx;databaseName=xxxx;"
statement => "Select * from public.test"
}
}
filter {
if [is_deleted] {
mutate {
add_field => {
"[#metadata][elasticsearch_action]" => "delete"
}
}
mutate {
remove_field => [ "is_deleted","#version","#timestamp" ]
}
} else {
mutate {
add_field => {
"[#metadata][elasticsearch_action]" => "index"
}
}
mutate {
remove_field => [ "is_deleted","#version","#timestamp" ]
}
}
}
output {
elasticsearch {
hosts => "xxxxx"
user => "xxxxx"
password => "xxxxx"
index => "xxxxx"
document_type => "_doc"
document_id => "%{id}"
}
stdout { codec => rubydebug }
}
I finally managed to eliminate, but ..... the problem I have now that apparently when the input starts, it counts the number of records it gets and when it continues towards the output, it eliminates in the first round and in The following n-1 turns the error message is displayed:
[HTTP Output Failure] Encountered non-2xx HTTP code 409
{:response_code=>409,
:url=>"http://localhost:9200/my_index/_delete_by_query",
The other, which I think may be happening is that _delete_by_query is not a bulk bulk deletion, but rather query / delete, which would lead to the query returning n results and therefore trying to delete n times.
Any ideas how I could iterate it once or how to avoid that error?
I clarify that the error is not only displayed once, but the number of documents to be deleted is displayed n-1 times
input {
jdbc {
jdbc_driver_library => ""
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://xxxxx;databaseName=xxxx;"
statement => "Select * from public.test"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][miEntidad]}"
document_type => "%{[#metadata][miDocumento]}"
document_id => "%{id}"
}
http {
url => "http://localhost:9200/my_index/_delete_by_query"
http_method => "post"
format => "message"
content_type => "application/json; charset=UTF-8"
message => '{"query": { "term": { "properties.codigo.keyword": "TEX_FOR_SEARCH_AND_DELETE" } }}'
}
}
Finally it worked like this:
output {
http {
url => "http://localhost:9200/%{[#metadata][miEntidad]}/_delete_by_query?conflicts=proceed"
http_method => "post"
format => "message"
content_type => "application/json; charset=UTF-8"
message => '{"query": { "term": { "properties.code.keyword": "%{[properties][code]}" } }}'
}
jdbc {
connection_string => 'xxxxxxxx'
statement => ["UPDATE test SET estate = 'A' WHERE entidad = ? ","%{[#metadata][miEntidad]}"]
}
}

Need clarification on sql_last_value used in Logstash configuration

Hi All i am using below code for indexing data from MSSql server to elasticsearch but i am not clear about this sql_last_value.
input {
jdbc {
jdbc_driver_library => ""
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://xxxx:1433;databaseName=xxxx;"
jdbc_user => "xxxx"
jdbc_paging_enabled => true
tracking_column => modified_date
tracking_column_type => "timestamp"
use_column_value => true
jdbc_password => "xxxx"
clean_run => true
schedule => "*/1 * * * *"
statement => "Select * from [dbo].[xxxx] where modified_date >:sql_last_value"
}
}
filter {
if [is_deleted] {
mutate {
add_field => {
"[#metadata][elasticsearch_action]" => "delete"
}
}
mutate {
remove_field => [ "is_deleted","#version","#timestamp" ]
}
} else {
mutate {
add_field => {
"[#metadata][elasticsearch_action]" => "index"
}
}
mutate {
remove_field => [ "is_deleted","#version","#timestamp" ]
}
}
}
output {
elasticsearch {
hosts => "xxxx"
user => "xxxx"
password => "xxxx"
index => "xxxx"
action => "%{[#metadata][elasticsearch_action]}"
document_type => "_doc"
document_id => "%{id}"
}
stdout { codec => rubydebug }
}
Where this sql_last_value stored and how to view that physically?
Is it possible to set a customized value to sql_last_value?
Could any one please clarify on above queries?
The sql_last_value is stored in the file called .logstash_jdbc_last_run and according to the docs it is stored in $HOME/.logstash_jdbc_last_run. The file itself contains the timestamp of the last run and it can be set to a specific value.
You should define the last_run_metadata_path parameter for each single jdbc_input_plugin and point to a more specific location, as all running jdbc_input_plugin instances will share the same .logstash_jdbc_last_run file by default and potentially lead into unwanted results.

Logstash error when converting MySQL value to nested elasticsearch property on suggestion field

A Huge cry for help here, When i try to convert a MySQL value to a nested elasticsearch field using logstash i get the following error.
{"exception"=>"expecting List or Map, found class org.logstash.bivalues.StringBiValue", "backtrace"=>["org.logstash.Accessors.newCollectionException(Accessors.java:195)"
Using the following config file:
input {
jdbc {
jdbc_driver_library => "/logstash/mysql-connector-java-5.1.42-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/data"
jdbc_user => "username"
jdbc_password => "password"
statement => "SELECT id, suggestions, address_count FROM `suggestions` WHERE id <= 100"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
}
}
filter {
mutate {
rename => { 'address_count' => '[suggestions][payload][count]' }
}
}
output {
elasticsearch {
hosts => [
"localhost:9200"
]
index => "dev_suggestions"
document_type => "address"
}
}
However if i rename address_count to a field that is not already in my mapping, Then it works just fine and it correctly adds the value as a nested property, I have tried on other fields in my index and not just suggestions.payloads.address_count and i get the same issue, It only works if the field has not been defined in the mapping.
This has caused me some serious headaches and if anyone could help me out to overcome this issue i would greatly appreciate it as Ive spent the last 48 hours banging my head on the table!
I initially assumed i could do the following with a MySQL query:
SELECT id, suggestion, '[suggestions][payload][count]' FROM `suggestions` WHERE id <= 100
Then i also tried
SELECT id, suggestion, 'suggestions.payload.count' FROM `suggestions` WHERE id <= 100
Both failed to insert the value with the later option giving an error that a field can not contain dots.
And finally the mapping:
{
"mappings": {
"address": {
"properties": {
"suggestions": {
"type": "completion",
"payloads" : true
}
}
}
}
}
Thanks to Val - and for future users in the same situation as myself that need to convert MySQL data into nested Elasticsearch objects using logstash, Here is a working solution using Logstash 5 and Elasticsearch 2.*
input {
jdbc {
jdbc_driver_library => "/logstash/mysql-connector-java-5.1.42-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/data"
jdbc_user => "username"
jdbc_password => "password"
statement => "SELECT addrid, suggestion, address_count FROM `suggestions` WHERE id <= 20"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
}
}
filter {
ruby {
code => "
event.set('[suggestions][input]', event.get('suggestion'))
event.set('[suggestions][payload][address_count]', event.get('address_count'))
event.set('[v][payload][id]', event.get('addrid'))
"
remove_field => [ 'suggestion', 'address_count', 'addrid' ]
}
}
output {
elasticsearch {
hosts => [
"localhost:9200"
]
index => "dev_suggestions"
document_type => "address"
}
}
I think you need to proceed differently. First, I would rename the suggestions field in your SQL query to something else and then build the suggestions object from the values you get from your SQL query.
statement => "SELECT id, suggestion, address_count FROM `suggestions` WHERE id <= 100"
Then you could use a ruby filter (and remove your mutate one) in order to build your suggestions field, like this:
Logstash 2.x code:
ruby {
code => "
event['suggestions']['input'] = event['suggestion']
event['suggestions']['payload']['count'] = event['address_count']
"
remove_field => [ 'suggestion', 'address_count' ]
}
Logstash 5.x code:
ruby {
code => "
event.set('[suggestions][input]', event.get('suggestion'))
event.set('[suggestions][payload][count]', event.get('address_count'))
"
remove_field => [ 'suggestion', 'address_count' ]
}
PS: All this assumes you're using ES 2.x since the payload field has disappeared in ES 5.x

How to define mapping in logstash for SQL attributes

I use logstash to index data from a database (in this case Postgres) and put it in an Elasticsearch index. This is my config:
input {
jdbc {
jdbc_driver_library => "/path/to/driver"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_connection_string => "jdbc:postgresql://POSTGRE_HOST:5432/db"
jdbc_user => "postgres"
jdbc_password => "top-secret"
statement => "SELECT id, title, description, username FROM products"
add_field => [ "type", "product" ]
}
}
output {
if [type] == "product" {
elasticsearch {
action => "index"
hosts => "localhost:9200"
index => "products"
document_id => "%{id}"
document_type => "%{type}"
workers => 1
}
}
}
Question: How can I define a mapping for my SQL query, so that e.g. title + description are indexed as text, but user is indexed as keyword data type?

Resources