Synchronize logstash jdbc inputs - elasticsearch

I am using multiple logstash jdbc inputs :
jdbc {
jdbc_driver_library => "../vendor/oracle/ojdbc7.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
connection_retry_attempts => 10
connection_retry_attempts_wait_time => 5
jdbc_validate_connection => "true"
jdbc_connection_string => "connectionString/myDataBase"
jdbc_user => "USER_NAME"
jdbc_password => "PASSWORD"
schedule => "* * * * *"
statement_filepath => "myPath/queryA.sql"
tracking_column => "myTrackingcolumn"
last_run_metadata_path => "myPath/.logstash_jdbc_last_run"
type => "documentType"
add_field => {
"tag" => "myFirstTag"
}
}
jdbc {
jdbc_driver_library => "../vendor/oracle/ojdbc7.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
connection_retry_attempts => 10
connection_retry_attempts_wait_time => 5
jdbc_validate_connection => "true"
jdbc_connection_string => "connectionString/myDataBase"
jdbc_user => "USER_NAME"
jdbc_password => "PASSWORD"
schedule => "* * * * *"
statement_filepath => "myPath/queryB.sql"
tracking_column => "myTrackingcolumn"
last_run_metadata_path => "myPath/.logstash_jdbc_last_run"
type => "documentType"
add_field => {
"tag" => "mySecondTag"
}
}
jdbc {
jdbc_driver_library => "../vendor/oracle/ojdbc7.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
connection_retry_attempts => 10
connection_retry_attempts_wait_time => 5
jdbc_validate_connection => "true"
jdbc_connection_string => "connectionString/myDataBase"
jdbc_user => "USER_NAME"
jdbc_password => "PASSWORD"
schedule => "* * * * *"
statement_filepath => "myPath/queryC.sql"
tracking_column => "myTrackingcolumn"
last_run_metadata_path => "myPath/.logstash_jdbc_last_run"
type => "documentType"
add_field => {
"tag" => "myThirdTag"
}
}
As there is a SESSIONS_PER_USER limit defined for the database I am querying this provokes the following error :
[31mPipeline aborted due to error {:exception=>#<Sequel::DatabaseConnectionError: Java::JavaSql::SQLException: ORA-02391:
exceeded simultaneous SESSIONS_PER_USER limit>, :backtrace=>["oracle.jdbc.driver.T4CTTIoer.processError(oracle/jdbc/driver/T4CTTIoer.java:450)", "oracle.jdbc.driver.
T4CTTIoer.processError(oracle/jdbc/driver/T4CTTIoer.java:392)", "oracle.jdbc.driver.T4CTTIoer.processError(oracle/jdbc/driver/T4CTTIoer.java:385)",
"oracle.jdbc.driver.T4CTTIfun.processError(oracle/jdbc/driver/T4CTTIfun.java:938)", "oracle.dbc.driver.T4CTTIoauthenticate.processError(oracle/jdbc/driver/T4CTTIoauthenticate.java:480)",
"oracle.jdbc.driver.T4CTTIfun.receive(oracle/jdbc/driver/T4CTTIfun.java:655)", "oracle.jdbc.driver.T4CTTIfun.doRPC(oracle/jdbc/driver/T4CTTIfun.java:249)",
"oracle.jdbc.driver.T4CTTIoauthenticate.doOAUTH(oracle/jdbc/driver/T4CTTIoauthenticate.java:416)", "oracle.jdbc.driver.T4CTTIoauthenticate.doOAUTH(oracle/jdbc/driver/T4CTTIoauthenticate.java:825)",
"oracle.jdbc.driver.T4CConnection.logon(oracle/jdbc/driver/T4CConnection.java:596)", "oracle.jdbc.driver.PhysicalConnection.<init>(oracle/jdbc/driver/PhysicalConnection.java:715)",
"oracle.jdbc.driver.T4CConnection.<init>(oracle/jdbc/driver/T4CConnection.java:385)", "oracle.jdbc.driver.T4CDriverExtension.getConnection(oracle/jdbc/driver/T4CDriverExtension.java:30)",
"oracle.jdbc.driver.OracleDriver.connect(oracle/jdbc/driver/OracleDriver.java:564)", "RUBY.connect(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/adapters/jdbc.rb:222)",
"RUBY.make_new(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool.rb:110)", "RUBY.make_new(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:226)",
"RUBY.available(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:199)", "RUBY._acquire(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:135)",
"RUBY.acquire(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:149)", "RUBY.sync(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:280)",
"org.jruby.ext.thread.Mutex.synchronize(org/jruby/ext/thread/Mutex.java:149)", "RUBY.sync(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:280)",
"RUBY.acquire(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:148)", "RUBY.acquire(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/extensions/connection_validator.rb:98)",
"RUBY.hold(D:myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/connection_pool/threaded.rb:106)", "RUBY.synchronize(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/database/connecting.rb:256)",
"RUBY.test_connection(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/sequel-4.36.0/lib/sequel/database/connecting.rb:266)", "RUBY.prepare_jdbc_connection(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-input-jdbc-3.1.0/lib/logstash/plugin_mixins/jdbc.rb:173)",
"RUBY.register(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-input-jdbc-3.1.0/lib/logstash/inputs/jdbc.rb:187)", "RUBY.start_inputs(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-core-2.3.4-java/lib/logstash/pipeline.rb:330)", "org.jruby.RubyArray.each(org/jruby/RubyArray.java:1613)",
"RUBY.start_inputs(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-core-2.3.4-java/lib/logstash/pipeline.rb:329)", "RUBY.start_workers(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-core-2.3.4-java/lib/logstash/pipeline.rb:180)",
"RUBY.run(myPath/Env/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-core-2.3.4-java/lib/logstash/pipeline.rb:136)",
"RUBY.start_pipeline(myPath/logstash-2.3.4/vendor/bundle/jruby/1.9/gems/logstash-core-2.3.4-java/lib/logstash/agent.rb:473)"], :level=>:error}?[0mstopping pipeline {:id=>"main"}
How to configure these inputs so logstash perform the SQL queries sequentially and avoid exceeding the permitted sessions limit ?

I don't think there's a way to perform the input sequentially.
But the option schedule from the jdbc input can reduce the frequency of the queries, in order to avoid the SESSIONS_PER_USER limit.
As it is: schedule => "* * * * *", your plugin will connect to the db every minute (see here). You can use instead schedule => "*/15 * * * *", which will connect every 15 minutes (see here).

Related

How to insert multiple table values into each table?

How to insert multiple table values into each table?
Using logstash, I want to put multiple tables as elasticsearch.
I used logstash several times using jdbc
but only one value is saved in one table.
I tried to answer the stackoverflow, but I couldn't solve it.
-> multiple inputs on logstash jdbc
This is my confile code.
This code is the code that I executed by myself.
input {
jdbc {
jdbc_driver_library => "/usr/share/java/mysql-connector-java-8.0.23.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/db_name?useSSL=false&user=root&password=1234"
jdbc_user => "root"
jdbc_password => "1234"
schedule => "* * * * *"
statement => "select * from table_name1"
tracking_column => "table_name1"
use_column_value => true
clean_run => true
}
jdbc {
jdbc_driver_library => "/usr/share/java/mysql-connector-java-8.0.23.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/db_name?useSSL=false&user=root&password=1234"
jdbc_user => "root"
jdbc_password => "1234"
schedule => "* * * * *"
statement => "select * from table_name2"
tracking_column => "table_name2"
use_column_value => true
clean_run => true
}
jdbc {
jdbc_driver_library => "/usr/share/java/mysql-connector-java-8.0.23.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/db_name?useSSL=false&user=root&password=1234"
jdbc_user => "root"
jdbc_password => "1234"
schedule => "* * * * *"
statement => "select * from table_name3"
tracking_column => "table_name3"
use_column_value => true
clean_run => true
}
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "aws_05181830_2"
document_type => "%{type}"
document_id => "{%[#metadata][document_id]}"
}
stdout {
codec => rubydebug
}
}
problem
1. If you look at the picture, save only one value in one table
2. When a new table comes, the existing table value disappears.
My golas
How to save properly without duplicate data in each table?
You are setting the document_id of the document in elasticsearch using
document_id => "{%[#metadata][document_id]}"
This is not a valid sprintf reference, so it uses the literal value {%[#metadata][document_id]}. As a result, every document you index overwrites the previous document. I suggest you remove this option.

Trying to get the data from oracle database through logstash but data is not coming to elasticsearch

I am trying to get the data of oracle database through logstash but data is not coming to elasticsearch. I am not sure where I missed it. I didn't see any error on logstash log file. Below are the logstash conf file.
input {
jdbc {
jdbc_validate_connection => "true"
jdbc_connection_string => "jdbc:oracle:thin:#//server:1521/db"
jdbc_user => "user"
jdbc_password => "pass"
jdbc_driver_library => "/etc/logstash/files/ojdbc7.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
jdbc_paging_enabled => "true"
schedule => "* * * * *"
statement_filepath => "/etc/logstash/files/keycount.sql"
use_column_value => "true"
tracking_column => "timestamp"
last_run_metadata_path => "/etc/logstash/files/.logstash_jdbc_last_run"
}
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "keyinventory-%{+YYYY}"
}
stdout{
codec => rubydebug
}
}
Please, someone, help me.

Logstash Scheduling first run

I have a logstash pipeline running every 5 minutes with below jdbc input config, issue is upon starting the pipeline first time, it also waits for 5 minutes and then start scheduling. Is there any way to specify that we query/statement is executed as soon as the logstash pipeline is started instead of waiting on first 5 minutes too?
input {
jdbc {
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://${DB_HOST}/${DB_NAME}?useSSL=false"
jdbc_user => "${DB_USER_NAME}"
jdbc_password => "${DB_PASSWORD}"
schedule => "*/5 * * * *"
statement => "Select * from students"
}
}
The other workaround to this is to have two jdbc inputs, one for startup and one for schedule. It requires a bit of copy/paste but is not too bad.
input {
jdbc {
id => "index_name_startup"
jdbc_connection_string => "${JDBC_STRING}"
jdbc_user => "${JDBC_USER}"
jdbc_password => "${JDBC_PASSWORD}"
jdbc_driver_library => "/opt/logstash/postgresql-42.2.5.jre7.jar"
jdbc_driver_class => "org.postgresql.Driver"
add_field => { "[#metadata][project_id]" => "index_name" }
statement_filepath => "/mnt/elastic-search-config/sql-scripts/assets-index_name.sql"
}
jdbc {
id => "index_name"
jdbc_connection_string => "${JDBC_STRING}"
jdbc_user => "${JDBC_USER}"
jdbc_password => "${JDBC_PASSWORD}"
jdbc_driver_library => "/opt/logstash/postgresql-42.2.5.jre7.jar"
jdbc_driver_class => "org.postgresql.Driver"
add_field => { "[#metadata][project_id]" => "index_name" }
statement_filepath => "/mnt/elastic-search-config/sql-scripts/assets-index_name.sql"
schedule => "0 * * * *"
}
}
filter {
mutate {
gsub => [
"name", "_", " ",
"name", "-", " "
]
}
}
output {
if [#metadata][project_id] == "index_name" {
elasticsearch {
index => "index_name"
document_id => "%{geom_id}"
hosts => "localhost:9200"
template_name => "assets_template"
id => "index_name_es"
}
}
}
No, that is not the way rufus cron schedules work (and that is what the jdbc input uses). There is an open issue that includes a link to a patch that adds this.

Logstash 6.2.3 Detected a 6.x and above cluster: the `type` event field won't be used to determine the document _type

I'm getting this warning when i start my logstash with the given below config.
if type has been removed then how to map multiple jdbc inputs to seaparate indices called "agency" and "subscriber". how to define output to elastic search.
input {
jdbc {
jdbc_driver_library => "mysql-connector-java-5.1.44.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/dbname"
jdbc_user => "XXXX"
jdbc_password => "XXXX"
jdbc_paging_enabled => "true"
jdbc_fetch_size => 500
lowercase_column_names => "false"
schedule => "* * * * * *"
last_run_metadata_path => "\RunConfig\logpos\agency_last_run"
statement_filepath => "\RunConfig\sql\agency.sql"
type => "agencydetails"
}
jdbc {
type => "subscriberdetails"
jdbc_driver_library => "mysql-connector-java-5.1.44.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/dbname"
jdbc_user => "XXXX"
jdbc_password => "XXXX"
jdbc_paging_enabled => "true"
jdbc_fetch_size => 500
lowercase_column_names => "false"
schedule => "* * * * * *"
last_run_metadata_path => "RunConfig\logpos\subscriber_last_run"
statement_filepath => "\RunConfig\sql\subscriber.sql"
}
}
You can use two separate file configurations for defining two pipelines: each pipeline will fetch from only one input JDBC, inserting in the defined index: in this case, you will need of two instances running of logstash.
Otherwise, you can use also one instance, using if then else to route the data in the preferred index:
output
{
if [type] == "agencydetails"
{
elasticsearch
{
hosts => "localhost:9200"
user => "xxx"
password => "xxx"
index => "agencydetails"
}
}
else
{
elasticsearch
{
hosts => "localhost:9200"
user => "xxx"
password => "xxx"
index => "subscriberdetails"
}
}
}

multiple inputs on logstash jdbc

I am using logstash jdbc to keep the things syncd between mysql and elasticsearch. Its working fine for one table. But now I want to do it for multiple tables. Do I need to open multiple in terminal
logstash agent -f /Users/logstash/logstash-jdbc.conf
each with a select query or do we have a better way of doing it so we can have multiple tables being updated.
my config file
input {
jdbc {
jdbc_driver_library => "/Users/logstash/mysql-connector-java-5.1.39-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/database_name"
jdbc_user => "root"
jdbc_password => "password"
schedule => "* * * * *"
statement => "select * from table1"
}
}
output {
elasticsearch {
index => "testdb"
document_type => "table1"
document_id => "%{table_id}"
hosts => "localhost:9200"
}
}
You can definitely have a single config with multiple jdbc input and then parametrize the index and document_type in your elasticsearch output depending on which table the event is coming from.
input {
jdbc {
jdbc_driver_library => "/Users/logstash/mysql-connector-java-5.1.39-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/database_name"
jdbc_user => "root"
jdbc_password => "password"
schedule => "* * * * *"
statement => "select * from table1"
type => "table1"
}
jdbc {
jdbc_driver_library => "/Users/logstash/mysql-connector-java-5.1.39-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/database_name"
jdbc_user => "root"
jdbc_password => "password"
schedule => "* * * * *"
statement => "select * from table2"
type => "table2"
}
# add more jdbc inputs to suit your needs
}
output {
elasticsearch {
index => "testdb"
document_type => "%{type}" # <- use the type from each input
hosts => "localhost:9200"
}
}
This will not create duplicate data. and compatible logstash 6x.
# YOUR_DATABASE_NAME : test
# FIRST_TABLE : place
# SECOND_TABLE : things
# SET_DATA_INDEX : test_index_1, test_index_2
input {
jdbc {
# The path to our downloaded jdbc driver
jdbc_driver_library => "/mysql-connector-java-5.1.44-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
# Postgres jdbc connection string to our database, YOUR_DATABASE_NAME
jdbc_connection_string => "jdbc:mysql://localhost:3306/test"
# The user we wish to execute our statement as
jdbc_user => "root"
jdbc_password => ""
schedule => "* * * * *"
statement => "SELECT #slno:=#slno+1 aut_es_1, es_qry_tbl.* FROM (SELECT * FROM `place`) es_qry_tbl, (SELECT #slno:=0) es_tbl"
type => "place"
add_field => { "queryFunctionName" => "getAllDataFromFirstTable" }
use_column_value => true
tracking_column => "aut_es_1"
}
jdbc {
# The path to our downloaded jdbc driver
jdbc_driver_library => "/mysql-connector-java-5.1.44-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
# Postgres jdbc connection string to our database, YOUR_DATABASE_NAME
jdbc_connection_string => "jdbc:mysql://localhost:3306/test"
# The user we wish to execute our statement as
jdbc_user => "root"
jdbc_password => ""
schedule => "* * * * *"
statement => "SELECT #slno:=#slno+1 aut_es_2, es_qry_tbl.* FROM (SELECT * FROM `things`) es_qry_tbl, (SELECT #slno:=0) es_tbl"
type => "things"
add_field => { "queryFunctionName" => "getAllDataFromSecondTable" }
use_column_value => true
tracking_column => "aut_es_2"
}
}
# install uuid plugin 'bin/logstash-plugin install logstash-filter-uuid'
# The uuid filter allows you to generate a UUID and add it as a field to each processed event.
filter {
mutate {
add_field => {
"[#metadata][document_id]" => "%{aut_es_1}%{aut_es_2}"
}
}
uuid {
target => "uuid"
overwrite => true
}
}
output {
stdout {codec => rubydebug}
if [type] == "place" {
elasticsearch {
hosts => "localhost:9200"
index => "test_index_1_12"
#document_id => "%{aut_es_1}"
document_id => "%{[#metadata][document_id]}"
}
}
if [type] == "things" {
elasticsearch {
hosts => "localhost:9200"
index => "test_index_2_13"
document_id => "%{[#metadata][document_id]}"
# document_id => "%{aut_es_2}"
# you can set document_id . otherwise ES will genrate unique id.
}
}
}
If you need to run more than one pipeline in the same process, Logstash provides a way to do this through a configuration file called pipelines.yml and using multiple pipelines
multiple pipeline
Using multiple pipelines is especially useful if your current configuration has event flows that don’t share the same inputs/filters and outputs and are being separated from each other using tags and conditionals.
more helpfull resource

Resources