I want to import data from oracle and would like to pass one of the params of the imported data to elastic search to fetch some other details.
For ex:- If I have an Employee Id which I get from oracle db for say 100 rows , I want to pass all these 100 employee ids to elastic search and get the emp name and salary.
I am able to retrieve the data from oracle now but unable to connect to elastic search. Also I am not sure what will be a better approach to do this.
I am using log stash 2.3.3 and the elastic search log stash filter plugin.
input {
jdbc {
jdbc_connection_string => "jdbc:oracle:thin:#<dbhost>:<port>:<sid>"
# The user we wish to execute our statement as
jdbc_user => “user"
jdbc_password => “pass"
# The path to our downloaded jdbc driver
jdbc_driver_library => “<path>"
# The name of the driver class for oracle
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
# our query
statement => "SELECT empId, desg from Employee"
}
elasticsearch {
hosts => "https://xx.corp.com:9200"
index => “empdetails”
}
}
output {
stdout { codec => json_lines }
}
I am getting the below error due to elastic search.
A plugin had an unrecoverable error. Will restart this plugin.
Plugin: ["https://xx.corp.com:9200"], index=>"empdetails ", query=>”empId:’1001'", codec=>"UTF-8">, scan=>true, size=>1000, scroll=>"1m", docinfo=>false, docinfo_target=>"#metadata", docinfo_fields=>["_index", "_type", "_id"], ssl=>false>
Error: [401] {:level=>:error}
You need to use the elasticsearch filter and not the elasticsearch input
input {
jdbc {
jdbc_connection_string => "jdbc:oracle:thin:#<dbhost>:<port>:<sid>"
# The user we wish to execute our statement as
jdbc_user => “user"
jdbc_password => “pass"
# The path to our downloaded jdbc driver
jdbc_driver_library => “<path>"
# The name of the driver class for oracle
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
# our query
statement => "SELECT empId, desg from Employee"
}
}
filter {
elasticsearch {
hosts => ["xx.corp.com:9200"]
query => "empId:%{empId}"
user => "admin"
password => "admin"
sort => "empName:desc"
fields => {
"empName" => "empName"
"salary" => "salary"
}
}
}
output {
stdout { codec => json_lines }
}
As a result, each record fetched via JDBC will be enriched by the corresponding data found in ES.
Related
Is there any way I can configure logstash so that it picks up delta records real time automatically. If not then is there any opensource plugin/tool available to achieve this? Thanks for the help.
Try the below configuration for the MSSQL server. You need to schedule it like below by adding the schedule period, a statement which would the query to fetch the data from your database
input {
jdbc {
jdbc_connection_string => "jdbc:sqlserver://localhost:1433;databaseName=test"
# The user we wish to execute our statement as
jdbc_user => "sa"
jdbc_password => "sasa"
# The path to our downloaded jdbc driver
jdbc_driver_library => "C:\Users\abhijitb\.m2\repository\com\microsoft\sqlserver\mssql-jdbc\6.2.2.jre8\mssql-jdbc-6.2.2.jre8.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
#clean_run => true
schedule => "* * * * *"
#query
statement => "SELECT * FROM Student where studentid > :sql_last_value"
use_column_value => true
tracking_column => "studentid"
}
}
output {
#stdout { codec => json_lines }
elasticsearch {
"hosts" => "localhost:9200"
"index" => "student"
"document_type" => "data"
"document_id" => "%{studentid}"
}
}
I'm trying to import a mysql table into elasticsearch via logstash. One column is of the type "varbinary" which causes the following error:
[2018-10-10T12:35:54,922][ERROR][logstash.outputs.elasticsearch] An unknown error occurred sending a bulk request to Elasticsearch. We will retry indefinitely {:error_message=>"\"\\xC3\" from ASCII-8BIT to UTF-8", :error_class=>"LogStash::Json::GeneratorError", :backtrace=>["/usr/share/logstash/logstash-core/lib/logstash/json.rb:27:in `jruby_dump'", "/usr/share/logstash/vendor/$
My logstash config:
input {
jdbc {
jdbc_connection_string => "jdbc:mysql://localhost:3306/xyz"
# The user we wish to execute our statement as
jdbc_user => "test"
jdbc_password => "test"
# The path to our downloaded jdbc driver
jdbc_driver_library => "/mysql-connector-java-5.1.47/mysql-connector-java-5.1.47.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
# our query
statement => "SELECT * FROM x"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
"hosts" => "localhost:9200"
"index" => "x"
"document_type" => "data"
}
}
How can I convert the varbinary to uft-8? Do I have to use a special filter?
Alright...after spending hours on this I found the solution right after posting this question:
columns_charset => { "column0" => "UTF8" }
Try using optional in connection string ( characterEncoding=utf8 )
jdbc_connection_string => "jdbc:mysql://localhost:3306/xyz?useSSL=false&useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&autoReconnect=true"
I am trying to get data from mongodb to elastic using logstash
but i get below errors:
Exception when executing JDBC query {:exception=>#<Sequel::DatabaseError: Java::OrgLogstash::Missing
ConverterException:
below is my config file:
input{
jdbc{
jdbc_driver_library => "D:/mongojdbc1.2.jar"
jdbc_driver_class => "com.dbschema.MongoJdbcDriver"
jdbc_connection_string => "jdbc:mongodb://localhost:27017/users"
jdbc_user => ""
jdbc_validate_connection => true
statement => "db.user_details.find({})"
}
}
output {
elasticsearch {
hosts => 'http://localhost:9200'
index => 'person_data'
document_type => "person_data"
}
stdout { codec => rubydebug }
}
This possibly happens because certain data type in Mongo might not be convertible to data type in Elasticsearch. May be you should try to select few columns and then see which one is failing.
Using Logstash I would like to know how to send data to ES without getting duplications. Meaning that I want to send data that is not present in the ES instance yet, and not data that is already in the instance.
Today I am deleting all the data on the specific index in ES, and then resend all data that is in the database. This prevents duplications but is however not so ideal since I have to manually delete the data.
This is the .config I am currently using:
input {
jdbc {
jdbc_driver_library => "/Users/Carl/Progs/logstash-6.3.0/mysql-connector-java/mysql-connector-java-5.1.46-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://*****"
jdbc_user => "****"
jdbc_password => "*****"
schedule => "0 * * * *"
statement => "SELECT * FROM carl.customer"
}
}
filter {
mutate {convert => { "long" => "float"} }
}
output {
#stdout { codec => json_lines }
elasticsearch {
hosts => "localhost"
index => "customers"
}
}
I have a log stash running pulling records from postgresql and creating documents in elastic search, but whenever i am trying to update a record in postgres the same is not getting reflected in elastic search, here is my INPUT & OUTPUT configs let me know if i am missing anything here,
input {
jdbc {
# Postgres jdbc connection string to our database, mydb
jdbc_connection_string => "jdbc:postgresql://127.0.0.1:5009/data"
# The user we wish to execute our statement as
jdbc_user => "data"
jdbc_password=>"data"
# The path to our downloaded jdbc driver
jdbc_driver_library => "/postgresql-9.4.1209.jar"
# The name of the driver class for Postgresql
jdbc_driver_class => "org.postgresql.Driver"
#sql_log_level => "debug"
jdbc_paging_enabled => "true"
jdbc_page_size => "5000"
schedule => "* * * * *"
# our query
clean_run => true
last_run_metadata_path => "/logstash/.test_metadata"
#use_column_value => true
#tracking_column => id
statement => "SELECT id,name,update_date from data where update_date > :sql_last_value"
}
}
output {
elasticsearch{
hosts => ["127.0.0.1"]
index => "test_data"
action => "index"
document_type => "data"
document_id => "%{id}"
upsert => ' {
"name" : "%{data.name}",
"update_date" : "%{data.update_date}"
} '
}
}
I think you need to track a date/timestamp column instead of the id column. If you have an UPDATE_DATE column that changes on each update, that would be good.
Your SELECT statement will only grab new records (i.e. id > last_id) and if you update a record, its id won't change, hence that updated record won't be picked up by the jdbc input the next time it runs.