Logstash JDBC adapter: Varbinary to UTF-8? (mysql to elastic import) - jdbc

I'm trying to import a mysql table into elasticsearch via logstash. One column is of the type "varbinary" which causes the following error:
[2018-10-10T12:35:54,922][ERROR][logstash.outputs.elasticsearch] An unknown error occurred sending a bulk request to Elasticsearch. We will retry indefinitely {:error_message=>"\"\\xC3\" from ASCII-8BIT to UTF-8", :error_class=>"LogStash::Json::GeneratorError", :backtrace=>["/usr/share/logstash/logstash-core/lib/logstash/json.rb:27:in `jruby_dump'", "/usr/share/logstash/vendor/$
My logstash config:
input {
jdbc {
jdbc_connection_string => "jdbc:mysql://localhost:3306/xyz"
# The user we wish to execute our statement as
jdbc_user => "test"
jdbc_password => "test"
# The path to our downloaded jdbc driver
jdbc_driver_library => "/mysql-connector-java-5.1.47/mysql-connector-java-5.1.47.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
# our query
statement => "SELECT * FROM x"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
"hosts" => "localhost:9200"
"index" => "x"
"document_type" => "data"
}
}
How can I convert the varbinary to uft-8? Do I have to use a special filter?

Alright...after spending hours on this I found the solution right after posting this question:
columns_charset => { "column0" => "UTF8" }

Try using optional in connection string ( characterEncoding=utf8 )
jdbc_connection_string => "jdbc:mysql://localhost:3306/xyz?useSSL=false&useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&autoReconnect=true"

Related

MYSQL to Elasticsearch via Logstash Problem: incompatible encodings: CP850 and UTF-8

I am using an elk stack via docker-compose with ES version 8.4.0
My goal is to use Logstash to copy an entire table from my MYSQL DB to ES.
The connection works and Logstash copies about 30 entries with no problems. But then I get a long error message.
[2022-09-10T18:41:26,318][ERROR][logstash.outputs.elasticsearch][main][757e3825fce0788f949869472d03e028630de9d063200717b56bc9ceefe29d81] An unknown error occurred sending a bulk request to Elasticsearch (will retry indefinitely) {:message=>"incompatible encodings: CP850 and UTF-8", :exception=>Encoding::CompatibilityError, :backtrace=>["org/jruby/ext/stringio/StringIO.java:1162:in write'", "D:/logstash/vendor/bundle/jruby/2.6.0/gems/logstash-output-elasticsearch-11.6.0-java/lib/logstash/outputs/elasticsearch/http_client.rb:142:in block in bulk'", "org/jruby/RubyArray.java:1865:in each'", "org/jruby/RubyEnumerable.java:1143:in each_with_index'", "D:/logstash/vendor/bundle/jruby/2.6.0/gems/logstash-output-elasticsearch-11.6.0-java/lib/logstash/outputs/elasticsearch/http_client.rb:125:in bulk'", "D:/logstash/vendor/bundle/jruby/2.6.0/gems/logstash-output-elasticsearch-11.6.0-java/lib/logstash/plugin_mixins/elasticsearch/common.rb:296:in safe_bulk'", "D:/logstash/vendor/bundle/jruby/2.6.0/gems/logstash-output-elasticsearch-11.6.0-java/lib/logstash/plugin_mixins/elasticsearch/common.rb:228:in submit'", "D:/logstash/vendor/bundle/jruby/2.6.0/gems/logstash-output-elasticsearch-11.6.0-java/lib/logstash/plugin_mixins/elasticsearch/common.rb:177:in retrying_submit'", "D:/logstash/vendor/bundle/jruby/2.6.0/gems/logstash-output-elasticsearch-11.6.0-java/lib/logstash/outputs/elasticsearch.rb:342:in multi_receive'", "org/logstash/config/ir/compiler/AbstractOutputDelegatorExt.java:121:in multi_receive'", "D:/logstash/logstash-core/lib/logstash/java_pipeline.rb:300:in `block in start_workers'"]}
I suspect this error is the reason:
{:message=>"incompatible encodings: CP850 and UTF-8", :exception=>Encoding::CompatibilityError
My config file look like that:
jdbc {
clean_run => true
jdbc_driver_library => "D:\logstash\mysql-connector-java-8.0.30.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/posts"
jdbc_user => "sqluser"
jdbc_password => "sqlpassword"
schedule => "* * * * *"
statement => "SELECT id, id_post, url, id_subforum, author, text, spread, date, added
FROM telegram.channel_results where id >:sql_last_value;"
use_column_value => true
tracking_column => "id"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "posts"
user => "username"
password => "password"
}
stdout {
codec => rubydebug
}
}
I notice that if I removed the text column from the query, the process runs without any problems. In my database, the text column is of SQL type text. I suspect an encoding problem because there are also Russian texts and emotes included. I need a solution to also copy the texts in ES. Maybe it is a encoding problem with emotes and other characters in the text ?!
Try this below charset encoding input filter.
jdbc {
clean_run => true
jdbc_driver_library => "D:\logstash\mysql-connector-java-8.0.30.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/posts"
jdbc_user => "sqluser"
jdbc_password => "sqlpassword"
schedule => "* * * * *"
statement => "SELECT id, id_post, url, id_subforum, author, text, spread, date, added
FROM telegram.channel_results where id >:sql_last_value;"
use_column_value => true
tracking_column => "id"
columns_charset => {
"text" => "ISO-8859-5"
}
}

I am trying to get data from mongodb to elastic using logstash

I am trying to get data from mongodb to elastic using logstash
but i get below errors:
Exception when executing JDBC query {:exception=>#<Sequel::DatabaseError: Java::OrgLogstash::Missing
ConverterException:
below is my config file:
input{
jdbc{
jdbc_driver_library => "D:/mongojdbc1.2.jar"
jdbc_driver_class => "com.dbschema.MongoJdbcDriver"
jdbc_connection_string => "jdbc:mongodb://localhost:27017/users"
jdbc_user => ""
jdbc_validate_connection => true
statement => "db.user_details.find({})"
}
}
output {
elasticsearch {
hosts => 'http://localhost:9200'
index => 'person_data'
document_type => "person_data"
}
stdout { codec => rubydebug }
}
This possibly happens because certain data type in Mongo might not be convertible to data type in Elasticsearch. May be you should try to select few columns and then see which one is failing.

Error in JDBC connection using logstash

I am trying to get my sqlserver table into Elasticsearch using Logstash. For that i have created below configuration file.
input {
jdbc {
jdbc_connection_string => "jdbc:sqlserver://xxx.xxx.x.xxx:1433/DB_name"
jdbc_user => "devuser"
jdbc_password => "devuser"
jdbc_driver_library => "D:/Mssqljdbc/sqljdbc4-2.0.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
statement => "SELECT * FROM sample"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
hosts => "localhost"
index => "testmigrate"
document_type => "data"
}
}
then i am using bin\logstash -f sqltable.conf to execute it.
But i am getting
Error: Java::ComMicrosoftSqlserverJdbc::SQLServerException: The port
number 1433/DB_name is not valid.
i checked i am able to ping the particular ip address and the port is also openbut still i am getting the same error. Please help
After a bit of digging i did a small change and it worked for me. I added databaseName in front of the DB_name.
input {
jdbc {
jdbc_connection_string => "jdbc:sqlserver://xxx.xxx.x.xxx:1433;databaseName=DB_name"
jdbc_user => "devuser"
jdbc_password => "devuser"
jdbc_driver_library => "D:/Mssqljdbc/sqljdbc4-2.0.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
statement => "SELECT * FROM sample"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
hosts => "localhost"
index => "testmigrate"
document_type => "data"
}
}
It is quite strange i didn't found this in any of the documentation.

Exception: LogStash::ConfigurationError

Am trying to connect Oracle database via logstash and am getting below error.
Error: oracle.jdbc.OracleDriver not loaded. Are you sure you've included the correct jdbc driver in :jdbc_driver_library?
Exception: LogStash::ConfigurationError
Stack: D:/softwares/logstash-6.2.4/logstash-6.2.4/vendor/bundle/jruby/2.3.0/gems/logstash-input-jdbc-4.3.9/lib/logstash/plugin_mixins/jdbc.rb:162:in `open_jdbc_connection'
Please find my logstash config file :
input {
jdbc {
jdbc_driver_library => "D:\data\ojdbc14.jar"
jdbc_driver_class => "oracle.jdbc.OracleDriver"
jdbc_connection_string => "jdbc:oracle:thin:#localhost:1521:xe"
jdbc_user => "user_0ne"
jdbc_password => "xxxyyyzzz"
statement => "SELECT * FROM PRODUCT"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "my_index"
}
}
logstash config file : (corrected)
input {
jdbc {
jdbc_driver_library => "D:\Karthikeyan\data\ojdbc14.jar"
jdbc_driver_class => "Java::oracle.jdbc.OracleDriver" // problem in this line is corrected
jdbc_connection_string => "jdbc:oracle:thin:#localhost:1521:xe"
jdbc_user => "vb"
jdbc_password => "123456"
statement => "SELECT * FROM VB_PRODUCT"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "my_index"
}
}
You can validate the configuration file using,
./user/share/logstash/bin/logstash -f etc/logstash/conf.d/sample.conf --config.test_and_exit

Issue connecting to elastic search from logstash in input

I want to import data from oracle and would like to pass one of the params of the imported data to elastic search to fetch some other details.
For ex:- If I have an Employee Id which I get from oracle db for say 100 rows , I want to pass all these 100 employee ids to elastic search and get the emp name and salary.
I am able to retrieve the data from oracle now but unable to connect to elastic search. Also I am not sure what will be a better approach to do this.
I am using log stash 2.3.3 and the elastic search log stash filter plugin.
input {
jdbc {
jdbc_connection_string => "jdbc:oracle:thin:#<dbhost>:<port>:<sid>"
# The user we wish to execute our statement as
jdbc_user => “user"
jdbc_password => “pass"
# The path to our downloaded jdbc driver
jdbc_driver_library => “<path>"
# The name of the driver class for oracle
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
# our query
statement => "SELECT empId, desg from Employee"
}
elasticsearch {
hosts => "https://xx.corp.com:9200"
index => “empdetails”
}
}
output {
stdout { codec => json_lines }
}
I am getting the below error due to elastic search.
A plugin had an unrecoverable error. Will restart this plugin.
Plugin: ["https://xx.corp.com:9200"], index=>"empdetails ", query=>”empId:’1001'", codec=>"UTF-8">, scan=>true, size=>1000, scroll=>"1m", docinfo=>false, docinfo_target=>"#metadata", docinfo_fields=>["_index", "_type", "_id"], ssl=>false>
Error: [401] {:level=>:error}
You need to use the elasticsearch filter and not the elasticsearch input
input {
jdbc {
jdbc_connection_string => "jdbc:oracle:thin:#<dbhost>:<port>:<sid>"
# The user we wish to execute our statement as
jdbc_user => “user"
jdbc_password => “pass"
# The path to our downloaded jdbc driver
jdbc_driver_library => “<path>"
# The name of the driver class for oracle
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
# our query
statement => "SELECT empId, desg from Employee"
}
}
filter {
elasticsearch {
hosts => ["xx.corp.com:9200"]
query => "empId:%{empId}"
user => "admin"
password => "admin"
sort => "empName:desc"
fields => {
"empName" => "empName"
"salary" => "salary"
}
}
}
output {
stdout { codec => json_lines }
}
As a result, each record fetched via JDBC will be enriched by the corresponding data found in ES.

Resources