Nested document to elasticsearch using logstash - elasticsearch

Hi All i am trying to index the documents from MSSQL server to elasticsearch using logstash. I wanted my documents to ingest as nested documents but i am getting aggregate exception error
Here i place all my code
Create table department(
ID Int identity(1,1) not null,
Name varchar(100)
)
Insert into department(Name)
Select 'IT Application development'
union all
Select 'HR & Marketing'
Create table Employee(
ID Int identity(1,1) not null,
emp_Name varchar(100),
dept_Id int
)
Insert into Employee(emp_Name,dept_Id)
Select 'Mohan',1
union all
Select 'parthi',1
union all
Select 'vignesh',1
Insert into Employee(emp_Name,dept_Id)
Select 'Suresh',2
union all
Select 'Jithesh',2
union all
Select 'Venkat',2
Final select statement
SELECT
De.id AS id,De.name AS deptname,Emp.id AS empid,Emp.emp_name AS empname
FROM department De LEFT JOIN employee Emp ON De.id = Emp.dept_Id
ORDER BY De.id
Result should be like this
My elastic search mapping
PUT /departments
{
"mappings": {
"properties": {
"id":{
"type":"integer"
},
"deptname":{
"type":"text"
},
"employee_details":{
"type": "nested",
"properties": {
"empid":{
"type":"integer"
},
"empname":{
"type":"text"
}
}
}
}
}
}
My logstash config file
input {
jdbc {
jdbc_driver_library => ""
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxxx;"
jdbc_user => "xxxx"
jdbc_password => "xxxx"
statement => "SELECT
De.id AS id,De.name AS deptname,Emp.id AS empid,Emp.emp_name AS empname
FROM department De LEFT JOIN employee Emp ON De.id = Emp.dept_Id
ORDER BY De.id"
}
}
filter{
aggregate {
task_id => "%{id}"
code => "
map['id'] = event['id']
map['deptname'] = event['deptname']
map['employee_details'] ||= []
map['employee_details'] << {'empId' => event['empid'], 'empname' => event['empname'] }
"
push_previous_map_as_event => true
timeout => 5
timeout_tags => ['aggregated']
}
}
output{
stdout{ codec => rubydebug }
elasticsearch{
hosts => "https://d9bc7cbca5ec49ea96a6ea683f70caca.eastus2.azure.elastic-cloud.com:4567"
user => "elastic"
password => "****"
index => "departments"
action => "index"
document_type => "departments"
document_id => "%{id}"
}
}
while running logstash i am getting below error
Elastic search scrrenshot for reference
my elasticsearch output should be something like this
{
"took" : 398,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "departments",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"id" : 1,
"deptname" : "IT Application development"
"employee_details" : [
{
"empid" : 1,
"empname" : "Mohan"
},
{
"empid" : 2,
"empname" : "Parthi"
},
{
"empid" : 3,
"empname" : "Vignesh"
}
]
}
}
]
}
}
Could any one please help me to resolve this issue? i want empname and empid of all the employees should get inserted as nested document for respective department. Thanks in advance

Instead of aggregate filter i used JDBC_STREAMING it is working fine might be helpful to some one looking at this post.
input {
jdbc {
jdbc_driver_library => "D:/Users/xxxx/Desktop/driver/mssql-jdbc-7.4.1.jre12-shaded.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxx;"
jdbc_user => "xxx"
jdbc_password => "xxxx"
statement => "Select Policyholdername,Age,Policynumber,Dob,Client_Address,is_active from policy"
}
}
filter{
jdbc_streaming {
jdbc_driver_library => "D:/Users/xxxx/Desktop/driver/mssql-jdbc-7.4.1.jre12-shaded.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxxx;"
jdbc_user => "xxxx"
jdbc_password => "xxxx"
statement => "select claimnumber,claimtype,is_active from claim where policynumber = :policynumber"
parameters => {"policynumber" => "policynumber"}
target => "claim_details"
}
}
output {
elasticsearch {
hosts => "https://e5a4a4a4de7940d9b12674d62eac9762.eastus2.azure.elastic-cloud.com:9243"
user => "elastic"
password => "xxxx"
index => "xxxx"
action => "index"
document_type => "_doc"
document_id => "%{policynumber}"
}
stdout { codec => rubydebug }
}

You can also try to make use of aggregate in logstash filter plugin. Check this
Inserting Nested Objects using Logstash
https://xyzcoder.github.io/2020/07/29/indexing-documents-using-logstash-and-python.html
I am just showing a single object but we can also have multiple arrays of items
input {
jdbc {
jdbc_driver_library => "/usr/share/logstash/javalib/mssql-jdbc-8.2.2.jre11.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://host.docker.internal;database=StackOverflow2010;user=pavan;password=pavankumar#123"
jdbc_user => "pavan"
jdbc_password => "pavankumar#123"
statement => "select top 500 p.Id as PostId,p.AcceptedAnswerId,p.AnswerCount,p.Body,u.Id as userid,u.DisplayName,u.Location
from StackOverflow2010.dbo.Posts p inner join StackOverflow2010.dbo.Users u
on p.OwnerUserId=u.Id"
}
}
filter {
aggregate {
task_id => "%{postid}"
code => "
map['postid'] = event.get('postid')
map['accepted_answer_id'] = event.get('acceptedanswerid')
map['answer_count'] = event.get('answercount')
map['body'] = event.get('body')
map['user'] = {
'id' => event.get('userid'),
'displayname' => event.get('displayname'),
'location' => event.get('location')
}
event.cancel()"
push_previous_map_as_event => true
timeout => 30
}
}
output {
elasticsearch {
hosts => ["http://elasticsearch:9200", "http://elasticsearch:9200"]
index => "stackoverflow_top"
}
stdout {
codec => rubydebug
}
}
So in that example, I am having multiple ways of inserting data like aggregate, JDBC streaming and other scenarios

Related

aggregate multiple recursive logstash

I am using logstash with input jdbc, and would like to embed one object inside another with aggregate.
How can I use add recursive?
Ie add an object inside another object?
This would be an example:
{
"_index": "my-index",
"_type": "test",
"_id": "1",
"_version": 1,
"_score": 1,
"_source": {
"id": "1",
"properties": {
"nested_1": [
{
"A": 0,
"B": "true",
"C": "PEREZ, MATIAS ROGELIO Y/O",
"Nested_2": [
{
"Z1": "true",
"Z2": "99999"
}
},
{
"A": 0,
"B": "true",
"C": "SALVADOR MATIAS ROMERO",
"Nested_2": [
{
"Z1": "true",
"Z2": "99999"
}
}
]
}
}
}
I'm using something like that but it doesn't work
aggregate {
task_id => "%{id}"
code => "
map['id'] = event.get('id')
map['nested_1_list'] ||= []
map['nested_1'] ||= []
if (event.get('id') != nil)
if !( map['nested_1_list'].include?event.get('id') )
map['nested_1_list'] << event.get('id')
map['nested_1'] << {
'A' => event.get('a'),
'B' => event.get('b'),
'C' => event.get('c'),
map['nested_2_list'] ||= []
map['nested_2'] ||= []
if (event.get('id_2') != nil)
if !( map['nested_2_list'].include?event.get('id_2') )
map['nested_2_list'] << event.get('id_2')
map['nested_2'] << {
'Z1' => event.get('z1'),
'Z2' => event.get('z2')
}
end
end
}
end
end
event.cancel()
"
push_previous_map_as_event => true
timeout => 3
}
Any idea how to implement this?........................
..........
Finally what I did was, generate the JSON from the input, that is, from a stored procedure that is consumed from a view (vw) from the input statement of logstash.
Once consumed, I process it as json and I already have that json to work as one more variable.
# Convierto el string a json real (quita comillas y barras invertidas)
ruby {
code => "
require 'json'
json_value = JSON.parse(event.get('field_db').to_s)
event.set('field_convert_to_json',json_value)
"
}
Maybe you can try this. Note This will be applicable only when you want to have a single object and not an array of object.
Please do visit my blog for other formats.
https://xyzcoder.github.io/2020/07/29/indexing-documents-using-logstash-and-python.html
input {
jdbc {
jdbc_driver_library => "/usr/share/logstash/javalib/mssql-jdbc-8.2.2.jre11.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://host.docker.internal;database=StackOverflow2010;user=pavan;password=pavankumar#123"
jdbc_user => "pavan"
jdbc_password => "pavankumar#123"
statement => "select top 500 p.Id as PostId,p.AcceptedAnswerId,p.AnswerCount,p.Body,u.Id as userid,u.DisplayName,u.Location
from StackOverflow2010.dbo.Posts p inner join StackOverflow2010.dbo.Users u
on p.OwnerUserId=u.Id"
}
}
filter {
aggregate {
task_id => "%{postid}"
code => "
map['postid'] = event.get('postid')
map['accepted_answer_id'] = event.get('acceptedanswerid')
map['answer_count'] = event.get('answercount')
map['body'] = event.get('body')
map['user'] = {
'id' => event.get('userid'),
'displayname' => event.get('displayname'),
'location' => event.get('location')
}
map['user']['test'] = {
'test_body' => event.get('postid')
}
event.cancel()"
push_previous_map_as_event => true
timeout => 30
}
}
output {
elasticsearch {
hosts => ["http://elasticsearch:9200", "http://elasticsearch:9200"]
index => "stackoverflow_top"
}
stdout {
codec => rubydebug
}
}
and my output is
{
"_index" : "stackoverflow_top",
"_type" : "_doc",
"_id" : "S8WEmnMBrXsRTNbKO0JJ",
"_score" : 1.0,
"_source" : {
"#version" : "1",
"body" : """<p>How do I store binary data in MySQL?</p>
""",
"#timestamp" : "2020-07-29T12:20:22.649Z",
"answer_count" : 10,
"user" : {
"displayname" : "Geoff Dalgas",
"location" : "Corvallis, OR",
"test" : {
"test_body" : 17
},
"id" : 2
},
"postid" : 17,
"accepted_answer_id" : 26
}
Here test object is nested into the user object

Logstash 2.3.4 How to load nested document in elasticsearch using logstash-jdbc plugin

I am currently using elasticsearch 2.3.4 and logstash 2.3.4 to load relational data from Oracle db into my elasticsearch index using logstash-jdbc plugin. As suggested in various posts, I am using aggregate filter for this. Still I am not able to load the inner nested object in the document. The values are not getting mapped to fields and are displayed as NULL.
I have two related entities with following data:
CREATE TABLE DEPARTMENT (
id NUMBER PRIMARY KEY,
name VARCHAR2(4000) NOT NULL
)
CREATE TABLE EMPLOYEE (
id NUMBER PRIMARY KEY,
name VARCHAR2(4000) NOT NULL,
departmentid NUMBER,
CONSTRAINT EMPLOYEE_FK FOREIGN KEY (departmentid) REFERENCES DEPARTMENT(id)
)
insert into DEPARTMENT values (1, 'dept1');
insert into DEPARTMENT values (2, 'dept2');
insert into DEPARTMENT values (3, 'dept3');
insert into DEPARTMENT values (4, 'dept4');
insert into EMPLOYEE values (1, 'emp1', 1);
insert into EMPLOYEE values (2, 'emp2', 1);
insert into EMPLOYEE values (3, 'emp3', 1);
insert into EMPLOYEE values (4, 'emp4', 2);
insert into EMPLOYEE values (5, 'emp5', 2);
insert into EMPLOYEE values (6, 'emp6', 3);`
Here is my mapping:
{
"mappings": {
"departments": {
"properties": {
"id": {
"type": "integer"
},
"deptName": {
"type": "string"
},
"employee_details": {
"type": "nested",
"properties": {
"empId": {
"type": "integer"
},
"empName": {
"type": "string"
}
}
}
}
}
}
}
And this is my logstash configuration:
input{
jdbc{
jdbc_validate_connection => true
jdbc_connection_string => "jdbc:oracle:thin:#host:port:db"
jdbc_user => "user"
jdbc_password => "pwd"
jdbc_driver_library => "../vendor/jar/ojdbc14.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
statement => "SELECT
department.id AS id,
department.name AS deptName,
employee.id AS empId,
employee.name AS empName
FROM department LEFT JOIN employee
ON department.id = employee.departmentid
ORDER BY id"
}
}
filter{
aggregate {
task_id => "%{id}"
code => "
map['id'] = event['id']
map['deptName'] = event['deptName'] #solution - deptName should be in smaller case and other fields too.
map['employee_details'] ||= []
map['employee_details'] << {'empId' => event['empId], 'empName' => event['empName'] }
"
push_previous_map_as_event => true
timeout => 5
timeout_tags => ['aggregated']
}
}
output{
stdout{ codec => rubydebug }
elasticsearch{
action => "index"
index => "my_index"
document_type => "departments"
document_id => "%{id}"
hosts => "localhost:9200"
}
}
When i perform a XGET on all documents:
curl -XGET 'localhost:9200/my_index/_search/?pretty=true&q=:
The values are not mapped to fields and displayed as NULL:
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "my_index",
"_type": "departments",
"_id": "2",
"_score": 1,
"_source": {
"id": 2,
"deptName": null,
"employee_details": [
{
"empId": null,
"empName": null
},
{
"empId": null,
"empName": null
}
],
"#version": "1",
"#timestamp": "2019-05-14T10:47:33.477Z",
"tags": [
"aggregated"
]
}
},
{
"_index": "my_index",
"_type": "departments",
"_id": "4",
"_score": 1,
"_source": {
"id": 4,
"deptname": "dept4",
"empid": null,
"empname": null,
"#version": "1",
"#timestamp": "2019-05-14T10:47:33.367Z",
"deptName": null,
"employee_details": [
{
"empId": null,
"empName": null
}
]
}
},
{
"_index": "my_index",
"_type": "departments",
"_id": "1",
"_score": 1,
"_source": {
"id": 1,
"deptName": null,
"employee_details": [
{
"empId": null,
"empName": null
},
{
"empId": null,
"empName": null
},
{
"empId": null,
"empName": null
}
],
"#version": "1",
"#timestamp": "2019-05-14T10:47:33.477Z",
"tags": [
"aggregated"
]
}
},
{
"_index": "my_index",
"_type": "departments",
"_id": "3",
"_score": 1,
"_source": {
"id": 3,
"deptName": null,
"employee_details": [
{
"empId": null,
"empName": null
}
],
"#version": "1",
"#timestamp": "2019-05-14T10:47:33.492Z",
"tags": [
"aggregated"
]
}
}
]
}
}
rubydebug suggests the values are set to 'nil'. Could anyone please help me with what I am doing wrong here?
Here is a snippet from stdout for document with id = 1:
{
"id" => 1.0,
"deptname" => "dept1",
"empid" => 1.0,
"empname" => "emp1",
"#version" => "1",
"#timestamp" => "2019-05-14T12:32:14.272Z"
}
{
"id" => 1.0,
"deptname" => "dept1",
"empid" => 2.0,
"empname" => "emp2",
"#version" => "1",
"#timestamp" => "2019-05-14T12:32:15.272Z"
}
{
"id" => 1.0,
"deptname" => "dept1",
"empid" => 3.0,
"empname" => "emp3",
"#version" => "1",
"#timestamp" => "2019-05-14T12:32:15.272Z"
}
{
"id" => 1.0,
"deptName" => nil,
"employee_details" => [
[0] {
"empId" => nil,
"empName" => nil
},
[1] {
"empId" => nil,
"empName" => nil
},
[2] {
"empId" => nil,
"empName" => nil
}
],
"#version" => "1",
"#timestamp" => "2019-05-14T12:32:15.381Z",
"tags" => [
[0] "aggregated"
]
}
Following code works for me .
input {
jdbc{
jdbc_validate_connection => true
jdbc_connection_string => "----/employees"
jdbc_user => "---"
jdbc_password => "--"
jdbc_driver_library => "/home/ilsa/mysql-connector-java-5.1.36-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
statement => "SELECT
e.emp_no as employee_number,
birth_date, first_name, last_name, gender, hire_date, t.title AS titlename,
t.from_date AS titlefrom_date, t.to_date AS titleto_date, d.dept_no AS departmentnumber,
ds.dept_name AS departmentname, d.from_date AS departmentfrom_date, d.to_date AS departmentto_date
FROM employees e
LEFT JOIN(titles t, dept_emp d, departments ds)
ON(e.emp_no = t.emp_no AND e.emp_no = d.emp_no AND d.dept_no = ds.dept_no AND t.from_date < d.to_date AND t.to_date > d.from_date)
ORDER BY e.emp_no ASC"
}
}
filter {
aggregate {
task_id => "%{employee_number}"
code => "
map['employee_number'] = event.get('employee_number')
map['birth_date'] = event.get('birth_date')
map['first_name'] = event.get('first_name')
map['last_name'] = event.get('last_name')
map['gender'] = event.get('gender')
map['hire_date'] = event.get('hire_date')
map['roles'] ||= []
map['roles'] << {
'title.name' => event.get('titlename'),
'title.from_date' => event.get('titlefrom_date'),
'title.to_date' => event.get('titleto_date'),
'department.number' => event.get('departmentnumber'),
'department.name' => event.get('departmentname'),
'department.from_date' => event.get('departmentfrom_date'),
'department.to_date' => event.get('departmentto_date')
}
event.cancel()"
push_previous_map_as_event => true
timeout => 30
}
}
output {
stdout{ codec => rubydebug }
elasticsearch{
action => "index"
index => "employees"
document_type => "employee"
document_id => "%{employee_number}"
hosts => "localhost:9200"
}
}
You can also try to make use of jdbc streaming in logstash filter plugin.
Check this post
Inserting Nested Objects using Logstash
For example, I am taking Stackoverflow Posts and Users as an example. Here Post is parent table and it is associated with Users table on OwnerUserId. So my plugin configuration is
input {
jdbc {
jdbc_driver_library => "/usr/share/logstash/javalib/mssql-jdbc-8.2.2.jre11.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://host.docker.internal;database=StackOverflow2010;user=pavan;password=pavankumar#123"
jdbc_user => "pavan"
jdbc_password => "pavankumar#123"
statement => "select top 500 * from StackOverflow2010.dbo.Posts p "
}
}
filter{
jdbc_streaming {
jdbc_driver_library => "/usr/share/logstash/javalib/mssql-jdbc-8.2.2.jre11.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://host.docker.internal;database=StackOverflow2010;user=pavan;password=pavankumar#123"
jdbc_user => "pavan"
jdbc_password => "pavankumar#123"
statement => "select * from StackOverflow2010.dbo.Users u where u.Id = :owneruserid"
parameters => {"owneruserid" => "owneruserid"}
target => "user_details"
}
}
output {
elasticsearch {
hosts => ["http://elasticsearch:9200", "http://elasticsearch:9200"]
index => "stackoverflow_top_user"
}
stdout {
codec => rubydebug
}
}

How to preprocess a document before indexation?

I'm using logstash and elasticsearch to collect tweet using the Twitter plug in. My problem is that I receive a document from twitter and I would like to make some preprocessing before indexing my document. Let's say that I have this as a document result from twitter:
{
"tweet": {
"tweetId": 1025,
"tweetContent": "Hey this is a fake document for stackoverflow #stackOverflow #elasticsearch",
"hashtags": ["stackOverflow", "elasticsearch"],
"publishedAt": "2017 23 August",
"analytics": {
"likeNumber": 400,
"shareNumber": 100,
}
},
"author":{
"authorId": 819744,
"authorAt": "the_expert",
"authorName": "John Smith",
"description": "Haha it's a fake description"
}
}
Now out of this document that twitter is sending me I would like to generate two documents:
the first one will be indexed in twitter/tweet/1025 :
# The id for this document should be the one from tweetId `"tweetId": 1025`
{
"content": "Hey this is a fake document for stackoverflow #stackOverflow #elasticsearch", # this field has been renamed
"hashtags": ["stackOverflow", "elasticsearch"],
"date": "2017/08/23", # the date has been formated
"shareNumber": 100 # This field has been flattened
}
The second one will be indexed in twitter/author/819744:
# The id for this document should be the one from authorId `"authorId": 819744 `
{
"authorAt": "the_expert",
"description": "Haha it's a fake description"
}
I have defined my output as follow:
output {
stdout { codec => dots }
elasticsearch {
hosts => [ "localhost:9200" ]
index => "twitter"
document_type => "tweet"
}
}
How can I process the information from twitter?
EDIT:
So my full config file should look like:
input {
twitter {
consumer_key => "consumer_key"
consumer_secret => "consumer_secret"
oauth_token => "access_token"
oauth_token_secret => "access_token_secret"
keywords => [ "random", "word"]
full_tweet => true
type => "tweet"
}
}
filter {
clone {
clones => ["author"]
}
if([type] == "tweet") {
mutate {
remove_field => ["authorId", "authorAt"]
}
} else {
mutate {
remove_field => ["tweetId", "tweetContent"]
}
}
}
output {
stdout { codec => dots }
if [type] == "tweet" {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "twitter"
document_type => "tweet"
document_id => "%{[tweetId]}"
}
} else {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "twitter"
document_type => "author"
document_id => "%{[authorId]}"
}
}
}
You could use the clone filter plugin on logstash.
With a sample logstash configuration file that takes a JSON input from stdin and simply shows the output on stdout:
input {
stdin {
codec => json
type => "tweet"
}
}
filter {
mutate {
add_field => {
"tweetId" => "%{[tweet][tweetId]}"
"content" => "%{[tweet][tweetContent]}"
"date" => "%{[tweet][publishedAt]}"
"shareNumber" => "%{[tweet][analytics][shareNumber]}"
"authorId" => "%{[author][authorId]}"
"authorAt" => "%{[author][authorAt]}"
"description" => "%{[author][description]}"
}
}
date {
match => ["date", "yyyy dd MMMM"]
target => "date"
}
ruby {
code => '
event.set("hashtags", event.get("[tweet][hashtags]"))
'
}
clone {
clones => ["author"]
}
mutate {
remove_field => ["author", "tweet", "message"]
}
if([type] == "tweet") {
mutate {
remove_field => ["authorId", "authorAt", "description"]
}
} else {
mutate {
remove_field => ["tweetId", "content", "hashtags", "date", "shareNumber"]
}
}
}
output {
stdout {
codec => rubydebug
}
}
Using as input:
{"tweet": { "tweetId": 1025, "tweetContent": "Hey this is a fake document", "hashtags": ["stackOverflow", "elasticsearch"], "publishedAt": "2017 23 August","analytics": { "likeNumber": 400, "shareNumber": 100 } }, "author":{ "authorId": 819744, "authorAt": "the_expert", "authorName": "John Smith", "description": "fake description" } }
You would get these two documents:
{
"date" => 2017-08-23T00:00:00.000Z,
"hashtags" => [
[0] "stackOverflow",
[1] "elasticsearch"
],
"type" => "tweet",
"tweetId" => "1025",
"content" => "Hey this is a fake document",
"shareNumber" => "100",
"#timestamp" => 2017-08-23T20:36:53.795Z,
"#version" => "1",
"host" => "my-host"
}
{
"description" => "fake description",
"type" => "author",
"authorId" => "819744",
"#timestamp" => 2017-08-23T20:36:53.795Z,
"authorAt" => "the_expert",
"#version" => "1",
"host" => "my-host"
}
You could alternatively use a ruby script to flatten the fields, and then use rename on mutate, when necessary.
If you want elasticsearch to use authorId and tweetId, instead of default ID, you could probably configure elasticsearch output with document_id.
output {
stdout { codec => dots }
if [type] == "tweet" {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "twitter"
document_type => "tweet"
document_id => "%{[tweetId]}"
}
} else {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "twitter"
document_type => "tweet"
document_id => "%{[authorId]}"
}
}
}

load array data mysql to ElasticSearch using logstash jdbc

Hi i am new to ES and i m trying to load data from 'MYSQL' to 'Elasticsearch'
I am getting below error when trying to loadata in array format, any help
Here is mysql data, need array data for new & hex value columns
cid color new hex create modified
1 100 euro abcd #86c67c 5/5/2016 15:48 5/13/2016 14:15
1 100 euro 1234 #fdf8ff 5/5/2016 15:48 5/13/2016 14:15
Here us the logstash config
input {
jdbc {
jdbc_driver_library => "/etc/logstash/mysql/mysql-connector-java-5.1.39-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/test"
jdbc_user => "root"
jdbc_password => "*****"
schedule => "* * * * *"
statement => "select cid,color, new as 'cvalue.new',hexa_value as 'cvalue.hexa',created,modified from colors_hex_test order by cid"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
}
}
output {
elasticsearch {
index => "colors_hexa"
document_type => "colors"
document_id => "%{cid}"
hosts => "localhost:9200"
Need array data for cvalue (new, hexa) like
{
"_index": "colors_hexa",
"_type": "colors",
"_id": "1",
"_version": 218,
"found": true,
"_source": {
"cid": 1,
"color": "100 euro",
"cvalue" : {
"new": "1234",
"hexa_value": "#fdf8ff",
}
"created": "2016-05-05T10:18:51.000Z",
"modified": "2016-05-13T08:45:30.000Z",
"#version": "1",
"#timestamp": "2016-05-14T01:30:00.059Z"
}
}
this is the error i m getting while running logstash
"status"=>400, "error"=>{"type"=>"mapper_parsing_exception",
"reason"=>"Field name [cvalue.hexa] cannot contain '.'"}}}, :level=>:warn}
You cant give a field name with .. But you can try to add:
filter {
mutate {
rename => { "new" => "[cvalue][new]" }
rename => { "hexa" => "[cvalue][hexa]" }
}
}

33mfailed action with response of 400, dropping action - logstash

I'm trying to use elasticsearch + logstash (jdbc input).
my elasticsearch seems to be ok. The problem seems to be in logstash (elasticsearch output plugin).
my logstash.conf:
input {
jdbc {
jdbc_driver_library => "C:\DEV\elasticsearch-1.7.1\plugins\elasticsearch-jdbc-1.7.1.0\lib\sqljdbc4.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://localhost:1433;databaseName=dbTest"
jdbc_user => "user"
jdbc_password => "pass"
schedule => "* * * * *"
statement => "SELECT ID_RECARGA as _id FROM RECARGA where DT_RECARGA >= '2015-09-04'"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
}
}
filter {
}
output {
elasticsearch {
host => "localhost"
protocol => "http"
index => "test_index"
document_id => "%{objectId}"
}
stdout { codec => rubydebug }
}
when I run the log stash:
C:\DEV\logstash-1.5.4\bin>logstash -f logstash.conf
I'm getting this result:
←[33mfailed action with response of 400, dropping action: ["index", {:_id=>"%{ob
jectId}", :_index=>"parrudo", :_type=>"logs", :_routing=>nil}, #<LogStash::Event
:0x5d4c2abf #metadata_accessors=#<LogStash::Util::Accessors:0x900a6e7 #store={"r
etry_count"=>0}, #lut={}>, #cancelled=false, #data={"_id"=>908026, "#version"=>"
1", "#timestamp"=>"2015-09-04T21:19:00.322Z"}, #metadata={"retry_count"=>0}, #ac
cessors=#<LogStash::Util::Accessors:0x4929c6a4 #store={"_id"=>908026, "#version"
=>"1", "#timestamp"=>"2015-09-04T21:19:00.322Z"}, #lut={"type"=>[{"_id"=>908026,
"#version"=>"1", "#timestamp"=>"2015-09-04T21:19:00.322Z"}, "type"], "objectId"
=>[{"_id"=>908026, "#version"=>"1", "#timestamp"=>"2015-09-04T21:19:00.322Z"}, "
objectId"]}>>] {:level=>:warn}←[0m
{
"_id" => 908026,
"#version" => "1",
"#timestamp" => "2015-09-04T21:19:00.322Z"
}
{
"_id" => 908027,
"#version" => "1",
"#timestamp" => "2015-09-04T21:19:00.322Z"
}
{
"_id" => 908028,
"#version" => "1",
"#timestamp" => "2015-09-04T21:19:00.323Z"
}
{
"_id" => 908029,
"#version" => "1",
"#timestamp" => "2015-09-04T21:19:00.323Z"
}
In elasticsearch the index was created but have any docs.
I'm using windows and MSSql Server.
elasticsearch version: 1.7.1
logstash version: 1.5.4
any idea?
Thanks!
All right... after looking fot this erros in elasticsearch log I realize that the problem was the alias in my sql statement.
statement => "SELECT ID_RECARGA as _id FROM RECARGA where DT_RECARGA >= '2015-09-04'"
For some reason that I don't know It wasn't been processed, so I just remove this from my query and everything seems to be right now.
thanks!

Resources