Kafka connect -Debezium- Oracle JDBC Sink connector error consuming debezium(oracle) CDC event - jdbc

I have below images running in docker.
quay.io/debezium/connect:2.0
quay.io/debezium/kafka:2.0
quay.io/debezium/zookeeper:2.0
container-registry.oracle.com/database/enterprise:latest
debezium-source-connector config
"name": "customers-connector",
"config": {
"connector.class": "io.debezium.connector.oracle.OracleConnector",
"tasks.max": "1",
"topic.prefix": "server1",
"database.hostname": "dbz_oracle21",
"database.port": "1521",
"database.user": "c##dbzuser",
"database.password": "dbz",
"database.dbname": "ORCLCDB",
"database.pdb.name": "ORCLPDB1",
"database.server.name": "server1",
"table.include.list": "C##DBZUSER.CUSTOMERS",
"schema.history.internal.kafka.bootstrap.servers": "kafka:9092",
"schema.history.internal.kafka.topic": "schema-changes.customers",
"internal.key.converter": "org.apache.kafka.connect.json.JsonConverter",
"internal.value.converter":"org.apache.kafka.connect.json.JsonConverter",
"internal.key.converter.schemas.enable":false,
"internal.value.converter.schemas.enable":false,
"transforms": "route",
"transforms.route.type": "org.apache.kafka.connect.transforms.RegexRouter",
"transforms.route.regex": "([^.]+)\\.([^.]+)\\.([^.]+)",
"transforms.route.replacement": "$3"
Oracle-JDBC-Sink-Connector Config:
"name": "jdbc-sink-2",
"config": {
"connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
"tasks.max": "1",
"topics": "CUSTOMERS",
"table.name.format": "kafka_customers",
"connection.url": "jdbc:oracle:thin:#dbz_oracle21:1521/orclpdb1",
"connection.user": "c##sinkuser",
"connection.password": "sinkpw",
"auto.create":true,
"auto.evolve":true,
"transforms": "unwrap",
"transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
"pk.fields": "id",
"insert.mode":"insert",
"pk.mode": "record_key"
}
I can see the CDC events getting published to kafka topic : "customers".
{"schema":{"type":"struct","fields":[{"type":"int32","optional":false,"field":"ID"}],"optional":false,"name":"server1.C__DBZUSER.CUSTOMERS.Key"},"payload":{"ID":1011}} {"schema":{"type":"struct","fields":[{"type":"struct","fields":[{"type":"int32","optional":false,"field":"ID"},{"type":"string","optional":true,"field":"NAME"}],"optional":true,"name":"server1.C__DBZUSER.CUSTOMERS.Value","field":"before"},{"type":"struct","fields":[{"type":"int32","optional":false,"field":"ID"},{"type":"string","optional":true,"field":"NAME"}],"optional":true,"name":"server1.C__DBZUSER.CUSTOMERS.Value","field":"after"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"version"},{"type":"string","optional":false,"field":"connector"},{"type":"string","optional":false,"field":"name"},{"type":"int64","optional":false,"field":"ts_ms"},{"type":"string","optional":true,"name":"io.debezium.data.Enum","version":1,"parameters":{"allowed":"true,last,false,incremental"},"default":"false","field":"snapshot"},{"type":"string","optional":false,"field":"db"},{"type":"string","optional":true,"field":"sequence"},{"type":"string","optional":false,"field":"schema"},{"type":"string","optional":false,"field":"table"},{"type":"string","optional":true,"field":"txId"},{"type":"string","optional":true,"field":"scn"},{"type":"string","optional":true,"field":"commit_scn"},{"type":"string","optional":true,"field":"lcr_position"},{"type":"string","optional":true,"field":"rs_id"},{"type":"int32","optional":true,"field":"ssn"},{"type":"int32","optional":true,"field":"redo_thread"},{"type":"string","optional":true,"field":"user_name"}],"optional":false,"name":"io.debezium.connector.oracle.Source","field":"source"},{"type":"string","optional":false,"field":"op"},{"type":"int64","optional":true,"field":"ts_ms"},{"type":"struct","fields":[{"type":"string","optional":false,"field":"id"},{"type":"int64","optional":false,"field":"total_order"},{"type":"int64","optional":false,"field":"data_collection_order"}],"optional":true,"name":"event.block","version":1,"field":"transaction"}],"optional":false,"name":"server1.C__DBZUSER.CUSTOMERS.Envelope","version":1},"payload":{"before":{"ID":1011,"NAME":"r 3"},"after":{"ID":1011,"NAME":"233"},"source":{"version":"2.0.1.Final","connector":"oracle","name":"server1","ts_ms":1674978001000,"snapshot":"false","db":"ORCLPDB1","sequence":null,"schema":"C##DBZUSER","table":"CUSTOMERS","txId":"0a001b007a020000","scn":"3252353","commit_scn":"3252452","lcr_position":null,"rs_id":null,"ssn":0,"redo_thread":1,"user_name":"C##DBZUSER"},"op":"u","ts_ms":1674978030086,"transaction":null}}
When I try to sink these CDC event from topic name "customers" with sink connector config, I see error message in connector log -
tition CUSTOMERS-0 [org.apache.kafka.clients.consumer.internals.ConsumerCoordinator]
2023-01-29 14:57:13,174 INFO || [Consumer clientId=connector-consumer-jdbc-sink-2-0, groupId=connect-jdbc-sink-2] Resetting offset for partition CUSTOMERS-0 to position FetchPosition{offset=0, offsetEpoch=Optional.empty, currentLeader=LeaderAndEpoch{leader=Optional[172.17.0.3:9092 (id: 1 rack: null)], epoch=0}}. [org.apache.kafka.clients.consumer.internals.SubscriptionState]
2023-01-29 14:57:13,181 INFO || Attempting to open connection #1 to Oracle [io.confluent.connect.jdbc.util.CachedConnectionProvider]
2023-01-29 14:57:13,222 INFO || JdbcDbWriter Connected [io.confluent.connect.jdbc.sink.JdbcDbWriter]
2023-01-29 14:57:13,263 ERROR || WorkerSinkTask{id=jdbc-sink-2-0} Task threw an uncaught and unrecoverable exception. Task is being killed and will not recover until manually restarted. Error: PK mode for table 'kafka_customers' is RECORD_KEY with configured PK fields [id], but record key schema does not contain field: id [org.apache.kafka.connect.runtime.WorkerSinkTask]
org.apache.kafka.connect.errors.ConnectException: PK mode for table 'kafka_customers' is RECORD_KEY with configured PK fields [id], but record key schema does not contain field: id
at io.confluent.connect.jdbc.sink.metadata.FieldsMetadata.extractRecordKeyPk(FieldsMetadata.java:208)
at io.confluent.connect.jdbc.sink.metadata.FieldsMetadata.extract(FieldsMetadata.java:97)
at io.confluent.connect.jdbc.sink.metadata.FieldsMetadata.extract(FieldsMetadata.java:63)
at io.confluent.connect.jdbc.sink.BufferedRecords.add(BufferedRecords.java:114)
at io.confluent.connect.jdbc.sink.JdbcDbWriter.write(JdbcDbWriter.java:66)
at io.confluent.connect.jdbc.sink.JdbcSinkTask.put(JdbcSinkTask.java:74)
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:581)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:333)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:234)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:203)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:189)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:244)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
2023-01-29 14:57:13,263 ERROR || WorkerSinkTask{id=jdbc-sink-2-0} Task threw an uncaught and unrecoverable exception. Task is being killed and will not recover until manually restarted [org.apache.kafka.connect.runtime.WorkerTask]
org.apache.kafka.connect.errors.ConnectException: Exiting WorkerSinkTask due to unrecoverable exception.
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:611)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:333)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:234)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:203)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:189)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:244)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.kafka.connect.errors.ConnectException: PK mode for table 'kafka_customers' is RECORD_KEY with configured PK fields [id], but record key schema does not contain field: id
at io.confluent.connect.jdbc.sink.metadata.FieldsMetadata.extractRecordKeyPk(FieldsMetadata.java:208)
at io.confluent.connect.jdbc.sink.metadata.FieldsMetadata.extract(FieldsMetadata.java:97)
at io.confluent.connect.jdbc.sink.metadata.FieldsMetadata.extract(FieldsMetadata.java:63)
at io.confluent.connect.jdbc.sink.BufferedRecords.add(BufferedRecords.java:114)
at io.confluent.connect.jdbc.sink.JdbcDbWriter.write(JdbcDbWriter.java:66)
at io.confluent.connect.jdbc.sink.JdbcSinkTask.put(JdbcSinkTask.java:74)
at org.apache.kafka.connect.runtime.WorkerSinkTask.deliverMessages(WorkerSinkTask.java:581)
... 10 more
2023-01-29 14:57:13,263 INFO || Stopping task [io.confluent.connect.jdbc.sink.JdbcSinkTask]

Oracle is know as a case insensitive system, so select id from tab works even if the column name is ID.
But most application quote the column name, so the cofiguration
"pk.fields": "id"
will lead to something like select "id" from tab
which will trigger the error record key schema does not contain field: id
Fix the case.

Related

Cannot send data from confluent platform to Elasticsearch with elastic sink connector. Exception: Tolerance exceeded in error handler

I'm trying to run a simple example to send kafka data to elasticsearch by using confluent platform with elastic-sink connector.
I'm using confluent platform version 6.0.0 and I installed the latest version of the elastic-sink-connector.
The configuration of my connector is the following:
{
"value.converter.schemas.enable": "false",
"name": "e",
"connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector",
"value.converter": "org.apache.kafka.connect.json.JsonConverter",
"topics": [
"ciao"
],
"connection.url": [
"http://192.168.x.x:9200"
],
"key.ignore": "true",
"schema.ignore": "true"
}
I used kafkacat to send message to my topic "ciao", but as soon as I sent the data my connector failed.
I'm trying to see what the problem can be and I obtained this exception:
org.apache.kafka.connect.errors.ConnectException: Tolerance exceeded in error handler
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:196)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:122)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertAndTransformRecord(WorkerSinkTask.java:495)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertMessages(WorkerSinkTask.java:472)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:226)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:198)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:185)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:235)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)\nCaused by: org.apache.kafka.connect.errors.DataException: Converting byte[] to Kafka Connect data failed due to serialization error:
at org.apache.kafka.connect.json.JsonConverter.toConnectData(JsonConverter.java:366)
at org.apache.kafka.connect.storage.Converter.toConnectData(Converter.java:87)
at org.apache.kafka.connect.runtime.WorkerSinkTask.lambda$convertAndTransformRecord$1(WorkerSinkTask.java:495)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndRetry(RetryWithToleranceOperator.java:146)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:180)
... 13 more\nCaused by: org.apache.kafka.common.errors.SerializationException: com.fasterxml.jackson.core.JsonParseException: Unrecognized token 'ciao': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n at [Source: (byte[])\"ciao\"; line: 1, column: 5]\nCaused by: com.fasterxml.jackson.core.JsonParseException: Unrecognized token 'ciao': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n at [Source: (byte[])\"ciao\"; line: 1, column: 5]
at com.fasterxml.jackson.core.JsonParser._constructError(JsonParser.java:1840)
at com.fasterxml.jackson.core.base.ParserMinimalBase._reportError(ParserMinimalBase.java:722)
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._reportInvalidToken(UTF8StreamJsonParser.java:3560)
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._handleUnexpectedValue(UTF8StreamJsonParser.java:2655)
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser._nextTokenNotInObject(UTF8StreamJsonParser.java:857)
at com.fasterxml.jackson.core.json.UTF8StreamJsonParser.nextToken(UTF8StreamJsonParser.java:754)
at com.fasterxml.jackson.databind.ObjectMapper._readTreeAndClose(ObjectMapper.java:4247)
at com.fasterxml.jackson.databind.ObjectMapper.readTree(ObjectMapper.java:2734)
at org.apache.kafka.connect.json.JsonDeserializer.deserialize(JsonDeserializer.java:64)
at org.apache.kafka.connect.json.JsonConverter.toConnectData(JsonConverter.java:364)
at org.apache.kafka.connect.storage.Converter.toConnectData(Converter.java:87)
at org.apache.kafka.connect.runtime.WorkerSinkTask.lambda$convertAndTransformRecord$1(WorkerSinkTask.java:495)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndRetry(RetryWithToleranceOperator.java:146)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:180)
at org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execute(RetryWithToleranceOperator.java:122)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertAndTransformRecord(WorkerSinkTask.java:495)
at org.apache.kafka.connect.runtime.WorkerSinkTask.convertMessages(WorkerSinkTask.java:472)
at org.apache.kafka.connect.runtime.WorkerSinkTask.poll(WorkerSinkTask.java:322)
at org.apache.kafka.connect.runtime.WorkerSinkTask.iteration(WorkerSinkTask.java:226)
at org.apache.kafka.connect.runtime.WorkerSinkTask.execute(WorkerSinkTask.java:198)
at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:185)
at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:235)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
What is the problem?
I followed this tutorial without running confluent with docker but locally with confluent platform https://www.confluent.io/blog/kafka-elasticsearch-connector-tutorial/
I reformatted the stack trace for you and within that you can see
org.apache.kafka.connect.errors.DataException:
Converting byte[] to Kafka Connect data failed due to serialization error:
at org.apache.kafka.connect.json.JsonConverter.toConnectData(JsonConverter.java:366)
…
org.apache.kafka.common.errors.SerializationException:
com.fasterxml.jackson.core.JsonParseException:
Unrecognized token 'ciao': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n at [Source: (byte[])\"ciao\"; line: 1, column: 5]
It looks like the message it's reading isn't valid JSON. You could try setting "errors.tolerance" : "all" if there's just a few misformed messages on the topic and other valid JSON that you do want to process.
See also Error Handling in Kafka Connect and Elasticsearch Sink connector
I saw that the configuration was wrong.
I wrote the correct configuration and now it works well.
The configuration is the following:
{
"name": "e",
"config": {
"connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector",
"key.converter": "org.apache.kafka.connect.storage.StringConverter",
"value.converter": "io.confluent.connect.json.JsonSchemaConverter",
"topics": "ciao",
"connection.url": "http://192.x.x.x:9200",
"key.ignore": "true",
"value.converter.schema.registry.url": "http://localhost:8081",
"value.converter.schemas.enable": "true",
"key.converter.schema.registry.url": "http://localhost:8081"
}
}

PostgreSQL JDBC sink raise error null(Array) type doesn't have a mapping to the SQL database column type

I have problem when try to replicate my database using Kafka JDBC sink. When I run my server to a table which has Array data type on it, it give this error
...
Caused by: org.apache.kafka.connect.errors.ConnectException: null (ARRAY) type doesn't have a mapping to the SQL database column type
...
I want to retain the same Array condition and don't want to convert it into string like what i do to SQL Server (since SQL Server not allowed array data type).
This is my connection config:
{"name" :"pgsink_'$topic_name'",
"config":{"connector.class":"io.confluent.connect.jdbc.JdbcSinkConnector",
"tasks.max":"1",
"topics":"'$table'",
"connection.url":"jdbc:postgresql://",
"connection.user":"",
"connection.password":"",
"transforms":"unwrap",
"transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
"transforms.unwrap.drop.tombstones": "false",
"delete.handling.mode":"drop",
"auto.create":"true",
"auto.evolve":"true",
"insert.mode":"upsert",
"pk.fields":" '$pk'",
"pk.mode":"record_key",
"delete.enabled":"true",
"destination.table.format":"public.'$table'",
"connection.attempts":"60",
"connection.backoff.ms":"100000"
}}
My Kafka source came from Debezium, since I want to retain same data type, i don't put SMT into my source. This is the source config:
{
"name":"pg_prod",
"config":{
"connector.class":"io.debezium.connector.postgresql.PostgresConnector",
"plugin.name":"wal2json_streaming",
"database.hostname":"",
"database.port":"",
"database.user":"",
"database.password":"",
"database.dbname":"",
"database.server.name":"",
"database.history.kafka.bootstrap.servers": "",
"database.history.kafka.topic": "",
"transforms":"unwrap,reroute",
"table.whitelist":"public.table",
"transforms.unwrap.type":"io.debezium.transforms.ExtractNewRecordState",
"transforms.unwrap.delete.handling.mode": "drop",
"transforms.unwrap.drop.tombstones": "false",
"decimal.handling.mode":"double",
"time.precision.mode":"connect",
"transforms.reroute.type":"org.apache.kafka.connect.transforms.RegexRouter",
"transforms.reroute.regex":"postgres.public.(.*)",
"transforms.reroute.replacement":"$1",
"errors.tolerance": "all",
"errors.log.enable":true,
"errors.log.include.messages":true,
"kafkaPartition": "0",
"snapshot.delay.ms":"1000",
"schema.refresh.mode":"columns_diff_exclude_unchanged_toast"
}
}

Kafka Connect - Caused by: org.apache.kafka.connect.errors.ConnectException: PK mode for table is RECORD_KEY, but record key schema is missing

I have jdbc-sink for transfer data from Kafka to Oracle Database.
My connect gives this error.
Caused by: org.apache.kafka.connect.errors.ConnectException: PK mode for table 'orders' is RECORD_KEY, but record key schema is missing
my sink properties :
{
"name": "jdbc-oracle",
"config": {
"connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
"tasks.max": "1",
"topics": "orders",
"connection.url": "jdbc:oracle:thin:#10.1.2.3:1071/orac",
"connection.user": "ersin",
"connection.password": "ersin!",
"auto.create": "true",
"delete.enabled": "true",
"pk.mode": "record_key",
"pk.fields": "MESSAGE_KEY",
"insert.mode": "update ",
"plugin.path": "/home/ersin/confluent-5.4.1/share/java/",
"name": "jdbc-oracle"
},
"tasks": [
{
"connector": "jdbc-oracle",
"task": 0
}
],
"type": "sink"
}
my connect-avro-distributed.properties :
bootstrap.servers=10.0.0.0:9092
group.id=connect-cluster
key.converter=io.confluent.connect.avro.AvroConverter
key.converter.schema.registry.url=http://10.0.0.0:8081
value.converter=io.confluent.connect.avro.AvroConverter
value.converter.schema.registry.url=http://10.0.0.0:8081
config.storage.topic=connect-configs
offset.storage.topic=connect-offsets
status.storage.topic=connect-statuses
config.storage.replication.factor=1
offset.storage.replication.factor=1
status.storage.replication.factor=1
internal.key.converter=org.apache.kafka.connect.json.JsonConverter
internal.value.converter=org.apache.kafka.connect.json.JsonConverter
internal.key.converter.schemas.enable=false
internal.value.converter.schemas.enable=false
I send data like this:
./bin/kafka-avro-console-producer \
--broker-list 10.0.0.0:9092 --topic orders \
--property parse.key="true" \
--property key.schema='{"type":"record","name":"key_schema","fields":[{"name":"id","type":"int"}]}' \
--property key.separator="$" \
--property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"id","type":"int"},{"name":"product","type":"string"}, {"name":"quantity", "type": "int"}, {"name":"price","type": "int"}]}' \
--property schema.registry.url=http://10.0.0.0:8081
How can I solve this?
thanks in advance
The problem seems to be with your payload and the configuration "pk.mode": "record_key".
pk.mode is used to define the primary key mode and you have the following config options:
none: No keys utilized
kafka: Kafka coordinates are used as the PK
record_key: Field(s) from the record key are used, which may be a primitive or a struct.
record_value: Field(s) from the record value are used, which must be a struct.
In your configuration, you are using record_key which means that Kafka Connect will take the field from the key of the message and use it as the primary key in the target Oracle table.
Although you haven't shared your Kafka Connect worker's configuration, my guess is that you are missing some configuration parameters in there.
According to the documentation,
The sink connector requires knowledge of schemas, so you should use a
suitable converter e.g. the Avro converter that comes with the schema
registry, or the JSON converter with schemas enabled. Kafka record
keys if present can be primitive types or a Connect struct, and the
record value must be a Connect struct. Fields being selected from
Connect structs must be of primitive types. If the data in the topic
is not of a compatible format, implementing a custom Converter may
be necessary.
Now in your case the problem seems to be "pk.fields" which is currently set to "pk.fields": "MESSAGE_KEY". In your schema, the message key is defined to be id. Therefore, the following should do the trick:
"pk.fields": "id"

org.apache.kafka.connect.errors.ConnectException: Tolerance exceeded in error handler

I'm creating a new Elasticsearch connector from kafka and I'm getting this error:
org.apache.kafka.connect.errors.ConnectException: Tolerance exceeded in error handler
Looks like is the serialization of the topic, but I've test both JsonConverter & AvroConverter in the value and key converter.
Any ideas which is the issue here?
{
"connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector",
"connection.url": "es-endpoint",
"tasks.max": "1",
"topics": "simple.elasticsearch.data",
"name": "simple-elasticsearch-connector",
"type.name": "_doc",
"value.converter": "org.apache.kafka.connect.json.JsonConverter",
"value.converter.schemas.enable": "false",
"schema.ignore": "true",
"key.ignore": "true"
}
I was following the examples here: https://www.confluent.io/blog/kafka-elasticsearch-connector-tutorial/
Error is not in full. If you error shows something like this;
org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator.execAndHandleError(RetryWithToleranceOperator.java:180)\n\t... 11 more\nCaused by: org.apache.kafka.common.errors.SerializationException: Error serializing Avro message\nCaused by: java.net.MalformedURLException: unknown protocol: confluent-schema-registry\n\tat
Describe your pod and check the environment variables.
kubectl.exe describe pod cp-kafka-connect-12fc858c8b-jj5sx -n namespace
to see if a protocol is specified for your schema registry URL;
CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: confluent-schema-registry:18081
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: confluent-schema-registry:18081
If it is something like above, edit your deployment to look like below or redeploy kafka-connect with schema registry with a protocol HTTP or HTTPS.
CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://confluent-schema-registry:18081
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://confluent-schema-registry:18081

Kafka Connect stops after 300k records

I am trying to sink my mysql table to elasticsearch. My table has 1 million plus records. Issue is that my elasticsearch does not get anymore records after 300 some thousand records are inserted. I know that first time I ran it, it did run all the records. Its when I tried to do it again after deleting ES index, this happened. I have tried resetting the update_ts field to new timestamp. I have tried offset value in sink. Nothing seems to be working.
Here is my source file
{
"name": "items3",
"config": {
"_comment": "The JDBC connector class. Don't change this if you want to use the JDBC Source.",
"connector.class": "io.confluent.connect.jdbc.JdbcSourceConnector",
"_comment": "How to serialise the value of keys - here use the Confluent Avro serialiser. Note that the JDBC Source Connector always returns null for the key ",
"key.converter": "io.confluent.connect.avro.AvroConverter",
"_comment": "Since we're using Avro serialisation, we need to specify the Confluent schema registry at which the created schema is to be stored. NB Schema Registry and Avro serialiser are both part of Confluent Open Source.",
"key.converter.schema.registry.url": "http://localhost:8081",
"_comment": "As above, but for the value of the message. Note that these key/value serialisation settings can be set globally for Connect and thus omitted for individual connector configs to make them shorter and clearer",
"value.converter": "io.confluent.connect.avro.AvroConverter",
"value.converter.schema.registry.url": "http://localhost:8081",
"_comment": " --- JDBC-specific configuration below here --- ",
"_comment": "JDBC connection URL. This will vary by RDBMS. Consult your manufacturer's handbook for more information",
"connection.url": "jdbc:mysql://localhost:3306/db?user=user&password=password",
"_comment": "Which table(s) to include",
"table.whitelist": "items",
"_comment": "Pull all rows based on an timestamp column. You can also do bulk or incrementing column-based extracts. For more information, see http://docs.confluent.io/current/connect/connect-jdbc/docs/source_config_options.html#mode",
"mode": "timestamp+incrementing",
"incrementing.column.name": "id",
"timestamp.column.name": "update_ts",
"_comment": "If the column is not defined as NOT NULL, tell the connector to ignore this ",
"validate.non.null": "true",
"_comment": "The Kafka topic will be made up of this prefix, plus the table name ",
"topic.prefix": "kafka-",
"auto.offset.reset" : "earliest"
}
}
And here is my sink
{
"name": "items-sink",
"config": {
"_comment": "-- standard converter stuff -- this can actually go in the worker config globally --",
"connector.class": "io.confluent.connect.elasticsearch.ElasticsearchSinkConnector",
"value.converter": "io.confluent.connect.avro.AvroConverter",
"key.converter": "io.confluent.connect.avro.AvroConverter",
"key.converter.schema.registry.url": "http://localhost:8081",
"value.converter.schema.registry.url": "http://localhost:8081",
"_comment": "--- Elasticsearch-specific config ---",
"_comment": "Elasticsearch server address",
"connection.url": "http://localhost:9200",
"_comment": "Elasticsearch mapping name. Gets created automatically if doesn't exist ",
"type.name": "items",
"_comment": "Which topic to stream data from into Elasticsearch",
"topics": "kafka-items",
"auto.offset.reset" : "earliest",
"_comment": "If the Kafka message doesn't have a key (as is the case with JDBC source) you need to specify key.ignore=true. If you don't, you'll get an error from the Connect task: 'ConnectException: Key is used as document id and can not be null.",
"key.ignore": "true"
}
}
as you can see I am trying to auto.offset.reset to earliest so if it is keeping track of my records somehow, it will start over, but all in vain.
"auto.offset.reset" : "earliest" can only be used inside the connect-distributed.properties file, not the JSON connector configurations
And in that file, since it's a consumer configuration, it's named consumer.auto.offset.reset.
Also, the consumer group is mapped to the name field of the connector configuration, so unless that's changed, you'd be continuing to consume from where the previous one of the same name left off until the group offsets are reset or the name is changed. By default, the group name is connect-${connector_name}

Resources