Spring Kafka commit reset offset not work when application goes down - spring

As described in the documentation, an offset should only be committed when I actually commit (When AckMode.MANUAL_IMMEDIATE or AckMode.MANUAL) or at the end of the listener execution when AckMode.RECORD, however, in the middle of processing the method annotated with #KafkaListener the application goes down, the message is not re-delivered, the application starts reading from the next valid message and this current message is lost (message that was being processed when the application was restarted), how do I achieve the goal of the application reprocessing an uncommitted message when the application is restarted in the middle of processing? I have also tried configuring AUTO_OFFSET_RESET_CONFIG as earliest, latest and none without success in the 3 models. For testing purposes, I created a topic with just one partition, I forced the listener to use the container factory that I define manually.
The springboot-version 2.2.6
#Configuration
class KafkaTestConfiguration {
#Bean
fun producerFactory(): ProducerFactory<String, String> {
return DefaultKafkaProducerFactory(producerConfigs())
}
#Bean
fun consumerFactory(): ConsumerFactory<Any, Any> {
return DefaultKafkaConsumerFactory(consumerConfigs())
}
#Bean
fun producerConfigs(): Map<String, Any> {
val props: MutableMap<String, Any> = HashMap()
props[ProducerConfig.BOOTSTRAP_SERVERS_CONFIG] = "localhost:9094"
props[ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG] = StringSerializer::class.java
props[ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG] = StringSerializer::class.java
return props
}
#Bean
fun consumerConfigs(): Map<String, Any> {
val props: MutableMap<String, Any> = HashMap()
props[ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG] = "localhost:9094"
props[ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG] = StringDeserializer::class.java
props[ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG] = StringDeserializer::class.java
props[ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG] = 20000
props[ConsumerConfig.GROUP_ID_CONFIG] = "kafka-retry"
props[ConsumerConfig.AUTO_OFFSET_RESET_CONFIG] = "earliest"
props[ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG] = "false"
return props
}
#Bean
fun kafkaTemplate(): KafkaTemplate<String, String> {
return KafkaTemplate(producerFactory())
}
#Bean
fun kafkaListenerContainerFactory(): ConcurrentKafkaListenerContainerFactory<Any, Any> {
val factory: ConcurrentKafkaListenerContainerFactory<Any, Any> = ConcurrentKafkaListenerContainerFactory()
factory.consumerFactory = consumerFactory()
factory.consumerFactory.createConsumer()
val containerProperties = factory.containerProperties
containerProperties.isAckOnError = false
containerProperties.ackMode = AckMode.MANUAL_IMMEDIATE
containerProperties.commitLogLevel = LogIfLevelEnabled.Level.INFO
containerProperties.isLogContainerConfig = true
return factory
}
#Component
class KafkaListenerAck {
#KafkaListener(id = "listMsgAckConsumer", topics = ["kafkaListenerTest1"],
groupId = "kafka-retry",
concurrency = "1",
containerFactory = "kafkaListenerContainerFactory"
)
fun onMessage(data: ConsumerRecord<String, String>, acknowledgment: Acknowledgment?) {
println("listMsgAckConsumer1 - topic ${data.topic()} offset ${data.offset()} partition ${data.partition()} message ${data.value()}")
println("If stop container here, the next pool will not deliver the current unconfirmed message")
acknowledgment?.acknowledge()
}
}

The offset will not be committed until the acknowledgment.acknowledge() is called. Set the commitLogLevel container property to DEBUG to see commit activity.
auto.offset.reset only applies if the consumer has never committed an offset (new consumer groups only).
If you can't figure it out from the log; edit the question with the log snippet.

Related

Meter registration fails on Spring Boot Kafka consumer with Prometheus MeterRegistry

I am investigating a bug report in our application (spring boot) regarding the kafka metric kafka.consumer.fetch.manager.records.consumed.total being missing.
The application has two kafka consumers, lets call them query-routing and query-tracking consumers, and they are configured via #KafkaListener annotation and each kafka consumer has it's own instance of ConcurrentKafkaListenerContainerFactory.
The query-router consumer is configured as
#Configuration
#EnableKafka
public class QueryRoutingConfiguration {
#Bean(name = "queryRoutingContainerFactory")
public ConcurrentKafkaListenerContainerFactory<String, RoutingInfo> kafkaListenerContainerFactory(MeterRegistry meterRegistry) {
Map<String, Object> consumerConfigs = new HashMap<>();
// For brevity I removed the configs as they are trivial configs like bootstrap servers and serializers
DefaultKafkaConsumerFactory<String, RoutingInfo> consumerFactory =
new DefaultKafkaConsumerFactory<>(consumerConfigs);
consumerFactory.addListener(new MicrometerConsumerListener<>(meterRegistry));
ConcurrentKafkaListenerContainerFactory<String, RoutingInfo> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory);
factory.getContainerProperties().setIdleEventInterval(5000L);
return factory;
}
}
And the query-tracking consumer is configured as:
#Configuration
#EnableKafka
public class QueryTrackingConfiguration {
private static final FixedBackOff NO_ATTEMPTS = new FixedBackOff(Duration.ofSeconds(0).toMillis(), 0L);
#Bean(name = "queryTrackingContainerFactory")
public ConcurrentKafkaListenerContainerFactory<String, QueryTrackingMessage> kafkaListenerContainerFactory(MeterRegistry meterRegistry) {
Map<String, Object> consumerConfigs = new HashMap<>();
// For brevity I removed the configs as they are trivial configs like bootstrap servers and serializers
DefaultKafkaConsumerFactory<String, QueryTrackingMessage> consumerFactory =
new DefaultKafkaConsumerFactory<>(consumerConfigs);
consumerFactory.addListener(new MicrometerConsumerListener<>(meterRegistry));
ConcurrentKafkaListenerContainerFactory<String, QueryTrackingMessage> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory);
factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL);
factory.setBatchListener(true);
DefaultErrorHandler deusErrorHandler = new DefaultErrorHandler(NO_ATTEMPTS);
factory.setCommonErrorHandler(deusErrorHandler);
return factory;
}
}
The MeterRegistryConfigurator bean configuaration is set as:
#Configuration
public class MeterRegistryConfigurator {
private static final Logger LOG = LoggerFactory.getLogger(MeterRegistryConfigurator.class);
private static final String PREFIX = "dps";
#Bean
MeterRegistryCustomizer<MeterRegistry> meterRegistryCustomizer() {
return registry -> registry.config()
.onMeterAdded(meter -> LOG.info("onMeterAdded: {}", meter.getId().getName()))
.onMeterRemoved(meter -> LOG.info("onMeterRemoved: {}", meter.getId().getName()))
.onMeterRegistrationFailed(
(id, s) -> LOG.info("onMeterRegistrationFailed - id '{}' value '{}'", id.getName(), s))
.meterFilter(PrefixMetricFilter.withPrefix(PREFIX))
.meterFilter(
MeterFilter.deny(id ->
id.getName().startsWith(PREFIX + ".jvm")
|| id.getName().startsWith(PREFIX + ".system")
|| id.getName().startsWith(PREFIX + ".process")
|| id.getName().startsWith(PREFIX + ".logback")
|| id.getName().startsWith(PREFIX + ".tomcat"))
)
.meterFilter(MeterFilter.ignoreTags("host", "host.name"))
.namingConvention(NamingConvention.snakeCase);
}
}
The #KafkaListener for each consumer is set as
#KafkaListener(
id = "query-routing",
idIsGroup = true,
topics = "${query-routing.consumer.topic}",
groupId = "${query-routing.consumer.groupId}",
containerFactory = "queryRoutingContainerFactory")
public void listenForMessages(ConsumerRecord<String, RoutingInfo> record) {
// Handle each record ...
}
and
#KafkaListener(
id = "query-tracking",
idIsGroup = true,
topics = "${query-tracking.consumer.topic}",
groupId = "${query-tracking.consumer.groupId}",
containerFactory = "queryTrackingContainerFactory"
)
public void listenForMessages(List<ConsumerRecord<String, QueryTrackingMessage>> consumerRecords, Acknowledgment ack) {
// Handle each record ...
}
When the application starts up, going to the actuator/prometheus endpoing I can see the metric for both consumers:
# HELP dps_kafka_consumer_fetch_manager_records_consumed_total The total number of records consumed
# TYPE dps_kafka_consumer_fetch_manager_records_consumed_total counter
dps_kafka_consumer_fetch_manager_records_consumed_total{client_id="consumer-qf-query-tracking-consumer-1",kafka_version="3.1.2",spring_id="not.managed.by.Spring.consumer-qf-query-tracking-consumer-1",} 7.0
dps_kafka_consumer_fetch_manager_records_consumed_total{client_id="consumer-QF-Routing-f5d0d9f1-e261-407b-954d-5d217211dee0-2",kafka_version="3.1.2",spring_id="not.managed.by.Spring.consumer-QF-Routing-f5d0d9f1-e261-407b-954d-5d217211dee0-2",} 0.0
But a few seconds later there is a new call to io.micrometer.core.instrument.binder.kafka.KafkaMetrics#checkAndBindMetrics which will remove a set of metrics (including kafka.consumer.fetch.manager.records.consumed.total)
onMeterRegistrationFailed - dps.kafka.consumer.fetch.manager.records.consumed.total string Prometheus requires that all meters with the same name have the same set of tag keys. There is already an existing meter named 'dps.kafka.consumer.fetch.manager.records.consumed.total' containing tag keys [client_id, kafka_version, spring_id]. The meter you are attempting to register has keys [client_id, kafka_version, spring_id, topic].
Going again to actuator/prometheus will only show the metric for the query-routing consumer:
# HELP deus_dps_persistence_kafka_consumer_fetch_manager_records_consumed_total The total number of records consumed for a topic
# TYPE deus_dps_persistence_kafka_consumer_fetch_manager_records_consumed_total counter
deus_dps_persistence_kafka_consumer_fetch_manager_records_consumed_total{client_id="consumer-QF-Routing-0a739a21-4764-411a-9cc6-0e60293b40b4-2",kafka_version="3.1.2",spring_id="not.managed.by.Spring.consumer-QF-Routing-0a739a21-4764-411a-9cc6-0e60293b40b4-2",theKey="routing",topic="QF_query_routing_v1",} 0.0
As you can see above the metric for the query-tracking consumer is gone.
As the log says, The meter you are attempting to register has keys [client_id, kafka_version, spring_id, topic]. The issue is I cannot find where is this metric with a topic key being registered which will trigger io.micrometer.core.instrument.binder.kafka.KafkaMetrics#checkAndBindMetrics which will remove the metric for the query-tracking consumer.
I am using
micrometer-registry-prometheus version 1.9.5
spring boot version 2.7.5
spring kafka (org.springframework.kafka:spring-kafka)
My question is, why does the metric kafka.consumer.fetch.manager.records.consumed.total fails causing it to be removed for the query-tracking consumer and how can I fix it?
I believe this is internal in Micrometer KafkaMetrics.
Periodically, it checks for new metrics; presumably, the topic one shows up after the consumer subscribes to the topic.
#Override
public void bindTo(MeterRegistry registry) {
this.registry = registry;
commonTags = getCommonTags(registry);
prepareToBindMetrics(registry);
checkAndBindMetrics(registry);
VVVVVVVVVVVVVVVVVVVVVVVVVVVVVV
scheduler.scheduleAtFixedRate(() -> checkAndBindMetrics(registry), getRefreshIntervalInMillis(),
getRefreshIntervalInMillis(), TimeUnit.MILLISECONDS);
}
You should be able to write a filter to exclude the one with fewer tags.

java.lang.IllegalArgumentException: A consumerFactory is required

This configuration existed before my changes:
#Configuration
public class KafkaConfiguration {
#Value(value = "${kafka.bootstrapServers:XXX}")
private String bootstrapServers;
#Value(value = "${kafka.connect.url:XXX}")
public String kafkaConnectUrl;
#Bean
public AdminClient adminClient() {
Map<String, Object> configs = new HashMap<>();
configs.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
return AdminClient.create(configs);
}
#Bean
public WebProperties.Resources resources() {
return new WebProperties.Resources();
}
#Bean
public KafkaConnectClient connectClient() {
return new KafkaConnectClient(new org.sourcelab.kafka.connect.apiclient.Configuration(kafkaConnectUrl));
}
}
I've setup the app so that I can use KafkaTemaplate:
application.yaml
spring:
profiles:
active: dev
kafka:
bootstrap-servers: XXX
properties:
schema.registry.url: XXX
producer:
key-serializer: org.apache.kafka.common.serialization.StringSerializer
value-serializer: io.confluent.kafka.serializers.protobuf.KafkaProtobufSerializer
consumer:
group-id: kafka-management-service-group-id
auto-offset-reset: earliest
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
value-deserializer: io.confluent.kafka.serializers.protobuf.KafkaProtobufDeserializer
I can correctly write and read messages using KafkaTemplate from a topic with 1 partition that I created manually:
#GetMapping("/loadData")
public void loadData() {
IntStream.range(0, 1000000).forEach(key ->
{
final General general = General.newBuilder()
.setUuid(StringValue.newBuilder().setValue(String.valueOf(key))
.build()).build();
template.send("test",
String.valueOf(key),
ScheduledJob.newBuilder().setGeneral(general).build()
);
}
);
}
#GetMapping("/readData")
public void readData() {
Collection<TopicPartitionOffset> topicPartitionOffsets = new ArrayList<>();
final TopicPartitionOffset topicPartitionOffset = new TopicPartitionOffset("test", 0, SeekPosition.BEGINNING);
topicPartitionOffsets.add(topicPartitionOffset);
final Iterator<ConsumerRecord<String, ScheduledJob>> iterator = template.receive(topicPartitionOffsets).iterator();
while (iterator.hasNext()) {
final ConsumerRecord<String, ScheduledJob> next = iterator.next();
System.out.println("Key = "+next.key());
System.out.println("Value = "+next.value());
System.out.println();
}
}
Current task: I'd like to read all messages of a topic from a specific partition X (therefore I need the number of partitions), from a specific offet Y to a specific offset Z; and ignore the rest.
I've read that I can do something along these lines to get the partition number for a specific topic:
#Autowired
private final ConsumerFactory<Object, Object> consumerFactory;
public String[] partitions(String topic) {
try (Consumer<Object, Object> consumer = consumerFactory.createConsumer()) {
return consumer.partitionsFor(topic).stream()
.map(pi -> "" + pi.partition())
.toArray(String[]::new);
}
}
Problem: Despite the fact that I can read and write messages, a ConsumerFactory isn't created automatically.
Context: Our frontend needs to support paging, multi-column order and filtering. The source are Kafka messages from a generic topic with any number of partitions. Maybe this isn't the best way to do it, therefore I'm also open for suggestions
I don't see how it is possible to receive records without adding a consumer factory to the template; Boot does not do that. See KafkaTemplate.oneOnly(), which is called by the receive methods...
private Properties oneOnly() {
Assert.notNull(this.consumerFactory, "A consumerFactory is required");
Properties props = new Properties();
props.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, "1");
return props;
}
Boot auto configures a consumer factory, but it does not add it to the template because using template to receive is an uncommon use case.
/**
* Set a consumer factory for receive operations.
* #param consumerFactory the consumer factory.
* #since 2.8
*/
public void setConsumerFactory(ConsumerFactory<K, V> consumerFactory) {
this.consumerFactory = consumerFactory;
}

Spring RabbitMq Listener Configuration

We are using RabbitMq with default spring boot configurations. We have a use case in which we want no parallelism for one of the listeners. That is, we want only one thread of the consumer to be running at any given point in time. We want this, because the nature of the use case is such that we want the messages to be consumed in order, thus if there are multiple threads per consumer there can be chances that the messages are processed out of order.
Since, we are using the defaults and have not explicitly tweaked the container, we are using the SimpleMessageListenerContainer. By looking at the documentation I tried fixing the number of consumers using concurrency = "1" . The annotation on the target method looks like this #RabbitListener(queues = ["queue-name"], concurrency = "1").
As per the documentation this should have ensured that there is only consumer thread.
{#link org.springframework.amqp.rabbit.listener.SimpleMessageListenerContainer
* SimpleMessageListenerContainer} if this value is a simple integer, it sets a fixed
* number of consumers in the {#code concurrentConsumers} property
2021-10-29 06:11:26.361 INFO 29752 --- [ntContainer#4-1] c.t.t.i.p.s.xxx : Created xxx
2021-10-29 06:11:26.383 INFO 29752 --- [ntContainer#0-1] c.t.t.i.p.s.xxx : Created xxx
ThreadIds to be noted here are [ntContainer#4-1] and [ntContainer#0-1].
So the question is- how can we ensure that there is only one thread per consumer at any given point in time ?
Edit: Adding the code of the consumer class for more context
#ConditionalOnProperty(value = ["rabbitmq.sharebooking.enabled"], havingValue = "true", matchIfMissing = false)
class ShareBookingConsumer #Autowired constructor(
private val shareBookingRepository: ShareBookingRepository,
private val objectMapper: ObjectMapper,
private val shareDtoToShareBookingConverter: ShareBookingDtoToShareBookingConverter
) {
private val logger = LoggerFactory.getLogger(javaClass)
init {
logger.info("start sharebooking created consumer")
}
#RabbitListener(queues = ["tax_engine.share_booking"], concurrency = "1-1", exclusive = true)
#Timed
#Transactional
fun consumeShareBookingCreatedEvent(message: Message) {
try {
consumeShareBookingCreatedEvent(message.body)
} catch (e: Exception) {
throw AmqpRejectAndDontRequeueException(e)
}
}
private fun consumeShareBookingCreatedEvent(event: ByteArray) {
toShareBookingCreationMessageEvent(event).let { creationEvent ->
RmqMetrics.measureEventMetrics(creationEvent)
val shareBooking = shareDtoToShareBookingConverter.convert(creationEvent.data)
val persisted = shareBookingRepository.save(shareBooking)
logger.info("Created shareBooking ${creationEvent.data.id}")
}
}
private fun toShareBookingCreationMessageEvent(event: ByteArray) =
objectMapper.readValue(event, shareBookingCreateEventType)
companion object {
private val shareBookingCreateEventType =
object : TypeReference<RMQMessageEnvelope<ShareBookingCreationDto>>() {}
}
}
Edit: Adding application thread analysis using visualvm
5 threads get created for 5 listeners.
[1]: https://i.stack.imgur.com/gQINE.png
Set concurrency = "1-1". Note that the concurrency of the Listener depends not only on concurrentConsumers, but also on maxConcurrentConsumers:
If you are using a custom factory:
#Bean
public SimpleRabbitListenerContainerFactory rabbitListenerContainerFactory(CachingConnectionFactory cachingConnectionFactory) {
SimpleRabbitListenerContainerFactory factory = new SimpleRabbitListenerContainerFactory();
factory.setConnectionFactory(cachingConnectionFactory);
factory.setConcurrentConsumers(1);
factory.setMaxConcurrentConsumers(1);
return factory;
}
See: https://docs.spring.io/spring-amqp/docs/current/reference/html/#simplemessagelistenercontainer for detail.
EDIT:
I did a simple test, 2 consumers&2 threads:
#RabbitListener(queues = "myQueue111", concurrency = "1-1")
public void handleMessage(Object message) throws InterruptedException {
LOGGER.info("Received message : {} in {}", message, Thread.currentThread().getName());
}
#RabbitListener(queues = "myQueue222", concurrency = "1-1")
public void handleMessag1e(Object message) throws InterruptedException {
LOGGER.info("Received message222 : {} in {}", message, Thread.currentThread().getName());
}
Try this:
#RabbitListener(queues = ["queue-name"], exclusive = true)
See https://docs.spring.io/spring-amqp/docs/current/reference/html/#exclusive-consumer

Problem with start of #KafkaListener (Spring)

What is need
I'm writing an application (Spring + Kotlin) that takes information with Kafka. If I set autoStartup = "true" when declaring a #KafkaListener then the app works fine but only if broker is available. When the broker is unavailable application crashes on start. It's undesirable behavior. The application must work and perform other functions.
What I tried to do
For the escape of crashing application on start somebody on this site in another topic advised setting autoStartup = "false" when declaring a #KafkaListener. And it really helped to prevent crash on start. But now I cannot successfully start KafkaListener manually. In other examples I saw auto wiring of KafkaListenerEndpointRegistry, but when I trying to do it:
#Service
class KafkaConsumer #Autowired constructor(
private val kafkaListenerEndpointRegistry: KafkaListenerEndpointRegistry
) {
IntelliJ Idea warns:
Could not autowire. No beans of 'KafkaListenerEndpointRegistry' type found.
When I try to use KafkaListenerEndpointRegistry without autowiring and perform this code:
#Service
class KafkaConsumer {
private val logger = LoggerFactory.getLogger(this::class.java)
private val kafkaListenerEndpointRegistry = KafkaListenerEndpointRegistry()
#Scheduled(fixedDelay = 10000)
fun startCpguListener(){
val container = kafkaListenerEndpointRegistry.getListenerContainer("consumer1")
if (!container.isRunning)
try {
logger.info("Kafka Consumer is not running. Trying to start...")
container.start()
} catch (e: Exception){
logger.error(e.message)
}
}
#KafkaListener(
id = "consumer1",
topics = ["cpgdb.public.user"],
autoStartup = "false"
)
private fun listen(it: ConsumerRecord<JsonNode, JsonNode>, qwe: Consumer<Any, Any>){
val pay = it.value().get("payload")
val after = pay.get("after")
val id = after["id"].asInt()
val receivedUser = CpguUser(
id = id,
name = after["name"].asText()
)
logger.info("received user with id = $id")
}
}
}
kafkaListenerEndpointRegistry.getListenerContainer("consumer1") always return null. I guess it's because I didn't auto wire kafkaListenerEndpointRegistry. How can I do it? Or if exist another solution of my answer I'll be appreciative any help! Thanks!
There is Kafka config:
#Configuration
#EnableConfigurationProperties(KafkaProperties::class)
class KafkaConfiguration(private val props: KafkaProperties) {
#Bean
fun kafkaListenerContainerFactory(): ConcurrentKafkaListenerContainerFactory<Any, Any> {
val factory = ConcurrentKafkaListenerContainerFactory<Any, Any>()
factory.consumerFactory = consumerFactory()
factory.setConcurrency(1)
factory.setMessageConverter(MessagingMessageConverter())
factory.setStatefulRetry(true)
val retryTemplate = RetryTemplate()
retryTemplate.setRetryPolicy(AlwaysRetryPolicy())
retryTemplate.setBackOffPolicy(ExponentialBackOffPolicy())
factory.setRetryTemplate(retryTemplate)
val handler = SeekToCurrentErrorHandler()
handler.isAckAfterHandle = false
factory.setErrorHandler(handler)
factory.containerProperties.isMissingTopicsFatal = false
return factory
}
#Bean
fun consumerFactory(): ConsumerFactory<Any, Any> {
return DefaultKafkaConsumerFactory(consumerConfigs())
}
#Bean
fun consumerConfigs(): Map<String, Any> {
return mapOf(
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG to props.bootstrap.address,
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG to JsonDeserializer::class.java,
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG to JsonDeserializer::class.java,
ConsumerConfig.INTERCEPTOR_CLASSES_CONFIG to listOf(MonitoringConsumerInterceptor::class.java),
ConsumerConfig.CLIENT_ID_CONFIG to props.receiver.clientId,
ConsumerConfig.GROUP_ID_CONFIG to props.receiver.groupId,
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG to "earliest",
ConsumerConfig.ISOLATION_LEVEL_CONFIG to "read_committed",
ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG to true
)
}
}
spring boot version: 2.3.0
spring-kafka version: 2.5.3
kafka-clients version: 2.5.0
Just ignore IntelliJ's warning about the auto wiring; the bean does exist; it's just that IntelliJ can't detect it.

Spring Kafka global transaction ID stays open after program ends

I am creating a Kafka Spring producer under Spring Boot which will send data to Kafka and then write to a database; I want all that work to be in one transaction. I am new to Kafka and no expert on Spring, and am having some difficulty. Any pointers much appreciated.
So far my code writes to Kafka successfully in a loop. I have not yet set up
the DB, but have proceeded to set up global transactioning by adding a transactionIdPrefix to the producerFactory in the configuration:
producerFactory.setTransactionIdPrefix("MY_SERVER");
and added #Transactional to the method that does the Kafka send. Eventually I plan to do my DB work in that same method.
Problem: the code runs great the first time. But if I stop the program, even cleanly, I find that the code hangs the 2nd time I run it as soon as it enters the #Transactional method. If I comment out the #Transactional, it enters the method but hangs on the kafa template send().
The problem seems to be the transaction ID. If I change the prefix and rerun, the program runs fine again the first time but hangs when I run it again, until a new prefix is chosen. Since after a restart the trans ID counter starts at zero, if the trans ID prefix does not change then the same trans ID will be used upon restart.
It seems to me that the original transID is still open on the server, and was never committed. (I can read the data off the topic using the console-consumer, but that will read uncommitted). But if that is the case, how do I get spring to commit the trans? I am thinking my coniguration must be wrong. Or-- is the issue possibly that trans ID's can never be reused? (In which case, how does one solve that?)
Here is my relevant code. Config is:
#SpringBootApplication
public class MYApplication {
#Autowired
private static ChangeSweeper changeSweeper;
#Value("${kafka.bootstrap-servers}")
private String bootstrapServers;
#Bean
public ProducerFactory<String, String> producerFactory() {
Map<String, Object> configProps = new HashMap<>();
configProps.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
configProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
configProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
DefaultKafkaProducerFactory<String, String> producerFactory=new DefaultKafkaProducerFactory<>(configProps);
producerFactory.setTransactionIdPrefix("MY_SERVER");
return producerFactory;
}
#Bean
public KafkaTransactionManager<String, String> KafkaTransactionManager() {
return new KafkaTransactionManager<String, String>((producerFactory()));
}
#Bean(name="kafkaProducerTemplate")
public KafkaTemplate<String, String> kafkaProducerTemplate() {
return new KafkaTemplate<>(producerFactory());
}
And the method that does the transaction is:
#Transactional
public void send( final List<Record> records) {
logger.debug("sending {} records; batchSize={}; topic={}", records.size(),batchSize, kafkaTopic);
// Divide the record set into batches of size batchSize and send each batch with a kafka transaction:
for (int batchStartIndex = 0; batchStartIndex < records.size(); batchStartIndex += batchSize ) {
int batchEndIndex=Math.min(records.size()-1, batchStartIndex+batchSize-1);
List<Record> nextBatch = records.subList(batchStartIndex, batchEndIndex);
logger.debug("## batch is from " + batchStartIndex + " to " + batchEndIndex);
for (Record record : nextBatch) {
kafkaProducerTemplate.send( kafkaTopic, record.getKey().toString(), record.getData().toString());
logger.debug("Sending> " + record);
}
// I will put the DB writes here
}
This works fine for me no matter how many times I run it (but I have to run 3 broker instances on my local machine because transactions require that by default)...
#SpringBootApplication
#EnableTransactionManagement
public class So47817034Application {
public static void main(String[] args) {
SpringApplication.run(So47817034Application.class, args).close();
}
private final CountDownLatch latch = new CountDownLatch(2);
#Bean
public ApplicationRunner runner(Foo foo) {
return args -> {
foo.send("foo");
foo.send("bar");
this.latch.await(10, TimeUnit.SECONDS);
};
}
#Bean
public KafkaTransactionManager<Object, Object> KafkaTransactionManager(KafkaProperties properties) {
return new KafkaTransactionManager<Object, Object>(kafkaProducerFactory(properties));
}
#Bean
public ProducerFactory<Object, Object> kafkaProducerFactory(KafkaProperties properties) {
DefaultKafkaProducerFactory<Object, Object> factory =
new DefaultKafkaProducerFactory<Object, Object>(properties.buildProducerProperties());
factory.setTransactionIdPrefix("foo-");
return factory;
}
#KafkaListener(id = "foo", topics = "so47817034")
public void listen(String in) {
System.out.println(in);
this.latch.countDown();
}
#Component
public static class Foo {
#Autowired
private KafkaTemplate<Object, Object> template;
#Transactional
public void send(String go) {
this.template.send("so47817034", go);
}
}
}

Resources