Spring Cloud Stream KStream DLQ Issue - apache-kafka-streams

Any exception occur in processing should got DLQ, currently this not happening.
I am getting org.apache.kafka.common.errors.SerializationException: Error serializing Avro message
it is not going to DLQ. I am using spring cloud stream kstream binder. Allow topics are created at startup of app.
My application.yml
spring:
application:
name: demo-stream
cloud:
stream:
function:
definition: rawProcessor
bindings:
rawProcessor-in-0:
destination: raw
consumer:
enableDlq: true
dlqName: dlq
rawProcessor-out-0:
destination: fx
rawProcessor-out-1:
destination: cp
rawProcessor-out-2:
destination: cl
kafka:
streams:
bindings:
rawProcessor-in-0:
consumer:
enableDlq: true
dlqName: dlq
valueSerde: org.apache.kafka.common.serialization.Serdes$StringSerde
rawProcessor-out-0:
producer:
keySerde: org.apache.kafka.common.serialization.Serdes$StringSerde
valueSerde: io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde
rawProcessor-out-1:
producer:
keySerde: org.apache.kafka.common.serialization.Serdes$StringSerde
valueSerde: io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde
rawProcessor-out-2:
producer:
keySerde: org.apache.kafka.common.serialization.Serdes$StringSerde
valueSerde: io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde
binder:
deserializationExceptionHandler: sendToDlq
configuration:
schema.registry.url: http://localhost:8081
specific.avro.reader: true
My processing Class
public class RawKafkaMessageStream {
private final Classifier classifier;
private final static String STREAM_SPLIT_BRANCH_PREFIX = "split-";
#Bean
public Function<KStream<String, String>, KStream<String, SpecificRecordBase>[]> rawProcessor() {
return rawKStream -> {
final Map<String, KStream<String, RecordHolder<SpecificRecordBase>>> recordKStreamMap = rawKStream
.map(this::convertIntoKeyValueRecord)
.peek((key, value) -> log.info("Key: {}, value: {}", key, value))
.filter((key, value) -> value != null)
.split(Named.as(STREAM_SPLIT_BRANCH_PREFIX))
.branch((key, value) -> value.getType() == RecordType.CP, Branched.as(RecordType.CP.name()))
.branch((key, value) -> value.getType() == RecordType.CL, Branched.as(RecordType.CL.name()))
.noDefaultBranch();
KStream<String, SpecificRecordBase> validatedCPStream = getValidatedRecordStream(recordKStreamMap, RecordType.CP);
KStream<String, SpecificRecordBase> validatedCLStream = getValidatedRecordStream(recordKStreamMap, RecordType.CL);
return new KStream[]{validatedCPStream, validatedCLStream};
};
}
private KStream<String, SpecificRecordBase> getValidatedRecordStream
(Map<String, KStream<String, RecordHolder<SpecificRecordBase>>> recordKStreamMap,
RecordType recordType) {
return recordKStreamMap.get(STREAM_SPLIT_BRANCH_PREFIX + recordType.name());
}
private KeyValue<String, RecordHolder<SpecificRecordBase>> convertIntoKeyValueRecord(final String key,
final String value) {
log.debug("Raw msg received with key: {} and payload: {}", key, value); // key will be null here
final KeyValue<String, RecordHolder<SpecificRecordBase>> processing = classifier.classify(value);
log.info("Processing msg with key: {} and payload: {}", processing.key, processing.value);
return processing;
}

Related

Cloud stream not able to track the status for down stream failures

I have written the following code to leverage the cloud stream functional approach to get the events from the RabbitMQ and publish those to KAFKA, I am able to achieve the primary goal with caveat while running the application if the KAFKA broker goes down due to any reason then I am getting the logs of KAFKA BROKER it's down but at the same time I want to stop the event from rabbitMQ or until the broker comes up those messages either should be routed to Exchange or DLQ topic. however, I have seen at many places to use producer sync: true but in my case that is either not helping, a lot of people talked about #ServiceActivator(inputChannel = "error-topic") for error topic while having a failure at target channel, this method is also not getting executed. so in short I don't want to lose my messages received from rabbitMQ during kafka is down due to any reason
application.yml
management:
health:
binders:
enabled: true
kafka:
enabled: true
server:
port: 8081
spring:
rabbitmq:
publisher-confirms : true
kafka:
bootstrap-servers: localhost:9092
producer:
properties:
max.block.ms: 100
admin:
fail-fast: true
cloud:
function:
definition: handle
stream:
bindingRetryInterval : 30
rabbit:
bindings:
handle-in-0:
consumer:
bindingRoutingKey: MyRoutingKey
exchangeType: topic
requeueRejected : true
acknowledgeMode: AUTO
# ackMode: MANUAL
# acknowledge-mode: MANUAL
# republishToDlq : false
kafka:
binder:
considerDownWhenAnyPartitionHasNoLeader: true
producer:
properties:
max.block.ms : 100
brokers:
- localhost
bindings:
handle-in-0:
destination: test_queue
binder: rabbit
group: queue
handle-out-0:
destination: mytopic
producer:
sync: true
errorChannelEnabled: true
binder: kafka
binders:
error:
destination: myerror
rabbit:
type: rabbit
environment:
spring:
rabbitmq:
host: localhost
port: 5672
username: guest
password: guest
virtual-host: rahul_host
kafka:
type: kafka
json:
cuttoff:
size:
limit: 1000
CloudStreamConfig.java
#Configuration
public class CloudStreamConfig {
private static final Logger log = LoggerFactory.getLogger(CloudStreamConfig.class);
#Autowired
ChunkService chunkService;
#Bean
public Function<Message<RmaValues>,Collection<Message<RmaValues>>> handle() {
return rmaValue -> {
log.info("processor runs : message received with request id : {}", rmaValue.getPayload().getRequestId());
ArrayList<Message<RmaValues>> msgList = new ArrayList<Message<RmaValues>>();
try {
List<RmaValues> dividedJson = chunkService.getDividedJson(rmaValue.getPayload());
for(RmaValues rmaValues : dividedJson) {
msgList.add(MessageBuilder.withPayload(rmaValues).build());
}
} catch (Exception e) {
e.printStackTrace();
}
Channel channel = rmaValue.getHeaders().get(AmqpHeaders.CHANNEL, Channel.class);
Long deliveryTag = rmaValue.getHeaders().get(AmqpHeaders.DELIVERY_TAG, Long.class);
// try {
// channel.basicAck(deliveryTag, false);
// } catch (IOException e) {
// e.printStackTrace();
// }
return msgList;
};
};
#ServiceActivator(inputChannel = "error-topic")
public void errorHandler(ErrorMessage em) {
log.info("---------------------------------------got error message over errorChannel: {}", em);
if (null != em.getPayload() && em.getPayload() instanceof KafkaSendFailureException) {
KafkaSendFailureException kafkaSendFailureException = (KafkaSendFailureException) em.getPayload();
if (kafkaSendFailureException.getRecord() != null && kafkaSendFailureException.getRecord().value() != null
&& kafkaSendFailureException.getRecord().value() instanceof byte[]) {
log.warn("error channel message. Payload {}", new String((byte[])(kafkaSendFailureException.getRecord().value())));
}
}
}
KafkaProducerConfiguration.java
#Configuration
public class KafkaProducerConfiguration {
#Value(value = "${spring.kafka.bootstrap-servers}")
private String bootstrapAddress;
#Bean
public ProducerFactory<String, Object> producerFactory() {
Map<String, Object> configProps = new HashMap<>();
configProps.put(
ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,
bootstrapAddress);
configProps.put(
ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
StringSerializer.class);
configProps.put(
ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
StringSerializer.class);
return new DefaultKafkaProducerFactory<>(configProps);
}
#Bean
public KafkaTemplate<String, String> kafkaTemplate() {
return new KafkaTemplate(producerFactory());
}
RmModelOutputIngestionApplication.java
#SpringBootApplication(scanBasePackages = "com.abb.rm")
public class RmModelOutputIngestionApplication {
private static final Logger LOGGER = LogManager.getLogger(RmModelOutputIngestionApplication.class);
public static void main(String[] args) {
SpringApplication.run(RmModelOutputIngestionApplication.class, args);
}
#Bean("objectMapper")
public ObjectMapper objectMapper() {
ObjectMapper mapper = new ObjectMapper();
LOGGER.info("Returning object mapper...");
return mapper;
}
First, it seems like you are creating too much unnecessary code. Why do you have ObjectMapper? Why do you have KafkaTemplate? Why do you have ProducerFactory? These are all already provided for you.
You really only have to have one function and possibly an error handler - depending on error handling strategy you select, which brings me to the error handling topic. There are 3 primary ways of handling errors. Here is the link to the doc explaining them all and providing samples. Please read thru that and modify your app accordingly and if something doesn't work or unclear feel free to follow up.

Spring Cloud Stream + Kafka Stream - KStream not consuming message from the topic

Trying our a sample project using Spring Cloud Stream + Kafka Stream but the Messages published to the input topic/queue are not consumed by the Processor method (KStream as argument).
Binding Definition
public interface WordCountChannelBindings {
// Channel to PUBLISH and FETCH 'words'
String _wordsOutput = "words_output_channel";
String _wordsInput = "words_input_channel";
// Channel to PUBLISH and FETCH 'words-count' details
String _countOutput = "counts_output_channel";
String _countInput = "counts_input_channel";
// Source
#Output(_wordsOutput)
MessageChannel _wordsOutput();
// Sink
#Input(_wordsInput)
KStream<String, PageViewEvent> _wordsInput();
// Source
#Output(_countOutput)
KStream<String, Long> _countOutput();
// Sink
#Input(_countInput)
KTable<String, Long> _countInput();
}
Producer
#Scheduled(fixedDelay = 1000)
public void wordsProducer() {
List<String> names = Arrays.asList("mfisher", "dyser", "schacko", "abilan", "ozhurakousky", "grussell");
List<String> pages = Arrays.asList("blog", "sitemap", "initializr", "news", "colophon", "about");
String rPage = pages.get(new Random().nextInt(pages.size()));
String rName = pages.get(new Random().nextInt(names.size()));
PageViewEvent pageViewEvent = new PageViewEvent(rName, rPage, Math.random() > .5 ? 10 : 1000);
// Publish the words into the OUTPUT Topic
this.wordCountChannelBindings._wordsOutput().send(
MessageBuilder.withPayload(pageViewEvent)
.build());
log.info("Words published - {}", pageViewEvent);
}
Processor
#Component
public class WordsStreamProcessor {
#StreamListener
#SendTo(WordCountChannelBindings._countOutput)
public KStream<String, Long> process(#Input(WordCountChannelBindings._wordsInput) KStream<String, PageViewEvent> input) {
log.info("Process data - {}", input);
return input.filter((key, value) -> value.getDuration() > 10)
.map((key, value) -> new KeyValue<>(value.getPage(), "0"))
.groupByKey()
.count(Materialized.as("wordscount"))
.toStream();
}
}
Consumer
#StreamListener
public void wordsCountConsumer(#Input(WordCountChannelBindings._countInput) KTable<String, Long> wordsCountDetails) {
log.info("Consumed Result - {}", wordsCountDetails);
}
SB Main class
#EnableScheduling
#EnableBinding(WordCountChannelBindings.class)
#SpringBootApplication
public class SpringCloudStreamKafkaApplication {
public static void main(String[] args) {
SpringApplication.run(SpringCloudStreamKafkaApplication.class, args);
}
}
application.yml
spring.cloud.stream.kafka.binder:
brokers:
- localhost:9092
spring.cloud.stream.kafka.streams.binder:
applicationId: word-count-sample
configuration:
commit.interval.ms: 100
default.key.serde: org.apache.kafka.common.serialization.Serdes$StringSerde
default.value.serde: org.apache.kafka.common.serialization.Serdes$StringSerde
spring.cloud.stream.bindings.words_output_channel:
destination: words_topic
producer:
headerMode: none
spring.cloud.stream.bindings.words_input_channel:
destination: words_topic
consumer:
headerMode: none
spring.cloud.stream.bindings.counts_output_channel:
destination: counts_topic
producer:
useNativeEncoding: true
spring.cloud.stream.bindings.counts_input_channel:
destination: counts_topic
consumer:
useNativeDecoding: true
headerMode: none
group: wordscount
contentType: application/json
spring.cloud.stream.kafka.streams.bindings.counts_output_channel:
producer:
keySerde: org.apache.kafka.common.serialization.Serdes$StringSerde
valueSerde: org.apache.kafka.common.serialization.Serdes$LongSerde
spring.cloud.stream.kafka.streams.bindings.counts_input_channel:
consumer:
keySerde: org.apache.kafka.common.serialization.Serdes$StringSerde
valueSerde: org.apache.kafka.common.serialization.Serdes$LongSerde
Logs
2020-07-17 12:31:45.893 INFO 17236 --- [ask-scheduler-4] l.k.s.s.c.stream.producer.WordsProducer : Words published - PageViewEvent(userId=about, page=colophon, duration=1000)
2020-07-17 12:31:46.895 INFO 17236 --- [ask-scheduler-8] l.k.s.s.c.stream.producer.WordsProducer : Words published - PageViewEvent(userId=initializr, page=blog, duration=10)
2020-07-17 12:31:47.899 INFO 17236 --- [ask-scheduler-3] l.k.s.s.c.stream.producer.WordsProducer : Words published - PageViewEvent(userId=blog, page=news, duration=1000)
2020-07-17 12:31:48.900 INFO 17236 --- [ask-scheduler-9] l.k.s.s.c.stream.producer.WordsProducer : Words published - PageViewEvent(userId=sitemap, page=about, duration=10)
As shown in the above logs the PageViewEvent is published to the topic every second but the Processor method which supposed to transform the event is not consuming the messages. No error seen in the log.
Kindly help to get this working.
Initially tried with
<spring-cloud.version>Hoxton</spring-cloud.version>
<version>2.3.1.RELEASE</version>
and also with
<spring-cloud.version>Finchley.RELEASE</spring-cloud.version>
<version>2.0.1.RELEASE</version>
But facing the same issue.

Is it possible to have more than one stream in single microservice?

It's possible to have more than one stream in single microservice?
This is my yaml.
bindings:
input:
destination: topic1
content-type: application/*+avro
output:
destination: topic2
content-type: application/*+avro
Can I have more input and output?
Can I have different serialize class for each stream?
producer-properties:
key.serializer: io.confluent.kafka.serializers.KafkaAvroSerializer
value.serializer: io.confluent.kafka.serializers.KafkaAvroSerializer
Yes, just add new bindings:
bindings:
input:
destination: topic1
content-type: application/*+avro
inputStream1:
destination: topic_input_1
inputStream2:
destination: topic_input_2
output:
destination: topic2
content-type: application/*+avro
output2:
destination: output_topic_2
And provide EnableBinding and the other extra annotations like StreamListener:
#EnableBinding(MultipleDesBinding.class)
public class YourApplication {
#StreamListener
#SendTo("output")
public KStream<String, String> topology1(#Input("inputStream") KStream<String, String> inputKStream,
#Input("inputStream1") KStream<String, String> inputStream1KStream) {
}
#StreamListener
#SendTo("output2")
public KStream<String, Long> topology2(#Input("inputStream2") KStream<String, String> inputStream2KStream) {
}
}
interface MultipleDesBinding {
#Input("input")
KStream<?, ?> input();
#Input("inputStream1")
KStream<?, ?> inputStream1();
#Input("inputStream2")
KStream<?, ?> inputStream2();
#Output("output")
KStream<?, ?> output();
#Output("output2")
KStream<?, ?> output2();
}
You can take a detail look in the document: Imperative programming model.

Use a KTable or Kstream with Spring Cloud Stream and use Avro

I want to to make an interactive query to my kafka stream topic.
At the moment i can send avro serialized json objects to my topic and read them again with avro deserializer.
I use for this scenario the normal MessageChannel Binder, this works as intended.
Now i want to use the kafka stream binder and i cant get it to work. Maybe someone can help me out there.
My Configuration:
spring:
cloud:
bus:
enabled: true
stream:
schemaRegistryClient.endpoint: http://192.168.99.100:8081
bindings:
segments-in:
destination: segments
contentType: application/vnd.segments-value.v1+avro
segments-all:
destination: segments
group: segments-all
consumer:
headerMode: raw
useNativeDecoding: true
kafka:
binder:
zkNodes: 192.168.99.100:2181
brokers: 192.168.99.100:32768
streams:
bindings:
segments-all:
consumer:
keySerde: org.apache.kafka.common.serialization.Serdes$StringSerde
valueSerde: io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde
Kafka Config Class:
#Configuration
public class KafkaConfiguration {
#Bean
public MessageConverter classificationMessageConverter() {
AvroSchemaMessageConverter converter = new AvroSchemaMessageConverter();
converter.setSchema(Segment.SCHEMA$);
return converter;
}
}
Schema Config
#Configuration
public class SchemaRegistryConfiguration {
#Bean
public SchemaRegistryClient schemaRegistryClient(#Value("${spring.cloud.stream.schemaRegistryClient.endpoint}") final String endpoint) {
ConfluentSchemaRegistryClient client = new ConfluentSchemaRegistryClient();
client.setEndpoint(endpoint);
return client;
}
}
And now my Interface
public interface Channels {
String EVENTS = "segments-in";
String ALLSEGMENTS = "segments-all";
#Input(Channels.EVENTS)
SubscribableChannel events();
#Input(Channels.ALLSEGMENTS)
KTable<?, ?> segmentsIn();
}
I always get following error(Warn Message), but only when i have the second channel open called segmentsIn().
org.apache.kafka.clients.NetworkClient : [AdminClient clientId=adminclient-3] Connection to node -1 could not be established. Broker may not be available.
With the SubscribableChannel (segments-in) everything works fine, what am i doing wrong here? How can i get the channel segments-all to work with with the kafka stream api?
I got the connection working with following configuration:
spring:
cloud:
bus:
enabled: true
stream:
schemaRegistryClient.endpoint: http://192.168.99.100:8081
bindings:
segments-in:
destination: segments
contentType: application/vnd.segments-value.v1+avro
segments-all:
destination: segments
group: segments-all
consumer:
useNativeDecoding: false
events-out:
destination: incidents
group: events-out
producer:
useNativeDecoding: false
kafka:
binder:
zkNodes: 192.168.99.100:2181
brokers: 192.168.99.100:32768
streams:
binder:
zkNodes: 192.168.99.100:2181
brokers: 192.168.99.100:32768
configuration:
schema.registry.url: http://192.168.99.100:8081
default.key.serde: org.apache.kafka.common.serialization.Serdes$StringSerde
default.value.serde: io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde
See the added config for kafka streams, but i cannot query anything with my code.
I use following snippet:
#StreamListener(Channels.ALLSEGMENTS)
#SendTo(Channels.EVENTS_OUT)
public KStream<Utf8, Long> process(KStream<String, Segment> input) {
log.info("Read new information");
return input
.filter((key, segment) -> segment.getStart().time > 10)
.map((key, value) -> new KeyValue<>(value.id, value))
.groupByKey()
.count(Materialized.as(STORE_NAME))
.toStream();
}
And this scheduler:
#Scheduled(fixedRate = 30000, initialDelay = 5000)
public void printProductCounts() {
if (keyValueStore == null) {
keyValueStore = queryService.getQueryableStoreType(STORE_NAME, QueryableStoreTypes.keyValueStore());
}
String id = "21523XDEf";
System.out.println(keyValueStore.approximateNumEntries());
System.out.println("Product ID: " + id + " Count: " + keyValueStore.get(id));
}
Output is always:
0
Product ID: 21523XDEf Count: null
Can someone point me in the right direction? What am i doing wrong?

Spring cloud stream: Headers kafka_acknowledgment value is null

I am trying to have the control over the offset commit on kafka topic according to the status of processing of message in the application. If message are successful, so offset can be committed. For this I am trying to get the Headers in my method if so that message can be acknowledge manually
spring:
cloud:
stream:
default:
contentType: application/json
default-binder: binder1-kafka
bindings:
myChannel:
binder: binder1-kafka
destination: my_topic
content-type: text/plain
consumer:
autoCommitOffset: false
outChannel:
binder: binder2-kafka
destination: my_topic
content-type: text/plain
consumer:
autoCommitOffset: false
binders:
#Connection config to different clusters
binder1-kafka:
type: kafka
defaultCandidate: true
environment:
spring:
cloud:
stream:
kafka:
binder:
brokers: some-url1:9092
binder2-kafka:
type: kafka
defaultCandidate: false
environment:
spring:
cloud:
stream:
kafka:
binder:
brokers: some-url2:9092
But When using the listener as
#StreamListener(target = IBrokerChannel.myChannel )
public void handlePayload(#Payload MyPayload payload, #Headers Map<String, Object> headers) {
Acknowledgment acknowledgment= (Acknowledgment) headers.get("kafka_acknowledgment"); // acknowledgment object is always null.
acknowledgment.acknowledge();
}
acknowledgment is always null. I am using kafka producer cli to send the message to the topic. spring-boot version is 1.5.10.RELEASE
You are missing kafka branch in your configuration property definition. It must be like this:
spring:
cloud:
stream:
default-binder: kafka
kafka:
bindings:
myChannel:
consumer:
autoCommitOffset: false
https://docs.spring.io/spring-cloud-stream/docs/Fishtown.BUILD-SNAPSHOT/reference/htmlsingle/#kafka-consumer-properties
Works fine for me...
#SpringBootApplication
#EnableBinding(Sink.class)
public class So51159949Application {
public static void main(String[] args) {
SpringApplication.run(So51159949Application.class, args);
}
#Bean
public ApplicationRunner runner(KafkaTemplate<byte[], byte[]> template) {
return args -> {
template.send("so51159949", "foo".getBytes());
};
}
#StreamListener(Sink.INPUT)
public void in(String in, #Headers MessageHeaders headers) {
System.out.println(in);
System.out.println(headers);
Acknowledgment ack = headers.get(KafkaHeaders.ACKNOWLEDGMENT, Acknowledgment.class);
ack.acknowledge();
}
}
and
spring:
cloud:
stream:
bindings:
input:
group: so51159949
destination: so51159949
kafka:
bindings:
input:
consumer:
auto-commit-offset: false
and
foo
{kafka_offset=2, kafka_consumer=org.apache.kafka.clients.consumer.KafkaConsumer#4ad39b5f, deliveryAttempt=1, kafka_timestampType=CREATE_TIME, kafka_receivedMessageKey=null, kafka_receivedPartitionId=0, kafka_receivedTopic=so51159949, kafka_receivedTimestamp=1530643662028, kafka_acknowledgment=Acknowledgment for ConsumerRecord(topic = so51159949, partition = 0, offset = 2, CreateTime = 1530643662028, serialized key size = -1, serialized value size = 3, headers = RecordHeaders(headers = [], isReadOnly = false), key = null, value = [B#700c9aa9), contentType=application/json}

Resources