Failed to flush state store - apache-kafka-streams

I'm trying to create a leftJoin in Kafka Streams which works fine for about 10 Records and then it crashes with an exception caused by a NullPointerException with such code:
private static KafkaStreams getKafkaStreams() {
StreamsConfig streamsConfig = new StreamsConfig(getProperties());
KStreamBuilder builder = new KStreamBuilder();
KTable<String, Verkaeufer> umsatzTable = builder.table(Serdes.String(), EventstreamSerde.Verkaeufer(), CommonUtilsConstants.TOPIC_VERKAEUFER_STAMMDATEN);
KStream<String, String> verkaeuferStream = builder.stream(CommonUtilsConstants.TOPIC_ANZAHL_UMSATZ_PER_VERKAEUFER);
KStream<String, String> tuttiStream = verkaeuferStream.leftJoin(umsatzTable,
(tutti, verkaeufer) -> ("Vorname=" + verkaeufer.getVorname().toString() +",Nachname=" +verkaeufer.getNachname().toString() +"," +tutti.toString()), Serdes.String(), Serdes.String());
tuttiStream.to(Serdes.String(), Serdes.String(), CommonUtilsConstants.TOPIC_TUTTI);
return new KafkaStreams(builder, streamsConfig);
}
StreamsConfig looks like this:
private static Properties getProperties() {
Properties props = new Properties();
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, CommonUtilsConstants.BOOTSTRAP_SERVER_CONFIGURATION);
props.put(StreamsConfig.APPLICATION_ID_CONFIG, CommonUtilsConstants.GID_TUTTI);
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, "1000");
return props;
}
Full Stack Trace:
22:19:36.550 [gid-tutti-8fe6be58-d5c5-41ce-982d-88081b98004e-StreamThread-1] ERROR o.a.k.s.p.internals.StreamThread - stream-thread [gid-tutti-8fe6be58-d5c5-41ce-982d-88081b98004e-StreamThread-1] Failed to commit StreamTask 0_0 state: org.apache.kafka.streams.errors.ProcessorStateException: task [0_0] Failed to flush state store KTABLE-SOURCE-STATE-STORE-0000000000
at org.apache.kafka.streams.processor.internals.ProcessorStateManager.flush(ProcessorStateManager.java:262)
at org.apache.kafka.streams.processor.internals.AbstractTask.flushState(AbstractTask.java:190)
at org.apache.kafka.streams.processor.internals.StreamTask.flushState(StreamTask.java:282)
at org.apache.kafka.streams.processor.internals.StreamTask$1.run(StreamTask.java:264)
at org.apache.kafka.streams.processor.internals.StreamsMetricsImpl.measureLatencyNs(StreamsMetricsImpl.java:187)
at org.apache.kafka.streams.processor.internals.StreamTask.commitImpl(StreamTask.java:259)
at org.apache.kafka.streams.processor.internals.StreamTask.commit(StreamTask.java:253)
at org.apache.kafka.streams.processor.internals.StreamThread.commitOne(StreamThread.java:815)
at org.apache.kafka.streams.processor.internals.StreamThread.access$2800(StreamThread.java:73)
at org.apache.kafka.streams.processor.internals.StreamThread$2.apply(StreamThread.java:797)
at org.apache.kafka.streams.processor.internals.StreamThread.performOnStreamTasks(StreamThread.java:1448)
at org.apache.kafka.streams.processor.internals.StreamThread.commitAll(StreamThread.java:789)
at org.apache.kafka.streams.processor.internals.StreamThread.maybeCommit(StreamThread.java:778)
at org.apache.kafka.streams.processor.internals.StreamThread.runLoop(StreamThread.java:567)
at org.apache.kafka.streams.processor.internals.StreamThread.run(StreamThread.java:527) Caused by: java.lang.NullPointerException: null
at java.lang.String.<init>(String.java:143)
at ch.wesr.eventstream.commonutils.serde.GsonDeserializer.deserialize(GsonDeserializer.java:38)
at org.apache.kafka.streams.state.StateSerdes.valueFrom(StateSerdes.java:163)
at org.apache.kafka.streams.state.internals.CachingKeyValueStore.putAndMaybeForward(CachingKeyValueStore.java:90)
at org.apache.kafka.streams.state.internals.CachingKeyValueStore.access$000(CachingKeyValueStore.java:34)
at org.apache.kafka.streams.state.internals.CachingKeyValueStore$1.apply(CachingKeyValueStore.java:78)
at org.apache.kafka.streams.state.internals.NamedCache.flush(NamedCache.java:145)
at org.apache.kafka.streams.state.internals.NamedCache.flush(NamedCache.java:103)
at org.apache.kafka.streams.state.internals.ThreadCache.flush(ThreadCache.java:97)
at org.apache.kafka.streams.state.internals.CachingKeyValueStore.flush(CachingKeyValueStore.java:107)
at org.apache.kafka.streams.processor.internals.ProcessorStateManager.flush(ProcessorStateManager.java:260)
... 14 common frames omitted
Update:
This is what GsonDeserialize looks like
public class GsonDeserializer<T> implements Deserializer<T>{
public static final String CONFIG_VALUE_CLASS = "default.value.deserializer.class";
public static final String CONFIG_KEY_CLASS = "default.key.deserializer.class";
private Class<T> deserializedClass;
private Gson gson = new GsonBuilder().create();
public GsonDeserializer() {}
#Override
public void configure(Map<String, ?> config, boolean isKey) {
String configKey = isKey ? CONFIG_KEY_CLASS : CONFIG_VALUE_CLASS;
String clsName = String.valueOf(config.get(configKey));
try {
if (deserializedClass == null) {
deserializedClass = (Class<T>) Class.forName(clsName);
}
} catch (ClassNotFoundException e) {
System.err.printf("Failed to configure GsonDeserializer. " +
"Did you forget to specify the '%s' property ?%n",
configKey);
System.out.println(e.getMessage());
}
}
#Override
public T deserialize(String s, byte[] bytes) {
return gson.fromJson(new String(bytes), deserializedClass);
}
#Override
public void close() {}
}

As long as the cache is not flushed, your deserializer is never called. That's why it doesn't fail in the beginning and you can increase the time until it fails via cache size parameter and commit interval (we flush on commit).
Looking at your code for GsonDeserializer, it seems that new String(bytes) fails with NPE -- String constructor cannot take null as parameter -- your deserializer code must guard against bytes==null and should return null for this case directly.

Related

Loading value from json upon start up application

I want to load the values from json file upon the Spring Boot Application is started.
My code for the Configuration File is like the below:
#Configuration
#Getter
public class FedexAPIConfig {
private final static String JSON_FILE = "/static/config/fedex-api-credentials.json";
private final boolean IS_PRODUCTION = false;
private FedexAPICred apiCredentials;
public FedexAPIConfig() {
try (InputStream in = getClass().getResourceAsStream(JSON_FILE);
BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))) {
JSONObject json = new JSONObject();
// this.apiCredentials = new JSONObject(new JSONTokener(reader));
if (IS_PRODUCTION) {
json = new JSONObject(new JSONTokener(reader)).getJSONObject("production");
} else {
json = new JSONObject(new JSONTokener(reader)).getJSONObject("test");
}
System.out.println(json.toString());
this.apiCredentials = FedexAPICred.builder()
.url(json.optString("url"))
.apiKey(json.optString("api_key"))
.secretKey(json.optString("secret_key"))
.build();
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
}
and with this, when the application is in progress of startup, values are successfully printed on the console.Startup console log
When I tried to call this value from other ordinary class, like the below:, it brings nothing but just throws NullPointerException... What are my faults and what shall I do?
public class FedexOAuthTokenManager extends OAuthToken {
private static final String VALIDATE_TOKEN_URL = "/oauth/token";
private static final String GRANT_TYPE_CLIENT = "client_credentials";
private static final String GRANT_TYPE_CSP = "csp_credentials";
#Autowired
private FedexAPIConfig fedexApiConfig;
#Autowired
private Token token;
#Override
public void validateToken() {
// This is the part where "fedexApiConfig" is null.
FedexAPICred fedexApiCred = fedexApiConfig.getApiCredentials();
Response response = null;
try {
RequestBody body = new FormBody.Builder()
.add("grant_type", GRANT_TYPE_CLIENT)
.add("client_id", fedexApiCred.getApiKey())
.add("client_secret", fedexApiCred.getSecretKey())
.build();
response = new HttpClient().post(fedexApiCred.getUrl() + VALIDATE_TOKEN_URL, body);
if (response.code() == 200) {
JSONObject json = new JSONObject(response.body().string());
token.setAccessToken(json.optString("access_token"));
token.setTokenType(json.optString("token_type"));
token.setExpiredIn(json.optInt("expires_in"));
token.setExpiredDateTime(LocalDateTime.now().plusSeconds(json.optInt("expires_in")));
token.setScope(json.optString("scope"));
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
fedexApiConfg is null even though I autowired it in prior to call.
And this FedexOAuthTokenManager is called from other #Component class by new FedexOAuthTokenManager()
Did you try like below?
Step 1: Create one Configuration class like below
public class DemoConfig implements ApplicationListener<ApplicationPreparedEvent> {
#Override
public void onApplicationEvent(ApplicationPreparedEvent event) {
//Load the values from the JSON file and populate the application
//properties dynamically
ConfigurableEnvironment environment = event.getApplicationContext().getEnvironment();
Properties props = new Properties();
props.put("spring.datasource.url", "<my value>");
//Add more properties
environment.getPropertySources().addFirst(new PropertiesPropertySource("myProps", props));
}
To listen to a context event, a bean should implement the ApplicationListener interface which has just one method onApplicationEvent().The ApplicationPreparedEvent is invoked very early in the lifecycle of the application
Step 2: Customize in src/main/resources/META-INF/spring.factories
org.springframework.context.ApplicationListener=com.example.demo.DemoConfig
Step 3: #Value in spring boot is commonly used to inject the configuration values into the spring boot application. Access the properties as per your wish.
#Value("${spring.datasource.url}")
private String valueFromJSon;
Try this sample first in your local machine and then modify your changes accordingly.
Refer - https://www.baeldung.com/spring-value-annotation
Refer - https://www.knowledgefactory.net/2021/02/aws-secret-manager-service-as.html

Groovy Spock: How do you pass in new Exception into when method without throwing error right away

I am trying to test a kafka error handler that takes in an Exception but as soon as I declare it in spock it actually throws it.
def 'test example'() {
when:
service.emitError(new Exception('test exception'))
then:
// do some tests
}
I have tried declaring it in a wrapped java class and running that in main will NOT throw an error but if I pull it into spock it will process it incorrectly.
I am trying to see if I am doing it wrong or if I can't test this with spock.
With help from Jeff I realized that it was an error on mock kafka template. When you have to pass an exception into a mock (not sure if it is just KafkaTemplate specific) and the expected mock fails something bubbles up and my try catch caught that instead. I recognize I should have posted the original code - and will in the future. This is testing on pre-refactored code that didn't have tests (not TTD)
I was missing .key('key') which was failing it.
Emitter
public class KafkaErrorNotificationEmitter {
private final KafkaTemplate<String, TopicMessageData> kafkaTemplate;
private final ObjectMapper objectMapper;
private final TemporalConfig.TimeKeeper timeKeeper;
private final String internalErrorTopic;
public KafkaErrorNotificationEmitter(
KafkaTemplate<String, TopicMessageData> kafkaTemplate,
ObjectMapper objectMapper,
TemporalConfig.TimeKeeper timeKeeper,
#Value("${kafka.error.topic}") String internalErrorTopic
) {
this.kafkaTemplate = kafkaTemplate;
this.objectMapper = objectMapper;
this.timeKeeper = timeKeeper;
this.internalErrorTopic = internalErrorTopic;
}
public void emitError(#Nullable KesMessageProperties kesMessageProperties, Exception ex) {
assert kesMessageProperties != null;
String entityName = kesMessageProperties.getEntityName();
log.warn("Failed message ({}). Sending to KES.", entityName, ex);
String key = kesMessageProperties.getMessage().getKey();
try {
TopicMessageData errorMessage =
TopicMessageData
.builder()
.sourceTopic(kesMessageProperties.getTopic())
.exceptionMessage(ex.getMessage())
.key(key)
.listenerType(kesMessageProperties.getListenerType())
.occurrenceTime(timeKeeper.nowZdt())
.payload(objectMapper.writeValueAsString(kesMessageProperties.getMessage()))
.build();
sendEmitError(errorMessage);
} catch (Exception e) {
log.error("Failed to send error ({}) notification for {}", entityName, key, e);
}
}
private void sendEmitError(final TopicMessageData topicMessageData) {
log.debug("Sending error message for: {}", topicMessageData);
kafkaTemplate.send(internalErrorTopic, topicMessageData);
}
}
Test
class KafkaErrorNotificationEmitterSpec extends Specification {
KafkaTemplate<String, TopicMessageData> kafkaTemplate = Mock()
ObjectMapper objectMapper = new ObjectMapper()
TemporalConfig.TimeKeeper timeKeeper = Mock()
String internalErrorTopic = 'kes.error.test'
def kafkaErrorNotificationEmitter = new KafkaErrorNotificationEmitter(
kafkaTemplate,
objectMapper,
timeKeeper,
internalErrorTopic
)
#Shared
def errorMessage = 'Test exception'
#Shared
Exception exception = new Exception(errorMessage)
def 'emitError throws uncaught NPE'() {
setup:
def properties = new KesMessageProperties(message: null)
when:
kafkaErrorNotificationEmitter.emitError(properties, exception)
then:
0 * _
thrown NullPointerException
}
def 'emitError throws caught exception'() {
setup:
def properties = new KesMessageProperties(
message: new IKafkaMessage() {
#Override
String getKey() {
return null
}
}
)
when:
kafkaErrorNotificationEmitter.emitError(properties, exception)
then:
1 * timeKeeper.nowZdt() >> { throw new RuntimeException() }
0 * _
}
def 'emitError success'() {
setup:
def listenerType = 'test-error'
def properties = new KesMessageProperties(
listenerType: listenerType,
message: new IKafkaMessage() {
#Override
String getKey() {
return 'key'
}
}
)
def now = ZonedDateTime.now()
def errorData =
TopicMessageData
.builder()
.exceptionMessage(errorMessage)
.listenerType(listenerType)
//.key('key') // this is what was missing!!!
.occurrenceTime(now)
.payload('{\"key\":\"key\"}')
.build()
when:
kafkaErrorNotificationEmitter.emitError(properties, exception)
then:
1 * timeKeeper.nowZdt() >> now
1 * kafkaTemplate.send(internalErrorTopic, errorData) >> Mock(ListenableFuture)
}
}

Spring Batch - FlatFileItemWriter Error 14416: Stream is already closed

Basically I have a Spring Batch that queries a Database and implements Partitioner to get the Jobs, and assign the Jobs to a ThreadPoolTaskExecutors in a SlaveStep.
The Reader reads (Job) from the Database. The Writer loads the data into a csv file in an Azure Blob Storage.
The Job Partitioner and Reader works fine. The Writer writes to one file, then it closes, and the other jobs cannot finish because the stream is closed. I get the following error:
Reading: market1
Reading: market2
Reading: market3
Reading: market4
Reading: market5
Writter: /upload-demo/market3_2021-06-01.csv
Writter: /upload-demo/market5_2021-06-01.csv
Writter: /upload-demo/market4_63_2021-06-01.csv
Writter: /upload-demo/market2_2021-06-01.csv
Writter: /upload-demo/market1_11_2021-06-01.csv
2021-06-02 08:24:42.304 ERROR 20356 --- [ taskExecutor-3] c.a.storage.common.StorageOutputStream : Stream is already closed.
2021-06-02 08:24:42.307 WARN 20356 --- [ taskExecutor-3] o.s.b.f.support.DisposableBeanAdapter : Destroy method 'close' on bean with name 'scopedTarget.writer2' threw an exception: java.lang.RuntimeException: Stream is already closed.
Reading: market6
Writter: /upload-demo/market6_2021-06-01.csv
Here is my Batch Configuration:
#EnableBatchProcessing
#Configuration
public class BatchConfig extends DefaultBatchConfigurer {
String connectionString = "azureConnectionString";
String containerName = "upload-demo";
String endpoint = "azureHttpsEndpoint";
String accountName ="azureAccountName";
String accountKey = "accountKey";
StorageSharedKeyCredential credential = new StorageSharedKeyCredential(accountName, accountKey);
BlobServiceClient client = new BlobServiceClientBuilder().connectionString(connectionString).endpoint(endpoint).buildClient();
#Autowired
private StepBuilderFactory steps;
#Autowired
private JobBuilderFactory jobs;
#Autowired
#Qualifier("verticaDb")
private DataSource verticaDataSource;
#Autowired
private PlatformTransactionManager transactionManager;
#Autowired
private ConsoleItemWriter consoleItemWriter;
#Autowired
private ItemWriter itemWriter;
#Bean
public Job job() throws Exception {
return jobs.get("job1")
.start(masterStep(null, null))
.incrementer(new RunIdIncrementer())
.build();
}
#Bean
public ThreadPoolTaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setCorePoolSize(5);
taskExecutor.setMaxPoolSize(10);
taskExecutor.initialize();
return taskExecutor;
}
#Bean
#JobScope
public Step masterStep(#Value("#{jobParameters['startDate']}") String startDate,
#Value("#{jobParameters['endDate']}") String endDate) throws Exception {
return steps.get("masterStep")
.partitioner(slaveStep().getName(), new RangePartitioner(verticaDataSource, startDate, endDate))
.step(slaveStep())
.gridSize(5)
.taskExecutor(taskExecutor())
.build();
}
#Bean
public Step slaveStep() throws Exception {
return steps.get("slaveStep")
.<MarketData, MarketData>chunk(100)
.reader(pagingItemReader(null, null, null))
.faultTolerant()
.skip(NullPointerException.class)
.skipPolicy(new AlwaysSkipItemSkipPolicy())
.writer(writer2(null, null, null)) //consoleItemWriter
.build();
}
#Bean
#StepScope
public JdbcPagingItemReader pagingItemReader(
#Value("#{stepExecutionContext['MarketName']}") String marketName,
#Value("#{jobParameters['startDate']}") String startDate,
#Value("#{jobParameters['endDate']}") String endDate
) throws Exception {
System.out.println("Reading: " + marketName);
SqlPagingQueryProviderFactoryBean provider = new SqlPagingQueryProviderFactoryBean();
Map<String, Order> sortKey = new HashMap<>();
sortKey.put("xbin", Order.ASCENDING);
sortKey.put("ybin", Order.ASCENDING);
provider.setDataSource(this.verticaDataSource);
provider.setDatabaseType("POSTGRES");
provider.setSelectClause("SELECT MARKET AS market, EPSG AS epsg, XBIN AS xbin, YBIN AS ybin, " +
"LATITUDE AS latitude, LONGITUDE AS longitude, " +
"SUM(TOTALUPLINKVOLUME) AS totalDownlinkVol, SUM(TOTALDOWNLINKVOLUME) AS totalUplinkVol");
provider.setFromClause("FROM views.geo_analytics");
provider.setWhereClause(
"WHERE market='" + marketName + "'" +
" AND STARTTIME >= '" + startDate + "'" +
" AND STARTTIME < '" + endDate + "'" +
" AND TOTALUPLINKVOLUME IS NOT NULL" +
" AND TOTALUPLINKVOLUME > 0" +
" AND TOTALDOWNLINKVOLUME IS NOT NULL" +
" AND TOTALDOWNLINKVOLUME > 0" +
" AND EPSG IS NOT NULL" +
" AND LATITUDE IS NOT NULL" +
" AND LONGITUDE IS NOT NULL" +
" AND XBIN IS NOT NULL" +
" AND YBIN IS NOT NULL"
);
provider.setGroupClause("GROUP BY XBIN, YBIN, MARKET, EPSG, LATITUDE, LONGITUDE");
provider.setSortKeys(sortKey);
JdbcPagingItemReader reader = new JdbcPagingItemReader();
reader.setDataSource(this.verticaDataSource);
reader.setQueryProvider(provider.getObject());
reader.setFetchSize(1000);
reader.setRowMapper(new BeanPropertyRowMapper() {
{
setMappedClass((MarketData.class));
}
});
return reader;
}
#Bean
#StepScope
public FlatFileItemWriter<MarketData> writer2(#Value("#{jobParameters['yearMonth']}") String yearMonth,
#Value("#{stepExecutionContext['marketName']}") String marketName,
#Value("#{jobParameters['startDate']}") String startDate) throws URISyntaxException, InvalidKeyException, StorageException, IOException {
AZBlobWriter<MarketData> writer = new AZBlobWriter<>();
String fullPath =marketName + "_" + startDate + ".csv";
String resourceString = "azure-blob://upload-demo/" + fullPath;
CloudStorageAccount storageAccount = CloudStorageAccount.parse(connectionString);
CloudBlobClient blobClient = storageAccount.createCloudBlobClient();
CloudBlobContainer container2 = blobClient.getContainerReference(containerName);
container2.createIfNotExists();
AzureStorageResourcePatternResolver storageResourcePatternResolver = new AzureStorageResourcePatternResolver(client);
Resource resource = storageResourcePatternResolver.getResource(resourceString);
System.out.println("Writter: " + resource.getURI().getPath().toString());
writer.setResource(resource);
writer.setStorage(container2);
writer.setLineAggregator(new DelimitedLineAggregator<MarketData>() {
{
setDelimiter(",");
setFieldExtractor(new BeanWrapperFieldExtractor<MarketData>() {
{
setNames(new String[] {
"market",
"epsg",
"xbin",
"ybin",
"latitude",
"longitude",
"totalDownlinkVol",
"totalUplinkVol"
});
}
});
}
});
return writer;
}
}
Previously I ran into other issues, such as setting up the Resource for FlatFileWriter to Azure Blob, Spring Batch / Azure Storage account blob resource [container"foo", blob='bar'] cannot be resolved to absolute file path.
As suggested by #Mahmoud Ben Hassine, make an implementation of the FlatFileWriter for the Azure Blob.
The implementation for the FlatFileWriter I used as a base (GCP) from this post: how to configure FlatFileItemWriter to output the file to a ByteArrayRecource?
Here is the implementation of the Azure Blob:
public class AZBlobWriter<T> extends FlatFileItemWriter<T> {
private CloudBlobContainer storage;
private Resource resource;
private static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator");
private OutputStream os;
private String lineSeparator = DEFAULT_LINE_SEPARATOR;
#Override
public void write(List<? extends T> items) throws Exception {
StringBuilder lines = new StringBuilder();
for (T item : items) {
lines.append(item).append(lineSeparator);
}
byte[] bytes = lines.toString().getBytes();
try {
os.write(bytes);
}
catch (IOException e) {
throw new WriteFailedException("Could not write data. The file may be corrupt.", e);
}
os.flush();
}
#Override
public void open(ExecutionContext executionContext) {
try {
os = ((WritableResource)resource).getOutputStream();
String bucket = resource.getURI().getHost();
String filePath = resource.getURI().getPath().substring(1);
CloudBlockBlob blob = storage.getBlockBlobReference(filePath);
} catch (IOException e) {
e.printStackTrace();
} catch (StorageException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
}
#Override
public void update(ExecutionContext executionContext) {
}
#Override
public void close() {
super.close();
try {
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void setStorage(CloudBlobContainer storage) {
this.storage = storage;
}
#Override
public void setResource(Resource resource) {
this.resource = resource;
}
}
Any help is greatly I appreciated. My apologies for the "dirt code", as I am still testing/developing it.
thx, Markus.
You did not share the entire stack trace to see when this error happens exactly, but it seems that the close method is called more than once. I think this is not due to a concurrency issue, as I see you are using one writer per thread in a partitioned step. So I would make this method "re-entrant" by checking if the output stream is already closed before closing it (there is no isClosed method on an output stream, so you can use a custom boolean around that).
That said, I would first confirm that the close method is called twice and if so, investigate why is that and fix the root cause.

Kafka Stream: can't get data from Kafka persistent keyValue state store

I am using Kafka streams and persistent KeyValue store in my application. There are two KeyValue stores I am using and two processors. I am facing issue with the stateStore which is shared between two processors. NameProcessor put data into nameStore and EventProcessor extracts data from nameStore. From Debugging it looks like, NameProcessor is able to put data successfully but when EventProcessor trying to get data from nameStore, it doesn't get any data. Below is the code snippet for Application class, Topology, NameProcessor and EventProcessor. Also, I am using Spring boot parent version 2.4.3, kafka-streams version 2.2.0 and kafka-clients version 2.2.0
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
Properties configs = getKafkaStreamProperties();
Topology builder = new Topology();
new ApplicationTopology(builder);
KafkaStreams stream = new KafkaStreams(builder, configs);
stream.setUncaughtExceptionHandler((Thread thread, Throwable throwable) -> {
// here you should examine the throwable/exception and perform an appropriate action!
logger.error("Uncaught exception in stream, MessageDetail: "+ ExceptionUtils.getRootCauseMessage(throwable) + ", Stack Trace: " + throwable.fillInStackTrace());
Runtime.getRuntime().halt(1);
});
Runtime.getRuntime().addShutdownHook(new Thread(stream::close));
stream.start();
}
private static Properties getKafkaStreamProperties() {
Properties configs = new Properties();
configs.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, getApplicationId());
configs.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, getBootstrapServers());
configs.setProperty(StreamsConfig.RETRIES_CONFIG, getRetries());
configs.setProperty(StreamsConfig.RETRY_BACKOFF_MS_CONFIG, getRetryBackOffMs());
configs.setProperty(StreamsConfig.REPLICATION_FACTOR_CONFIG, getReplicationFactor());
configs.setProperty(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, getMaxPollIntervalMs());
return configs;
}
public class ApplicationTopology {
public ApplicationTopology (Topology builder) {
StoreBuilder<KeyValueStore<String, Sensor>> nameStoreBuilder = Stores.
keyValueStoreBuilder(Stores.persistentKeyValueStore("nameStore"), Serdes.String(), CustomSerdes.getNameSerde()).withCachingEnabled().withLoggingEnabled(new HashMap<>());
StoreBuilder<KeyValueStore<String, String>> stateStoreBuilder = Stores.
keyValueStoreBuilder(Stores.persistentKeyValueStore("stateStore"), Serdes.String(), Serdes.String()).withCachingEnabled().withLoggingEnabled(new HashMap<>());
builder.addSource(AutoOffsetReset.LATEST, "source", Serdes.String().deserializer(), CustomSerdes.getIncomingEventSerde().deserializer(), getInboundTopic())
.addProcessor(TRANSFORMER, () -> new EventProcessor(), "source")
.addStateStore(nameStoreBuilder, TRANSFORMER)
.addSink("sink", getOutboundTopic(), Serdes.String().serializer(), CustomSerdes.getIncomingEventSerde().serializer(), TRANSFORMER);
//reset to earliest for model config topic as some models could be already on the topic
builder.addSource(AutoOffsetReset.EARLIEST, "nameStoreSource", Serdes.String().deserializer(), CustomSerdes.getSensorSerde().deserializer(), getInboundSensorUpdateTopic())
.addProcessor("process", () -> new NameProcessor(), "nameStoreSource")
.addStateStore(nameStoreBuilder, TRANSFORMER, "process");
}
public ApplicationTopology() {}
} }
public class NameProcessor extends AbstractProcessor<String, Sensor> {
private static final Logger LOGGER = LoggerFactory.getLogger(NameProcessor.class);
ProcessorContext context;
private KeyValueStore<String, Name> nameStore;
private static List<String> externalDeviceIdList = new ArrayList<>();
#Override
public void init(ProcessorContext processorContext) {
this.context = processorContext;
this.nameStore = (KeyValueStore<String, Name>) context.getStateStore("nameStore");
}
#Override
public void process(String externalDeviceId, Name name) {
if (StringUtils.isNotBlank(externalDeviceId)) {
String[] externalDeviceIds = SensorUtils.getExternalDeviceIdsWithoutSuffix(externalDeviceId);
if (Objects.isNull(name)) {
Arrays.stream(externalDeviceIds).forEach(id -> {
sensorStore.delete(id);
});
} else {
addOrUpdateNameInStore(sensor, externalDeviceIds);
}
}
}
private void addOrUpdateNameInStore(Sensor sensor, String[] externalDeviceIds) {
Arrays.stream(externalDeviceIds).forEach(id -> {
sensorStore.put(id, sensor);
});
// context.commit();
}
}
public class EventProcessor extends AbstractProcessor<String, IncomingEvent> {
private static final Logger LOGGER = LoggerFactory.getLogger(EventProcessor.class);
ProcessorContext context;
private KeyValueStore<String, Name> nameStore;
private KeyValueStore<String, String> stateStore;
#Override
public void init(ProcessorContext processorContext) {
this.context = processorContext;
this.nameStore = (KeyValueStore<String, Name>) context.getStateStore("nameStore");
this.stateStore = (KeyValueStore<String, String>) context.getStateStore("stateStore");
}
#Override
public void process(String key, IncomingEvent value) {
String correlationId = UUID.randomUUID().toString();
try {
String externalDeviceId = value.getExternalDeviceId();
Name nameFromStore = nameStore.get(externalDeviceId);
}
}
}
In nameFromStore variable, I don't get even value even after storing it in NameProcessor.

AggregatingReplyingKafkaTemplate releaseStrategy Question

There seem to be an issue when I use AggregatingReplyingKafkaTemplate with template.setReturnPartialOnTimeout(true) in that, it returns timeout exception even if partial results are available from consumers.
In example below, I have 3 consumers to reply to the request topic and i've set the reply timeout at 10 seconds. I've explicitly delayed the response of Consumer 3 to 11 seconds, however, I expect the response back from Consumer 1 and 2, so, I can return partial results. However, I am getting KafkaReplyTimeoutException. Appreciate your inputs. Thanks.
I follow the code based on the Unit Test below.
[ReplyingKafkaTemplateTests][1]
I've provided the actual code below:
#RestController
public class SumController {
#Value("${kafka.bootstrap-servers}")
private String bootstrapServers;
public static final String D_REPLY = "dReply";
public static final String D_REQUEST = "dRequest";
#ResponseBody
#PostMapping(value="/sum")
public String sum(#RequestParam("message") String message) throws InterruptedException, ExecutionException {
AggregatingReplyingKafkaTemplate<Integer, String, String> template = aggregatingTemplate(
new TopicPartitionOffset(D_REPLY, 0), 3, new AtomicInteger());
String resultValue ="";
String currentValue ="";
try {
template.setDefaultReplyTimeout(Duration.ofSeconds(10));
template.setReturnPartialOnTimeout(true);
ProducerRecord<Integer, String> record = new ProducerRecord<>(D_REQUEST, null, null, null, message);
RequestReplyFuture<Integer, String, Collection<ConsumerRecord<Integer, String>>> future =
template.sendAndReceive(record);
future.getSendFuture().get(5, TimeUnit.SECONDS); // send ok
System.out.println("Send Completed Successfully");
ConsumerRecord<Integer, Collection<ConsumerRecord<Integer, String>>> consumerRecord = future.get(10, TimeUnit.SECONDS);
System.out.println("Consumer record size "+consumerRecord.value().size());
Iterator<ConsumerRecord<Integer, String>> iterator = consumerRecord.value().iterator();
while (iterator.hasNext()) {
currentValue = iterator.next().value();
System.out.println("response " + currentValue);
System.out.println("Record header " + consumerRecord.headers().toString());
resultValue = resultValue + currentValue + "\r\n";
}
} catch (Exception e) {
System.out.println("Error Message is "+e.getMessage());
}
return resultValue;
}
public AggregatingReplyingKafkaTemplate<Integer, String, String> aggregatingTemplate(
TopicPartitionOffset topic, int releaseSize, AtomicInteger releaseCount) {
//Create Container Properties
ContainerProperties containerProperties = new ContainerProperties(topic);
containerProperties.setAckMode(ContainerProperties.AckMode.MANUAL_IMMEDIATE);
//Set the consumer Config
//Create Consumer Factory with Consumer Config
DefaultKafkaConsumerFactory<Integer, Collection<ConsumerRecord<Integer, String>>> cf =
new DefaultKafkaConsumerFactory<>(consumerConfigs());
//Create Listener Container with Consumer Factory and Container Property
KafkaMessageListenerContainer<Integer, Collection<ConsumerRecord<Integer, String>>> container =
new KafkaMessageListenerContainer<>(cf, containerProperties);
// container.setBeanName(this.testName);
AggregatingReplyingKafkaTemplate<Integer, String, String> template =
new AggregatingReplyingKafkaTemplate<>(new DefaultKafkaProducerFactory<>(producerConfigs()), container,
(list, timeout) -> {
releaseCount.incrementAndGet();
return list.size() == releaseSize;
});
template.setSharedReplyTopic(true);
template.start();
return template;
}
public Map<String, Object> consumerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,bootstrapServers);
props.put(ConsumerConfig.GROUP_ID_CONFIG, "test_id");
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
return props;
}
public Map<String, Object> producerConfigs() {
Map<String, Object> props = new HashMap<>();
// list of host:port pairs used for establishing the initial connections to the Kakfa cluster
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,
bootstrapServers);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
org.apache.kafka.common.serialization.StringSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringSerializer.class);
return props;
}
public ProducerFactory<Integer,String> producerFactory() {
return new DefaultKafkaProducerFactory<>(producerConfigs());
}
#KafkaListener(id = "def1", topics = { D_REQUEST}, groupId = "D_REQUEST1")
#SendTo // default REPLY_TOPIC header
public String dListener1(String in) throws InterruptedException {
return "First Consumer : "+ in.toUpperCase();
}
#KafkaListener(id = "def2", topics = { D_REQUEST}, groupId = "D_REQUEST2")
#SendTo // default REPLY_TOPIC header
public String dListener2(String in) throws InterruptedException {
return "Second Consumer : "+ in.toLowerCase();
}
#KafkaListener(id = "def3", topics = { D_REQUEST}, groupId = "D_REQUEST3")
#SendTo // default REPLY_TOPIC header
public String dListener3(String in) throws InterruptedException {
Thread.sleep(11000);
return "Third Consumer : "+ in;
}
}
'''
[1]: https://github.com/spring-projects/spring-kafka/blob/master/spring-kafka/src/test/java/org/springframework/kafka/requestreply/ReplyingKafkaTemplateTests.java
template.setReturnPartialOnTimeout(true) simply means the template will consult the release strategy on timeout (with the timeout argument = true, to tell the strategy it's a timeout rather than a delivery call).
It must return true to release the partial result.
This is to allow you to look at (and possibly modify) the list to decide whether you want to release or discard.
Your strategy ignores the timeout parameter:
(list, timeout) -> {
releaseCount.incrementAndGet();
return list.size() == releaseSize;
});
You need return timeout ? true : { ... }.

Resources