Spring batch partitioning taking to much time for processing 100000 records - spring

I want to read 10 billions of data using spring batch program. I implemented partitioning for processing for one big file.
How I can increase the performance?
Below is my configuration.
#Bean
public FlatFileItemReader<RequestDTO> reader() throws MalformedURLException {
FlatFileItemReader<RequestDTO> itemReader = new FlatFileItemReader<>();
itemReader.setLineMapper(lineMapper());
itemReader.setResource(new FileSystemResource(fileLocation));
itemReader.setLinesToSkip(1);
return itemReader;
}
/**
* This is used for mapping values.
*
* #return LineMapper will be returned
*/
#Bean
public LineMapper<RequestDTO> lineMapper() {
DefaultLineMapper<RequestDTO> lineMapper = new DefaultLineMapper<>();
DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
lineTokenizer.setNames("name", "salary", "company");
lineTokenizer.setIncludedFields(0, 1, 2);
lineTokenizer.setDelimiter(ProcessorConstants.FILE_SEPERATOR);
BeanWrapperFieldSetMapper<RequestDTO> fieldSetMapper = new BeanWrapperFieldSetMapper<>();
fieldSetMapper.setTargetType(RequestDTO.class);
lineMapper.setLineTokenizer(lineTokenizer);
lineMapper.setFieldSetMapper(fieldSetMapper);
return lineMapper;
}
#Bean
public CRSItemProcessor processor() {
return new CRSItemProcessor();
}
#Bean
public RepositoryItemWriter<AdministrationRequest> writer(DataSource dataSource) {
return new RepositoryItemWriterBuilder<CRSAdministrationRequest>().methodName("saveAndFlush")
.repository(processorBatchDAO).build();
}
#Bean
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(20);
taskExecutor.setCorePoolSize(10);
taskExecutor.setQueueCapacity(5);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
#Bean(name = "partitionerJob")
public Job partitionerJob(JobCompletionNotificationListener listener) throws UnexpectedInputException, MalformedURLException, ParseException {
return jobBuilderFactory.get("partitionerJob").incrementer(new RunIdIncrementer()).listener(listener)
.start(partitionStep())
.build();
}
#Bean
public Step partitionStep() throws UnexpectedInputException, MalformedURLException, ParseException {
return stepBuilderFactory.get("partitionStep")
.partitioner(slaveStep(null))
.partitioner("slaveStep",new CustomMultiResourcePartitioner())
.gridSize(20)
.taskExecutor(taskExecutor())
.build();
}
#Bean
public Step slaveStep(RepositoryItemWriter<CRSAdministrationRequest> writer) throws UnexpectedInputException, MalformedURLException, ParseException {
return stepBuilderFactory.get("slaveStep")
.<RequestDTO, AdministrationRequest>chunk(1000)
.reader(reader())
.processor(processor())
.writer(writer)
.build();
}
Below is custom partitioning class.
public class CustomMultiResourcePartitioner implements Partitioner {
/**
* Assign the filename of each of the injected resources to an
* {#link ExecutionContext}.
*
* #see Partitioner#partition(int)
*/
#Override
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> result = new HashMap<String, ExecutionContext>();
int range = 1000;
int fromId = 1;
int toId = range;
for (int i = 1; i <= gridSize; i++) {
ExecutionContext value = new ExecutionContext();
System.out.println("\nStarting : Thread" + i);
System.out.println("fromId : " + fromId);
System.out.println("toId : " + toId);
value.putInt("fromId", fromId);
value.putInt("toId", toId);
// give each thread a name, thread 1,2,3
value.putString("name", "Thread" + i);
result.put("partition" + i, value);
fromId = toId + 1;
toId += range;
}
return result;
}
}

Related

Spring Batch Remote Partitioning - DeployerPartitionHandler not executing another job in queue

The DeployerPartitionHandler is configured to use at most 3 workers at a time.
When job is launched which is using all 3 workers, and at the same time another job is launched it is not executed even after first job is completed and workers are free. The status of the 2nd job is stuck at STARTING.
Below is the Batch Configuration:
#Configuration
#EnableBatchProcessing
#Slf4j
public class BatchConfig {
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Value("${jarLocation}")
public String jarLocation;
#Value("${batch.job.jobname}")
public String jobName;
#Bean
#Primary
PlatformTransactionManager getTransactionManager(
#Qualifier("transactionManager") PlatformTransactionManager platform) {
return platform;
}
#Bean
public JobRegistryBeanPostProcessor jobRegistryBeanPostProcessor(JobRegistry jobRegistry) {
JobRegistryBeanPostProcessor postProcessor = new JobRegistryBeanPostProcessor();
postProcessor.setJobRegistry(jobRegistry);
return postProcessor;
}
#Bean
#Profile("worker")
public DeployerStepExecutionHandler stepExecutionHandler(JobExplorer jobExplorer, JobRepository jobRepository,
ConfigurableApplicationContext context) {
return new DeployerStepExecutionHandler(context, jobExplorer, jobRepository);
}
#Bean
public PartitionHandler partitionHandler(TaskLauncher taskLauncher, JobExplorer jobExplorer,
Environment environment, DelegatingResourceLoader delegatingResourceLoader, TaskRepository taskRepository) {
Resource resource = delegatingResourceLoader.getResource(jarLocation);
DeployerPartitionHandler partitionHandler = new DeployerPartitionHandler(taskLauncher, jobExplorer, resource,
"workerStep", taskRepository);
List<String> commandLineArguments = new ArrayList<>(5);
commandLineArguments.add("--spring.profiles.active=worker");
commandLineArguments.add("--spring.cloud.task.initialize.enable=false");
commandLineArguments.add("--spring.batch.initializer.enabled=false");
commandLineArguments.add("--spring.cloud.task.closecontextEnabled=true");
commandLineArguments.add("--logging.level=DEBUG");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArguments));
partitionHandler.setEnvironmentVariablesProvider(new SimpleEnvironmentVariablesProvider(environment));
partitionHandler.setMaxWorkers(3);
partitionHandler.setApplicationName("BatchApplicationWorker");
return partitionHandler;
}
#Bean
#StepScope
public Partitioner partitioner(#Value("#{jobParameters['inputFiles']}") String file,
#Value("#{jobParameters['partitionSize']}") String partitionSize1) {
int partitionSize = Integer.parseInt(partitionSize1);
return new Partitioner() {
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> partitions = new HashMap<>();
String[] ids = fetchAllPrimaryKeys(file);
List<List<String>> partitionPayloads = splitPayLoad(ids, partitionSize);
int size = partitionPayloads.size();
for (int i = 0; i < size; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("partitionNumber", i);
executionContext.put("partitionPayLoad", new ArrayList<>(partitionPayloads.get(i)));
partitions.put("partition" + i, executionContext);
}
return partitions;
}
};
}
#Bean
public Step masterStep(Step workerStep, PartitionHandler partitionHandler) {
return this.stepBuilderFactory.get("masterStep").partitioner(workerStep.getName(), partitioner(null, null))
.step(workerStep).partitionHandler(partitionHandler).build();
}
#Bean
public Step workerStep(CustomWriter customWriter, CustomProcessor customProcessor) {
return this.stepBuilderFactory.get("workerStep").<User, User>chunk(10000).reader(reader(null))
.processor(customProcessor).writer(customWriter).build();
}
#Bean
public Job batchJob(Step masterStep, JobExecutionListnerClass jobExecutionListnerClass,
JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("batchJob").start(masterStep).listener(jobExecutionListnerClass).build();
}
#Bean
#StepScope
public CustomReader reader(#Value("#{stepExecutionContext['partitionPayLoad']}") List<String> payload) {
return new CustomReader(payload);
}
#Bean
public AppTaskListener appTaskListener() {
return new AppTaskListener();
}
}

Is there a way to do a spring batch processor next page?

I set the chunk size to 15 and the page size to 15 at the time of programming, and made the batch program run every minute and 1 second. The result I want is, for example, when I have 30 data, I process 15 data in the first second and then 15 data in the immediately following page. But the way it works now is that it processes 15 data in the first second, then another 15 in the next second, and so on. Is there a way to do an action until the end of the page when executed once?
#Bean
public Job ConfirmJob() throws Exception {
Job exampleJob = jobBuilderFactory.get("ConfirmJob")
.start(Step())
.build();
return exampleJob;
}
#Bean
#JobScope
public Step Step() throws Exception {
return stepBuilderFactory.get("Step")
.<UserOrder,UserOrder>chunk(15)
.reader(reader())
.processor(processor())
.writer(writer())
.build();
}
#Bean
#StepScope
public JpaPagingItemReader<UserOrder> reader() throws Exception {
Map<String,Object> parameterValues = new HashMap<>();
parameterValues.put("beforeDay", LocalDateTime.now().minusDays(14));
parameterValues.put("od", OrderStatus.ship);
return new JpaPagingItemReaderBuilder<UserOrder>()
.pageSize(15)
.parameterValues(parameterValues)
.queryString("SELECT uo FROM UserOrder uo where uo.standardfinishAt <: beforeDay And uo.orderStatus =: od ORDER BY id ASC")
.entityManagerFactory(entityManagerFactory)
.name("JpaPagingItemReader")
.build();
}
#Bean
#StepScope
public ItemProcessor<UserOrder, UserOrder> processor(){
return new ItemProcessor<UserOrder, UserOrder>() {
#Override
public UserOrder process(UserOrder us) throws Exception {
us.batchConfirm(LocalDateTime.now());
return us;
}
};
}
#Bean
#StepScope
public JpaItemWriter<UserOrder> writer(){
return new JpaItemWriterBuilder<UserOrder>()
.entityManagerFactory(entityManagerFactory)
.build();
}
}
this is scheduler code
#Autowired
private ResignConfiguration resignConfiguration;
#Scheduled(cron = "1 * * * * ?")
public void runConfirmJob() {
Map<String, JobParameter> confMap = new HashMap<>();
confMap.put("time", new JobParameter(System.currentTimeMillis()));
JobParameters jobParameters = new JobParameters(confMap);
try {
jobLauncher.run(jobConfiguration.ConfirmJob(), jobParameters);
} catch (JobExecutionAlreadyRunningException | JobInstanceAlreadyCompleteException
| JobParametersInvalidException | org.springframework.batch.core.repository.JobRestartException e) {
log.error(e.getMessage());
} catch (Exception e) {
e.printStackTrace();
}
}
It's a paging problem
JpaPagingItemReader<UserOrder> reader = new JpaPagingItemReader<UserOrder>() {
#Override
public int getPage() {
return 0;
}
};
reader.setParameterValues(parameterValues);
reader.setQueryString("SELECT uo FROM UserOrder uo where uo.createdAt <: limitDay And uo.orderStatus =: od ORDER BY id ASC");
reader.setPageSize(100);
reader.setEntityManagerFactory(entityManagerFactory);
reader.setName("JpaPagingItemReader");

How send reply from listener

I am working on homework to my studies with Java and RabbitMQ. I am not familiar much with Spring and RabbitMQ, but I can't handle this problem.
I have 2 single application.
First one, which produces the message (bolid application)
I created a producer of the message (bolid), which every 10 seconds send a message to listeners
#SpringBootApplication
public class BolidApplication implements CommandLineRunner {
public static void main(String[] args) {
SpringApplication.run(BolidApplication.class, args);
}
#Autowired
private RabbitTemplate rabbitTemplate;
#Override
public void run(String... args) throws Exception {
Bolid bolid = new Bolid();
int i = 10;
while (true) {
bolid.setData(new Date());
rabbitTemplate.setReplyAddress("bolidReply");
rabbitTemplate.convertAndSend("RaceExchange", "raceRouting", bolid.toString());
rabbitTemplate.convertAndSend("MonitorExchange", "raceRouting", bolid.toString());
Thread.sleep(15000);
i += 10;
}
}
}
So, I create 2 queue (RaceQueue and MonitorQueue), define exchange and bind them.
I have 2 listeners: RaceListener and MonitorListener.
There is the code of my listeners:
And the second application, which is listeners.
public class RabbitConfig {
private static final String RACE_QUEUE = "RaceQueue";
private static final String MONITOR_QUEUE = "MonitorQueue";
#Bean
Queue myQueue() {
return new Queue(RACE_QUEUE, true);
}
#Bean
Queue monitorQueue() {
return new Queue(MONITOR_QUEUE, true);
}
#Bean
Exchange myExchange() {
return ExchangeBuilder.topicExchange("RaceExchange")
.durable(true)
.build();
}
#Bean
Exchange monitorExchange() {
return ExchangeBuilder.topicExchange("MonitorExchange")
.durable(true)
.build();
}
#Bean
Binding binding() {
// return new Binding(MY_QUEUE, Binding.DestinationType.QUEUE, "MyTopicExchange", "topic", null)
return BindingBuilder
.bind(myQueue())
.to(myExchange())
.with("raceRouting")
.noargs();
}
#Bean
Binding monitorBinding() {
return BindingBuilder
.bind(monitorQueue())
.to(monitorExchange())
.with("raceRouting")
.noargs();
}
#Bean
ConnectionFactory connectionFactory() {
CachingConnectionFactory cachingConnectionFactory = new CachingConnectionFactory("localhost");
cachingConnectionFactory.setUsername("guest");
cachingConnectionFactory.setPassword("guest");
return cachingConnectionFactory;
}
#Bean
MessageListenerContainer rabbitRaceListener() {
SimpleMessageListenerContainer simpleMessageListenerContainer = new SimpleMessageListenerContainer();
simpleMessageListenerContainer.setConnectionFactory(connectionFactory());
simpleMessageListenerContainer.setQueues(myQueue());
simpleMessageListenerContainer.setupMessageListener(new RabbitRaceListener());
return simpleMessageListenerContainer;
}
#Bean
MessageListenerContainer rabbitMonitorListener() {
SimpleMessageListenerContainer simpleMessageListenerContainer = new SimpleMessageListenerContainer();
simpleMessageListenerContainer.setConnectionFactory(connectionFactory());
simpleMessageListenerContainer.setQueues(monitorQueue());
simpleMessageListenerContainer.setupMessageListener(new RabbitMonitorListener());
return simpleMessageListenerContainer;
}
}
From MonitorListener I want to use reply pattern to reply message to my first application (bolid application). So Bolid application can receive my message.
My Code for MonitorListener:
public class RabbitMonitorListener implements MessageListener {
#Autowired
private RabbitTemplate rabbitTemplate;
#Override
public void onMessage(Message message) {
String[] splitted = new String(message.getBody()).split("\\|");
int oilTemperature = Integer.parseInt(splitted[1].split(" ")[2]);
int engineTemperature = Integer.parseInt(splitted[2].split(" ")[2]);
int tirePressure = Integer.parseInt(splitted[3].split(" ")[2]);
System.out.println("message2 = [" + new String(message.getBody()) + "]");
if (oilTemperature > 120 || engineTemperature > 120 || tirePressure > 12) {
System.out.println("SEND REPLY TO BOLID!");
}
if (oilTemperature > 150 || engineTemperature > 150 || tirePressure > 17) {
System.out.println("SEND RELY TO BOLID!");
}
}
}
How can I achieve that? So here I can send the message go back to bolid and on the bolid application I can read it?
EDIT:
I did some research, I want to do it like this way:
public class RabbitMonitorListener implements MessageListener {
#Autowired
private RabbitTemplate rabbitTemplate;
#Override
public void onMessage(Message message) {
String[] splitted = new String(message.getBody()).split("\\|");
int oilTemperature = Integer.parseInt(splitted[1].split(" ")[2]);
int engineTemperature = Integer.parseInt(splitted[2].split(" ")[2]);
int tirePressure = Integer.parseInt(splitted[3].split(" ")[2]);
String response = "Hello";
MessageProperties properties = new MessageProperties();
Message responseMessage = new Message(response.getBytes(), properties);
rabbitTemplate.send(message.getMessageProperties().getReplyTo(), responseMessage);
System.out.println("message2 = [" + new String(message.getBody()) + "]");
if (oilTemperature > 120 || engineTemperature > 120 || tirePressure > 12) {
System.out.println("WARN MECHANICS");
}
if (oilTemperature > 150 || engineTemperature > 150 || tirePressure > 17) {
System.out.println("WARN MECHANICS");
}
}
}
but the rabbitTemplate is null here, so I can't #Autowired it here. How can I have access to rabbitTemplate and method send in MessageListener?
new RabbitRaceListener() - that must be a #Bean too, to get auto wiring.
However, you are over-complicating things; the framework can take care of all of this for you.
See Request/Reply Messaging for the client side - and use convertSendAndReceive() or convertSendAndReceiveAsType().
On the server side, see Annotation-driven Listener Endpoints.
#RabbitListener(queues = "request")
public String handle(String in) {
return in.toUpperCase();
}

Spring Batch MultiLineItemReader with MultiResourcePartitioner

I have a File which has Multiline data like this. DataID is Start of a new record. e.g. One record is a combination of ID and concatenating below line until the start of a new record.
>DataID1
Line1asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line3asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID2
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID3
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
I was able to implement this using SingleItemPeekableItemReader and it's working fine.
I am not trying to implement partition, As we need to process multiple files. I am not sure how the partitioner is passing file info to my customer reader and how to make my SingleItemPeekableItemReader thread safe as it not working correctly
Need some inputs as I am stuck at this point
java-config
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
#Bean
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(4);
taskExecutor.setCorePoolSize(4);
taskExecutor.setQueueCapacity(8);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
#Bean
#Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep")
.partitioner("step1",partitioner())
.step(step1())
.taskExecutor(taskExecutor())
.build();
}
#Bean
public MultiResourceItemReader<FieldSet> multiResourceItemReader() {
log.info("Total Number of Files to be process {}",resources.length);
report.setFileCount(resources.length);
MultiResourceItemReader<FieldSet> resourceItemReader = new MultiResourceItemReader<FieldSet>();
resourceItemReader.setResources(resources);
resourceItemReader.setDelegate(reader());
return resourceItemReader;
}
#Bean
public FlatFileItemReader<FieldSet> reader() {
FlatFileItemReader<FieldSet> build = new FlatFileItemReaderBuilder<FieldSet>().name("fileReader")
.lineTokenizer(orderFileTokenizer())
.fieldSetMapper(new FastFieldSetMapper())
.recordSeparatorPolicy(new BlankLineRecordSeparatorPolicy())
.build();
build.setBufferedReaderFactory(gzipBufferedReaderFactory);
return build;
}
#Bean
public SingleItemPeekableItemReader<FieldSet> readerPeek() {
SingleItemPeekableItemReader<FieldSet> reader = new SingleItemPeekableItemReader<>();
reader.setDelegate(multiResourceItemReader());
return reader;
}
#Bean
public MultiLineFastaItemReader itemReader() {
MultiLineFastaItemReader itemReader = new MultiLineFastaItemReader(multiResourceItemReader());
itemReader.setSingalPeekable(readerPeek());
return itemReader;
}
#Bean
public PatternMatchingCompositeLineTokenizer orderFileTokenizer() {
PatternMatchingCompositeLineTokenizer tokenizer = new PatternMatchingCompositeLineTokenizer();
Map<String, LineTokenizer> tokenizers = new HashMap<>(2);
tokenizers.put(">*", head());
tokenizers.put("*", tail());
tokenizer.setTokenizers(tokenizers);
return tokenizer;
}
public DelimitedLineTokenizer head() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequenceIdentifier");
token.setDelimiter(" ");
token.setStrict(false);
return token;
}
public DelimitedLineTokenizer tail() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequences");
token.setDelimiter(" ");
return token;
}
#Bean
public FastReportWriter writer() {
return new FastReportWriter();
}
#Bean
public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
return jobBuilderFactory.get("importUserJob")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(masterStep())
//.flow(step1)
.next(step2())
.end()
.build();
}
#Bean
public Step step1() {
return stepBuilderFactory.get("step1")
.<Fasta, Fasta>chunk(5000)
.reader(itemReader())
.processor(new FastaIteamProcessor())
//.processor(new PassThroughItemProcessor<>())
.writer(writer())
.build();
}
public class MultiLineFastaItemReader implements ItemReader<Fasta>, ItemStream {
private static final Logger log = LoggerFactory.getLogger(MultiLineFastaItemReader.class);
private SingleItemPeekableItemReader<FieldSet> singalPeekable;
AtomicInteger iteamCounter = new AtomicInteger(0);
ConcurrentHashMap<String, AtomicInteger> fileNameAndCounterMap = new ConcurrentHashMap<>();
#Autowired
private SequenceFastaReport sequenceFastaReport;
private MultiResourceItemReader<FieldSet> resourceItemReader;
public MultiLineFastaItemReader(MultiResourceItemReader<FieldSet> multiResourceItemReader) {
this.resourceItemReader = multiResourceItemReader;
}
public SingleItemPeekableItemReader<FieldSet> getSingalPeekable() {
return singalPeekable;
}
public void setSingalPeekable(SingleItemPeekableItemReader<FieldSet> singalPeekable) {
this.singalPeekable = singalPeekable;
}
#Override
public Fasta read() throws Exception {
FieldSet item = singalPeekable.read();
if (item == null) {
return null;
}
Fasta fastaObject = new Fasta();
log.info("ID {} fileName {}", item.readString(0), resourceItemReader.getCurrentResource());
fastaObject.setSequenceIdentifier(item.readString(0)
.toUpperCase());
fastaObject.setFileName(resourceItemReader.getCurrentResource()
.getFilename());
if (!fileNameAndCounterMap.containsKey(fastaObject.getFileName())) {
fileNameAndCounterMap.put(fastaObject.getFileName(), new AtomicInteger(0));
}
while (true) {
FieldSet possibleRelatedObject = singalPeekable.peek();
if (possibleRelatedObject == null) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
if (possibleRelatedObject.readString(0)
.startsWith(">")) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
String data = fastaObject.getSequences()
.toUpperCase();
fastaObject.setSequences(data + singalPeekable.read()
.readString(0)
.toUpperCase());
}
}
#Override
public void close() {
this.singalPeekable.close();
}
#Override
public void open(ExecutionContext executionContext) {
this.singalPeekable.open(executionContext);
}
#Override
public void update(ExecutionContext executionContext) {
this.singalPeekable.update(executionContext);
}
}
I am not sure how the partitioner is passing file info to my customer reader
The partitioner will create partition meta-data in step execution contexts and your reader should read that meta-data from it. In your example, you don't need to call partition on the partitioner, Spring Batch will do it. You need instead to set the partition key on the partitioner, for example:
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.setKeyName("file");
return partitioner;
}
This will create a partition for each file with the key file that you can get in your reader from the step execution context:
#Bean
#StepScope
public FlatFileItemReader reader(#Value("#{stepExecutionContext['file']}") String file) {
// define your reader
}
Note that the reader should be step scoped to use this feature. More details here: https://docs.spring.io/spring-batch/4.0.x/reference/html/step.html#late-binding

kafka listener container in spring boot with annotations does not consume message

Not sure if I am doing some thing wrong but I could not manage to make it work. Below is my code:
public class EventsApp {
private static final Logger log = LoggerFactory.getLogger(EventsApp.class);
#Value("${kafka.topic:test}")
private String topic;
#Value("${kafka.messageKey:si.key}")
private String messageKey;
#Value("${kafka.broker.address:localhost:9092}")
private String brokerAddress;
#Value("${kafka.zookeeper.connect:localhost:2181}")
private String zookeeperConnect;
/**
* Main method, used to run the application.
*
* #param args the command line arguments
* #throws UnknownHostException if the local host name could not be resolved into an address
*/
public static void main(String[] args) throws UnknownHostException, Exception {
ConfigurableApplicationContext context
= new SpringApplicationBuilder(EventsApp.class)
.web(false)
.run(args);
MessageChannel toKafka = context.getBean("toKafka", MessageChannel.class);
for (int i = 0; i < 10; i++) {
System.out.println("sending.."+toKafka.toString());
toKafka.send(new GenericMessage<>("foo" + i));
}
Thread.sleep(115000);
context.close();
System.exit(0);
}
#KafkaListener(id = "baz", topics = "test",
containerFactory = "kafkaListenerContainerFactory")
public void listen(String data, Acknowledgment ack) {
System.out.println("----- "+data);
ack.acknowledge();
}
#Bean
KafkaListenerContainerFactory<ConcurrentMessageListenerContainer<Integer, String>>
kafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<Integer, String> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
factory.setConcurrency(1);
factory.getContainerProperties().setPollTimeout(3000);
return factory;
}
#ServiceActivator(inputChannel = "toKafka")
#Bean
public MessageHandler handler() throws Exception {
KafkaProducerMessageHandler<String, String> handler =
new KafkaProducerMessageHandler<>(kafkaTemplate());
handler.setTopicExpression(new LiteralExpression(this.topic));
handler.setMessageKeyExpression(new LiteralExpression(this.messageKey));
return handler;
}
#Bean
public KafkaTemplate<String, String> kafkaTemplate() {
return new KafkaTemplate<>(producerFactory());
}
#Bean
public ProducerFactory<String, String> producerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, this.brokerAddress);
props.put(ProducerConfig.RETRIES_CONFIG, 0);
props.put(ProducerConfig.BATCH_SIZE_CONFIG, 16384);
props.put(ProducerConfig.LINGER_MS_CONFIG, 1);
props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, 33554432);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
return new DefaultKafkaProducerFactory<>(props);
}
#Bean
public ConsumerFactory<Integer, String> consumerFactory() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, this.brokerAddress);
//props.put("zookeeper.connect", this.zookeeperConnect);
props.put(ConsumerConfig.GROUP_ID_CONFIG, "siTestGroup");
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true);
props.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, 100);
props.put(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 15000);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
return new DefaultKafkaConsumerFactory<>(props);
}
#Bean
public TopicCreator topicCreator() {
return new TopicCreator(this.topic, this.zookeeperConnect);
}
public static class TopicCreator implements SmartLifecycle {
private final String topic;
private final String zkConnect;
private volatile boolean running;
public TopicCreator(String topic, String zkConnect) {
this.topic = topic;
this.zkConnect = zkConnect;
}
#Override
public void start() {
ZkUtils zkUtils = new ZkUtils(new ZkClient(this.zkConnect, 6000, 6000,
ZKStringSerializer$.MODULE$), null, false);
try {
AdminUtils.createTopic(zkUtils, topic, 1, 1, new Properties());
}
catch (TopicExistsException e) {
// no-op
}
this.running = true;
}
#Override
public void stop() {
}
#Override
public boolean isRunning() {
return this.running;
}
#Override
public int getPhase() {
return Integer.MIN_VALUE;
}
#Override
public boolean isAutoStartup() {
return true;
}
#Override
public void stop(Runnable callback) {
callback.run();
}
}
}
While I am able to produce message. I am using spring boot version 1.4.0.RELEASE and spring-integration-kafka version 2.0.1.RELEASE.
Try adding
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
to the consumer config.
By default, kafka will start the consumer at the end of the topic.
If that doesn't work, turn on DEBUG logging to figure out what's going on.

Resources