am currently reading csv files with spring batch into objects where i have to save total lines as well as rejected/skipped lines of that current file ,and using StepExecutionListener didn't work since i need to get it before the step ends and not after the step . is there a way i can get them to be saved in itemProcessor or itemWriter without having to add another step?
i need to get it before the step ends and not after the step
You can't get the total number of lines without going until the end of the step (ie reading the entire file).
using StepExecutionListener didn't work
Using a step execution listener is the way to go. You did not share your code to see why this didn't work for you, but here is a quick example:
import java.util.Arrays;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration
#EnableBatchProcessing
public class MyJobConfiguration {
#Bean
public Job job(JobBuilderFactory jobs, StepBuilderFactory steps) {
return jobs.get("myJob")
.start(steps.get("myStep")
.<Integer, Integer>chunk(2)
.reader(new ListItemReader<>(Arrays.asList(1, 2, 3, 4)))
.processor((ItemProcessor<Integer, Integer>) item -> {
if (item % 2 != 0) {
throw new Exception("No odd numbers here!");
}
return item;
})
.writer(items -> items.forEach(System.out::println))
.faultTolerant()
.skip(Exception.class)
.skipLimit(5)
.listener(new StepExecutionListener() {
#Override
public void beforeStep(StepExecution stepExecution) {
System.out.println("Starting step " + stepExecution.getStepName());
}
#Override
public ExitStatus afterStep(StepExecution stepExecution) {
System.out.println("Step "+ stepExecution.getStepName() + " is complete");
System.out.println("read.count = " + stepExecution.getReadCount());
System.out.println("write.count = " + stepExecution.getWriteCount());
System.out.println("skip.count = " + stepExecution.getSkipCount());
return stepExecution.getExitStatus();
}
})
.build())
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(MyJobConfiguration.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
}
This prints:
Starting step myStep
2
4
Step myStep is complete
read.count = 4
write.count = 2
skip.count = 2
Related
I am new with Spring-Batch and I would like to understand how it should be used to process a List<String> as fast as possible in parallel using multiple threads and then just return a subset of them based on some condition.
For example, I was thinking to use it for checking which IP is up within a subnet.
import org.apache.commons.net.util.SubnetUtils;
String subnet = "192.168.8.0/24";
SubnetUtils utils = new SubnetUtils(subnet);
List<String> addresses = List.of(utils.getInfo().getAllAddresses());
if(InetAddress.getByName(address).isReachable(100){
// Consider this address for the final list
return true;
};
My code is as follows:
import it.eng.cysec.discoverer.service.NetworkService;
import lombok.RequiredArgsConstructor;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.JobScope;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.net.InetAddress;
import java.util.Arrays;
import java.util.Date;
#Configuration
#EnableBatchProcessing
#RequiredArgsConstructor
public class BatchConfiguration {
private final JobBuilderFactory jobBuilderFactory;
private final StepBuilderFactory stepBuilderFactory;
private final NetworkService networkService;
#Bean
public Job checkSubnetJob(Step checkIPStep){
return this.jobBuilderFactory.get("check-subnet-job")
.incrementer(new RunIdIncrementer())
.start(checkIPStep)
.build();
}
#Bean
#JobScope
public Step checkIPStep(#Value("#{jobParameters['subnet']}") String subnet) {
System.out.println("Subnet parameter: " + subnet);
return this.stepBuilderFactory.get("check-ip-step")
.<String, String>chunk(10)
.reader(reader(null))
.processor(processor())
.writer(writer())
.allowStartIfComplete(true)
.build();
}
#Bean
#JobScope
public ItemReader<String> reader(#Value("#{jobParameters['subnet']}") String subnet) {
return new ListItemReader<>(this.networkService.getAllSubnetAddresses(subnet));
}
#Bean
public ItemProcessor<String, String> processor() {
return ip -> {
System.out.println("Processor IP: " + ip + " " + new Date());
try {
InetAddress address = InetAddress.getByName(ip);
if(address.isReachable(5000)){
return ip;
}else {
return null;
}
}catch (Exception e){
return null;
}
};
}
#Bean
public ItemWriter<String> writer() {
// TODO How to pass the list of up IPs back to the calling function?
return list -> {
System.out.println("Arrays to String" + Arrays.toString(list.toArray()));
};
}
}
import lombok.RequiredArgsConstructor;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.util.List;
#RestController
#RequestMapping("test")
#RequiredArgsConstructor
public class TestController {
private final Job job;
private final JobLauncher jobLauncher;
#GetMapping()
public List<String> test(){
JobParameters parameters = new JobParametersBuilder()
.addString("subnet", "192.168.8.0/24", false)
.toJobParameters();
try {
this.jobLauncher.run(this.job, parameters);
} catch (Exception e) {
throw new RuntimeException(e);
}
// TODO How to return the IP that are up based on the previous object?
return List.of("OK");
}
}
So my main questions are:
How to make different chunks (of 10 IP) to be processed in parallel? Right now they are not.
What is the fastest approach that Spring-Batch provides to process all the IPs of a local network? Is it enough to keep them in memory or would it be better to persist them while processing the remaining IPs? If so, how?
How to pass back to the calling method the computed IPs result?
You can create a custom partitioner that partitions the input list based on indexes. Here is a quick example:
/*
* Copyright 2022 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.batch.sample;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Stream;
import javax.sql.DataSource;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.partition.support.Partitioner;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.item.Chunk;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseBuilder;
import org.springframework.jdbc.datasource.embedded.EmbeddedDatabaseType;
/**
* Example of a partitioned step where the input is a list, and partitions
* are sublists that are processed in parallel with local worker threads.
*
* #author Mahmoud Ben Hassine
*/
#Configuration
#EnableBatchProcessing
public class ListPartitioningSample {
#Bean
public Step managerStep(StepBuilderFactory stepBuilderFactory) {
List<String> items = Arrays.asList("foo1", "foo2", "foo3", "foo4", "foo5", "foo6", "foo7", "foo8"); // retrieved with this.networkService.getAllSubnetAddresses(subnet)
return stepBuilderFactory.get("managerStep")
.partitioner("workerStep", new ListPartitioner(items.size()))
.gridSize(2)
.taskExecutor(new SimpleAsyncTaskExecutor())
.step(workerStep(stepBuilderFactory))
.build();
}
#Bean
public Step workerStep(StepBuilderFactory stepBuilderFactory) {
return stepBuilderFactory.get("workerStep")
.<String, String>chunk(2)
.reader(itemReader(null))
.processor(itemProcessor())
.writer(itemWriter())
.build();
}
#Bean
#StepScope
public ListItemReader<String> itemReader(#Value("#{stepExecutionContext['range']}") Range partition) {
List<String> items = Arrays.asList("foo1", "foo2", "foo3", "foo4", "foo5", "foo6", "foo7", "foo8"); // retrieved with this.networkService.getAllSubnetAddresses(subnet)
return new ListItemReader<>(items.subList(partition.start, partition.end));
}
#Bean
public ItemProcessor<String, String> itemProcessor() {
return new ItemProcessor<String, String>() {
#Override
public String process(String item) throws Exception {
return item; // filter items as needed here
}
};
}
#Bean
public ItemWriter<String> itemWriter() {
return new ItemWriter<String>() {
#Override
public void write(List<? extends String> items) throws Exception {
items.forEach(new Consumer<String>() {
#Override
public void accept(String item) {
System.out.println(Thread.currentThread().getName() + ": " + item);
}
});
}
};
}
#Bean
public Job job(JobBuilderFactory jobBuilderFactory, StepBuilderFactory stepBuilderFactory) {
return jobBuilderFactory.get("job")
.start(managerStep(stepBuilderFactory))
.build();
}
#Bean
public DataSource dataSource() {
return new EmbeddedDatabaseBuilder()
.setType(EmbeddedDatabaseType.HSQL)
.addScript("/org/springframework/batch/core/schema-hsqldb.sql")
.build();
}
// TODO quick and dirty implementation, please add sanity checks and verify edge cases
public static class ListPartitioner implements Partitioner {
private int listSize;
public ListPartitioner(int listSize) {
this.listSize = listSize;
}
#Override
public Map<String, ExecutionContext> partition(int gridSize) {
// calculate ranges
int partitionSize = listSize / gridSize;
Range[] ranges = new Range[gridSize];
for (int i = 0, j = 0; i < gridSize; i++, j+= partitionSize) {
ranges[i] = new Range(j, j + partitionSize);
System.out.println("range = " + ranges[i]);
}
// prepare partition meta-data
Map<String, ExecutionContext> partitions = new HashMap<>(gridSize);
for (int i = 0; i < gridSize; i++) {
ExecutionContext context = new ExecutionContext();
context.put("range", ranges[i]);
partitions.put("partition" + i, context);
}
return partitions;
}
}
/**
* Represents an index range (ie a partition) in a list.
* Ex: List = ["foo1", "foo2", "bar1", "bar2"]
* Range1 = [0, 2] => sublist1 = ["foo1", "foo2"]
* Range2 = [2, 4] => sublist2 = ["bar1", "bar2"]
* #param start of sublist, inclusive
* #param end of sublist, exclusive
*/
record Range(int start, int end) {};
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(ListPartitioningSample.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
JobExecution jobExecution = jobLauncher.run(job, new JobParameters());
System.out.println("jobExecution = " + jobExecution);
}
}
The idea is to create sub lists and make each worker step work on a distinct sublist. (note the list is not duplicated, it could be shared and each worker thread will read its own distinct partition).
The sample above prints:
[main] INFO org.springframework.batch.core.launch.support.SimpleJobLauncher - Job: [SimpleJob: [name=job]] launched with the following parameters: [{}]
[main] INFO org.springframework.batch.core.job.SimpleStepHandler - Executing step: [managerStep]
range = Range[start=0, end=4]
range = Range[start=4, end=8]
SimpleAsyncTaskExecutor-1: foo1
SimpleAsyncTaskExecutor-1: foo2
SimpleAsyncTaskExecutor-2: foo5
SimpleAsyncTaskExecutor-2: foo6
SimpleAsyncTaskExecutor-1: foo3
SimpleAsyncTaskExecutor-1: foo4
SimpleAsyncTaskExecutor-2: foo7
SimpleAsyncTaskExecutor-2: foo8
[SimpleAsyncTaskExecutor-1] INFO org.springframework.batch.core.step.AbstractStep - Step: [workerStep:partition0] executed in 82ms
[SimpleAsyncTaskExecutor-2] INFO org.springframework.batch.core.step.AbstractStep - Step: [workerStep:partition1] executed in 82ms
[main] INFO org.springframework.batch.core.step.AbstractStep - Step: [managerStep] executed in 137ms
[main] INFO org.springframework.batch.core.launch.support.SimpleJobLauncher - Job: [SimpleJob: [name=job]] completed with the following parameters: [{}] and the following status: [COMPLETED] in 162ms
jobExecution = JobExecution: id=0, version=2, startTime=Wed Aug 17 12:21:00 CEST 2022, endTime=Wed Aug 17 12:21:00 CEST 2022, lastUpdated=Wed Aug 17 12:21:00 CEST 2022, status=COMPLETED, exitStatus=exitCode=COMPLETED;exitDescription=, job=[JobInstance: id=0, version=0, Job=[job]], jobParameters=[{}]
This shows that partitions (ie sublists) are processed in parallel by different threads.
Now to answer your question about how to gather written elements (the retained IPs in your case), you can put item indexes in the Execution context (not items them selves), and grab them from the execution context with a StepExecutionAggregator. You can find an example of how to do that in the word count fork/join sample that I shared here:
EDIT: show how to access the subnet job parameter from the item reader
You are already passing the subnet as a job parameter in your controller method. So you can access it in the item reader bean definition with a SpEL expression as follows:
#Bean
#StepScope
public ListItemReader<String> itemReader(#Value("#{stepExecutionContext['range']}") Range partition, #Value("#{jobParameters['subnet']}") String subnet) {
// use subnet parameter as needed here
List<String> items = Arrays.asList("foo1", "foo2", "foo3", "foo4", "foo5", "foo6", "foo7", "foo8"); // retrieved with this.networkService.getAllSubnetAddresses(subnet)
return new ListItemReader<>(items.subList(partition.start, partition.end));
}
I'm reading a csv a file using a multiResourceItemReader and I've kept the skip limit to be 10. When the limit exceeds I want to catch the SkipLimitExceedException and throw my own customized exception with a message like "Invalid csv" ,where or how do i catch it?
try {
log.info("Running job to insert batch fcm: {} into database.", id);
jobLauncher
.run(importJob, new JobParametersBuilder()
.addString("fullPathFileName", TMP_DIR)
.addString("batch_fcm_id", String.valueOf(id))
.addLong("time",System.currentTimeMillis())
.toJobParameters());
}
catch(...){...}
I cannot catch it here, is it because I'm using MultiResourceItemReader and the asynchronous process doesn't allow me to catch it here?
my job is as follows
#Bean(name = "fcmJob")
Job importJob(#Qualifier(MR_ITEM_READER) Reader reader,
#Qualifier(JDBC_WRITER) JdbcBatchItemWriter jdbcBatchItemWriter,
#Qualifier("fcmTaskExecutor") TaskExecutor taskExecutor) {
Step writeToDatabase = stepBuilderFactory.get("file-database")//name of step
.<FcmIdResource, FcmIdResource>chunk(csvChunkSize) // <input as, output as>
.reader(reader)
.faultTolerant()
.skipLimit(10)
.skip(UncategorizedSQLException.class)
.noSkip(FileNotFoundException.class)
.writer(jdbcBatchItemWriter)
.taskExecutor(taskExecutor)
.throttleLimit(20)
.build();
return jobBuilderFactory.get("jobBuilderFactory") //Name of job builder factory
.incrementer(new RunIdIncrementer())
.start(writeToDatabase)
.on("*")
.to(deleteTemporaryFiles())
.end()
.build();
}
I have tried using ItemReaderListener, SkipPolicy, SkipListener, but they cannot throw an exception, is there any other way?
The exception you are looking for is not thrown by the job, you can get it from the job execution using JobExecution#getAllFailureExceptions.
So in your example, instead of doing:
try {
jobLauncher.run(job, new JobParameters());
} catch (Exception e) {
//...
}
You should do:
JobExecution jobExecution = jobLauncher.run(job, new JobParameters());
List<Throwable> allFailureExceptions = jobExecution.getFailureExceptions();
In your case, SkipLimitExceedException will be one of allFailureExceptions.
EDIT: Adding an example showing that SkipLimitExceedException is part of allFailureExceptions:
import java.util.Arrays;
import java.util.List;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration
#EnableBatchProcessing
public class MyJob {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public ItemReader<Integer> itemReader() {
return new ListItemReader<>(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
#Bean
public ItemProcessor<Integer, Integer> itemProcessor() {
return item -> {
if (item % 3 == 0) {
throw new IllegalArgumentException("no multiples of three here! " + item);
}
return item;
};
}
#Bean
public ItemWriter<Integer> itemWriter() {
return items -> {
for (Integer item : items) {
System.out.println("item = " + item);
}
};
}
#Bean
public Step step() {
return steps.get("step")
.<Integer, Integer>chunk(2)
.reader(itemReader())
.processor(itemProcessor())
.writer(itemWriter())
.faultTolerant()
.skip(IllegalArgumentException.class)
.skipLimit(2)
.build();
}
#Bean
public Job job() {
return jobs.get("job")
.start(step())
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
JobExecution jobExecution = jobLauncher.run(job, new JobParameters());
List<Throwable> allFailureExceptions = jobExecution.getAllFailureExceptions();
for (Throwable failureException : allFailureExceptions) {
System.out.println("failureException = " + failureException);
}
}
}
This sample prints:
item = 1
item = 2
item = 4
item = 5
item = 7
item = 8
failureException = org.springframework.batch.core.step.skip.SkipLimitExceededException: Skip limit of '2' exceeded
I am reading the csv file and inserting data to database using spring batch(read,process and write).I am using "jpaRepository.save" in itemWriter class to save the data into the database. And I am trying to catch the skipped item and the skipped message in #OnSkipInWrite method but this method is not called even if data are skipped. And in batch_step_execution table :
read_count = 18, write_count = 10, write_skip_count = 0, roll_back_count =8.
Why the write_skip_count is 0? I just want to know which item was skipped and what was the exceptional message. My step :
#Bean
public Step step() throws IOException {
return stepBuilderFactory.get("step").<Entity, Entity>chunk(1).reader(multiResourceItemReader())
.processor(processor()).writer(writer()).faultTolerant().skip(Exception.class).skipLimit(100)
.listener(new stepExecutionListener()).build();
}
This is my Listener class.
public class StepExecutionListener{
private static final Logger LOG = Logger.getLogger(StepExecutionListener.class);
#OnSkipInRead
public void onSkipInRead(Throwable t) {
LOG.error("On Skip in Read Error : " + t.getMessage());
}
#OnSkipInWrite
public void onSkipInWrite(Entity item, Throwable t) {
LOG.error("Skipped in write due to : " + t.getMessage());
}
#OnSkipInProcess
public void onSkipInProcess(Entity item, Throwable t) {
LOG.error("Skipped in process due to: " + t.getMessage());
}
#OnWriteError
public void onWriteError(Exception exception, List<? extends Entity> items) {
LOG.error("Error on write on " + items + " : " + exception.getMessage());
}}
Why #OnSkipInWrite and #OnWriteError is not called? Any help would be much appreciated. Thanks in advance.
I can't see from what you shared why the skip listener is not called but here is a self-contained example using your listener:
import java.util.Arrays;
import java.util.List;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.OnSkipInProcess;
import org.springframework.batch.core.annotation.OnSkipInRead;
import org.springframework.batch.core.annotation.OnSkipInWrite;
import org.springframework.batch.core.annotation.OnWriteError;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration
#EnableBatchProcessing
public class MyJob {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public ItemReader<Integer> itemReader() {
return new ListItemReader<>(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
#Bean
public ItemWriter<Integer> itemWriter() {
return items -> {
for (Integer item : items) {
if (item.equals(3)) {
throw new Exception("No 3 here!");
}
System.out.println("item = " + item);
}
};
}
#Bean
public Step step() {
return steps.get("step")
.<Integer, Integer>chunk(5)
.reader(itemReader())
.writer(itemWriter())
.faultTolerant()
.skip(Exception.class)
.skipLimit(10)
.listener(new StepExecutionListener())
.build();
}
#Bean
public Job job() {
return jobs.get("job")
.start(step())
.build();
}
public class StepExecutionListener {
#OnSkipInRead
public void onSkipInRead(Throwable t) {
System.err.println("On Skip in Read Error : " + t.getMessage());
}
#OnSkipInWrite
public void onSkipInWrite(Integer item, Throwable t) {
System.err.println("Skipped in write due to : " + t.getMessage());
}
#OnSkipInProcess
public void onSkipInProcess(Integer item, Throwable t) {
System.err.println("Skipped in process due to: " + t.getMessage());
}
#OnWriteError
public void onWriteError(Exception exception, List<? extends Integer> items) {
System.err.println("Error on write on " + items + " : " + exception.getMessage());
}}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
JobExecution jobExecution = jobLauncher.run(job, new JobParameters());
StepExecution stepExecution = jobExecution.getStepExecutions().iterator().next();
System.out.println("WriteSkipCount = " + stepExecution.getWriteSkipCount());
}
}
This example prints:
item = 1
item = 2
Error on write on [1, 2, 3, 4, 5] : No 3 here!
item = 1
item = 2
Error on write on [3] : No 3 here!
item = 4
Skipped in write due to : No 3 here!
item = 5
item = 6
item = 7
item = 8
item = 9
item = 10
WriteSkipCount = 1
Which means the skip listener is called when an item is skipped on write and the writeSkipCount is correct.
Hope this helps.
You can implement SkipListener interface instead of using #OnWriteError annotation.
Try that in your BatchConf:
#Bean
#StepScope
public StepExecutionListener stepExecutionListener() {
return new StepExecutionListener();
}
...
.skipLimit(1)
.listener(stepExecutionListener()
.build();
Each parallel step will create a file, if all succeed then these files will be moved together to an output folder. If any of these steps fail then none of the files will go to the output folder and the whole job is failed. Help with / code example much appreciated for batch noob.
read from a table then split the results by type and process in parallel
You can partition data by type using a partition step. Partitions will be processed in parallel and each partition creates a file. Then you add step after the partition step to clean up the files if any of the partitions fail. Here is a quick example you can try:
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.core.partition.support.Partitioner;
import org.springframework.batch.core.step.tasklet.Tasklet;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.task.SimpleAsyncTaskExecutor;
#Configuration
#EnableBatchProcessing
public class PartitionJobSample {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public Step step1() {
return steps.get("step1")
.partitioner(workerStep().getName(), partitioner())
.step(workerStep())
.gridSize(3)
.taskExecutor(taskExecutor())
.build();
}
#Bean
public SimpleAsyncTaskExecutor taskExecutor() {
return new SimpleAsyncTaskExecutor();
}
#Bean
public Partitioner partitioner() {
return gridSize -> {
Map<String, ExecutionContext> map = new HashMap<>(gridSize);
for (int i = 0; i < gridSize; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("data", "data" + i);
String key = "partition" + i;
map.put(key, executionContext);
}
return map;
};
}
#Bean
public Step workerStep() {
return steps.get("workerStep")
.tasklet(getTasklet(null))
.build();
}
#Bean
#StepScope
public Tasklet getTasklet(#Value("#{stepExecutionContext['data']}") String partitionData) {
return (contribution, chunkContext) -> {
if (partitionData.equals("data2")) {
throw new Exception("Boom!");
}
System.out.println(Thread.currentThread().getName() + " processing partitionData = " + partitionData);
Files.createFile(Paths.get(partitionData + ".txt"));
return RepeatStatus.FINISHED;
};
}
#Bean
public Step moveFilesStep() {
return steps.get("moveFilesStep")
.tasklet((contribution, chunkContext) -> {
System.out.println("moveFilesStep");
// add code to move files where needed
return RepeatStatus.FINISHED;
})
.build();
}
#Bean
public Step cleanupFilesStep() {
return steps.get("cleanupFilesStep")
.tasklet((contribution, chunkContext) -> {
System.out.println("cleaning up..");
deleteFiles();
return RepeatStatus.FINISHED;
})
.build();
}
#Bean
public Job job() {
return jobs.get("job")
.flow(step1()).on("FAILED").to(cleanupFilesStep())
.from(step1()).on("*").to(moveFilesStep())
.from(moveFilesStep()).on("*").end()
.from(cleanupFilesStep()).on("*").fail()
.build()
.build();
}
public static void main(String[] args) throws Exception {
deleteFiles();
ApplicationContext context = new AnnotationConfigApplicationContext(PartitionJobSample.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
private static void deleteFiles() throws IOException {
for (int i = 0; i <= 2; i++) {
Files.deleteIfExists(Paths.get("data" + i + ".txt"));
}
}
}
This example creates 3 dummy partitions ("data0", "data1" and "data2"). Each partition will create a file. If all partitions finish correctly, you will have three files "data0.txt", "data1.txt" and "data2.txt" which will be moved in the moveFilesStep.
Now let make one of the partitions fail, for example the second partition:
#Bean
#StepScope
public Tasklet getTasklet(#Value("#{stepExecutionContext['data']}") String partitionData) {
return (contribution, chunkContext) -> {
if (partitionData.equals("data2")) {
throw new Exception("Boom!");
}
System.out.println(Thread.currentThread().getName() + " processing partitionData = " + partitionData);
Files.createFile(Paths.get(partitionData + ".txt"));
return RepeatStatus.FINISHED;
};
}
In this case, the cleanupFilesStep will be triggered and will delete all files.
Hope this helps.
I have a batch job where i am using ScriptBatch.3.0.x version.
My use-case is to retry the job incase of any intermediate failures in between.
I am using the Chunk based processing and StepBuilderFactory for a job. I could not see any difference by adding the retry in it.
return stepBuilderFactory.get("ValidationStepName")
.<Long, Info> chunk(10)
.reader(.....)
.processor(.....)
// .faultTolerant()
// .retryLimit(5)
// .retryLimit(5).retry(Exception.class)
.writer(......)
.faultTolerant()
.retryLimit(5)
//.retryLimit(5).retry(Exception.class)
.transactionManager(jpaTransactionManager())
.listener(new ChunkNotificationListener())
.build();
Not sure i am missing something here, I am expecting here that adding retryLimit() will retry the same chunk for n number of time on getting any exception
I am expecting here that adding retryLimit() will retry the same chunk for n number of time on getting any exception
If you specify a retry limit, you need to specify which exceptions to retry. Otherwise you would have an IllegalStateException with the message: If a retry limit is provided then retryable exceptions must also be specified.
EDIT:
Point 1 : The following test is passing with version 3.0.9:
import java.util.Arrays;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.repository.JobRepository;
import org.springframework.batch.core.step.tasklet.TaskletStep;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.batch.item.support.ListItemWriter;
import org.springframework.transaction.PlatformTransactionManager;
#RunWith(MockitoJUnitRunner.class)
public class TestRetryConfig {
#Rule
public ExpectedException expectedException = ExpectedException.none();
#Mock
private JobRepository jobRepository;
#Mock
PlatformTransactionManager transactionManager;
#Test
public void testRetryLimitWithoutException() {
expectedException.expect(IllegalStateException.class);
expectedException.expectMessage("If a retry limit is provided then retryable exceptions must also be specified");
StepBuilderFactory stepBuilderFactory = new StepBuilderFactory(jobRepository, transactionManager);
TaskletStep step = stepBuilderFactory.get("step")
.<Integer, Integer>chunk(2)
.reader(new ListItemReader<>(Arrays.asList(1, 2, 3)))
.writer(new ListItemWriter<>())
.faultTolerant()
.retryLimit(3)
.build();
}
}
It shows that if you specify a retry limit without the exception type(s) to retry, the step configuration should fail.
Point 2: The following sample shows that the declared exception type is retried as expected (tested with version 3.0.9 too):
import java.util.Arrays;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
#Configuration
#EnableBatchProcessing
public class MyJob {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public ItemReader<Integer> itemReader() {
return new ListItemReader<>(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
}
#Bean
public ItemWriter<Integer> itemWriter() {
return items -> {
for (Integer item : items) {
System.out.println("item = " + item);
if (item.equals(7)) {
throw new Exception("Sevens are sometime nasty, let's retry them");
}
}
};
}
#Bean
public Step step() {
return steps.get("step")
.<Integer, Integer>chunk(5)
.reader(itemReader())
.writer(itemWriter())
.faultTolerant()
.retryLimit(3)
.retry(Exception.class)
.build();
}
#Bean
public Job job() {
return jobs.get("job")
.start(step())
.build();
}
public static void main(String[] args) throws Exception {
ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
JobLauncher jobLauncher = context.getBean(JobLauncher.class);
Job job = context.getBean(Job.class);
jobLauncher.run(job, new JobParameters());
}
}
it prints:
item = 1
item = 2
item = 3
item = 4
item = 5
item = 6
item = 7
item = 6
item = 7
item = 6
item = 7
item 7 is retried 3 times and then the step fails as expected.
I hope this helps.