Spring Batch Remote Partitioning - DeployerPartitionHandler not executing another job in queue - spring-boot

The DeployerPartitionHandler is configured to use at most 3 workers at a time.
When job is launched which is using all 3 workers, and at the same time another job is launched it is not executed even after first job is completed and workers are free. The status of the 2nd job is stuck at STARTING.
Below is the Batch Configuration:
#Configuration
#EnableBatchProcessing
#Slf4j
public class BatchConfig {
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Value("${jarLocation}")
public String jarLocation;
#Value("${batch.job.jobname}")
public String jobName;
#Bean
#Primary
PlatformTransactionManager getTransactionManager(
#Qualifier("transactionManager") PlatformTransactionManager platform) {
return platform;
}
#Bean
public JobRegistryBeanPostProcessor jobRegistryBeanPostProcessor(JobRegistry jobRegistry) {
JobRegistryBeanPostProcessor postProcessor = new JobRegistryBeanPostProcessor();
postProcessor.setJobRegistry(jobRegistry);
return postProcessor;
}
#Bean
#Profile("worker")
public DeployerStepExecutionHandler stepExecutionHandler(JobExplorer jobExplorer, JobRepository jobRepository,
ConfigurableApplicationContext context) {
return new DeployerStepExecutionHandler(context, jobExplorer, jobRepository);
}
#Bean
public PartitionHandler partitionHandler(TaskLauncher taskLauncher, JobExplorer jobExplorer,
Environment environment, DelegatingResourceLoader delegatingResourceLoader, TaskRepository taskRepository) {
Resource resource = delegatingResourceLoader.getResource(jarLocation);
DeployerPartitionHandler partitionHandler = new DeployerPartitionHandler(taskLauncher, jobExplorer, resource,
"workerStep", taskRepository);
List<String> commandLineArguments = new ArrayList<>(5);
commandLineArguments.add("--spring.profiles.active=worker");
commandLineArguments.add("--spring.cloud.task.initialize.enable=false");
commandLineArguments.add("--spring.batch.initializer.enabled=false");
commandLineArguments.add("--spring.cloud.task.closecontextEnabled=true");
commandLineArguments.add("--logging.level=DEBUG");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArguments));
partitionHandler.setEnvironmentVariablesProvider(new SimpleEnvironmentVariablesProvider(environment));
partitionHandler.setMaxWorkers(3);
partitionHandler.setApplicationName("BatchApplicationWorker");
return partitionHandler;
}
#Bean
#StepScope
public Partitioner partitioner(#Value("#{jobParameters['inputFiles']}") String file,
#Value("#{jobParameters['partitionSize']}") String partitionSize1) {
int partitionSize = Integer.parseInt(partitionSize1);
return new Partitioner() {
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> partitions = new HashMap<>();
String[] ids = fetchAllPrimaryKeys(file);
List<List<String>> partitionPayloads = splitPayLoad(ids, partitionSize);
int size = partitionPayloads.size();
for (int i = 0; i < size; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("partitionNumber", i);
executionContext.put("partitionPayLoad", new ArrayList<>(partitionPayloads.get(i)));
partitions.put("partition" + i, executionContext);
}
return partitions;
}
};
}
#Bean
public Step masterStep(Step workerStep, PartitionHandler partitionHandler) {
return this.stepBuilderFactory.get("masterStep").partitioner(workerStep.getName(), partitioner(null, null))
.step(workerStep).partitionHandler(partitionHandler).build();
}
#Bean
public Step workerStep(CustomWriter customWriter, CustomProcessor customProcessor) {
return this.stepBuilderFactory.get("workerStep").<User, User>chunk(10000).reader(reader(null))
.processor(customProcessor).writer(customWriter).build();
}
#Bean
public Job batchJob(Step masterStep, JobExecutionListnerClass jobExecutionListnerClass,
JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("batchJob").start(masterStep).listener(jobExecutionListnerClass).build();
}
#Bean
#StepScope
public CustomReader reader(#Value("#{stepExecutionContext['partitionPayLoad']}") List<String> payload) {
return new CustomReader(payload);
}
#Bean
public AppTaskListener appTaskListener() {
return new AppTaskListener();
}
}

Related

Spring batch run multiple jobs in parallel

I am new to Spring batch and couldn't figure out how to do this..
Basically I have a spring batch files and both are have to run parallel i.e when I request execute_job1 then BatchConfig1 have to execute and when I request execute_job2 then BatchConfig2 have to execute. How can I do this?
Controller
#RestController
public class JobExecutionController {
#Autowired
JobLauncher jobLauncher;
#Autowired
Job job;
/**
*
* #return
*/
#RequestMapping("/execute_job1")
#ResponseBody
public void executeBatchJob1() {
}
/**
*
* #return
*/
#RequestMapping("/execute_job2")
#ResponseBody
public void executeBatchJob2() {
}
}
BatchConfig1
#Configuration
#EnableBatchProcessing
public class BatchConfig {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
#Bean
public Step stepOne(){
return steps.get("stepOne")
.tasklet(new MyTaskOne())
.build();
}
#Bean
public Step stepTwo(){
return steps.get("stepTwo")
.tasklet(new MyTaskTwo())
.build();
}
#Bean
public Job demoJob(){
return jobs.get("exportUserJob1")
.incrementer(new RunIdIncrementer())
.start(stepOne())
.next(stepTwo())
.build();
}
}
BatchConfig2:
#Configuration
#EnableBatchProcessing
public class BatchConfig2 {
#Autowired
public JobBuilderFactory jobBuilderFactory;
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Autowired
public DataSource dataSource;
#Bean
public JdbcCursorItemReader<User> reader() {
JdbcCursorItemReader<User> reader = new JdbcCursorItemReader<User>();
reader.setDataSource(dataSource);
reader.setSql("SELECT id,name FROM user");
reader.setRowMapper(new UserRowMapper());
return reader;
}
public class UserRowMapper implements RowMapper<User> {
#Override
public User mapRow(ResultSet rs, int rowNum) throws SQLException {
User user = new User();
user.setId(rs.getInt("id"));
user.setName(rs.getString("name"));
return user;
}
}
#Bean
public UserItemProcessor processor() {
return new UserItemProcessor();
}
#Bean
public FlatFileItemWriter<User> writer() {
FlatFileItemWriter<User> writer = new FlatFileItemWriter<User>();
writer.setResource(new ClassPathResource("users.csv"));
writer.setLineAggregator(new DelimitedLineAggregator<User>() {
{
setDelimiter(",");
setFieldExtractor(new BeanWrapperFieldExtractor<User>() {
{
setNames(new String[] { "id", "name" });
}
});
}
});
return writer;
}
#Bean
public Step step1() {
return stepBuilderFactory.get("step1").<User, User>chunk(10).reader(reader()).processor(processor())
.writer(writer()).build();
}
#Bean
public Job exportUserJob() {
return jobBuilderFactory.get("exportUserJob2").incrementer(new RunIdIncrementer()).flow(step1()).end().build();
}
}
You can run a job with JobLauncher. The code for the controller:
#RestController
public class JobExecutionController {
public JobExecutionController(JobLauncher jobLauncher,
#Qualifier("demoJob") Job demoJob,
#Qualifier("exportUserJob") Job exportUserJob) {
this.jobLauncher = jobLauncher;
this.demoJob = demoJob;
this.exportUserJob = exportUserJob;
}
JobLauncher jobLauncher;
Job demoJob;
Job exportUserJob;
#RequestMapping("/execute_job1")
#ResponseBody
public void executeBatchJob1() {
try {
JobExecution jobExecution = jobLauncher.run(demoJob, new JobParameters(generateJobParameter()));
log.info("Job started in thread :" + jobExecution.getJobParameters().getString("JobThread"));
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobParametersInvalidException | JobInstanceAlreadyCompleteException e) {
log.error("Something sent wrong during job execution", e);
}
}
#RequestMapping("/execute_job2")
#ResponseBody
public void executeBatchJob2() {
try {
JobExecution jobExecution = jobLauncher.run(exportUserJob, new JobParameters(generateJobParameter()));
log.info("Job started in thread :" + jobExecution.getJobParameters().getString("JobThread"));
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobParametersInvalidException | JobInstanceAlreadyCompleteException e) {
log.error("Something sent wrong during job execution", e);
}
}
private JobParameters generateJobParameter() {
Map<String, JobParameter> parameters = new HashMap<>();
parameters.put("Job start time", new JobParameter(Instant.now().toEpochMilli()));
parameters.put("JobThread", new JobParameter(Thread.currentThread().getId()));
return new JobParameters(parameters);
}
}
To prevent starting your jobs at the application start add to the application.properties next spring batch configuration: spring.batch.job.enabled=false

Spring Batch MultiLineItemReader with MultiResourcePartitioner

I have a File which has Multiline data like this. DataID is Start of a new record. e.g. One record is a combination of ID and concatenating below line until the start of a new record.
>DataID1
Line1asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line3asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID2
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID3
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
I was able to implement this using SingleItemPeekableItemReader and it's working fine.
I am not trying to implement partition, As we need to process multiple files. I am not sure how the partitioner is passing file info to my customer reader and how to make my SingleItemPeekableItemReader thread safe as it not working correctly
Need some inputs as I am stuck at this point
java-config
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
#Bean
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(4);
taskExecutor.setCorePoolSize(4);
taskExecutor.setQueueCapacity(8);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
#Bean
#Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep")
.partitioner("step1",partitioner())
.step(step1())
.taskExecutor(taskExecutor())
.build();
}
#Bean
public MultiResourceItemReader<FieldSet> multiResourceItemReader() {
log.info("Total Number of Files to be process {}",resources.length);
report.setFileCount(resources.length);
MultiResourceItemReader<FieldSet> resourceItemReader = new MultiResourceItemReader<FieldSet>();
resourceItemReader.setResources(resources);
resourceItemReader.setDelegate(reader());
return resourceItemReader;
}
#Bean
public FlatFileItemReader<FieldSet> reader() {
FlatFileItemReader<FieldSet> build = new FlatFileItemReaderBuilder<FieldSet>().name("fileReader")
.lineTokenizer(orderFileTokenizer())
.fieldSetMapper(new FastFieldSetMapper())
.recordSeparatorPolicy(new BlankLineRecordSeparatorPolicy())
.build();
build.setBufferedReaderFactory(gzipBufferedReaderFactory);
return build;
}
#Bean
public SingleItemPeekableItemReader<FieldSet> readerPeek() {
SingleItemPeekableItemReader<FieldSet> reader = new SingleItemPeekableItemReader<>();
reader.setDelegate(multiResourceItemReader());
return reader;
}
#Bean
public MultiLineFastaItemReader itemReader() {
MultiLineFastaItemReader itemReader = new MultiLineFastaItemReader(multiResourceItemReader());
itemReader.setSingalPeekable(readerPeek());
return itemReader;
}
#Bean
public PatternMatchingCompositeLineTokenizer orderFileTokenizer() {
PatternMatchingCompositeLineTokenizer tokenizer = new PatternMatchingCompositeLineTokenizer();
Map<String, LineTokenizer> tokenizers = new HashMap<>(2);
tokenizers.put(">*", head());
tokenizers.put("*", tail());
tokenizer.setTokenizers(tokenizers);
return tokenizer;
}
public DelimitedLineTokenizer head() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequenceIdentifier");
token.setDelimiter(" ");
token.setStrict(false);
return token;
}
public DelimitedLineTokenizer tail() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequences");
token.setDelimiter(" ");
return token;
}
#Bean
public FastReportWriter writer() {
return new FastReportWriter();
}
#Bean
public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
return jobBuilderFactory.get("importUserJob")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(masterStep())
//.flow(step1)
.next(step2())
.end()
.build();
}
#Bean
public Step step1() {
return stepBuilderFactory.get("step1")
.<Fasta, Fasta>chunk(5000)
.reader(itemReader())
.processor(new FastaIteamProcessor())
//.processor(new PassThroughItemProcessor<>())
.writer(writer())
.build();
}
public class MultiLineFastaItemReader implements ItemReader<Fasta>, ItemStream {
private static final Logger log = LoggerFactory.getLogger(MultiLineFastaItemReader.class);
private SingleItemPeekableItemReader<FieldSet> singalPeekable;
AtomicInteger iteamCounter = new AtomicInteger(0);
ConcurrentHashMap<String, AtomicInteger> fileNameAndCounterMap = new ConcurrentHashMap<>();
#Autowired
private SequenceFastaReport sequenceFastaReport;
private MultiResourceItemReader<FieldSet> resourceItemReader;
public MultiLineFastaItemReader(MultiResourceItemReader<FieldSet> multiResourceItemReader) {
this.resourceItemReader = multiResourceItemReader;
}
public SingleItemPeekableItemReader<FieldSet> getSingalPeekable() {
return singalPeekable;
}
public void setSingalPeekable(SingleItemPeekableItemReader<FieldSet> singalPeekable) {
this.singalPeekable = singalPeekable;
}
#Override
public Fasta read() throws Exception {
FieldSet item = singalPeekable.read();
if (item == null) {
return null;
}
Fasta fastaObject = new Fasta();
log.info("ID {} fileName {}", item.readString(0), resourceItemReader.getCurrentResource());
fastaObject.setSequenceIdentifier(item.readString(0)
.toUpperCase());
fastaObject.setFileName(resourceItemReader.getCurrentResource()
.getFilename());
if (!fileNameAndCounterMap.containsKey(fastaObject.getFileName())) {
fileNameAndCounterMap.put(fastaObject.getFileName(), new AtomicInteger(0));
}
while (true) {
FieldSet possibleRelatedObject = singalPeekable.peek();
if (possibleRelatedObject == null) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
if (possibleRelatedObject.readString(0)
.startsWith(">")) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
String data = fastaObject.getSequences()
.toUpperCase();
fastaObject.setSequences(data + singalPeekable.read()
.readString(0)
.toUpperCase());
}
}
#Override
public void close() {
this.singalPeekable.close();
}
#Override
public void open(ExecutionContext executionContext) {
this.singalPeekable.open(executionContext);
}
#Override
public void update(ExecutionContext executionContext) {
this.singalPeekable.update(executionContext);
}
}
I am not sure how the partitioner is passing file info to my customer reader
The partitioner will create partition meta-data in step execution contexts and your reader should read that meta-data from it. In your example, you don't need to call partition on the partitioner, Spring Batch will do it. You need instead to set the partition key on the partitioner, for example:
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.setKeyName("file");
return partitioner;
}
This will create a partition for each file with the key file that you can get in your reader from the step execution context:
#Bean
#StepScope
public FlatFileItemReader reader(#Value("#{stepExecutionContext['file']}") String file) {
// define your reader
}
Note that the reader should be step scoped to use this feature. More details here: https://docs.spring.io/spring-batch/4.0.x/reference/html/step.html#late-binding

how to configure spring dataflow for spring batch

I have spring batch project I want to configure it on spring cloud dataflow I m able to register it on SCDF but on launching task my job is not running
following is my configuration file
#SpringBootApplication
#EnableBatchProcessing
#EnableTask
public class BatchApplication {
/*#Autowired
BatchCommandLineRunner batchcommdrunner;
#Bean
public CommandLineRunner commandLineRunner() {
System.out.println("Executed at :" + new SimpleDateFormat().format(new Date()));
return batchcommdrunner ;
}*/
public static void main(String[] args) {
SpringApplication.run(BatchApplication.class, args);
}
}
And this is my batch confriguration file
#Configuration
public class BatchConfiguaration {
#Autowired
private DataSource datasouce;
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Autowired
public Environment env;
#Bean(name = "reader")
#StepScope
public ItemReader<Schedules> reader(#Value("#{stepExecutionContext[scheduleRecs]}") List<Schedules> scherecs) {
ItemReader<Schedules> reader = new IteratorItemReader<Schedules>(scherecs);
return reader;
}
#Bean(name = "CWSreader")
#StepScope
public ItemReader<Contents> CWSreader(#Value("#{stepExecutionContext[scheduleRecs]}") List<Contents> scherecs) {
ItemReader<Contents> reader = new IteratorItemReader<Contents>(scherecs);
return reader;
}
#SuppressWarnings("rawtypes")
#Bean
#StepScope
public BatchProcessor processor() {
return new BatchProcessor();
}
#Bean(name = "batchSchedulePreparedStatement")
#StepScope
public BatchSchedulePreparedStatement batchSchedulePreparedStatement() {
return new BatchSchedulePreparedStatement();
}
#SuppressWarnings({ "rawtypes", "unchecked" })
#Bean(name = "batchWriter")
#StepScope
public BatchWriter batchWriter() {
BatchWriter batchWriter = new BatchWriter();
batchWriter.setDataSource(datasouce);
batchWriter.setSql(env.getProperty("batch.insert.schedule.query"));
batchWriter.setItemPreparedStatementSetter(batchSchedulePreparedStatement());
return batchWriter;
}
#Bean("acheronDbTm")
#Qualifier("acheronDbTm")
public PlatformTransactionManager platformTransactionManager() {
return new ResourcelessTransactionManager();
}
#Bean
public JobExplorer jobExplorer() throws Exception {
MapJobExplorerFactoryBean explorerFactoryBean = new MapJobExplorerFactoryBean();
explorerFactoryBean.setRepositoryFactory(mapJobRepositoryFactoryBean());
explorerFactoryBean.afterPropertiesSet();
return explorerFactoryBean.getObject();
}
#Bean
public MapJobRepositoryFactoryBean mapJobRepositoryFactoryBean() {
MapJobRepositoryFactoryBean mapJobRepositoryFactoryBean = new MapJobRepositoryFactoryBean();
mapJobRepositoryFactoryBean.setTransactionManager(platformTransactionManager());
return mapJobRepositoryFactoryBean;
}
#Bean
public JobRepository jobRepository() throws Exception {
return mapJobRepositoryFactoryBean().getObject();
}
#Bean
public SimpleJobLauncher jobLauncher() throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository());
return jobLauncher;
}
#Bean(name = "batchPartition")
#StepScope
public BatchPartition batchPartition() {
BatchPartition batchPartition = new BatchPartition();
return batchPartition;
}
#Bean(name="taskExecutor")
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor poolTaskExecutor = new ThreadPoolTaskExecutor();
poolTaskExecutor.setCorePoolSize(10);
poolTaskExecutor.setMaxPoolSize(30);
poolTaskExecutor.setQueueCapacity(35);
poolTaskExecutor.setThreadNamePrefix("Acheron");
poolTaskExecutor.afterPropertiesSet();
return poolTaskExecutor;
}
#Bean(name = "masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep").partitioner(slave()).partitioner("slave", batchPartition())
.taskExecutor(taskExecutor()).build();
}
#Bean(name = "slave")
public Step slave() {
return stepBuilderFactory.get("slave").chunk(100).faultTolerant().retryLimit(2)
.retry(DeadlockLoserDataAccessException.class).reader(reader(null)).processor(processor())
.writer(batchWriter()).build();
}
#Bean(name = "manageStagingScheduleMaster")
public Job manageStagingScheduleMaster(final Step masterStep) throws Exception {
return jobBuilderFactory.get("manageStagingScheduleMaster").preventRestart().incrementer(new RunIdIncrementer())
.start(masterStep).build();
}
can anyone help me to configure it properly or is there any other way where I can monitor my batch jobs
I also tried with Spring boot admin but it is not supporting java configuration in SBA is there any way to add jobs without jobs in xml
I am launcing this job from controller
JobParametersBuilder builder = new JobParametersBuilder();
System.out.println("Job Builder " + builder);
JobParameters jobParameters = builder.toJobParameters();
JobExecution execution = jobLauncher.run(job, jobParameters);
return execution.getStatus().toString();
This sample shows a basic Spring batch application that can be launched as a task in Spring Cloud Data Flow.

Spring Boot + Spring Batch Multiple Job Creation and Scheduling

I created a Spring Boot with Spring Batch Application and Scheduling. When i create only one job, things are working fine . But when i try to create another job using the modular approach, I am getting few errors like reader is already closed and some errors related to version even though i am using different readers. The jobs and it's step are running many times and they are getting duplicated.
Can anyone Please guide me how to resolve these issues and run the jobs in a parallel way independent of each other ?
Below are the configuration Classes :
ModularJobConfiguration.java , DeptBatchConfiguration.java and CityBatchConfiguration.java and BatchScheduler.java
#Configuration
#EnableBatchProcessing(modular=true)
public class ModularJobConfiguration {
#Bean
public ApplicationContextFactory firstJob() {
return new GenericApplicationContextFactory(DeptBatchConfiguration.class);
}
#Bean
public ApplicationContextFactory secondJob() {
return new GenericApplicationContextFactory(CityBatchConfiguration.class);
}
}
#Configuration
#EnableBatchProcessing
#Import({BatchScheduler.class})
public class DeptBatchConfiguration {
private static final Logger LOGGER = LoggerFactory.getLogger(DeptBatchConfiguration.class);
#Autowired
private SimpleJobLauncher jobLauncher;
#Autowired
public JobBuilderFactory jobBuilderFactory;
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Autowired
public JobExecutionListener listener;
public ItemReader<DepartmentModelReader> deptReaderSO;
#Autowired
#Qualifier("dataSourceReader")
private DataSource dataSourceReader;
#Autowired
#Qualifier("dataSourceWriter")
private DataSource dataSourceWriter;
#Scheduled(cron = "0 0/1 * * * ?")
public void performFirstJob() throws Exception {
long startTime = System.currentTimeMillis();
LOGGER.info("Job1 Started at :" + new Date());
JobParameters param = new JobParametersBuilder().addString("JobID1",String.valueOf(System.currentTimeMillis())).toJobParameters();
JobExecution execution = (JobExecution) jobLauncher.run(importDeptJob(jobBuilderFactory,stepdept(deptReaderSO,customWriter()),listener), param);
long endTime = System.currentTimeMillis();
LOGGER.info("Job1 finished at " + (endTime - startTime) / 1000 + " seconds with status :" + execution.getExitStatus());
}
#Bean
public ItemReader<DepartmentModelReader> deptReaderSO() {
//LOGGER.info("Inside deptReaderSO Method");
JdbcCursorItemReader<DepartmentModelReader> deptReaderSO = new JdbcCursorItemReader<>();
//deptReaderSO.setSql("select id, firstName, lastname, random_num from reader");
deptReaderSO.setSql("SELECT DEPT_CODE,DEPT_NAME,FULL_DEPT_NAME,CITY_CODE,CITY_NAME,CITY_TYPE_NAME,CREATED_USER_ID,CREATED_G_DATE,MODIFIED_USER_ID,MODIFIED_G_DATE,RECORD_ACTIVITY,DEPT_CLASS,DEPT_PARENT,DEPT_PARENT_NAME FROM TBL_SAMPLE_SAFTY_DEPTS");
deptReaderSO.setDataSource(dataSourceReader);
deptReaderSO.setRowMapper(
(ResultSet resultSet, int rowNum) -> {
if (!(resultSet.isAfterLast()) && !(resultSet.isBeforeFirst())) {
DepartmentModelReader recordSO = new DepartmentModelReader();
recordSO.setDeptCode(resultSet.getString("DEPT_CODE"));
recordSO.setDeptName(resultSet.getString("DEPT_NAME"));
recordSO.setFullDeptName(resultSet.getString("FULL_DEPT_NAME"));
recordSO.setCityCode(resultSet.getInt("CITY_CODE"));
recordSO.setCityName(resultSet.getString("CITY_NAME"));
recordSO.setCityTypeName(resultSet.getString("CITY_TYPE_NAME"));
recordSO.setCreatedUserId(resultSet.getInt("CREATED_USER_ID"));
recordSO.setCreatedGDate(resultSet.getDate("CREATED_G_DATE"));
recordSO.setModifiedUserId(resultSet.getString("MODIFIED_USER_ID"));
recordSO.setModifiedGDate(resultSet.getDate("MODIFIED_G_DATE"));
recordSO.setRecordActivity(resultSet.getInt("RECORD_ACTIVITY"));
recordSO.setDeptClass(resultSet.getInt("DEPT_CLASS"));
recordSO.setDeptParent(resultSet.getString("DEPT_PARENT"));
recordSO.setDeptParentName(resultSet.getString("DEPT_PARENT_NAME"));
// LOGGER.info("RowMapper record : {}", recordSO.getDeptCode() +" | "+recordSO.getDeptName());
return recordSO;
} else {
LOGGER.info("Returning null from rowMapper");
return null;
}
});
return deptReaderSO;
}
#Bean
public ItemProcessor<DepartmentModelReader, DepartmentModelWriter> processor() {
//LOGGER.info("Inside Processor Method");
return new RecordProcessor();
}
#Bean
public ItemWriter<DepartmentModelWriter> customWriter(){
//LOGGER.info("Inside customWriter Method");
return new CustomItemWriter();
}
#Bean
public Job importDeptJob(JobBuilderFactory jobs, Step stepdept,JobExecutionListener listener){
return jobs.get("importDeptJob")
.incrementer(new RunIdIncrementer())
.listener(listener())
.flow(stepdept).end().build();
}
#Bean
public Step stepdept(ItemReader<DepartmentModelReader> deptReaderSO,
ItemWriter<DepartmentModelWriter> writerSO) {
LOGGER.info("Inside stepdept Method");
return stepBuilderFactory.get("stepdept").<DepartmentModelReader, DepartmentModelWriter>chunk(5)
.reader(deptReaderSO).processor(processor()).writer(customWriter()).transactionManager(platformTransactionManager(dataSourceWriter)).build();
}
#Bean
public JobExecutionListener listener() {
return new JobCompletionNotificationListener();
}
#Bean
public JdbcTemplate jdbcTemplate(DataSource dataSource) {
return new JdbcTemplate(dataSource);
}
#Bean
public BatchWriteService batchWriteService() {
return new BatchWriteService();
}
#Bean
public PlatformTransactionManager platformTransactionManager(#Qualifier("dataSourceWriter") DataSource dataSourceWriter) {
JpaTransactionManager transactionManager = new JpaTransactionManager();
transactionManager.setDataSource(dataSourceWriter);
return transactionManager;
}
}
#Configuration
#EnableBatchProcessing
#Import({BatchScheduler.class})
public class CityBatchConfiguration {
private static final Logger LOGGER = LoggerFactory.getLogger(CityBatchConfiguration.class);
#Autowired
private SimpleJobLauncher jobLauncher;
#Autowired
public JobBuilderFactory jobBuilderFactory;
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Autowired
public JobExecutionListener listener;
public ItemReader<CitiesModelReader> citiesReaderSO;
#Autowired
#Qualifier("dataSourceReader")
private DataSource dataSourceReader;
#Autowired
#Qualifier("dataSourceWriter")
private DataSource dataSourceWriter;
#Scheduled(cron = "0 0/1 * * * ?")
public void performSecondJob() throws Exception {
long startTime = System.currentTimeMillis();
LOGGER.info("\n Job2 Started at :" + new Date());
JobParameters param = new JobParametersBuilder().addString("JobID2",String.valueOf(System.currentTimeMillis())).toJobParameters();
JobExecution execution = (JobExecution) jobLauncher.run(importCitiesJob(jobBuilderFactory,stepcity(citiesReaderSO,customCitiesWriter()),listener), param);
long endTime = System.currentTimeMillis();
LOGGER.info("Job2 finished at " + (endTime - startTime) / 1000 + " seconds with status :" + execution.getExitStatus());
}
#Bean
public ItemReader<CitiesModelReader> citiesReaderSO() {
//LOGGER.info("Inside readerSO Method");
JdbcCursorItemReader<CitiesModelReader> readerSO = new JdbcCursorItemReader<>();
readerSO.setSql("SELECT CITY_CODE,CITY_NAME,PARENT_CITY,CITY_TYPE,CITY_TYPE_NAME,CREATED_G_DATE,CREATED_USER_ID,MODIFIED_G_DATE,MODIFIED_USER_ID,RECORD_ACTIVITY FROM TBL_SAMPLE_SAFTY_CITIES");
readerSO.setDataSource(dataSourceReader);
readerSO.setRowMapper(
(ResultSet resultSet, int rowNum) -> {
if (!(resultSet.isAfterLast()) && !(resultSet.isBeforeFirst())) {
CitiesModelReader recordSO = new CitiesModelReader();
recordSO.setCityCode(resultSet.getLong("CITY_CODE"));
recordSO.setCityName(resultSet.getString("CITY_NAME"));
recordSO.setParentCity(resultSet.getInt("PARENT_CITY"));
recordSO.setCityType(resultSet.getString("CITY_TYPE"));
recordSO.setCityTypeName(resultSet.getString("CITY_TYPE_NAME"));
recordSO.setCreatedGDate(resultSet.getDate("CREATED_G_DATE"));
recordSO.setCreatedUserId(resultSet.getString("CREATED_USER_ID"));
recordSO.setModifiedGDate(resultSet.getDate("MODIFIED_G_DATE"));
recordSO.setModifiedUserId(resultSet.getString("MODIFIED_USER_ID"));
recordSO.setRecordActivity(resultSet.getInt("RECORD_ACTIVITY"));
//LOGGER.info("RowMapper record : {}", recordSO.toString());
return recordSO;
} else {
LOGGER.info("Returning null from rowMapper");
return null;
}
});
return readerSO;
}
#Bean
public ItemProcessor<CitiesModelReader,CitiesModelWriter> citiesProcessor() {
//LOGGER.info("Inside Processor Method");
return new RecordCitiesProcessor();
}
#Bean
public ItemWriter<CitiesModelWriter> customCitiesWriter(){
LOGGER.info("Inside customCitiesWriter Method");
return new CustomCitiesWriter();
}
#Bean
public Job importCitiesJob(JobBuilderFactory jobs, Step stepcity,JobExecutionListener listener) {
LOGGER.info("Inside importCitiesJob Method");
return jobs.get("importCitiesJob")
.incrementer(new RunIdIncrementer())
.listener(listener())
.flow(stepcity).end().build();
}
#Bean
public Step stepcity(ItemReader<CitiesModelReader> readerSO,
ItemWriter<CitiesModelWriter> writerSO) {
LOGGER.info("Inside stepCity Method");
return stepBuilderFactory.get("stepcity").<CitiesModelReader, CitiesModelWriter>chunk(5)
.reader(readerSO).processor(citiesProcessor()).writer(customCitiesWriter()).transactionManager(platformTransactionManager(dataSourceWriter)).build();
}
#Bean
public JobExecutionListener listener() {
return new JobCompletionNotificationListener();
}
#Bean
public JdbcTemplate jdbcTemplate(DataSource dataSource) {
return new JdbcTemplate(dataSource);
}
#Bean
public BatchWriteService batchWriteService() {
return new BatchWriteService();
}
#Bean
public PlatformTransactionManager platformTransactionManager(#Qualifier("dataSourceWriter") DataSource dataSourceWriter) {
JpaTransactionManager transactionManager = new JpaTransactionManager();
transactionManager.setDataSource(dataSourceWriter);
return transactionManager;
}
}
#Configuration
#EnableScheduling
public class BatchScheduler {
private static final Logger LOGGER = LoggerFactory.getLogger(BatchScheduler.class);
#Bean
public ResourcelessTransactionManager resourcelessTransactionManager() {
return new ResourcelessTransactionManager();
}
#Bean
public MapJobRepositoryFactoryBean mapJobRepositoryFactory(
ResourcelessTransactionManager txManager) throws Exception {
LOGGER.info("Inside mapJobRepositoryFactory method");
MapJobRepositoryFactoryBean factory = new
MapJobRepositoryFactoryBean(txManager);
factory.afterPropertiesSet();
return factory;
}
#Bean
public JobRepository jobRepository(
MapJobRepositoryFactoryBean factory) throws Exception {
LOGGER.info("Inside jobRepository method");
return factory.getObject();
}
#Bean
public SimpleJobLauncher jobLauncher(JobRepository jobRepository) {
LOGGER.info("Inside jobLauncher method");
SimpleJobLauncher launcher = new SimpleJobLauncher();
launcher.setJobRepository(jobRepository);
final SimpleAsyncTaskExecutor simpleAsyncTaskExecutor = new SimpleAsyncTaskExecutor();
launcher.setTaskExecutor(simpleAsyncTaskExecutor);
return launcher;
}
}
Your readers are not thread safe and not step scoped. Because of that, you're running into concurrency issues. Configure each of your stateful ItemReaders (the ones that implement ItemStream like the JdbcCursorItemReader), to be step scoped by adding the #StepScope annotation and things should work fine.

Why the intemReader is always sending the exact same value to CustomItemProcessor

Why does the itemReader method is always sending the exact same file name to be processed in CustomItemProcessor?
As far as I understand, since I settup reader as #Scope and I set more than 1 in chunk, I was expecting the "return s" to move forward to next value from String array.
Let me clarify my question with a debug example in reader method:
1 - the variable stringArray is filled in with 3 file names (f1.txt, f2.txt and f3.txt)
2 - "return s" is evoked with s = f1.txt
3 - "return s" evoked again before evoked customItemProcessor method (perfect untill here since chunk = 2)
4 - looking at s it contains f1.txt again (different from what I expected. I expected f2.txt)
5 and 6 - runs processor with same name f1.tx (it should work correctly if the second turn of "return s" would contain f2.txt)
7 - writer method works as expected (processedFiles contain twice the two names processed in customItemProcessor f1.txt and f1.txt again since same name was processed twice)
CustomItemReader
public class CustomItemReader implements ItemReader<String> {
#Override
public String read() throws Exception, UnexpectedInputException,
ParseException, NonTransientResourceException {
String[] stringArray;
try (Stream<Path> stream = Files.list(Paths.get(env
.getProperty("my.path")))) {
stringArray = stream.map(String::valueOf)
.filter(path -> path.endsWith("out"))
.toArray(size -> new String[size]);
}
//*** the problem is here
//every turn s variable receives the first file name from the stringArray
if (stringArray.length > 0) {
for (String s : stringArray) {
return s;
}
} else {
log.info("read method - no file found");
return null;
}
return null;
}
CustomItemProcessor
public class CustomItemProcessor implements ItemProcessor<String , String> {
#Override
public String process(String singleFileToProcess) throws Exception {
log.info("process method: " + singleFileToProcess);
return singleFileToProcess;
}
}
CustomItemWriter
public class CustomItemWriter implements ItemWriter<String> {
private static final Logger log = LoggerFactory
.getLogger(CustomItemWriter.class);
#Override
public void write(List<? extends String> processedFiles) throws Exception {
processedFiles.stream().forEach(
processedFile -> log.info("**** write method"
+ processedFile.toString()));
FileSystem fs = FileSystems.getDefault();
for (String s : processedFiles) {
Files.deleteIfExists(fs.getPath(s));
}
}
Configuration
#Configuration
#ComponentScan(...
#EnableBatchProcessing
#EnableScheduling
#PropertySource(...
public class BatchConfig {
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Autowired
private JobRepository jobRepository;
#Bean
public TaskExecutor getTaskExecutor() {
return new TaskExecutor() {
#Override
public void execute(Runnable task) {
}
};
}
//I can see the number in chunk reflects how many time customReader is triggered before triggers customProcesser
#Bean
public Step step1(ItemReader<String> reader,
ItemProcessor<String, String> processor, ItemWriter<String> writer) {
return stepBuilderFactory.get("step1").<String, String> chunk(2)
.reader(reader).processor(processor).writer(writer)
.allowStartIfComplete(true).build();
}
#Bean
#Scope
public ItemReader<String> reader() {
return new CustomItemReader();
}
#Bean
public ItemProcessor<String, String> processor() {
return new CustomItemProcessor();
}
#Bean
public ItemWriter<String> writer() {
return new CustomItemWriter();
}
#Bean
public Job job(Step step1) throws Exception {
return jobBuilderFactory.get("job1").incrementer(new RunIdIncrementer()).start(step1).build();
}
Scheduler
#Component
public class QueueScheduler {
private static final Logger log = LoggerFactory
.getLogger(QueueScheduler.class);
private Job job;
private JobLauncher jobLauncher;
#Autowired
public QueueScheduler(JobLauncher jobLauncher, #Qualifier("job") Job job){
this.job = job;
this.jobLauncher = jobLauncher;
}
#Scheduled(fixedRate=60000)
public void runJob(){
try{
jobLauncher.run(job, new JobParameters());
}catch(Exception ex){
log.info(ex.getMessage());
}
}
}
Your issue is that you are relying on an internal loop to iterate over the items instead of letting Spring Batch do it for you by calling ItemReader#read multiple times.
What I'd recommend is changing your reader to the something like the following:
public class JimsItemReader implements ItemStreamReader {
private String[] items;
private int curIndex = -1;
#Override
public void open(ExecutionContext ec) {
curIndex = ec.getInt("curIndex", -1);
String[] stringArray;
try (Stream<Path> stream = Files.list(Paths.get(env.getProperty("my.path")))) {
stringArray = stream.map(String::valueOf)
.filter(path -> path.endsWith("out"))
.toArray(size -> new String[size]);
}
}
#Override
public void update(ExecutionContext ec) {
ec.putInt("curIndex", curIndex);
}
#Override
public String read() {
if (curIndex < items.length) {
curIndex++;
return items[curIndex];
} else {
return null;
}
}
}
The above example should loop through the items of your array as they are read. It also should be restartable in that we're storing the index in the ExecutionContext so if the job is restarted after a failure, you'll restart where you left off.

Resources