Spring Batch MultiLineItemReader with MultiResourcePartitioner - spring

I have a File which has Multiline data like this. DataID is Start of a new record. e.g. One record is a combination of ID and concatenating below line until the start of a new record.
>DataID1
Line1asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line3asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID2
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID3
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
I was able to implement this using SingleItemPeekableItemReader and it's working fine.
I am not trying to implement partition, As we need to process multiple files. I am not sure how the partitioner is passing file info to my customer reader and how to make my SingleItemPeekableItemReader thread safe as it not working correctly
Need some inputs as I am stuck at this point
java-config
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
#Bean
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(4);
taskExecutor.setCorePoolSize(4);
taskExecutor.setQueueCapacity(8);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
#Bean
#Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep")
.partitioner("step1",partitioner())
.step(step1())
.taskExecutor(taskExecutor())
.build();
}
#Bean
public MultiResourceItemReader<FieldSet> multiResourceItemReader() {
log.info("Total Number of Files to be process {}",resources.length);
report.setFileCount(resources.length);
MultiResourceItemReader<FieldSet> resourceItemReader = new MultiResourceItemReader<FieldSet>();
resourceItemReader.setResources(resources);
resourceItemReader.setDelegate(reader());
return resourceItemReader;
}
#Bean
public FlatFileItemReader<FieldSet> reader() {
FlatFileItemReader<FieldSet> build = new FlatFileItemReaderBuilder<FieldSet>().name("fileReader")
.lineTokenizer(orderFileTokenizer())
.fieldSetMapper(new FastFieldSetMapper())
.recordSeparatorPolicy(new BlankLineRecordSeparatorPolicy())
.build();
build.setBufferedReaderFactory(gzipBufferedReaderFactory);
return build;
}
#Bean
public SingleItemPeekableItemReader<FieldSet> readerPeek() {
SingleItemPeekableItemReader<FieldSet> reader = new SingleItemPeekableItemReader<>();
reader.setDelegate(multiResourceItemReader());
return reader;
}
#Bean
public MultiLineFastaItemReader itemReader() {
MultiLineFastaItemReader itemReader = new MultiLineFastaItemReader(multiResourceItemReader());
itemReader.setSingalPeekable(readerPeek());
return itemReader;
}
#Bean
public PatternMatchingCompositeLineTokenizer orderFileTokenizer() {
PatternMatchingCompositeLineTokenizer tokenizer = new PatternMatchingCompositeLineTokenizer();
Map<String, LineTokenizer> tokenizers = new HashMap<>(2);
tokenizers.put(">*", head());
tokenizers.put("*", tail());
tokenizer.setTokenizers(tokenizers);
return tokenizer;
}
public DelimitedLineTokenizer head() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequenceIdentifier");
token.setDelimiter(" ");
token.setStrict(false);
return token;
}
public DelimitedLineTokenizer tail() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequences");
token.setDelimiter(" ");
return token;
}
#Bean
public FastReportWriter writer() {
return new FastReportWriter();
}
#Bean
public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
return jobBuilderFactory.get("importUserJob")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(masterStep())
//.flow(step1)
.next(step2())
.end()
.build();
}
#Bean
public Step step1() {
return stepBuilderFactory.get("step1")
.<Fasta, Fasta>chunk(5000)
.reader(itemReader())
.processor(new FastaIteamProcessor())
//.processor(new PassThroughItemProcessor<>())
.writer(writer())
.build();
}
public class MultiLineFastaItemReader implements ItemReader<Fasta>, ItemStream {
private static final Logger log = LoggerFactory.getLogger(MultiLineFastaItemReader.class);
private SingleItemPeekableItemReader<FieldSet> singalPeekable;
AtomicInteger iteamCounter = new AtomicInteger(0);
ConcurrentHashMap<String, AtomicInteger> fileNameAndCounterMap = new ConcurrentHashMap<>();
#Autowired
private SequenceFastaReport sequenceFastaReport;
private MultiResourceItemReader<FieldSet> resourceItemReader;
public MultiLineFastaItemReader(MultiResourceItemReader<FieldSet> multiResourceItemReader) {
this.resourceItemReader = multiResourceItemReader;
}
public SingleItemPeekableItemReader<FieldSet> getSingalPeekable() {
return singalPeekable;
}
public void setSingalPeekable(SingleItemPeekableItemReader<FieldSet> singalPeekable) {
this.singalPeekable = singalPeekable;
}
#Override
public Fasta read() throws Exception {
FieldSet item = singalPeekable.read();
if (item == null) {
return null;
}
Fasta fastaObject = new Fasta();
log.info("ID {} fileName {}", item.readString(0), resourceItemReader.getCurrentResource());
fastaObject.setSequenceIdentifier(item.readString(0)
.toUpperCase());
fastaObject.setFileName(resourceItemReader.getCurrentResource()
.getFilename());
if (!fileNameAndCounterMap.containsKey(fastaObject.getFileName())) {
fileNameAndCounterMap.put(fastaObject.getFileName(), new AtomicInteger(0));
}
while (true) {
FieldSet possibleRelatedObject = singalPeekable.peek();
if (possibleRelatedObject == null) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
if (possibleRelatedObject.readString(0)
.startsWith(">")) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
String data = fastaObject.getSequences()
.toUpperCase();
fastaObject.setSequences(data + singalPeekable.read()
.readString(0)
.toUpperCase());
}
}
#Override
public void close() {
this.singalPeekable.close();
}
#Override
public void open(ExecutionContext executionContext) {
this.singalPeekable.open(executionContext);
}
#Override
public void update(ExecutionContext executionContext) {
this.singalPeekable.update(executionContext);
}
}

I am not sure how the partitioner is passing file info to my customer reader
The partitioner will create partition meta-data in step execution contexts and your reader should read that meta-data from it. In your example, you don't need to call partition on the partitioner, Spring Batch will do it. You need instead to set the partition key on the partitioner, for example:
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.setKeyName("file");
return partitioner;
}
This will create a partition for each file with the key file that you can get in your reader from the step execution context:
#Bean
#StepScope
public FlatFileItemReader reader(#Value("#{stepExecutionContext['file']}") String file) {
// define your reader
}
Note that the reader should be step scoped to use this feature. More details here: https://docs.spring.io/spring-batch/4.0.x/reference/html/step.html#late-binding

Related

Spring Batch Remote Partitioning - DeployerPartitionHandler not executing another job in queue

The DeployerPartitionHandler is configured to use at most 3 workers at a time.
When job is launched which is using all 3 workers, and at the same time another job is launched it is not executed even after first job is completed and workers are free. The status of the 2nd job is stuck at STARTING.
Below is the Batch Configuration:
#Configuration
#EnableBatchProcessing
#Slf4j
public class BatchConfig {
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Value("${jarLocation}")
public String jarLocation;
#Value("${batch.job.jobname}")
public String jobName;
#Bean
#Primary
PlatformTransactionManager getTransactionManager(
#Qualifier("transactionManager") PlatformTransactionManager platform) {
return platform;
}
#Bean
public JobRegistryBeanPostProcessor jobRegistryBeanPostProcessor(JobRegistry jobRegistry) {
JobRegistryBeanPostProcessor postProcessor = new JobRegistryBeanPostProcessor();
postProcessor.setJobRegistry(jobRegistry);
return postProcessor;
}
#Bean
#Profile("worker")
public DeployerStepExecutionHandler stepExecutionHandler(JobExplorer jobExplorer, JobRepository jobRepository,
ConfigurableApplicationContext context) {
return new DeployerStepExecutionHandler(context, jobExplorer, jobRepository);
}
#Bean
public PartitionHandler partitionHandler(TaskLauncher taskLauncher, JobExplorer jobExplorer,
Environment environment, DelegatingResourceLoader delegatingResourceLoader, TaskRepository taskRepository) {
Resource resource = delegatingResourceLoader.getResource(jarLocation);
DeployerPartitionHandler partitionHandler = new DeployerPartitionHandler(taskLauncher, jobExplorer, resource,
"workerStep", taskRepository);
List<String> commandLineArguments = new ArrayList<>(5);
commandLineArguments.add("--spring.profiles.active=worker");
commandLineArguments.add("--spring.cloud.task.initialize.enable=false");
commandLineArguments.add("--spring.batch.initializer.enabled=false");
commandLineArguments.add("--spring.cloud.task.closecontextEnabled=true");
commandLineArguments.add("--logging.level=DEBUG");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArguments));
partitionHandler.setEnvironmentVariablesProvider(new SimpleEnvironmentVariablesProvider(environment));
partitionHandler.setMaxWorkers(3);
partitionHandler.setApplicationName("BatchApplicationWorker");
return partitionHandler;
}
#Bean
#StepScope
public Partitioner partitioner(#Value("#{jobParameters['inputFiles']}") String file,
#Value("#{jobParameters['partitionSize']}") String partitionSize1) {
int partitionSize = Integer.parseInt(partitionSize1);
return new Partitioner() {
public Map<String, ExecutionContext> partition(int gridSize) {
Map<String, ExecutionContext> partitions = new HashMap<>();
String[] ids = fetchAllPrimaryKeys(file);
List<List<String>> partitionPayloads = splitPayLoad(ids, partitionSize);
int size = partitionPayloads.size();
for (int i = 0; i < size; i++) {
ExecutionContext executionContext = new ExecutionContext();
executionContext.put("partitionNumber", i);
executionContext.put("partitionPayLoad", new ArrayList<>(partitionPayloads.get(i)));
partitions.put("partition" + i, executionContext);
}
return partitions;
}
};
}
#Bean
public Step masterStep(Step workerStep, PartitionHandler partitionHandler) {
return this.stepBuilderFactory.get("masterStep").partitioner(workerStep.getName(), partitioner(null, null))
.step(workerStep).partitionHandler(partitionHandler).build();
}
#Bean
public Step workerStep(CustomWriter customWriter, CustomProcessor customProcessor) {
return this.stepBuilderFactory.get("workerStep").<User, User>chunk(10000).reader(reader(null))
.processor(customProcessor).writer(customWriter).build();
}
#Bean
public Job batchJob(Step masterStep, JobExecutionListnerClass jobExecutionListnerClass,
JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("batchJob").start(masterStep).listener(jobExecutionListnerClass).build();
}
#Bean
#StepScope
public CustomReader reader(#Value("#{stepExecutionContext['partitionPayLoad']}") List<String> payload) {
return new CustomReader(payload);
}
#Bean
public AppTaskListener appTaskListener() {
return new AppTaskListener();
}
}

Records are not written in files when invoked from BillerOrderWriter which implements ItemWriter in Spring Batch

I am trying to write successful records using one writer and failed records in another writer.
I have written BillerOrderWriter class which implements ItemWriter. I put some log statements and I can see it writes success billerOrderId or failed billerOrderId . But, it seems like it does not invoke DatabaseToCsvFileJobConfig or SuccessfulOrdersToCsvFileJobConfig .
public class BillerOrderWriter implements ItemWriter<BillerOrder>{
private static Logger log = LoggerFactory.getLogger("BillerOrderWriter.class");
#Autowired
SuccessfulOrdersToCsvFileJobConfig successfulOrdersToCsvFileJobConfig;
#Autowired
DatabaseToCsvFileJobConfig databaseToCsvFileJobConfig;
#Override
public void write(List<? extends BillerOrder> items) throws Exception {
for (BillerOrder item : items) {
log.info("item = " + item.toString());
if (item.getResult().equals("SUCCESS")) {
log.info(" Success billerOrderId = " + item.getBillerOrderId());
successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter();
} else {
log.info("Failed billerOrderId = " + item.getBillerOrderId());
databaseToCsvFileJobConfig.databaseCsvItemWriter();
}
}
}
}
Here is BatchConfig class.
#Bean
public BillerOrderWriter billerOrderWriter() {
return new BillerOrderWriter();
}
#Bean
public Job importJobOrder(JobCompletionNotificationListner listener, Step step1) {
return jobBuilderFactory.get("importJobOrder")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(step1)
.end()
.build();
}
#Bean(name="step1")
public Step step1(BillerOrderWriter billerOrderWriter) {
return stepBuilderFactory.get("step1")
.<BillerOrder, BillerOrder> chunk(10)
.reader((ItemReader<? extends BillerOrder>) reader())
.processor(processor())
.writer(billerOrderWriter)
.build();
}
Here is my successwriter and failedwriter class .
#Configuration
public class SuccessfulOrdersToCsvFileJobConfig {
private static Logger log = LoggerFactory.getLogger("SuccessfulOrdersToCsvFileJobConfig.class");
#Bean
public ItemWriter<BillerOrder> successfulDatabaseCsvItemWriter() {
log.info("Entering SuccessfulOrdersToCsvFileJobConfig...");
FlatFileItemWriter<BillerOrder> csvFileWriter = new FlatFileItemWriter<>();
String exportFileHeader = "BillerOrderId;SuccessMessage";
OrderWriter headerWriter = new OrderWriter(exportFileHeader);
csvFileWriter.setHeaderCallback(headerWriter);
String exportFilePath = "/tmp/SuccessBillerOrderIdForRetry.csv";
csvFileWriter.setResource(new FileSystemResource(exportFilePath));
LineAggregator<BillerOrder> lineAggregator = createOrderLineAggregator();
csvFileWriter.setLineAggregator(lineAggregator);
return csvFileWriter;
}
private LineAggregator<BillerOrder> createOrderLineAggregator() {
log.info("Entering createOrderLineAggregator...");
DelimitedLineAggregator<BillerOrder> lineAggregator = new DelimitedLineAggregator<>();
lineAggregator.setDelimiter(";");
FieldExtractor<BillerOrder> fieldExtractor = createOrderFieldExtractor();
lineAggregator.setFieldExtractor(fieldExtractor);
return lineAggregator;
}
private FieldExtractor<BillerOrder> createOrderFieldExtractor() {
log.info("Entering createOrderFieldExtractor...");
BeanWrapperFieldExtractor<BillerOrder> extractor = new BeanWrapperFieldExtractor<>();
extractor.setNames(new String[] {"billerOrderId","successMessage"});
return extractor;
}
}
#Configuration
public class DatabaseToCsvFileJobConfig {
private static Logger log = LoggerFactory.getLogger("DatabaseToCsvFileJobConfig.class");
#Bean
public ItemWriter<BillerOrder> databaseCsvItemWriter() {
log.info("Entering databaseCsvItemWriter...");
FlatFileItemWriter<BillerOrder> csvFileWriter = new FlatFileItemWriter<>();
String exportFileHeader = "BillerOrderId;ErrorMessage";
OrderWriter headerWriter = new OrderWriter(exportFileHeader);
csvFileWriter.setHeaderCallback(headerWriter);
String exportFilePath = "/tmp/FailedBillerOrderIdForRetry.csv";
csvFileWriter.setResource(new FileSystemResource(exportFilePath));
LineAggregator<BillerOrder> lineAggregator = createOrderLineAggregator();
csvFileWriter.setLineAggregator(lineAggregator);
return csvFileWriter;
}
private LineAggregator<BillerOrder> createOrderLineAggregator() {
log.info("Entering createOrderLineAggregator...");
DelimitedLineAggregator<BillerOrder> lineAggregator = new DelimitedLineAggregator<>();
lineAggregator.setDelimiter(";");
FieldExtractor<BillerOrder> fieldExtractor = createOrderFieldExtractor();
lineAggregator.setFieldExtractor(fieldExtractor);
return lineAggregator;
}
private FieldExtractor<BillerOrder> createOrderFieldExtractor() {
log.info("Entering createOrderFieldExtractor...");
BeanWrapperFieldExtractor<BillerOrder> extractor = new BeanWrapperFieldExtractor<>();
extractor.setNames(new String[] {"billerOrderId","errorMessage"});
return extractor;
}
}
Here is my job completion listener class.
#Component
public class JobCompletionNotificationListner extends JobExecutionListenerSupport {
private static final org.slf4j.Logger log = LoggerFactory.getLogger(JobCompletionNotificationListner.class);
#Override
public void afterJob(JobExecution jobExecution) {
log.info("In afterJob ...");
if (jobExecution.getStatus() == BatchStatus.COMPLETED) {
DatabaseToCsvFileJobConfig databaseToCsvFileJobConfig = new DatabaseToCsvFileJobConfig();
SuccessfulOrdersToCsvFileJobConfig successfulOrdersToCsvFileJobConfig = new SuccessfulOrdersToCsvFileJobConfig();
}
}
}
In your BillerOrderWriter#write method, it is supposed to write code that does the actual write operation of items to a data sink. But in your case, you are calling successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter(); and databaseToCsvFileJobConfig.databaseCsvItemWriter(); which create item writer beans. You should inject those delegate writers and call their write method when needed, something like:
public class BillerOrderWriter implements ItemWriter<BillerOrder>{
private ItemWriter<BillerOrder> successfulDatabaseCsvItemWriter;
private ItemWriter<BillerOrder> databaseCsvItemWriter;
// constructor with successfulDatabaseCsvItemWriter + databaseCsvItemWriter
#Override
public void write(List<? extends BillerOrder> items) throws Exception {
for (BillerOrder item : items) {
if (item.getResult().equals("SUCCESS")) {
successfulDatabaseCsvItemWriter.write(Collections.singletonList(item));
} else {
databaseCsvItemWriter.write(Collections.singletonList(item));
}
}
}
}
Instead Of BillerOrderWriter, I wroter BillerOrderClassifier class.
public class BillerOrderClassifier implements Classifier<BillerOrder, ItemWriter<? super BillerOrder>> {
private static final long serialVersionUID = 1L;
private ItemWriter<BillerOrder> successItemWriter;
private ItemWriter<BillerOrder> failedItemWriter;
public BillerOrderClassifier(ItemWriter<BillerOrder> successItemWriter, ItemWriter<BillerOrder> failedItemWriter) {
this.successItemWriter = successItemWriter;
this.failedItemWriter = failedItemWriter;
}
#Override
public ItemWriter<? super BillerOrder> classify(BillerOrder billerOrder) {
return billerOrder.getResult().equals("SUCCESS") ? successItemWriter : failedItemWriter;
}
}
In BatchConfiguration, I wrote classifierBillerOrderCompositeItemWriter method.
#Bean
public ClassifierCompositeItemWriter<BillerOrder> classifierBillerOrderCompositeItemWriter() throws Exception {
ClassifierCompositeItemWriter<BillerOrder> compositeItemWriter = new ClassifierCompositeItemWriter<>();
compositeItemWriter.setClassifier(new BillerOrderClassifier(successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter(), databaseToCsvFileJobConfig.databaseCsvItemWriter()));
return compositeItemWriter;
}
#Bean(name="step1")
public Step step1() throws Exception{
return stepBuilderFactory.get("step1")
.<BillerOrder, BillerOrder> chunk(10)
.reader((ItemReader<? extends BillerOrder>) reader())
.processor(processor())
.writer(classifierBillerOrderCompositeItemWriter())
.stream(successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter())
.stream(databaseToCsvFileJobConfig.databaseCsvItemWriter())
.build();
}

Saving file information in Spring batch MultiResourceItemReader

I have a directory having text files. I want to process files and write data into db. I did that by using MultiResourceItemReader.
I have a scenario like whenever file is coming, the first step is to save file info, like filename, record count in file in a log table(custom table).
Since i used MultiResourceItemReader, It's loading all files once and the code which i wrote is executing once in server startup. I tried with getCurrentResource() method but its returning null.
Please refer below code.
NetFileProcessController.java
#Slf4j
#RestController
#RequestMapping("/netProcess")
public class NetFileProcessController {
#Autowired
private JobLauncher jobLauncher;
#Autowired
#Qualifier("netFileParseJob")
private Job job;
#GetMapping(path = "/process")
public #ResponseBody StatusResponse process() throws ServiceException {
try {
Map<String, JobParameter> parameters = new HashMap<>();
parameters.put("date", new JobParameter(new Date()));
jobLauncher.run(job, new JobParameters(parameters));
return new StatusResponse(true);
} catch (Exception e) {
log.error("Exception", e);
Throwable rootException = ExceptionUtils.getRootCause(e);
String errMessage = rootException.getMessage();
log.info("Root cause is instance of JobInstanceAlreadyCompleteException --> "+(rootException instanceof JobInstanceAlreadyCompleteException));
if(rootException instanceof JobInstanceAlreadyCompleteException){
log.info(errMessage);
return new StatusResponse(false, "This job has been completed already!");
} else{
throw new ServiceException(errMessage);
}
}
}
}
BatchConfig.java
#Configuration
#EnableBatchProcessing
public class BatchConfig {
private JobBuilderFactory jobBuilderFactory;
#Autowired
public void setJobBuilderFactory(JobBuilderFactory jobBuilderFactory) {
this.jobBuilderFactory = jobBuilderFactory;
}
#Autowired
StepBuilderFactory stepBuilderFactory;
#Value("file:${input.files.location}${input.file.pattern}")
private Resource[] netFileInputs;
#Value("${net.file.column.names}")
private String netFilecolumnNames;
#Value("${net.file.column.lengths}")
private String netFileColumnLengths;
#Autowired
NetFileInfoTasklet netFileInfoTasklet;
#Autowired
NetFlatFileProcessor netFlatFileProcessor;
#Autowired
NetFlatFileWriter netFlatFileWriter;
#Bean
public Job netFileParseJob() {
return jobBuilderFactory.get("netFileParseJob")
.incrementer(new RunIdIncrementer())
.start(netFileStep())
.build();
}
public Step netFileStep() {
return stepBuilderFactory.get("netFileStep")
.<NetDetailsDTO, NetDetailsDTO>chunk(1)
.reader(new NetFlatFileReader(netFileInputs, netFilecolumnNames, netFileColumnLengths))
.processor(netFlatFileProcessor)
.writer(netFlatFileWriter)
.build();
}
}
NetFlatFileReader.java
#Slf4j
public class NetFlatFileReader extends MultiResourceItemReader<NetDetailsDTO> {
public netFlatFileReader(Resource[] netFileInputs, String netFilecolumnNames, String netFileColumnLengths) {
setResources(netFileInputs);
setDelegate(reader(netFilecolumnNames, netFileColumnLengths));
}
private FlatFileItemReader<NetDetailsDTO> reader(String netFilecolumnNames, String netFileColumnLengths) {
FlatFileItemReader<NetDetailsDTO> flatFileItemReader = new FlatFileItemReader<>();
FixedLengthTokenizer tokenizer = CommonUtil.fixedLengthTokenizer(netFilecolumnNames, netFileColumnLengths);
FieldSetMapper<NetDetailsDTO> mapper = createMapper();
DefaultLineMapper<NetDetailsDTO> lineMapper = new DefaultLineMapper<>();
lineMapper.setLineTokenizer(tokenizer);
lineMapper.setFieldSetMapper(mapper);
flatFileItemReader.setLineMapper(lineMapper);
return flatFileItemReader;
}
/*
* Mapping column data to DTO
*/
private FieldSetMapper<NetDetailsDTO> createMapper() {
BeanWrapperFieldSetMapper<NetDetailsDTO> mapper = new BeanWrapperFieldSetMapper<>();
try {
mapper.setTargetType(NetDetailsDTO.class);
} catch(Exception e) {
log.error("Exception in mapping column data to dto ", e);
}
return mapper;
}
}
I am stuck on this scenario, Any help appreciated
I don't think MultiResourceItemReader is appropriate in your case. I would run a job per file for all the reasons of making one thing do one thing and do it well:
Your preparatory step will work by design
It would be easier to run multiple jobs in parallel and improve your file ingestion throughput
In case of failure, you would only restart the job for the failed file
EDIT: add an example
Resource[] netFileInputs = ... // same code that looks for file as currently in your reader
for (Resource netFileInput : netFileInputs) {
Map<String, JobParameter> parameters = new HashMap<>();
parameters.put("netFileInput", new JobParameter(netFileInput.getFilename()));
jobLauncher.run(job, new JobParameters(parameters));
}

The FlatFileItemReader read only one line from the CSV file - Spring Batch

I'm creating a Spring Batch Job to populate Data into a Database table from a given CSV file.
I created a customized FlatFileItemReader.
my problem is that the read() method is invoked only one time so only the first line of my CSV file is inserted into the database.
#Configuration
#EnableBatchProcessing
public class SpringBatchConfig {
private MultipartFile[] files;
#Bean
public Job job(JobBuilderFactory jobBuilderFactory, StepBuilderFactory stepBuilderFactory,
ItemReader<MyModelEntity> itemReader,
ItemWriter<MyModelEntity> itemWriter) {
Step step = stepBuilderFactory.get("Load-CSV-file_STP")
.<MyModelEntity, MyModelEntity > chunk(12)
.reader(itemReader)
.writer(itemWriter).build();
return jobBuilderFactory.get("Load-CSV-Files").
incrementer(new RunIdIncrementer()) /
.start(step)
.build();
}
#Bean
ItemReader<MyModelEntity> myModelCsvReader() throws Exception {
return new MyModelCsvReader();
}
}
the myModelCsvReader
#Component
#StepScope
public class MyModelCsvReader implements ItemReader<MyModelEntity>{
#Value("#{jobParameters['SDH']}")
private String sdhPath;
private boolean batchJobState= false;
#Autowired
MyModelFieldSetMapper myModelFieldSetMapper;
public LineMapper<MyModelEntity> lineMapper() throws Exception {
DefaultLineMapper<MyModelEntity> defaultLineMapper = new
DefaultLineMapper<MyModelEntity>();
DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
lineTokenizer.setDelimiter(",");
lineTokenizer.setStrict(false);
lineTokenizer.setNames(new String[]
{
"clientId","ddId","institName","progName",
"qual","startDate","endDate","eType", "country","comments"
});
defaultLineMapper.setLineTokenizer(lineTokenizer);
defaultLineMapper.setFieldSetMapper(myModelFieldSetMapper);
return defaultLineMapper;}
#Override
public MyModelEntity read()
throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {
//if(!batchJobState )
{
FlatFileItemReader<MyModelEntity> flatFileItemReader = new
FlatFileItemReader<MyModelEntity>();
flatFileItemReader.setMaxItemCount(2000);
flatFileItemReader.setResource(new UrlResource("file:\\"+sdhPath));
flatFileItemReader.setName("CSV-Reader");
flatFileItemReader.setLinesToSkip(1);
flatFileItemReader.setLineMapper(lineMapper());
flatFileItemReader.open(new ExecutionContext());
batchJobState=true;
return flatFileItemReader.read();
}
// return null;
}
}
the FieldSetMapper Implementation
#Component
public class MyModelFieldSetMapper implements FieldSetMapper<MyModelEntity> {
//private SiteService siteService =BeanUtil.getBean(SiteServiceImpl.class);
#Autowired
private SiteService siteService;
#Override
public MyModelEntity mapFieldSet(FieldSet fieldSet ) throws BindException {
if(fieldSet == null){
return null;
}
MyModelEntity educationHistory = new MyModelEntity();
// setting MyModelAttributes Values
return myModel;
}
}
any conribution is welcomed . thanks
// thereader after extending FlatFileItemReader
#Component
#StepScope
public class CustomUserItemReader extends FlatFileItemReader<User> {
#Value("#{jobParameters['UserCSVPath']}")
private String UserCSVPath;
private boolean batchJobState;
public CustomUserItemReader() throws Exception {
super();
setResource(new UrlResource("file:\\"+UserCSVPath));
setLineMapper(lineMapper());
afterPropertiesSet();
setStrict(false);
}
public LineMapper<User> lineMapper() throws Exception {
DefaultLineMapper<User> defaultLineMapper =
new DefaultLineMapper<User>();
DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
lineTokenizer.setDelimiter(",");
lineTokenizer.setStrict(false);
lineTokenizer.setNames(new String[]{"name", "dept",
"salary","endDate"});
defaultLineMapper.setLineTokenizer(lineTokenizer);
defaultLineMapper.setFieldSetMapper(new CustomUserFieldSetMapper());
//defaultLineMapper.setFieldSetMapper(fieldSetMapper);
return defaultLineMapper;}
#Override
public User read()
throws Exception, UnexpectedInputException, ParseException,
NonTransientResourceException {
//if(!batchJobState )
{
flatFileItemReader).size())
// flatFileItemReader.setMaxItemCount(2000);
this.setResource(new UrlResource("file:\\"+UserCSVPath));
this.setName("CSV-Reader");
this.setLinesToSkip(1);
//flatFileItemReader.setLineMapper(lineMapper());
this.open(new ExecutionContext());
User e = this.read();
batchJobState = true;
return e ;
}
// return null;
}
public
String getUserCSVPath() {
return UserCSVPath;
}
public
void setUserCSVPath(String userCSVPath) {
UserCSVPath = userCSVPath;
}
}
Thanks for all your suggestions, even if i have implemented ItemReader<>. I fixed the problem by moving the instantiation of the FlatFileItemReader Out from the read() method.
that was creating a new FlatFileItemReader in each loop and reading only the first line for each object created .
thanks

Get String from Spring FTP streaming Inbound Channel Adapter

I have the following code which works OK retrieving files from FTP server into a stream, but I need to get String of each file, seems I need to use the Transformer passing a charset but what I'm missing? How exactly to get the content String of each file transferred?
Thanks a lot in advance
#SpringBootApplication
#EnableIntegration
public class FtpinboundApp extends SpringBootServletInitializer implements WebApplicationInitializer {
final static Logger logger = Logger.getLogger(FtpinboundApp.class);
public static void main(String[] args) {
SpringApplication.run(FtpinboundApp.class, args);
}
#Bean
public SessionFactory<FTPFile> ftpSessionFactory() {
DefaultFtpSessionFactory sf = new DefaultFtpSessionFactory();
sf.setHost("X.X.X.X");
sf.setPort(21);
sf.setUsername("xxx");
sf.setPassword("XXX");
return new CachingSessionFactory<FTPFile>(sf);
}
#Bean
#ServiceActivator(inputChannel = "stream")
public MessageHandler handler() {
return new MessageHandler() {
#Override
public void handleMessage(Message<?> message) throws MessagingException {
System.out.println("trasnferred file:" + message.getPayload());
}
};
}
#Bean
#InboundChannelAdapter(value = "stream", poller = #Poller(fixedRate = "1000"))
public MessageSource<InputStream> ftpMessageSource() {
FtpStreamingMessageSource messageSource = new FtpStreamingMessageSource(template(), null);
messageSource.setRemoteDirectory("/X/X/X");
messageSource.setFilter(new FtpPersistentAcceptOnceFileListFilter(new SimpleMetadataStore(), "streaming"));
return messageSource;
}
#Bean
#Transformer(inputChannel = "stream", outputChannel = "data")
public org.springframework.integration.transformer.Transformer transformer() {
return new StreamTransformer("UTF-8");
}
#Bean
public FtpRemoteFileTemplate template() {
return new FtpRemoteFileTemplate(ftpSessionFactory());
}
#Bean(name = PollerMetadata.DEFAULT_POLLER)
public PollerMetadata defaultPoller() {
PollerMetadata pollerMetadata = new PollerMetadata();
pollerMetadata.setTrigger(new PeriodicTrigger(5000));
return pollerMetadata;
}
}
Use a StreamTransformer to get the whole file as a single string, or a FileSplitter to get a message for each line.
EDIT (filter config)
#Bean
#InboundChannelAdapter(channel = "stream")
public MessageSource<InputStream> ftpMessageSource() {
FtpStreamingMessageSource messageSource = new FtpStreamingMessageSource(template(), null);
messageSource.setRemoteDirectory("ftpSource/");
messageSource.setFilter(filter());
return messageSource;
}
public FileListFilter<FTPFile> filter() {
CompositeFileListFilter<FTPFile> filter = new CompositeFileListFilter<>();
filter.addFilter(new FtpSimplePatternFileListFilter("*.txt"));
filter.addFilter(acceptOnceFilter());
return filter;
}
#Bean
public FtpPersistentAcceptOnceFileListFilter acceptOnceFilter() {
FtpPersistentAcceptOnceFileListFilter filter = new FtpPersistentAcceptOnceFileListFilter(meta(),
"streaming"); // keys will be, e.g. "streamingfoo.txt"
filter.setFlushOnUpdate(true);
return filter;
}
#Bean
public ConcurrentMetadataStore meta() {
PropertiesPersistingMetadataStore meta = new PropertiesPersistingMetadataStore();
meta.setBaseDirectory("/tmp/foo");
meta.setFileName("ftpStream.properties");
return meta;
}
EDIT2 - remove remote file with an advice
#ServiceActivator(inputChannel = "data", adviceChain = "after")
#Bean
public MessageHandler handle() {
return System.out::println;
}
#Bean
public ExpressionEvaluatingRequestHandlerAdvice after() {
ExpressionEvaluatingRequestHandlerAdvice advice = new ExpressionEvaluatingRequestHandlerAdvice();
advice.setOnSuccessExpression(
"#template.remove(headers['file_remoteDirectory'] + headers['file_remoteFile'])");
advice.setPropagateEvaluationFailures(true);
return advice;
}

Resources