The FlatFileItemReader read only one line from the CSV file - Spring Batch - spring

I'm creating a Spring Batch Job to populate Data into a Database table from a given CSV file.
I created a customized FlatFileItemReader.
my problem is that the read() method is invoked only one time so only the first line of my CSV file is inserted into the database.
#Configuration
#EnableBatchProcessing
public class SpringBatchConfig {
private MultipartFile[] files;
#Bean
public Job job(JobBuilderFactory jobBuilderFactory, StepBuilderFactory stepBuilderFactory,
ItemReader<MyModelEntity> itemReader,
ItemWriter<MyModelEntity> itemWriter) {
Step step = stepBuilderFactory.get("Load-CSV-file_STP")
.<MyModelEntity, MyModelEntity > chunk(12)
.reader(itemReader)
.writer(itemWriter).build();
return jobBuilderFactory.get("Load-CSV-Files").
incrementer(new RunIdIncrementer()) /
.start(step)
.build();
}
#Bean
ItemReader<MyModelEntity> myModelCsvReader() throws Exception {
return new MyModelCsvReader();
}
}
the myModelCsvReader
#Component
#StepScope
public class MyModelCsvReader implements ItemReader<MyModelEntity>{
#Value("#{jobParameters['SDH']}")
private String sdhPath;
private boolean batchJobState= false;
#Autowired
MyModelFieldSetMapper myModelFieldSetMapper;
public LineMapper<MyModelEntity> lineMapper() throws Exception {
DefaultLineMapper<MyModelEntity> defaultLineMapper = new
DefaultLineMapper<MyModelEntity>();
DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
lineTokenizer.setDelimiter(",");
lineTokenizer.setStrict(false);
lineTokenizer.setNames(new String[]
{
"clientId","ddId","institName","progName",
"qual","startDate","endDate","eType", "country","comments"
});
defaultLineMapper.setLineTokenizer(lineTokenizer);
defaultLineMapper.setFieldSetMapper(myModelFieldSetMapper);
return defaultLineMapper;}
#Override
public MyModelEntity read()
throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {
//if(!batchJobState )
{
FlatFileItemReader<MyModelEntity> flatFileItemReader = new
FlatFileItemReader<MyModelEntity>();
flatFileItemReader.setMaxItemCount(2000);
flatFileItemReader.setResource(new UrlResource("file:\\"+sdhPath));
flatFileItemReader.setName("CSV-Reader");
flatFileItemReader.setLinesToSkip(1);
flatFileItemReader.setLineMapper(lineMapper());
flatFileItemReader.open(new ExecutionContext());
batchJobState=true;
return flatFileItemReader.read();
}
// return null;
}
}
the FieldSetMapper Implementation
#Component
public class MyModelFieldSetMapper implements FieldSetMapper<MyModelEntity> {
//private SiteService siteService =BeanUtil.getBean(SiteServiceImpl.class);
#Autowired
private SiteService siteService;
#Override
public MyModelEntity mapFieldSet(FieldSet fieldSet ) throws BindException {
if(fieldSet == null){
return null;
}
MyModelEntity educationHistory = new MyModelEntity();
// setting MyModelAttributes Values
return myModel;
}
}
any conribution is welcomed . thanks
// thereader after extending FlatFileItemReader
#Component
#StepScope
public class CustomUserItemReader extends FlatFileItemReader<User> {
#Value("#{jobParameters['UserCSVPath']}")
private String UserCSVPath;
private boolean batchJobState;
public CustomUserItemReader() throws Exception {
super();
setResource(new UrlResource("file:\\"+UserCSVPath));
setLineMapper(lineMapper());
afterPropertiesSet();
setStrict(false);
}
public LineMapper<User> lineMapper() throws Exception {
DefaultLineMapper<User> defaultLineMapper =
new DefaultLineMapper<User>();
DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
lineTokenizer.setDelimiter(",");
lineTokenizer.setStrict(false);
lineTokenizer.setNames(new String[]{"name", "dept",
"salary","endDate"});
defaultLineMapper.setLineTokenizer(lineTokenizer);
defaultLineMapper.setFieldSetMapper(new CustomUserFieldSetMapper());
//defaultLineMapper.setFieldSetMapper(fieldSetMapper);
return defaultLineMapper;}
#Override
public User read()
throws Exception, UnexpectedInputException, ParseException,
NonTransientResourceException {
//if(!batchJobState )
{
flatFileItemReader).size())
// flatFileItemReader.setMaxItemCount(2000);
this.setResource(new UrlResource("file:\\"+UserCSVPath));
this.setName("CSV-Reader");
this.setLinesToSkip(1);
//flatFileItemReader.setLineMapper(lineMapper());
this.open(new ExecutionContext());
User e = this.read();
batchJobState = true;
return e ;
}
// return null;
}
public
String getUserCSVPath() {
return UserCSVPath;
}
public
void setUserCSVPath(String userCSVPath) {
UserCSVPath = userCSVPath;
}
}

Thanks for all your suggestions, even if i have implemented ItemReader<>. I fixed the problem by moving the instantiation of the FlatFileItemReader Out from the read() method.
that was creating a new FlatFileItemReader in each loop and reading only the first line for each object created .
thanks

Related

Records are not written in files when invoked from BillerOrderWriter which implements ItemWriter in Spring Batch

I am trying to write successful records using one writer and failed records in another writer.
I have written BillerOrderWriter class which implements ItemWriter. I put some log statements and I can see it writes success billerOrderId or failed billerOrderId . But, it seems like it does not invoke DatabaseToCsvFileJobConfig or SuccessfulOrdersToCsvFileJobConfig .
public class BillerOrderWriter implements ItemWriter<BillerOrder>{
private static Logger log = LoggerFactory.getLogger("BillerOrderWriter.class");
#Autowired
SuccessfulOrdersToCsvFileJobConfig successfulOrdersToCsvFileJobConfig;
#Autowired
DatabaseToCsvFileJobConfig databaseToCsvFileJobConfig;
#Override
public void write(List<? extends BillerOrder> items) throws Exception {
for (BillerOrder item : items) {
log.info("item = " + item.toString());
if (item.getResult().equals("SUCCESS")) {
log.info(" Success billerOrderId = " + item.getBillerOrderId());
successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter();
} else {
log.info("Failed billerOrderId = " + item.getBillerOrderId());
databaseToCsvFileJobConfig.databaseCsvItemWriter();
}
}
}
}
Here is BatchConfig class.
#Bean
public BillerOrderWriter billerOrderWriter() {
return new BillerOrderWriter();
}
#Bean
public Job importJobOrder(JobCompletionNotificationListner listener, Step step1) {
return jobBuilderFactory.get("importJobOrder")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(step1)
.end()
.build();
}
#Bean(name="step1")
public Step step1(BillerOrderWriter billerOrderWriter) {
return stepBuilderFactory.get("step1")
.<BillerOrder, BillerOrder> chunk(10)
.reader((ItemReader<? extends BillerOrder>) reader())
.processor(processor())
.writer(billerOrderWriter)
.build();
}
Here is my successwriter and failedwriter class .
#Configuration
public class SuccessfulOrdersToCsvFileJobConfig {
private static Logger log = LoggerFactory.getLogger("SuccessfulOrdersToCsvFileJobConfig.class");
#Bean
public ItemWriter<BillerOrder> successfulDatabaseCsvItemWriter() {
log.info("Entering SuccessfulOrdersToCsvFileJobConfig...");
FlatFileItemWriter<BillerOrder> csvFileWriter = new FlatFileItemWriter<>();
String exportFileHeader = "BillerOrderId;SuccessMessage";
OrderWriter headerWriter = new OrderWriter(exportFileHeader);
csvFileWriter.setHeaderCallback(headerWriter);
String exportFilePath = "/tmp/SuccessBillerOrderIdForRetry.csv";
csvFileWriter.setResource(new FileSystemResource(exportFilePath));
LineAggregator<BillerOrder> lineAggregator = createOrderLineAggregator();
csvFileWriter.setLineAggregator(lineAggregator);
return csvFileWriter;
}
private LineAggregator<BillerOrder> createOrderLineAggregator() {
log.info("Entering createOrderLineAggregator...");
DelimitedLineAggregator<BillerOrder> lineAggregator = new DelimitedLineAggregator<>();
lineAggregator.setDelimiter(";");
FieldExtractor<BillerOrder> fieldExtractor = createOrderFieldExtractor();
lineAggregator.setFieldExtractor(fieldExtractor);
return lineAggregator;
}
private FieldExtractor<BillerOrder> createOrderFieldExtractor() {
log.info("Entering createOrderFieldExtractor...");
BeanWrapperFieldExtractor<BillerOrder> extractor = new BeanWrapperFieldExtractor<>();
extractor.setNames(new String[] {"billerOrderId","successMessage"});
return extractor;
}
}
#Configuration
public class DatabaseToCsvFileJobConfig {
private static Logger log = LoggerFactory.getLogger("DatabaseToCsvFileJobConfig.class");
#Bean
public ItemWriter<BillerOrder> databaseCsvItemWriter() {
log.info("Entering databaseCsvItemWriter...");
FlatFileItemWriter<BillerOrder> csvFileWriter = new FlatFileItemWriter<>();
String exportFileHeader = "BillerOrderId;ErrorMessage";
OrderWriter headerWriter = new OrderWriter(exportFileHeader);
csvFileWriter.setHeaderCallback(headerWriter);
String exportFilePath = "/tmp/FailedBillerOrderIdForRetry.csv";
csvFileWriter.setResource(new FileSystemResource(exportFilePath));
LineAggregator<BillerOrder> lineAggregator = createOrderLineAggregator();
csvFileWriter.setLineAggregator(lineAggregator);
return csvFileWriter;
}
private LineAggregator<BillerOrder> createOrderLineAggregator() {
log.info("Entering createOrderLineAggregator...");
DelimitedLineAggregator<BillerOrder> lineAggregator = new DelimitedLineAggregator<>();
lineAggregator.setDelimiter(";");
FieldExtractor<BillerOrder> fieldExtractor = createOrderFieldExtractor();
lineAggregator.setFieldExtractor(fieldExtractor);
return lineAggregator;
}
private FieldExtractor<BillerOrder> createOrderFieldExtractor() {
log.info("Entering createOrderFieldExtractor...");
BeanWrapperFieldExtractor<BillerOrder> extractor = new BeanWrapperFieldExtractor<>();
extractor.setNames(new String[] {"billerOrderId","errorMessage"});
return extractor;
}
}
Here is my job completion listener class.
#Component
public class JobCompletionNotificationListner extends JobExecutionListenerSupport {
private static final org.slf4j.Logger log = LoggerFactory.getLogger(JobCompletionNotificationListner.class);
#Override
public void afterJob(JobExecution jobExecution) {
log.info("In afterJob ...");
if (jobExecution.getStatus() == BatchStatus.COMPLETED) {
DatabaseToCsvFileJobConfig databaseToCsvFileJobConfig = new DatabaseToCsvFileJobConfig();
SuccessfulOrdersToCsvFileJobConfig successfulOrdersToCsvFileJobConfig = new SuccessfulOrdersToCsvFileJobConfig();
}
}
}
In your BillerOrderWriter#write method, it is supposed to write code that does the actual write operation of items to a data sink. But in your case, you are calling successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter(); and databaseToCsvFileJobConfig.databaseCsvItemWriter(); which create item writer beans. You should inject those delegate writers and call their write method when needed, something like:
public class BillerOrderWriter implements ItemWriter<BillerOrder>{
private ItemWriter<BillerOrder> successfulDatabaseCsvItemWriter;
private ItemWriter<BillerOrder> databaseCsvItemWriter;
// constructor with successfulDatabaseCsvItemWriter + databaseCsvItemWriter
#Override
public void write(List<? extends BillerOrder> items) throws Exception {
for (BillerOrder item : items) {
if (item.getResult().equals("SUCCESS")) {
successfulDatabaseCsvItemWriter.write(Collections.singletonList(item));
} else {
databaseCsvItemWriter.write(Collections.singletonList(item));
}
}
}
}
Instead Of BillerOrderWriter, I wroter BillerOrderClassifier class.
public class BillerOrderClassifier implements Classifier<BillerOrder, ItemWriter<? super BillerOrder>> {
private static final long serialVersionUID = 1L;
private ItemWriter<BillerOrder> successItemWriter;
private ItemWriter<BillerOrder> failedItemWriter;
public BillerOrderClassifier(ItemWriter<BillerOrder> successItemWriter, ItemWriter<BillerOrder> failedItemWriter) {
this.successItemWriter = successItemWriter;
this.failedItemWriter = failedItemWriter;
}
#Override
public ItemWriter<? super BillerOrder> classify(BillerOrder billerOrder) {
return billerOrder.getResult().equals("SUCCESS") ? successItemWriter : failedItemWriter;
}
}
In BatchConfiguration, I wrote classifierBillerOrderCompositeItemWriter method.
#Bean
public ClassifierCompositeItemWriter<BillerOrder> classifierBillerOrderCompositeItemWriter() throws Exception {
ClassifierCompositeItemWriter<BillerOrder> compositeItemWriter = new ClassifierCompositeItemWriter<>();
compositeItemWriter.setClassifier(new BillerOrderClassifier(successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter(), databaseToCsvFileJobConfig.databaseCsvItemWriter()));
return compositeItemWriter;
}
#Bean(name="step1")
public Step step1() throws Exception{
return stepBuilderFactory.get("step1")
.<BillerOrder, BillerOrder> chunk(10)
.reader((ItemReader<? extends BillerOrder>) reader())
.processor(processor())
.writer(classifierBillerOrderCompositeItemWriter())
.stream(successfulOrdersToCsvFileJobConfig.successfulDatabaseCsvItemWriter())
.stream(databaseToCsvFileJobConfig.databaseCsvItemWriter())
.build();
}

Spring Batch Reader is reading alternate records

I have created a sample spring batch application which is trying to read record from a DB and in writer, it displays those records. However, I could see that only even numbered (alternate) records are printed.
It's not the problem of database as the behavior is consistent with both H2 database or Oracle database.
There are total 100 records in my DB.
With JDBCCursorItemReader, only 50 records are read and that too alternate one as can be seen from log snapshot
With JdbcPagingItemReader, only 5 records are read and that too alternate one as can be seen from log snapshot
My code configurations are given below. Why reader is skipping odd numbered records?
#Bean
public ItemWriter<Safety> safetyWriter() {
return items -> {
for (Safety item : items) {
log.info(item.toString());
}
};
}
#Bean
public JdbcCursorItemReader<Safety> cursorItemReader() throws Exception {
JdbcCursorItemReader<Safety> reader = new JdbcCursorItemReader<>();
reader.setSql("select * from safety " );
reader.setDataSource(dataSource);
reader.setRowMapper(new SafetyRowMapper());
reader.setVerifyCursorPosition(false);
reader.afterPropertiesSet();
return reader;
}
#Bean
JdbcPagingItemReader<Safety> safetyPagingItemReader() throws Exception {
JdbcPagingItemReader<Safety> reader = new JdbcPagingItemReader<>();
reader.setDataSource(dataSource);
reader.setFetchSize(10);
reader.setRowMapper(new SafetyRowMapper());
H2PagingQueryProvider queryProvider = new H2PagingQueryProvider();
queryProvider.setSelectClause("*");
queryProvider.setFromClause("safety");
Map<String, Order> sortKeys = new HashMap<>(1);
sortKeys.put("id", Order.ASCENDING);
queryProvider.setSortKeys(sortKeys);
reader.setQueryProvider(queryProvider);
return reader;
}
#Bean
public Step importSafetyDetails() throws Exception {
return stepBuilderFactory.get("importSafetyDetails")
.<Safety, Safety>chunk(chunkSize)
//.reader(cursorItemReader())
.reader(safetyPagingItemReader())
.writer(safetyWriter())
.listener(new StepListener())
.listener(new ChunkListener())
.build();
}
#Bean
public Job job() throws Exception {
return jobBuilderFactory.get("job")
.start(importSafetyDetails())
.build();
}
Domain classes looks like below:
#NoArgsConstructor
#AllArgsConstructor
#Data
public class Safety {
private int id;
}
public class SafetyRowMapper implements RowMapper<Safety> {
#Override
public Safety mapRow(ResultSet resultSet, int i) throws SQLException {
if(resultSet.next()) {
Safety safety = new Safety();
safety.setId(resultSet.getInt("id"));
return safety;
}
return null;
}
}
#SpringBootApplication
#EnableBatchProcessing
public class SpringBatchSamplesApplication {
public static void main(String[] args) {
SpringApplication.run(SpringBatchSamplesApplication.class, args);
}
}
application.yml configuration is as below:
spring:
application:
name: spring-batch-samples
main:
allow-bean-definition-overriding: true
datasource:
url: jdbc:h2:mem:testdb;DB_CLOSE_DELAY=-1;DB_CLOSE_ON_EXIT=FALSE
username: sa
password:
driver-class-name: org.h2.Driver
hikari:
connection-timeout: 20000
maximum-pool-size: 10
h2:
console:
enabled: true
batch:
initialize-schema: never
server:
port: 9090
sqls are as below:
CREATE TABLE safety (
id int NOT NULL,
CONSTRAINT PK_ID PRIMARY KEY (id)
);
INSERT INTO safety (id) VALUES (1);
...100 records are inserted
Listeners classes are as below:
#Slf4j
public class StepListener{
#AfterStep
public ExitStatus afterStep(StepExecution stepExecution) {
log.info("In step {} ,Exit Status: {} ,Read Records: {} ,Committed Records: {} ,Skipped Read Records: {} ,Skipped Write Records: {}",
stepExecution.getStepName(),
stepExecution.getExitStatus().getExitCode(),
stepExecution.getReadCount(),
stepExecution.getCommitCount(),
stepExecution.getReadSkipCount(),
stepExecution.getWriteSkipCount());
return stepExecution.getExitStatus();
}
}
#Slf4j
public class ChunkListener {
#BeforeChunk
public void beforeChunk(ChunkContext context) {
log.info("<< Before the chunk");
}
#AfterChunk
public void afterChunk(ChunkContext context) {
log.info("<< After the chunk");
}
}
I tried to reproduce your problem, but I couldn't. Maybe it would be great if you could share more code.
Meanwhile I created a simple job to read 100 records from "safety" table a print them to the console. And it is working fine.
.
#SpringBootApplication
#EnableBatchProcessing
public class ReaderWriterProblem implements CommandLineRunner {
#Autowired
DataSource dataSource;
#Autowired
StepBuilderFactory stepBuilderFactory;
#Autowired
JobBuilderFactory jobBuilderFactory;
#Autowired
private JobLauncher jobLauncher;
#Autowired
private ApplicationContext context;
public static void main(String[] args) {
String[] arguments = new String[]{LocalDateTime.now().toString()};
SpringApplication.run(ReaderWriterProblem.class, arguments);
}
#Bean
public ItemWriter<Safety> safetyWriter() {
return new ItemWriter<Safety>() {
#Override
public void write(List<? extends Safety> items) throws Exception {
for (Safety item : items) {
//log.info(item.toString());
System.out.println(item);
}
}
};
}
// #Bean
// public JdbcCursorItemReader<Safety> cursorItemReader() throws Exception {
// JdbcCursorItemReader<Safety> reader = new JdbcCursorItemReader<>();
//
// reader.setSql("select * from safety ");
// reader.setDataSource(dataSource);
// reader.setRowMapper(new SafetyRowMapper());
// reader.setVerifyCursorPosition(false);
// reader.afterPropertiesSet();
//
// return reader;
// }
#Bean
JdbcPagingItemReader<Safety> safetyPagingItemReader() throws Exception {
JdbcPagingItemReader<Safety> reader = new JdbcPagingItemReader<>();
reader.setDataSource(dataSource);
reader.setFetchSize(10);
reader.setRowMapper(new SafetyRowMapper());
PostgresPagingQueryProvider queryProvider = new PostgresPagingQueryProvider();
queryProvider.setSelectClause("*");
queryProvider.setFromClause("safety");
Map<String, Order> sortKeys = new HashMap<>(1);
sortKeys.put("id", Order.ASCENDING);
queryProvider.setSortKeys(sortKeys);
reader.setQueryProvider(queryProvider);
return reader;
}
#Bean
public Step importSafetyDetails() throws Exception {
return stepBuilderFactory.get("importSafetyDetails")
.<Safety, Safety>chunk(5)
//.reader(cursorItemReader())
.reader(safetyPagingItemReader())
.writer(safetyWriter())
.listener(new MyStepListener())
.listener(new MyChunkListener())
.build();
}
#Bean
public Job job() throws Exception {
return jobBuilderFactory.get("job")
.listener(new JobListener())
.start(importSafetyDetails())
.build();
}
#Override
public void run(String... args) throws Exception {
JobParametersBuilder jobParametersBuilder = new JobParametersBuilder();
jobParametersBuilder.addString("date", LocalDateTime.now().toString());
try {
Job job = (Job) context.getBean("job");
jobLauncher.run(job, jobParametersBuilder.toJobParameters());
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobInstanceAlreadyCompleteException | JobParametersInvalidException e) {
e.printStackTrace();
}
}
public static class JobListener implements JobExecutionListener {
#Override
public void beforeJob(JobExecution jobExecution) {
System.out.println("Before job");
}
#Override
public void afterJob(JobExecution jobExecution) {
System.out.println("After job");
}
}
private static class SafetyRowMapper implements RowMapper<Safety> {
#Override
public Safety mapRow(ResultSet resultSet, int i) throws SQLException {
Safety safety = new Safety();
safety.setId(resultSet.getLong("ID"));
return safety;
}
}
public static class MyStepListener implements StepExecutionListener {
#Override
public void beforeStep(StepExecution stepExecution) {
System.out.println("Before Step");
}
#Override
public ExitStatus afterStep(StepExecution stepExecution) {
System.out.println("After Step");
return ExitStatus.COMPLETED;
}
}
private static class MyChunkListener implements ChunkListener {
#Override
public void beforeChunk(ChunkContext context) {
System.out.println("Before Chunk");
}
#Override
public void afterChunk(ChunkContext context) {
System.out.println("After Chunk");
}
#Override
public void afterChunkError(ChunkContext context) {
}
}
}
Hope this helps

Springboot Batch get CSV from rest API

I have a scenario for Springboot batch application where I need to read the CSV-11 and write to another CSV-2, the problem arises when I need to get the CSV-1 from a REST endpoint, for e.g. when I initiate the job it should fetch a CSV-1 from an endpoint and then continue the batch processing to write to CSV-2.
But it seems like to process a CSV-1 we need to feed the 'Resource' in advance while application startup [I am fairly new to batch so not sure if it's 100% correct].
Can anyone please guide me the right approach to resolve this?
EDIT : Adding code, I was able to resolve it but need advice if it's the right way to do things (please ignore the hard-coded data).
#Configuration
#EnableBatchProcessing
public class BatchConfig {
#Autowired
private JobBuilderFactory jobbuilder;
#Autowired
private StepBuilderFactory stepbuilder;
#Autowired
private CSVProcessor processor;
#Autowired
private CustomSkipPolicy customSkipPolicy;
#Autowired
private CSVResponse response;
#Bean(name="csv")
public Job job_csv() throws Exception {
Step step = stepbuilder
.get("csv-step")
.<Person, Person>chunk(5)
.reader(new CSVReader(response.getResource()))
.processor(processor)
.writer(new CSVWriter().write(response.getFilename()))
.faultTolerant()
.skipPolicy(customSkipPolicy)
.listener(new StepListener())
.build();
return jobbuilder
.get("csv-job")
.incrementer(new RunIdIncrementer())
.listener(new JobListener())
.flow(step)
.end()
.build();
}
}
#Slf4j
public class CSVReader extends FlatFileItemReader<Person> {
public CSVReader(Resource resource) throws Exception {
super();
setResource(resource);
setStrict(false);
setLinesToSkip(1);
doOpen();
DelimitedLineTokenizer dlt = new DelimitedLineTokenizer();
dlt.setNames(new String[] {"id","first_name","last_name","email","gender","ip_address","dob"});
dlt.setDelimiter(",");
dlt.setStrict(false);
BeanWrapperFieldSetMapper<Person> fsp = new BeanWrapperFieldSetMapper<>();
fsp.setTargetType(Person.class);
DefaultLineMapper<Person> dlp = new DefaultLineMapper<>();
dlp.setLineTokenizer(dlt);
dlp.setFieldSetMapper(fsp);
setLineMapper(dlp);
}
}
public class CSVWriter {
private static final String DATA_PROCESSED = "C:/data/processed";
public FlatFileItemWriter<Person> write(String filename) throws Exception {
FlatFileItemWriter<Person> writer = new FlatFileItemWriter<>();
writer.setResource(resource(filename));
writer.setHeaderCallback(new Header());
BeanWrapperFieldExtractor<Person> fe = new BeanWrapperFieldExtractor<>();
fe.setNames(new String[] { "id", "first_name", "last_name", "age" });
DelimitedLineAggregator<Person> dla = new DelimitedLineAggregator<>();
dla.setDelimiter(",");
dla.setFieldExtractor(fe);
writer.setLineAggregator(dla);
return writer;
}
private Resource resource(String filename) throws IOException {
String processed_file = filename.replace(".csv", "").concat("_PROCESSED").concat(".csv");
if (Files.notExists(Paths.get(DATA_PROCESSED), new LinkOption[] { LinkOption.NOFOLLOW_LINKS })) {
Files.createDirectory(Paths.get(DATA_PROCESSED));
}
if (Files.notExists(Paths.get(DATA_PROCESSED + "/" + processed_file),
new LinkOption[] { LinkOption.NOFOLLOW_LINKS })) {
Files.createFile(Paths.get(DATA_PROCESSED + "/" + processed_file));
}
return new FileSystemResource(DATA_PROCESSED + "/" + processed_file);
}
}
#Component
#Slf4j
public class CSVResponse {
private static final String DATA_RECEIVE = "C:/data/receive";
private String filename;
public Resource getResource() throws IOException {
ResponseEntity<Resource> resp = new RestTemplate().getForEntity("http://localhost:8081/csv", Resource.class);
log.info("File Received : "+resp.getBody().getFilename());
setFilename(resp.getBody().getFilename());
Path path = Paths.get(DATA_RECEIVE);
if(!Files.exists(path, new LinkOption[]{ LinkOption.NOFOLLOW_LINKS})) {
Files.createDirectory(path);
}
Files.copy(resp.getBody().getInputStream(), Paths.get(DATA_RECEIVE + "/" +resp.getBody().getFilename()), StandardCopyOption.REPLACE_EXISTING);
return resp.getBody();
}
public String getFilename() {
return filename;
}
public void setFilename(String filename) {
this.filename = filename;
}
}
You can implement item reader with StepExecutionListener interface, and you have to implement the fetching CSV-1 file via RestTemplate in beforeStep() method of the item reader.
You can refer how to use RestTemplate from https://www.baeldung.com/rest-template

Spring Batch MultiLineItemReader with MultiResourcePartitioner

I have a File which has Multiline data like this. DataID is Start of a new record. e.g. One record is a combination of ID and concatenating below line until the start of a new record.
>DataID1
Line1asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
Line3asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID2
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
>DataID3
DataID2asdfsafsdgdsfghfghfghjfgjghjgxcvmcxnvm
I was able to implement this using SingleItemPeekableItemReader and it's working fine.
I am not trying to implement partition, As we need to process multiple files. I am not sure how the partitioner is passing file info to my customer reader and how to make my SingleItemPeekableItemReader thread safe as it not working correctly
Need some inputs as I am stuck at this point
java-config
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.partition(10);
return partitioner;
}
#Bean
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setMaxPoolSize(4);
taskExecutor.setCorePoolSize(4);
taskExecutor.setQueueCapacity(8);
taskExecutor.afterPropertiesSet();
return taskExecutor;
}
#Bean
#Qualifier("masterStep")
public Step masterStep() {
return stepBuilderFactory.get("masterStep")
.partitioner("step1",partitioner())
.step(step1())
.taskExecutor(taskExecutor())
.build();
}
#Bean
public MultiResourceItemReader<FieldSet> multiResourceItemReader() {
log.info("Total Number of Files to be process {}",resources.length);
report.setFileCount(resources.length);
MultiResourceItemReader<FieldSet> resourceItemReader = new MultiResourceItemReader<FieldSet>();
resourceItemReader.setResources(resources);
resourceItemReader.setDelegate(reader());
return resourceItemReader;
}
#Bean
public FlatFileItemReader<FieldSet> reader() {
FlatFileItemReader<FieldSet> build = new FlatFileItemReaderBuilder<FieldSet>().name("fileReader")
.lineTokenizer(orderFileTokenizer())
.fieldSetMapper(new FastFieldSetMapper())
.recordSeparatorPolicy(new BlankLineRecordSeparatorPolicy())
.build();
build.setBufferedReaderFactory(gzipBufferedReaderFactory);
return build;
}
#Bean
public SingleItemPeekableItemReader<FieldSet> readerPeek() {
SingleItemPeekableItemReader<FieldSet> reader = new SingleItemPeekableItemReader<>();
reader.setDelegate(multiResourceItemReader());
return reader;
}
#Bean
public MultiLineFastaItemReader itemReader() {
MultiLineFastaItemReader itemReader = new MultiLineFastaItemReader(multiResourceItemReader());
itemReader.setSingalPeekable(readerPeek());
return itemReader;
}
#Bean
public PatternMatchingCompositeLineTokenizer orderFileTokenizer() {
PatternMatchingCompositeLineTokenizer tokenizer = new PatternMatchingCompositeLineTokenizer();
Map<String, LineTokenizer> tokenizers = new HashMap<>(2);
tokenizers.put(">*", head());
tokenizers.put("*", tail());
tokenizer.setTokenizers(tokenizers);
return tokenizer;
}
public DelimitedLineTokenizer head() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequenceIdentifier");
token.setDelimiter(" ");
token.setStrict(false);
return token;
}
public DelimitedLineTokenizer tail() {
DelimitedLineTokenizer token = new DelimitedLineTokenizer();
token.setNames("sequences");
token.setDelimiter(" ");
return token;
}
#Bean
public FastReportWriter writer() {
return new FastReportWriter();
}
#Bean
public Job importUserJob(JobCompletionNotificationListener listener, Step step1) {
return jobBuilderFactory.get("importUserJob")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(masterStep())
//.flow(step1)
.next(step2())
.end()
.build();
}
#Bean
public Step step1() {
return stepBuilderFactory.get("step1")
.<Fasta, Fasta>chunk(5000)
.reader(itemReader())
.processor(new FastaIteamProcessor())
//.processor(new PassThroughItemProcessor<>())
.writer(writer())
.build();
}
public class MultiLineFastaItemReader implements ItemReader<Fasta>, ItemStream {
private static final Logger log = LoggerFactory.getLogger(MultiLineFastaItemReader.class);
private SingleItemPeekableItemReader<FieldSet> singalPeekable;
AtomicInteger iteamCounter = new AtomicInteger(0);
ConcurrentHashMap<String, AtomicInteger> fileNameAndCounterMap = new ConcurrentHashMap<>();
#Autowired
private SequenceFastaReport sequenceFastaReport;
private MultiResourceItemReader<FieldSet> resourceItemReader;
public MultiLineFastaItemReader(MultiResourceItemReader<FieldSet> multiResourceItemReader) {
this.resourceItemReader = multiResourceItemReader;
}
public SingleItemPeekableItemReader<FieldSet> getSingalPeekable() {
return singalPeekable;
}
public void setSingalPeekable(SingleItemPeekableItemReader<FieldSet> singalPeekable) {
this.singalPeekable = singalPeekable;
}
#Override
public Fasta read() throws Exception {
FieldSet item = singalPeekable.read();
if (item == null) {
return null;
}
Fasta fastaObject = new Fasta();
log.info("ID {} fileName {}", item.readString(0), resourceItemReader.getCurrentResource());
fastaObject.setSequenceIdentifier(item.readString(0)
.toUpperCase());
fastaObject.setFileName(resourceItemReader.getCurrentResource()
.getFilename());
if (!fileNameAndCounterMap.containsKey(fastaObject.getFileName())) {
fileNameAndCounterMap.put(fastaObject.getFileName(), new AtomicInteger(0));
}
while (true) {
FieldSet possibleRelatedObject = singalPeekable.peek();
if (possibleRelatedObject == null) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
if (possibleRelatedObject.readString(0)
.startsWith(">")) {
if (fastaObject.getSequenceIdentifier()
.length() < 1)
throw new InvalidParameterException("Somwthing Wrong in file");
sequenceFastaReport.addToReport(fileNameAndCounterMap.get(fastaObject.getFileName())
.incrementAndGet(), fastaObject.getSequences());
return fastaObject;
}
String data = fastaObject.getSequences()
.toUpperCase();
fastaObject.setSequences(data + singalPeekable.read()
.readString(0)
.toUpperCase());
}
}
#Override
public void close() {
this.singalPeekable.close();
}
#Override
public void open(ExecutionContext executionContext) {
this.singalPeekable.open(executionContext);
}
#Override
public void update(ExecutionContext executionContext) {
this.singalPeekable.update(executionContext);
}
}
I am not sure how the partitioner is passing file info to my customer reader
The partitioner will create partition meta-data in step execution contexts and your reader should read that meta-data from it. In your example, you don't need to call partition on the partitioner, Spring Batch will do it. You need instead to set the partition key on the partitioner, for example:
#Bean
public Partitioner partitioner() {
MultiResourcePartitioner partitioner = new MultiResourcePartitioner();
partitioner.setResources(resources);
partitioner.setKeyName("file");
return partitioner;
}
This will create a partition for each file with the key file that you can get in your reader from the step execution context:
#Bean
#StepScope
public FlatFileItemReader reader(#Value("#{stepExecutionContext['file']}") String file) {
// define your reader
}
Note that the reader should be step scoped to use this feature. More details here: https://docs.spring.io/spring-batch/4.0.x/reference/html/step.html#late-binding

Why the intemReader is always sending the exact same value to CustomItemProcessor

Why does the itemReader method is always sending the exact same file name to be processed in CustomItemProcessor?
As far as I understand, since I settup reader as #Scope and I set more than 1 in chunk, I was expecting the "return s" to move forward to next value from String array.
Let me clarify my question with a debug example in reader method:
1 - the variable stringArray is filled in with 3 file names (f1.txt, f2.txt and f3.txt)
2 - "return s" is evoked with s = f1.txt
3 - "return s" evoked again before evoked customItemProcessor method (perfect untill here since chunk = 2)
4 - looking at s it contains f1.txt again (different from what I expected. I expected f2.txt)
5 and 6 - runs processor with same name f1.tx (it should work correctly if the second turn of "return s" would contain f2.txt)
7 - writer method works as expected (processedFiles contain twice the two names processed in customItemProcessor f1.txt and f1.txt again since same name was processed twice)
CustomItemReader
public class CustomItemReader implements ItemReader<String> {
#Override
public String read() throws Exception, UnexpectedInputException,
ParseException, NonTransientResourceException {
String[] stringArray;
try (Stream<Path> stream = Files.list(Paths.get(env
.getProperty("my.path")))) {
stringArray = stream.map(String::valueOf)
.filter(path -> path.endsWith("out"))
.toArray(size -> new String[size]);
}
//*** the problem is here
//every turn s variable receives the first file name from the stringArray
if (stringArray.length > 0) {
for (String s : stringArray) {
return s;
}
} else {
log.info("read method - no file found");
return null;
}
return null;
}
CustomItemProcessor
public class CustomItemProcessor implements ItemProcessor<String , String> {
#Override
public String process(String singleFileToProcess) throws Exception {
log.info("process method: " + singleFileToProcess);
return singleFileToProcess;
}
}
CustomItemWriter
public class CustomItemWriter implements ItemWriter<String> {
private static final Logger log = LoggerFactory
.getLogger(CustomItemWriter.class);
#Override
public void write(List<? extends String> processedFiles) throws Exception {
processedFiles.stream().forEach(
processedFile -> log.info("**** write method"
+ processedFile.toString()));
FileSystem fs = FileSystems.getDefault();
for (String s : processedFiles) {
Files.deleteIfExists(fs.getPath(s));
}
}
Configuration
#Configuration
#ComponentScan(...
#EnableBatchProcessing
#EnableScheduling
#PropertySource(...
public class BatchConfig {
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Autowired
private JobRepository jobRepository;
#Bean
public TaskExecutor getTaskExecutor() {
return new TaskExecutor() {
#Override
public void execute(Runnable task) {
}
};
}
//I can see the number in chunk reflects how many time customReader is triggered before triggers customProcesser
#Bean
public Step step1(ItemReader<String> reader,
ItemProcessor<String, String> processor, ItemWriter<String> writer) {
return stepBuilderFactory.get("step1").<String, String> chunk(2)
.reader(reader).processor(processor).writer(writer)
.allowStartIfComplete(true).build();
}
#Bean
#Scope
public ItemReader<String> reader() {
return new CustomItemReader();
}
#Bean
public ItemProcessor<String, String> processor() {
return new CustomItemProcessor();
}
#Bean
public ItemWriter<String> writer() {
return new CustomItemWriter();
}
#Bean
public Job job(Step step1) throws Exception {
return jobBuilderFactory.get("job1").incrementer(new RunIdIncrementer()).start(step1).build();
}
Scheduler
#Component
public class QueueScheduler {
private static final Logger log = LoggerFactory
.getLogger(QueueScheduler.class);
private Job job;
private JobLauncher jobLauncher;
#Autowired
public QueueScheduler(JobLauncher jobLauncher, #Qualifier("job") Job job){
this.job = job;
this.jobLauncher = jobLauncher;
}
#Scheduled(fixedRate=60000)
public void runJob(){
try{
jobLauncher.run(job, new JobParameters());
}catch(Exception ex){
log.info(ex.getMessage());
}
}
}
Your issue is that you are relying on an internal loop to iterate over the items instead of letting Spring Batch do it for you by calling ItemReader#read multiple times.
What I'd recommend is changing your reader to the something like the following:
public class JimsItemReader implements ItemStreamReader {
private String[] items;
private int curIndex = -1;
#Override
public void open(ExecutionContext ec) {
curIndex = ec.getInt("curIndex", -1);
String[] stringArray;
try (Stream<Path> stream = Files.list(Paths.get(env.getProperty("my.path")))) {
stringArray = stream.map(String::valueOf)
.filter(path -> path.endsWith("out"))
.toArray(size -> new String[size]);
}
}
#Override
public void update(ExecutionContext ec) {
ec.putInt("curIndex", curIndex);
}
#Override
public String read() {
if (curIndex < items.length) {
curIndex++;
return items[curIndex];
} else {
return null;
}
}
}
The above example should loop through the items of your array as they are read. It also should be restartable in that we're storing the index in the ExecutionContext so if the job is restarted after a failure, you'll restart where you left off.

Resources