Get file name from readMultiFileJob in Spring Batch - spring

The following is my Spring Batch processing config file, i am reading multiple files (xml, csv etc), the files generate dynamically with time stamp as suffix i can read file's data and process, now the Question is,
i would like know the file name.
How to get file name when job is processing.
<import resource="../config/context.xml" />
<bean id="domain" class="com.di.pos.Domain" />
<job id="readMultiFileJob" xmlns="http://www.springframework.org/schema/batch">
<step id="step1">
<tasklet>
<chunk reader="multiResourceReader" writer="flatFileItemWriter"
commit-interval="1" />
</tasklet>
</step>
</job>
<bean id="multiResourceReader"
class=" org.springframework.batch.item.file.MultiResourceItemReader">
<property name="resources" value="file:csv/inputs/dipos-*.csv" />
<property name="delegate" ref="flatFileItemReader" />
</bean>
<bean id="flatFileItemReader" class="org.springframework.batch.item.file.FlatFileItemReader">
<property name="lineMapper">
<bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
<property name="lineTokenizer">
<bean
class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
<property name="names" value="id, name" />
</bean>
</property>
<property name="fieldSetMapper">
<bean
class="org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper">
<property name="prototypeBeanName" value="domain" />
</bean>
</property>
</bean>
</property>
</bean>

Create a custom Mapper which extends LineMapper.
Override mapLine() method
public FileData mapLine(String line, int lineNumber) throws Exception {
FileData fileData = new FileData();
Resource currentResource = delegator.getCurrentResource();
String[] fileName = currentResource.getFilename().split("/");
//Use this to access file path
URI fileUri = currentResource.getURI();
return fileData;
}

Related

Spring Batch partition doesnt work composite itemprocessor

I have a Spring Batch partition job. I’m using CompositeProcessor, read data from DB and save these items into an CopyOnWriteArrayList. Because the environment is concurrent but my CopyOnWriteArrayList is being utilized for other threads and mix information, I don’t know why and what I am doing bad, and the output writing them into files for each thread.
public class CustomerItemProcessor implements ItemProcessor<beangenerico,CopyOnWriteArrayList<beanCustomer>> {
private CustomerDAO customerDAO;
private CopyOnWriteArrayList<beanCustomer> listbean;
public CopyOnWriteArrayList<beanCustomer> process(beangenerico rangos) throws Exception {
listbean = customerDAO.getAccAgentes(rangos);
if(listbean != null) {
//customer.setId(currentCustomer.getId());
return listbean;
}else{
return null;
}
}
The configuration of my batch im XML:
<batch:job id="partitionJob" xmlns="http://www.springframework.org/schema/batch">
<batch:step id="masterStep">
<batch:partition step="slave" partitioner="rangePartitioner">
<batch:handler grid-size="10" task-executor="taskExecutor"/>
</batch:partition>
</batch:step>
</batch:job>
<!-- each thread will run this job, with different stepExecutionContext values. -->
<batch:step id="slave" xmlns="http://www.springframework.org/schema/batch">
<batch:tasklet task-executor="taskExecutor" throttle-limit="1">
<batch:chunk reader="beaniniendreader" writer="tempRecordsWriter" processor="completeItemProcessor" commit-interval="1" />
</batch:tasklet>
</batch:step>
<bean id="taskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor" />
<bean id="rangePartitioner" class="my.package.springbatch.RangePartitioner" />
<bean id="beaniniendreader" class="my.package.springbatch.FormiikReader" scope="step"></bean>
<bean id="beanprocessor" class="my.package.springbatch.FormiikProcessor" scope="step">
<property name="accountExecutiveDao" ref="accountExecutiveDao"/>
</bean>
<bean id="beanprocessor2" class="my.package.springbatch.CustomerItemProcessor" scope="step">
<property name="customerDAO" ref="customerAccDao"/>
</bean>
<bean id="completeItemProcessor" class="org.springframework.batch.item.support.CompositeItemProcessor">
<property name="delegates">
<list>
<ref bean="beanprocessor2"/>
<ref bean="accItemprocessor"/>
<ref bean="beanaccDataItem"/>
</list>
</property>
</bean>
<bean id="tempRecordsWriter" class="my.package.springbatch.ListDelegateWriter" scope="step">
<property name="delegate" ref="flatFileItemWriterPartition"/>
</bean>
<!-- csv file writer -->
<bean id="flatFileItemWriterPartition" class="org.springframework.batch.item.file.FlatFileItemWriter"
scope="step" >
<property name="resource"
value="file:csv/outputs/users.processed#{stepExecutionContext[fromId]}-#{stepExecutionContext[toId]}.csv" />
<property name="appendAllowed" value="false" />
<property name="lineAggregator">
<bean class="org.springframework.batch.item.file.transform.DelimitedLineAggregator">
<property name="delimiter" value="," />
<property name="fieldExtractor">
<bean class="org.springframework.batch.item.file.transform.BeanWrapperFieldExtractor">
<property name="names" value="cuenta, name, purchasedPackage" />
</bean>
</property>
</bean>
</property>
</bean>
I going back to take the subject of my code, it is advised to me using Threadlocal for storing thread-specific data whereby it´s works . Here I put my code again. Thanks for your replies.
public class CustomerItemProcessor implements ItemProcessor<beangenerico,ThreadLocal<CopyOnWriteArrayList<beanCustomer>>> {
private CustomerDAO customerDAO;
private ThreadLocal<CopyOnWriteArrayList<beanCustomer>> listbean = new ThreadLocal<CopyOnWriteArrayList<beanCustomer>>();
public ThreadLocal<CopyOnWriteArrayList<beanCustomer>> process(beangenerico rangos) throws Exception {
listbean.set(new CopyOnWriteArrayList<beanCustomer>());
listbean = customerDAO.getAccAgentes(rangos);
if(listbean != null) {
return listbean;
} else {
return null;
}
}
public void setCustomerDAO(CustomerDAO customerDAO) {
this.customerDAO = customerDAO;
}
}

JDBCBatchItemWriter not receiving the List for batch update

I'm a new bee to spring batch. My requirement is to fetch records from a DB table, process them(each record can be processed independently so i'm partitioning and using a task executor) and then update the status column in the same table based on processing status.
Simplified version of my code is below.
Item Reader (My custom column partitioner will decide the min & max value below):
<bean name="databaseReader" class="org.springframework.batch.item.database.JdbcCursorItemReader" scope="step">
<property name="dataSource" ref="dataSource"/>
<property name="sql">
<value>
<![CDATA[
select id,user_login,user_pass,age from users where id >= #{stepExecutionContext['minValue']} and id <= #{stepExecutionContext['maxValue']}
]]>
</value>
</property>
<property name="rowMapper">
<bean class="com.springapp.batch.UserRowMapper" />
</property>
<property name="verifyCursorPosition" value="false"/>
</bean>
Item Processor:
<bean id="itemProcessor" class="com.springapp.batch.UserItemProcessor" scope="step"/>
....
public class UserItemProcessor implements ItemProcessor<Users, Users>
{
#Override
public Users process(Users users) throws Exception {
// do some processing here..
//update users status
//users.setStatus(users.getId() + ": Processed by :" + Thread.currentThread().getName() + ": Processed at :" + new GregorianCalendar().getTime().toString());
//System.out.println("Processing user :" + users + " :" +Thread.currentThread().getName());
return users;
}
}
Item Writer:
<bean id="databaseWriter" class="org.springframework.batch.item.database.JdbcBatchItemWriter">
<property name="dataSource" ref="dataSource" />
<property name="sql">
<value>
<![CDATA[
update users set status = :status where id= :id
]]>
</value>
</property>
<property name="itemSqlParameterSourceProvider">
<bean class="org.springframework.batch.item.database.BeanPropertyItemSqlParameterSourceProvider" />
</property>
</bean>
Step configuration:
<batch:job id="usersJob">
<batch:step id="stepOne">
<batch:partition step="worker" partitioner="myColumnRangepartitioner" handler="partitionHandler" />
</batch:step>
</batch:job>
<batch:step id="worker" >
<batch:tasklet transaction-manager="transactionManager">
<batch:chunk reader="databaseReader" writer="databaseWriter" commit-interval="5" processor="itemProcessor" />
</batch:tasklet>
</batch:step>
<bean id="asyncTaskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor" />
<bean id="partitionHandler" class="org.springframework.batch.core.partition.support.TaskExecutorPartitionHandler" scope="step">
<property name="taskExecutor" ref="asyncTaskExecutor"/>
<property name="step" ref="worker" />
<property name="gridSize" value="3" />
</bean>
Since i have specified the commit interval as 5 my understanding is that when 5 items are processed by a partition, it will call JDBCItemWriter with a List of 5 Users object to perform a batch JDBC update. However with the current setup, i'm receiving 1 User object at a time during batch update.
Is my understanding above correct or am i missing any step/configuration ?
Note: I'm using HSQL file based database for testing.
<bean id="dataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close">
<property name="driverClassName" value="org.hsqldb.jdbc.JDBCDriver"/>
<property name="url" value="jdbc:hsqldb:file:C://users.txt"/>
<property name="username" value="sa"/>
<property name="password" value=""/>
</bean>

Multiple input file Spring Batch

I'm trying to develop a batch which can process a directory containing files with Spring Batch.
I looked at the MultiResourcePartitioner and tryied somethind like :
<job parent="loggerParent" id="importContractESTD" xmlns="http://www.springframework.org/schema/batch">
<step id="multiImportContractESTD">
<batch:partition step="partitionImportContractESTD" partitioner="partitioner">
<batch:handler grid-size="5" task-executor="taskExecutor" />
</batch:partition>
</step>
</job>
<bean id="partitioner" class="org.springframework.batch.core.partition.support.MultiResourcePartitioner">
<property name="keyName" value="inputfile" />
<property name="resources" value="file:${import.contract.filePattern}" />
</bean>
<step id="partitionImportContractESTD" xmlns="http://www.springframework.org/schema/batch">
<batch:job ref="importOneContractESTD" job-parameters-extractor="defaultJobParametersExtractor" />
</step>
<bean id="defaultJobParametersExtractor" class="org.springframework.batch.core.step.job.DefaultJobParametersExtractor"
scope="step" />
<!-- Job importContractESTD definition -->
<job parent="loggerParent" id="importOneContractESTD" xmlns="http://www.springframework.org/schema/batch">
<step parent="baseStep" id="initStep" next="calculateMD5">
<tasklet ref="initTasklet" />
</step>
<step id="calculateMD5" next="importContract">
<tasklet ref="md5Tasklet">
<batch:listeners>
<batch:listener ref="md5Tasklet" />
</batch:listeners>
</tasklet>
</step>
<step id="importContract">
<tasklet>
<chunk reader="contractReader" processor="contractProcessor" writer="contractWriter" commit-interval="${commit.interval}" />
<batch:listeners>
<batch:listener ref="contractProcessor" />
</batch:listeners>
</tasklet>
</step>
</job>
<!-- Chunk definition : Contract ItemReader -->
<bean id="contractReader" class="com.sopra.banking.cirbe.acquisition.batch.AcquisitionFileReader" scope="step">
<property name="resource" value="#{stepExecutionContext[inputfile]}" />
<property name="lineMapper">
<bean id="contractLineMappe" class="org.springframework.batch.item.file.mapping.PatternMatchingCompositeLineMapper">
<property name="tokenizers">
<map>
<entry key="1*" value-ref="headerTokenizer" />
<entry key="2*" value-ref="contractTokenizer" />
</map>
</property>
<property name="fieldSetMappers">
<map>
<entry key="1*" value-ref="headerMapper" />
<entry key="2*" value-ref="contractMapper" />
</map>
</property>
</bean>
</property>
</bean>
<!-- MD5 Tasklet -->
<bean id="md5Tasklet" class="com.sopra.banking.cirbe.acquisition.batch.AcquisitionMD5Tasklet">
<property name="file" value="#{stepExecutionContext[inputfile]}" />
</bean>
But what I get is :
Caused by: org.springframework.expression.spel.SpelEvaluationException: EL1008E:(pos 0): Field or property 'stepExecutionContext' cannot be found on object of type 'org.springframework.beans.factory.config.BeanExpressionContext'
What I'm looking for is a way to launch my job importOneContractESTD for each files contained in file:${import.contract.filePattern}. And each files is shared between the step calculateMD5 (which puts me the processed file md5 into my jobContext) and the step importContract (which read the previous md5 from the jobContext to add it as data to each line processed by the contractProcessor)
If I only try to call importOneContractESTD with one file given as a parameter (eg replacing #{stepExecutionContext[inputfile]} for ${my.file}), it works... But I want to try to use spring batch to manage my directory rather than my calling shell script...
Thanks for your ideas !
Add scope="step" when you need to access stepExecutionContext
like here:
<bean id="md5Tasklet" class="com.sopra.banking.cirbe.acquisition.batch.AcquisitionMD5Tasklet" scope="step">
<property name="file" value="#{stepExecutionContext[inputfile]}" />
</bean>
More info here.

Regarding Spring Batch

I am very new to Spring Batch. I just tried to run a simple spring batch example which i got from net in vain.The example just tries to read from a flat file and write the contents into a separate flat file.
Below is my xml which is loaded into context (through a ContextLoaderListener).
test-batch.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://172.18.7.57/spring-beans.dtd">
<beans>
<bean id="simpleJob"
class="org.springframework.batch.core.job.SimpleJo b">
<property name="name" value="simpleJob" />
<property name="steps">
<list>
<ref local="step" />
</list>
</property>
<property name="jobRepository" ref="jobRepository" />
</bean>
<bean id="jobLauncher"
class="org.springframework.batch.core.launch.suppo rt.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository" />
</bean>
<bean id="jobRepository"
class="org.springframework.batch.core.repository.s upport.SimpleJobRepository">
<constructor-arg>
<bean
class="org.springframework.batch.core.repository.d ao.MapJobInstanceDao" />
</constructor-arg>
<constructor-arg>
<bean
class="org.springframework.batch.core.repository.d ao.MapJobExecutionDao" />
</constructor-arg>
<constructor-arg>
<bean
class="org.springframework.batch.core.repository.d ao.MapStepExecutionDao" />
</constructor-arg>
</bean>
<bean id="transactionManager"
class="org.springframework.batch.support.transacti on.ResourcelessTransactionManager" />
<bean id="step"
class="org.springframework.batch.core.step.item.Si mpleStepFactoryBean">
<property name="transactionManager" ref="transactionManager" />
<property name="jobRepository" ref="jobRepository" />
<property name="itemReader" ref="itemReader" />
<property name="itemWriter" ref="itemWriter" />
</bean>
<bean id="lineAggregator"
class="org.springframework.batch.item.file.transfo rm.DelimitedLineAggregator">
<property name="delimiter" value="|" />
</bean>
<bean id="itemWriter"
class="org.springframework.batch.item.file.FlatFil eItemWriter">
<property name="fieldSetCreator" ref="fieldSetMapper" />
<property name="lineAggregator" ref="lineAggregator" />
<property name="resource" value="file:/hello2.txt" />
</bean>
<bean id="lineTokenizer"
class="org.springframework.batch.item.file.transfo rm.DelimitedLineTokenizer" />
<bean id="fieldSetMapper"
class="org.springframework.batch.item.file.mapping .PassThroughFieldSetMapper" />
<bean id="itemReader"
class="org.springframework.batch.item.file.FlatFil eItemReader">
<property name="resource" value="file:/hello1.txt" />
<property name="lineTokenizer" ref="lineTokenizer" />
<property name="fieldSetMapper" ref="fieldSetMapper" />
</bean>
</beans>
As i dont have maven, i tried launching this job from my java code like ,
ApplicationContext context = new FileSystemXmlApplicationContext("D:\\jboss-4.0.5.GA\\server\\default\\deploy\\test.war\\WEB-INF\\xml\\test-batch.xml");
Job job;
job = (Job) context.getBean("simpleJob");
JobParameters jobParameters = jobParametersConverter.getJobParameters(StringUtils
.splitArrayElementsIntoProperties(new String[]{"D:\\jboss-4.0.5.GA\\server\\default\\deploy\\venki.war\\WEB-INF\\xml\\venki-batch.xml","simpleJob"}, "="));
JobExecution jobExecution = launcher.run(job, jobParameters);
--But, some error is thrown in eclipse saying no class loaders found....below is the error i got....
org.apache.commons.lang.SerializationException: java.lang.ClassNotFoundException: No ClassLoaders found for: org.springframework.batch.core.JobExecution
at org.apache.commons.lang.SerializationUtils.deseria lize(SerializationUtils.java:164)
at org.apache.commons.lang.SerializationUtils.deseria lize(SerializationUtils.java:191)
at org.springframework.batch.core.repository.dao.MapJ obExecutionDao.copy(MapJobExecutionDao.java:33)
at org.springframework.batch.core.repository.dao.MapJ obExecutionDao.saveJobExecution(MapJobExecutionDao .java:56)
at org.springframework.batch.core.repository.support. SimpleJobRepository.saveOrUpdate(SimpleJobReposito ry.java:216)
at org.springframework.batch.core.repository.support. SimpleJobRepository.createJobExecution(SimpleJobRe pository.java:192)
at org.springframework.batch.core.launch.support.Simp leJobLauncher.run(SimpleJobLauncher.java:79)
at com.venki.handler.TestHandler.testSpringBatch(Test Handler.java:111)
--Please help me in this regard...i need to find out a code that reads from a flat file and inserts into a DB....i need to integrate this into a stand alone module and i should not go with using maven....
Why dont you run it by using org.springframework.batch.core.launch.support.CommandLineJobRunner
you can define it as your main class in Run Configuration of eclise.
In Arguments give
<your batch-job-xml-file><space><your Job (in your case its simpleJob)>
You dont need maven here too. Just set classpath of required libraries.
The sample Spring Batch football project reads from a CSV file and write into a database using JDBC.
You can find the details here, and the download links are at the top of that page.
As far as Maven goes, you don't need it, but you'll have to manually include all the required libraries on your classpath. If you're running it through Eclipse, trigger the CommandLineJobRunner.

Spring Batch: Reading a File : if field is empty setting the default value

I am very new to spring batch. I have requirement in which i have to read a file having a header(Field Names) record and data records
i have to validate 1st record (check the field names matching against set of predefined names)- note that this record need to be skipped- i mean should not be part of items in processor)
read and store rest of the field values to a POJO
if the field 'date' is empty , i need to set the default value as 'xxxx-yy-zz'
i am unable to 1st and 3rd requirement with batch
here is the sample reader XML. please help
<bean id="reader" class="org.springframework.batch.item.file.FlatFileItemReader">
<property name="resource" value="classpath:input/import" />
<property name="encoding" value="UTF-8" />
<property name="linesToSkip" value="1" />
<property name="lineMapper" ref="line.mapper"/>
</bean>
<bean id="line.mapper" class="org.springframework.batch.item.file.mapping .DefaultLineMapper">
<property name="lineTokenizer" ref="line.tokenizer"/>
<property name="fieldSetMapper" ref="fieldSet.enity.mapper"/>
</bean>
<bean id="line.tokenizer" class="org.springframework.batch.item.file.transfo rm.DelimitedLineTokenizer">
<property name="delimiter">
<util:constant static-field="org.springframework.batch.item.file.transfo rm.DelimitedLineTokenizer.DELIMITER_TAB"/>
</property>
<property name="names" value="id,date,age " />
<property name="strict" value="false"/>
</bean>
<bean id="fieldSet.enity.mapper" class="org.springframework.batch.item.file.mapping .BeanWrapperFieldSetMapper">
<property name="targetType" value="a.b.myPOJO"/>
<property name="customEditors">
<map>
<entry key="java.util.Date">
<bean class="org.springframework.beans.propertyeditors.C ustomDateEditor">
<constructor-arg>
<bean class="java.text.SimpleDateFormat">
<constructor-arg value="yyyy-mm-dd" />
</bean>
</constructor-arg>
<constructor-arg value="true" />
</bean>
</entry>
</map>
</property>
Create your own custom FieldSetMapper like below
CustomeFieldSetMapper implements FieldSetMapper<a.b.myPOJO> {
#Override
public a.b.myPOJO mapFieldSet(FieldSet fs) {
a.b.myPOJO myPOJO = new a.b.myPOJO();
if(fs.readString("date").isEmpty()){
myPOJO.setDate("xxxx-yy-zz");
}
return a.b.myPOJO;
}
}
You think you should do date set in ItemProcessor.
Also, if <property name="linesToSkip" value="1" /> not fill your requirements - extend FlatFileItemReader and validate first line manually in it.

Resources