Spring Batch - how many records are read & processed - spring

Is it possible to tell how many records are read and/or processed once the job is executed completely? I've a job that reads data from the database and in the processor, I filter few records based on certain criteria and send them to the writer. I would like to know how many total records are read from the DB and how many are sent to the writer step.
Here is my batch config file.
<bean id="dbItemReader" class="....JdbcCursorItemReader">
<property name="datasource" ref="datasource"/>
<property name="sql" ref="select * from"/>
<property name="rowMapper">
<bean class="com.my.MyRowMapper"/>
</property>
</bean>
<bean id="itemProcessor" class="com.my.MyItemProcessor"/>
<bean id="itemWriter" class="com.my.MyItemWriter"/>
<batch:job id="myJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="jobTransactionManager">
<batch:chunk reader="dbItemReader" processor="itemProcessor" writer="itemWriter" commit-interval="100"/>
</batch:tasklet>

Spring Batch stores the number of items read, processed, skipped, written, etc in the job repository. Assuming you're using a database job repository, you can view them there in the BATCH_STEP_EXECUTION table.
You can read more about the information stored in the job repository in the documentation here: http://docs.spring.io/spring-batch/reference/html/metaDataSchema.html

import org.apache.log4j.Logger;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
/**
* Vaquar khan
*/
public class StepExecuListner implements StepExecutionListener {
static Logger log = Logger.getLogger("badRecordLogger");
#Override
public void beforeStep(StepExecution stepExecution) {
System.out.println("StepExecutionListener - beforeStep");
log.error("StepExecutionListener - beforeStep " );
}
#Override
public ExitStatus afterStep(StepExecution stepExecution) {
System.out.println("StepExecutionListener - afterStep");
log.error("------------------------------------------------------------------------------------");
log.error("StepExecutionListener - afterStep:getCommitCount=" + stepExecution.getCommitCount());
log.error("StepExecutionListener - afterStep:getFilterCount=" + stepExecution.getFilterCount());
log.error("StepExecutionListener - afterStep:getProcessSkipCount=" + stepExecution.getProcessSkipCount());
log.error("StepExecutionListener - afterStep:getReadCount=" + stepExecution.getReadCount());
log.error("StepExecutionListener - afterStep:getReadSkipCount=" + stepExecution.getReadSkipCount());
log.error("StepExecutionListener - afterStep:getRollbackCount=" + stepExecution.getRollbackCount());
log.error("StepExecutionListener - afterStep:getWriteCount=" + stepExecution.getWriteCount());
log.error("StepExecutionListener - afterStep:getWriteSkipCount=" + stepExecution.getWriteSkipCount());
log.error("StepExecutionListener - afterStep:getStepName=" + stepExecution.getStepName());
log.error("StepExecutionListener - afterStep:getSummary=" + stepExecution.getSummary());
log.error("StepExecutionListener - afterStep:getStartTime=" + stepExecution.getStartTime());
log.error("StepExecutionListener - afterStep:getStartTime=" + stepExecution.getEndTime());
log.error("StepExecutionListener - afterStep:getLastUpdated=" + stepExecution.getLastUpdated());
log.error("StepExecutionListener - afterStep:getExitStatus=" + stepExecution.getExitStatus());
log.error("StepExecutionListener - afterStep:getFailureExceptions=" + stepExecution.getFailureExceptions());
log.error("------------------------------------------------------------------------------------");
return null;
}
}
inside of batch xml
<bean id="stepListener" class="com.test.listener.StepListner" />
<batch:job id="TestDataLoader">
<batch:split id="split1" task-executor="taskExecutor">
<batch:flow>
<batch:step id="step1">
<batch:tasklet task-executor="taskExecutor" throttle-limit="5">
<batch:chunk reader="itemReader" writer="itemWriter" commit-interval="${commitInterval}" skip-limit="3">
<batch:skippable-exception-classes>
<batch:include class="java.lang.NumberFormatException" />
</batch:skippable-exception-classes>
<batch:listeners>
<batch:listener ref="skipListener" />
<batch:listener ref="stepListener" />
</batch:listeners>
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:flow>
<batch:flow>
<batch:step id="step2">
<batch:tasklet task-executor="taskExecutor" throttle-limit="15">
<batch:chunk reader="itemReaderToDelete"
writer="itemWriterToDelete" commit-interval="${commitInterval}" skip-limit="3">
<batch:skippable-exception-classes>
<batch:include class="org.springframework.dao.DataAccessException" />
<batch:include class="java.lang.NumberFormatException" />
</batch:skippable-exception-classes>
<batch:listeners>
<batch:listener ref="skipListener" />
<batch:listener ref="stepListener" />
</batch:listeners>
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:flow>
</batch:split>
</batch:job>

the simplest approach is - you can use a listener over your step and can get all the counts.
<batch:job id="myJob">
<batch:step id="step1">
<batch:tasklet transaction-manager="jobTransactionManager">
<batch:chunk reader="dbItemReader"
processor="itemProcessor"
writer="itemWriter"
commit-interval="100">
<bean id="customStepListner" class="com.company.listner.StepListner" />
</batch:chunk>
</batch:tasklet>
public class StepListner implements StepExecutionListener {
#Override
public ExitStatus afterStep(StepExecution arg0) {
int readCount=arg0.getReadCount();
int writeCount=arg0.getWriteCount();
int skipCount=arg0.getSkipCount();
int commitCount=arg0.getCommitCount();
arg0.getStartTime();
arg0.getEndTime();
}
#Override
public void beforeStep(StepExecution arg0) {
}
}

Related

Get jobExecutionContext in xml config spring batch from before step

I am defining my MultiResourceItemReader on this way:
<bean id="multiDataItemReader" class="org.springframework.batch.item.file.MultiResourceItemReader" scope="step">
<property name="resources" value="#{jobExecutionContext['filesResource']}"/>
<property name="delegate" ref="dataItemReader"/>
</bean>
How you can see I want read from the jobExecutionContext the "filesResource" value.
Note: I changed some names to keep the "code privacy". This is executing, Is somebody wants more info please tell me.
I am saving this value in my first step and I am using the reader in the second step, Should I have access to it?
I am saving it in the final lines from my step1 tasklet:
ExecutionContext jobContext = context.getStepContext().getStepExecution().getJobExecution().getExecutionContext();
jobContext.put("filesResource", resourceString);
<batch:job id="myJob">
<batch:step id="step1" next="step2">
<batch:tasklet ref="moveFilesFromTasklet" />
</batch:step>
<batch:step id="step2">
<tasklet>
<chunk commit-interval="500"
reader="multiDataItemReader"
processor="dataItemProcessor"
writer="dataItemWriter" />
</tasklet>
</batch:step>
</batch:job>
I am not really sure what I am forgetting to get the value. The error that I am getting is:
20190714 19:49:08.120 WARN org.springframework.batch.item.file.MultiResourceItemReader [[ # ]] - No resources to read. Set strict=true if this should be an error condition.
I see nothing wrong with your config. The value of resourceString should be an array of org.springframework.core.io.Resource as this is the parameter type of the resources attribute of MultiResourceItemReader.
You can pass an array or a list of String with the absolute path to each resource and it should work. Here is a quick example:
class MyTasklet implements Tasklet {
#Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) {
List<String> resources = Arrays.asList(
"/full/path/to/resource1",
"/full/path/to/resource2");
chunkContext.getStepContext().getStepExecution().getJobExecution().getExecutionContext()
.put("filesResource", resources);
return RepeatStatus.FINISHED;
}
}

Spring Batch: Footer callback of multiple file writer not giving exact count

If item writers are writing 2 records to file A and 1 record in file B then Trailer count of both the files(A & B) are 3.
I have a reader, processor and ClassifierCompositeItemWriter. In classifier i have two item writers those are giving valid outputs but the footer callback is not proper. in both the file trailer count is same though the record counts are different.
<batch:job id="abc-job" parent="xyzJob">
<batch:step id="inputfile">
<batch:tasklet>
<batch:chunk reader="itemReader" processor="itemProcessor" writer="itemWriter" commit-interval="1000" >
<batch:streams>
<batch:stream ref="AFileWriter"/>
<batch:stream ref="BFileWriter"/>
</batch:streams>
</batch:chunk>
</batch:tasklet>
</batch:step>
</batch:job>
<beans:bean id="itemWriter" class="org.springframework.batch.item.support.ClassifierCompositeItemWriter">
<beans:property name="classifier" ref="classifier" />
</beans:bean>
<beans:bean id="classifier" class="org.springframework.batch.classify.BackToBackPatternClassifier">
<beans:property name="routerDelegate">
<beans:bean class="com.abc.classifier.MyClassifier" />
</beans:property>
<beans:property name="matcherMap">
<beans:map>
<beans:entry key="A" value-ref="AFileWriter" />
<beans:entry key="B" value-ref="BFileWriter" />
</beans:map>
</beans:property>
</beans:bean>
<beans:bean id="1FileWriter" parent="parentItemWriter1">
<beans:property name="name" value="AFileWriter"/>
<beans:property name="resource" ref="AFile"/>
</beans:bean>
<beans:bean id="2FileWriter" parent="parentItemWriter2">
<beans:property name="name" value="BFileWriter"/>
<beans:property name="resource" ref="BFile"/>
</beans:bean>
Footer callback-
public class ItemCountFooterCallback implements FlatFileFooterCallback
{
private AtomicInteger count;
public ItemCountFooterCallback(final AtomicInteger count)
{
this.count = count;
}
public void writeFooter(final Writer writer) throws IOException
{
writer.append("Trailer " + this.count.toString());
}
}
I expect the output of A and B file's trailer record to be exact number rows of that particular file.
footerCallback is registered at the step level, hence it will use the write.count of the step, which is the total count of written items (3 in your case).
What you can do is to have a write count for each writer (writer1.count and writer2.count for example) and set a footer callback on each writer (not at the step level). Each footer callback should write the item count of the writer it is attached to.

How to skip reader, writer in spring batch

I have a requirement where I need to upload some files to a server. I am using spring batch to accomplish the same. Here the "initializeFile" will basically interact with the server to check if the files already exists in server. if not then it should call the step "uploadIndexFileStep" to upload the files. If files already present in server then the step "uploadIndexFileStep" SHOULDN'T be called.
How to implement this case wherein if the "initializeFile" has no files to upload then spring should not call the next step "uploadIndexFileStep".
Is there a way, or do I need to follow some design or its a spring config change? Any pointers would be helpful.
following is the batch configuration.
<batch:step id="initFileStep" next="uploadIndexFileStep">
<batch:tasklet ref="initializeFile"></batch:tasklet>
</batch:step>
<batch:step id="uploadIndexFileStep">
<batch:tasklet>
<batch:chunk reader="indexFileReader" processor="indexFileProcessor" writer="indexFileWriter" commit-interval="${app.chunk.commit.interval}"/>
</batch:tasklet>
</batch:step>
<batch:listeners>
<batch:listener ref="uploadIndexJobListener"/>
</batch:listeners>
</batch:job>
Spring batch provides a nice way to handle conditional flow. You can implement this by using ON exist status.
You can have something like below
#Bean
public Job job() {
return jobBuilderFactory().get("job").
flow(initializeFile()).on("FILELOADED").to(anyStep()).
from(initializeFile()).on("FILENOTLOADED").to(uploadIndexFileStep()).next(step3()).next(step4()).end().build();
}
5.3.2 Conditional Flow
I resolved this using JobExecutionDecider. I am maintaining the queue size in ExecutionContext and then reading this execution context in decider to manage the flow.
public class UploadIndexFlowDecider implements JobExecutionDecider {
#Override
public FlowExecutionStatus decide(JobExecution jobExecution, StepExecution stepExecution) {
int queueSize = jobExecution.getExecutionContext().getInt("INDEX_UPLOAD_QUEUE_SIZE");
if(queueSize > 0)
return FlowExecutionStatus.COMPLETED;
else
return FlowExecutionStatus.STOPPED;
}
}
#Component
public class InitializeFileStep implements Tasklet {
#Override
public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception {
chunkContext.getStepContext().getStepExecution().getJobExecution().getExecutionContext().putInt("INDEX_UPLOAD_QUEUE_SIZE", 1);
return RepeatStatus.FINISHED;
}
<batch:job id="uploadIndexFileJob">
<batch:step id="initFileStep" next="uploadDecision">
<batch:tasklet ref="initializeFile"></batch:tasklet>
</batch:step>
<batch:decision id="uploadDecision" decider="uploadIndexDecision">
<batch:next on="COMPLETED" to="uploadIndexFileStep"/>
<batch:end on="STOPPED"/>
</batch:decision>
<batch:step id="uploadIndexFileStep">
<batch:tasklet>
<batch:chunk reader="indexFileReader" processor="indexFileProcessor" writer="indexFileWriter" commit-interval="${app.chunk.commit.interval}"/>
</batch:tasklet>
</batch:step>
<batch:listeners>
<batch:listener ref="uploadIndexJobListener"/>
</batch:listeners>
</batch:job>

I want my Processor to get access to JobExecutionId

Spring 4.3 with Spring Batch 3.0.8.
I want to have a reference to the job execution id in the processor, so I can put it inside the output object and write it out along with the data to db. Here is my setup below.
I have added the blueReportJobExecutionListener, which gives me the JobExecution ID that I need.... but how do I send that over to my blueReportItemProcessor ?! That's the object that needs that value.
<bean id="blueReportJobExecutionListener" class="com.cloud.cost.listener.BlueReportJobExecutionListener" scope="prototype" />
<bean id="blueReportJobListener" class="com.cloud.cost.listener.BlueReportJobListener" scope="prototype" />
<bean id="blueReportStepListener" class="com.cloud.cost.listener.BlueReportStepListener" scope="prototype" />
<batch:job id="blueReportJob">
<batch:step id="blueReportStep">
<batch:tasklet>
<batch:chunk reader="blueReportCSVFileItemReader" processor="blueReportItemProcessor" writer="mysqlItemWriter"
commit-interval="2">
</batch:chunk>
</batch:tasklet>
<batch:listeners>
<batch:listener ref="blueReportStepListener"/>
</batch:listeners>
</batch:step>
<batch:listeners>
<batch:listener ref="blueReportJobListener"/>
<batch:listener ref="**blueReportJobExecutionListener**"/>
</batch:listeners>
</batch:job>
You can the get the value from Job Execution by simply using #Value annotation.
#Value("#{jobExecutionContext['JOB_ID']}")
Where JOB_ID is the key you have used in the listener to add the job id.
Make sure your processor scope is defined as step otherwise this value will not be autowired.

Spring JdbcCursorItemReader in spring batch application

My use case is as follows:
There is an Employee table and columns are as follows:
employee_id;
empoyee_dob;
employee_lastName;
employee_firstName;
employee_zipCode
Now there is an use-case to build a list of Employees present in Dept 'A' and zipcode 11223 and also employees present in Dept B and zipcode 33445.
I have configured a spring job as follows:
<batch:job id="EmployeeDetailsJob" job-repository="EmpDaoRepository">
<batch:step id="loadEmployeeDetails" >
<batch:tasklet transaction-manager="EmployeeDAOTranManager">
<batch:chunk reader="EmpDaoJdbcCursorItemReader" writer="EmpDaoWriter" commit-interval="200" skip-limit="100">
<batch:skippable-exception-classes>
</batch:skippable-exception-classes>
</batch:chunk>
<batch:listeners>
<batch:listener ref="EmpDaoStepListener" />
</batch:listeners>
<batch:transaction-attributes isolation="DEFAULT" propagation="REQUIRED" timeout="300" />
</batch:tasklet>
</batch:step>
</batch:job>
The configuration of reader is as follows:
<bean id="EmpDaoJdbcCursorItemReader" class="EmpDaoJdbcCursorItemReader">
<property name="dataSource" ref="EmpDataSource" />
<property name="sql">
<value><![CDATA[select * from Employee where employee_id=? and employee_zipCode=? ]]>
</value>
</property>
<property name="fetchSize" value="100"></property>
<property name="rowMapper" ref="EmployeeMapper" />
</bean>
There is class EmployeeQueryCriteria which has two fields employee_id and employee_zipCode.
In on of the steps i will create an ArrayList of EmployeeQueryCriteria objects for which the data has to be fetched.
So my question is:
1.Is there a way i can pass this List to the EmpDaoJdbcCursorItemReader and it will iterate through the object and set the parameter values from the EmployeeQueryCriteria object
2.Can i loop through the step to read data for every item in the ArrayList created containing EmployeeQueryCriteria and fetch the data.
The class EmpDaoJdbcCursorIte‌​mReader:
public class EmpDaoJdbcCursorIte‌​mReader extends JdbcCursorItemReader{
#BeforeStep
public void beforeStep(StepExecution stepExecution)
{
StringBuffer sqlQuerySB= new StringBuffer(super.getSql());
sqlQuerySB.append((").append(/*I am adding a comma seperated list of employee ids*/).append(")");
super.setSql(sqlQuerySB.toString());
}
}
My Spring configurations are as follows:
Spring-batch-core 2.2.2
Spring-beans 3.2.3
Spring-context 3.2.3
Can someone please provide suggestions on how to solve this problem.
you can iterate through the steps by following code model
<decision id="testLoop" decider="iterationDecider">
<next on="CONTINUABLE" to="pqrStep" />
<end on="FINISHED" />
</decision>
<step id="pqrStep" next="xyzStep">
<tasklet ref="someTasklet" />
</step>
<step id="xyzStep" next="testLoop">
<tasklet ref="someOtherTasklet" />
</step>
and Configuration is
<bean id="iterationDecider" class="com.xyz.StepFlowController" />
Following class will handle the flow based on the condition
public class StepFlowController implements JobExecutionDecider{
#Override
public FlowExecutionStatus decide(JobExecution jobExecution, StepExecution stepExecution) {
FlowExecutionStatus status = null;
try {
if (conditionIsTrue) {
status = new FlowExecutionStatus("CONTINUABLE");
}else {
status = new FlowExecutionStatus("FINISHED");
}
} catch (Exception e) {
e.printStackTrace();
}
return status;
}

Resources