mapreduce ---custom data type - hadoop

When i do a mapreduce program,i encounter that the key is a tuple (A,B) (A and B are both integer sets).How can i custom this data type?
public static class MapClass extends Mapper<Object,Text,Tuple,Tuple>....
public class Tuple implements WritableComparable<Tuple>{
#Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
}
#Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
}
#Override
public int compareTo(Tuple o) {
// TODO Auto-generated method stub
return 0;
}
}

You're almost there, just add variables for A and B, and then complete the serialization methods and compareTo:
public class Tuple implements WritableComparable<Tuple>{
public Set<Integer> a = new TreeSet<Integer>;
public Set<Integer> b = new TreeSet<Integer>;
#Override
public void readFields(DataInput arg0) throws IOException {
a.clear();
b.clear();
int count = arg0.readInt();
while (count-- > 0) {
a.add(arg0.readInt());
}
count = arg0.readInt();
while (count-- > 0) {
b.add(arg0.readInt());
}
}
#Override
public void write(DataOutput arg0) throws IOException {
arg0.writeInt(a.size());
for (int v : a) {
arg0.writeInt(v);
}
arg0.writeInt(b.size());
for (int v : b) {
arg0.writeInt(v);
}
}
#Override
public int compareTo(Tuple o) {
// you'll need to implement how you want to compare the two sets between objects
}
}

to implement a custom datatype in hadoop, you must implement WritableComparable interface and provide the custom implementation for readFields() write() method.
Apart from the implementation of readFiled and write methods must override the equals and hashcode method of java object.
In case of custom data type implementation for the keys must implement comparable interface.

Related

Handling Null values in postgres for text array type column in org.hibernate.PropertyAccessException

we got a strange scenario in our Hibernate based web application.
On the Database postgresql,there is a column field of text[] type and some of the values are Null. In my entity class I have mapped it to String[] and When I run a CriteriaBuilder select query I am getting exception saying
Request processing failed; nested exception is javax.persistence.PersistenceException: org.hibernate.PropertyAccessException: could not set a field value by reflection setter
I am using #Type(type = "GenericArrayUserType") annotation on String[] in entity class.
Please use the below custom GenericStringArrayUserType mapping for your String[] attribute.
#Type(type = "ai.test.GenericStringArrayUserType")
private String[] fields;
import org.hibernate.HibernateException;
import org.hibernate.engine.spi.SessionImplementor;
import org.hibernate.usertype.UserType;
public class GenericStringArrayUserType<T extends Serializable> implements UserType {
protected static final int[] SQL_TYPES = {Types.ARRAY};
private Class<T> typeParameterClass;
#Override
public int[] sqlTypes() {
return new int[]{Types.ARRAY};
}
#Override
public Class<T> returnedClass() {
return typeParameterClass;
}
#Override
public boolean equals(Object x, Object y) throws HibernateException {
if (x == null) {
return y == null;
}
return x.equals(y);
}
#Override
public int hashCode(Object x) throws HibernateException {
return x.hashCode();
}
#Override
public Object nullSafeGet(ResultSet resultSet, String[] names, SessionImplementor session, Object owner)
throws HibernateException, SQLException {
/*
if (resultSet.wasNull()) {
return null;
}
*/
if (resultSet.getArray(names[0]) == null) {
return new String[0];
}
Array array = resultSet.getArray(names[0]);
#SuppressWarnings("unchecked")
T javaArray = (T) array.getArray();
return javaArray;
}
#Override
public void nullSafeSet(PreparedStatement statement, Object value, int index, SessionImplementor session)
throws HibernateException, SQLException {
Connection connection = statement.getConnection();
if (value == null) {
statement.setNull(index, SQL_TYPES[0]);
} else {
#SuppressWarnings("unchecked")
T castObject = (T) value;
Array array = connection.createArrayOf("text", (Object[]) castObject);
statement.setArray(index, array);
}
}
#Override
public Object deepCopy(Object value) throws HibernateException {
return value;
}
#Override
public boolean isMutable() {
return true;
}
#SuppressWarnings("unchecked")
#Override
public Serializable disassemble(Object value) throws HibernateException {
return (T) this.deepCopy(value);
}
#Override
public Object assemble(Serializable cached, Object owner) throws HibernateException {
return this.deepCopy(cached);
}
#Override
public Object replace(Object original, Object target, Object owner) throws HibernateException {
return original;
}
}

Spring Batch MultiResourceItemReader re-reading first resource

I am writing a Spring Batch application using Spring Boot 1.5, following are my classes : -
CustomMultiResourceItemReader.java
#StepScoped
#Component
public class CustomMultiResourceItemReader
extends MultiResourceItemReader<MyDTO> {
public MultiResourceXmlItemReader(
#NonNull final MyResourceAwareItemReader itemReader,
#NonNull final ApplicationContext ctx)
throws IOException {
setResources(
ctx.getResources(
String.format(
"file:%s/*.xml", "~/data"))); // gives me a Resource[] array fine
setDelegate(itemReader);
}
#PreDestroy
void destroy() {
close();
}
}
MyResourceAwareItemReader.java
#RequiredArgsConstructor
#StepScope
#Component
#Slf4j
public class MyResourceAwareItemReader
implements ResourceAwareItemReaderItemStream<MyDTO> {
private static final String RESOURCE_NAME_KEY = "RESOURCE_NAME_KEY";
#NonNull private final Unmarshaller unmarshaller; // JaxB Unmarshaller
private Resource resource;
#Override
public void setResource(Resource resource) {
this.resource = resource; // **gets called only once**
}
#Override
public MyDTO read() throws Exception {
final MyDTO dto = (MyDTO) unmarshaller.unmarshal(resource.getFile()); // Standard JaxB unmarshalling.
return dto;
}
#Override
public void open(ExecutionContext executionContext) throws ItemStreamException {
if (executionContext.containsKey(RESOURCE_NAME_KEY)) {
} else if (resource != null) {
executionContext.put(RESOURCE_NAME_KEY, resource.getFilename());
}
}
#Override
public void update(ExecutionContext executionContext) throws ItemStreamException {
if (resource != null) executionContext.put(RESOURCE_NAME_KEY, resource.getFilename());
}
#Override
public void close() throws ItemStreamException {}
}
The problem is the setResource method in the delegate reader (MyResourceAwareItemReader.java) gets called only once at the beginning; while the read method gets called multiple times, as a result I read the same item multiple times, instead of reading the next item as expected.
I have also browsed the source code of MultiResouceItemReader in Spring Batch, it seems like, the read method of the delegate class is supposed to return null after each item is read, I can clearly see my code doesnt seem to do that.
I am bit lost how to make this work. Any help is much appreciated
Looking further into it, ItemReader documentation, clearly details that reader must return null at the end of the input data set. So basically I implemented my ItemReader with a boolean flag as follows: -
#RequiredArgsConstructor
#StepScope
#Component
#Slf4j
public class MyResourceAwareItemReader
implements ResourceAwareItemReaderItemStream<MyDTO> {
private static final String RESOURCE_NAME_KEY = "RESOURCE_NAME_KEY";
#NonNull private final Unmarshaller unmarshaller; // JaxB Unmarshaller
private Resource resource;
private boolean isResourceRead;
#Override
public void setResource(Resource resource) {
this.resource = resource;
isResourceRead = false;
}
#Override
public MyDTO read() throws Exception {
if(isResourceRead == true) return null;
final MyDTO dto = (MyDTO) unmarshaller.unmarshal(resource.getFile());
isResourceRead = true;
return dto;
}
#Override
public void open(ExecutionContext executionContext) throws ItemStreamException {
if (executionContext.containsKey(RESOURCE_NAME_KEY)) {
} else if (resource != null) {
executionContext.put(RESOURCE_NAME_KEY, resource.getFilename());
}
}
#Override
public void update(ExecutionContext executionContext) throws ItemStreamException {
if (resource != null) executionContext.put(RESOURCE_NAME_KEY, resource.getFilename());
}
#Override
public void close() throws ItemStreamException {}
}
MultiResourceItemReader does not returns null each time. If there are no more resources to read the it returns NULL otherwise it returns the next resources to the delegate that means – Your actual reader
I can see problem in your read() method . you are not moving to next file. As you are implementing you own MultiResourceItemReader It’s your responsibility to move to next resources item.
This is how it is implanted in MultiResourceItemReader . You will need your own similar implementation.
private T readNextItem() throws Exception {
T item = delegate.read();
while (item == null) {
currentResource++;
if (currentResource >= resources.length) {
return null;
}
delegate.close();
delegate.setResource(resources[currentResource]);
delegate.open(new ExecutionContext());
item = delegate.read();
}
return item;
}
You need to maintain index of resources array . Please check implementation of MultiResourceItemReader. You need to do exactly similar way

JMeter Plugin - How to Listen to TestState

I am working on developing a JMeter plugin. I'm trying to create an AbstractVisualizer that is capable of monitoring the current test state. However, implementing the TestStateListener doesn't seem to be working.
I'm testing this by creating a basic listener that has a login to output arbitrary info to JMeter's logging console. When a sample is sent through the Add function, a line is sent to the console. But nothing is ever triggered on the various TestState functions. Is there something more structural I'm missing?
public class TestListener extends AbstractVisualizer
implements TestStateListener
{
private static final Logger log = LoggingManager.getLoggerForClass();
#Override
public void add(SampleResult arg0) {
log.info("add");
}
#Override
public void clearData() {
// TODO Auto-generated method stub
}
#Override
public String getStaticLabel()
{
return "Test Listener";
}
#Override
public String getLabelResource() {
return null;
}
#Override
public void testEnded() {
log.info("Test Ended");
}
#Override
public void testEnded(String arg0) {
log.info("Test Ended");
}
#Override
public void testStarted() {
log.info("Test started");
}
#Override
public void testStarted(String arg0) {
log.info("Test started");
}
}
I'm not sure how to do it in 1 class. I have 2 classes:
The UI:
public class MonitorGui extends AbstractListenerGui
{
// ...
#Override
public TestElement createTestElement()
{
TestElement element = new Monitor();// <-- this is the backend
modifyTestElement(element);
return element;
}
// ...
}
And then the backend goes like this:
public class Monitor extends AbstractListenerElement
implements SampleListener,
Clearable, Serializable,
TestStateListener, Remoteable,
NoThreadClone
{
private static final String TEST_IS_LOCAL = "*local*";
// ...
#Override
public void testStarted()
{
testStarted(TEST_IS_LOCAL);
}
#Override
public void testEnded()
{
testEnded(TEST_IS_LOCAL);
}
#Override
public void testStarted(String host)
{
// ...
}
// ...
}
You may not need to implement SampleListener like I do, but probably other things are quite similar.
I based that implementation on a built-in pair of ResultSaverGui and ResultCollector which are the components that are saving results into the file(s) for Simple Data Writer, Summary Report and so on.

Spring Batch: pass data between reader and writer

I would like to get data in the Writer that I've set in the Reader of my step. I know about ExecutionContexts (step and job) and about ExecutionContextPromotionListener via http://docs.spring.io/spring-batch/trunk/reference/html/patterns.html#passingDataToFutureSteps
The problem is that in Writer I'm retrieving a null value of 'npag'.
Line on ItemWriter:
LOG.info("INSIDE WRITE, NPAG: " + nPag);
I've being doing some workarounds without luck, looking answer for other similar questions... Any help? thanks!
Here's my code:
READER
#Component
public class LCItemReader implements ItemReader<String> {
private StepExecution stepExecution;
private int nPag = 1;
#Override
public String read() throws CustomItemReaderException {
ExecutionContext stepContext = this.stepExecution.getExecutionContext();
stepContext.put("npag", nPag);
nPag++;
return "content";
}
#BeforeStep
public void saveStepExecution(StepExecution stepExecution) {
this.stepExecution = stepExecution;
}
}
WRITER
#Component
#StepScope
public class LCItemWriter implements ItemWriter<String> {
private String nPag;
#Override
public void write(List<? extends String> continguts) throws Exception {
try {
LOG.info("INSIDE WRITE, NPAG: " + nPag);
} catch (Throwable ex) {
LOG.error("Error: " + ex.getMessage());
}
}
#BeforeStep
public void retrieveInterstepData(StepExecution stepExecution) {
JobExecution jobExecution = stepExecution.getJobExecution();
ExecutionContext jobContext = jobExecution.getExecutionContext();
this.nPag = jobContext.get("npag").toString();
}
}
JOB/STEP BATCH CONFIG
#Bean
public Job lCJob() {
return jobs.get("lCJob")
.listener(jobListener)
.start(lCStep())
.build();
}
#Bean
public Step lCStep() {
return steps.get("lCStep")
.<String, String>chunk(1)
.reader(lCItemReader)
.processor(lCProcessor)
.writer(lCItemWriter)
.listener(promotionListener())
.build();
}
LISTENER
#Bean
public ExecutionContextPromotionListener promotionListener() {
ExecutionContextPromotionListener executionContextPromotionListener = new ExecutionContextPromotionListener();
executionContextPromotionListener.setKeys(new String[]{"npag"});
return executionContextPromotionListener;
}
The ExecutionContextPromotionListener specifically states that it works at the end of a step so that would be after the writer executes. So the promotion I think you are counting on does not occur when you think it does.
If i were you I would set it in the step context and get it from the step if you need the value with in a single step. Otherwise I would set it to the job context.
The other aspect is the #BeforeStep. That marks a method for executing before the step context exists. The way you are setting the nPag value in the reader would be after the step had started executing.
You are trying to read the value for nPag even before it is set in the reader, ending up with a default value which is null. You need to read the value on nPag at the time of logging from the execution context directly. You can keep a reference to the jobContext. Try this
#Component
#StepScope
public class LCItemWriter implements ItemWriter<String> {
private String nPag;
private ExecutionContext jobContext;
#Override
public void write(List<? extends String> continguts) throws Exception {
try {
this.nPag = jobContext.get("npag").toString();
LOG.info("INSIDE WRITE, NPAG: " + nPag);
} catch (Throwable ex) {
LOG.error("Error: " + ex.getMessage());
}
}
#BeforeStep
public void retrieveInterstepData(StepExecution stepExecution) {
JobExecution jobExecution = stepExecution.getJobExecution();
jobContext = jobExecution.getExecutionContext();
}
}
In your Reader and Writer you need to implement ItemStream interface and use ExecutionContext as member variable.Here i have given example with Processor instead of Writer but same is applicable for Writer as well .Its working fine for me and i am able to take values from reader to processor.
I have set the value in context in reader and getting the value in processor.
public class EmployeeItemReader implements ItemReader<Employee>, ItemStream {
ExecutionContext context;
#Override
public Employee read() throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {
context.put("ajay", "i am going well");
Employee emp=new Employee();
emp.setEmpId(1);
emp.setFirstName("ajay");
emp.setLastName("goswami");
return emp;
}
#Override
public void close() throws ItemStreamException {
// TODO Auto-generated method stub
}
#Override
public void open(ExecutionContext arg0) throws ItemStreamException {
context = arg0;
}
#Override
public void update(ExecutionContext arg0) throws ItemStreamException {
// TODO Auto-generated method stub
context = arg0;
}
}
My processor
public class CustomItemProcessor implements ItemProcessor<Employee,ActiveEmployee>,ItemStream{
ExecutionContext context;
#Override
public ActiveEmployee process(Employee emp) throws Exception {
//See this line
System.out.println(context.get("ajay"));
ActiveEmployee actEmp=new ActiveEmployee();
actEmp.setEmpId(emp.getEmpId());
actEmp.setFirstName(emp.getFirstName());
actEmp.setLastName(emp.getLastName());
actEmp.setAdditionalInfo("Employee is processed");
return actEmp;
}
#Override
public void close() throws ItemStreamException {
// TODO Auto-generated method stub
}
#Override
public void open(ExecutionContext arg0) throws ItemStreamException {
// TODO Auto-generated method stub
}
#Override
public void update(ExecutionContext arg0) throws ItemStreamException {
context = arg0;
}
}
Hope this helps.

Spring Batch how to regroup/aggregate user datas into a single object

I am trying to transform user operations (like purshases) into a user summary class (expenses by user). A user can have multiple operations but only one summary. I cannot sum purshases in the reader because I need a processor to reject some operation depending to another service.
So some code :
class UserOperation {
String userId;
Integer price;
}
class UserSummary {
String userId;
Long sum;
}
#Bean
public Step retrieveOobClientStep1(StepBuilderFactory stepBuilderFactory, ItemReader<UserOperation> userInformationJdbcCursorItemReader, ItemProcessor<UserOperation, UserSummary> userInformationsProcessor, ItemWriter<UserSummary> flatFileWriter) {
return stepBuilderFactory.get("Step1").<UserOperation, UserSummary>chunk(100) // chunck result that need to be aggregated... not good
.reader(userInformationJdbcCursorItemReader) // read all user operations from DB
.processor(userInformationsProcessor) // I need to reject or not some operations - but here 1 operation = 1 summary that is not good
.writer(flatFileWriter) // write result into flat file
.build();
}
I thing that ItemReader/ItemProcessor/ItemWriter is for single item processing.
But how to regroup multiples records into a single object using Spring Batch ? only Tasklet ?
Possibility but cause problems with small commit interval :
public class UserSummaryAggregatorItemStreamWriter implements ItemStreamWriter<UserSummary>, InitializingBean {
private ItemStreamWriter<UserSummary> delegate;
#Override
public void afterPropertiesSet() throws Exception {
Assert.notNull(delegate, "'delegate' may not be null.");
}
public void setDelegate(ItemStreamWriter<UserSummary> delegate) {
this.delegate = delegate;
}
#Override
public void write(List<? extends UserSummary> items) throws Exception {
Map<String, UserSummary> userSummaryMap = new HashMap<String, UserSummary>();
// Aggregate
for (UserSummary item : items) {
UserSummary savedUserSummary = userSummaryMap.get(item.getUserId());
if (savedUserSummary != null) {
savedUserSummary.incrementSum(item.getSum()); // sum
} else {
savedUserSummary = item;
}
userSummaryMap.put(item.getSubscriptionCode(), savedUserSummary);
}
Collection<UserSummary> values = userSummaryMap.values();
if(values != null) {
delegate.write(new ArrayList<UserSummary>(values));
}
}
#Override
public void open(ExecutionContext executionContext) throws ItemStreamException {
delegate.open(executionContext);
}
#Override
public void update(ExecutionContext executionContext) throws ItemStreamException {
delegate.update(executionContext);
}
#Override
public void close() throws ItemStreamException {
delegate.close();
}
}

Resources