Hadoop Secondary sort - to use or not use - hadoop

I have accidents input data from Traffic Data Analysis . Some of the columns are :
Accident Id, Accident Date, Day of week
1, 1/1/1979, 5 (Thursday)
2, 1/2/1979, 6 (Friday)
.......
3, 1/1/1980, 0 (Sunday)
I am trying to solve following :
Find number of accidents per year per day
so output should look like :
where Key is (Year, Day of week)
and Value= Number of accidents on that day
Here line 1 represents , year =1979 Day = Sunday and number of accidents =500 and so on.
1979,1 500
1979,2 1500
1979,3 2500
1979,4 3500
1979,5 4500
1979,6 5500
1979,7 6500
1980,1 500
1980,2 1500
1980,3 2500
1980,4 3500
1980,5 4500
In this scenario , I am trying to solve it using secondary sort method . Is that correct way to solve this problem ?
If secondary sort is correct way , its not working for me . Here is the key class, mapper and reducer. But my output doesn't come as expected . Please help ..
public class DOW implements WritableComparable<DOW> {
private Text year;
private Text day;
// private final Text count;
// private int count;
public DOW() {
this.year = new Text();
this.day = new Text();
// this.count = count;
}
public DOW(Text year, Text day) {
this.year = year;
this.day = day;
// this.count = count;
}
public Text getYear() {
return this.year;
}
public void setYear(Text year) {
this.year = year;
}
public Text getDay() {
return this.day;
}
public void setDay(Text day) {
this.day = day;
}
#Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
year.readFields(in);
day.readFields(in);
}
#Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
year.write(out);
day.write(out);
}
#Override
public int compareTo(DOW o) {
// TODO Auto-generated method stub
int cmp = year.compareTo(o.year);
if (cmp != 0) {
return cmp;
}
return o.day.compareTo(this.day);
}
#Override
public String toString() {
// TODO Auto-generated method stub
return year + "," + day;
}
#Override
public boolean equals(Object o) {
// TODO Auto-generated method stub
if (o instanceof DOW) {
DOW tp = (DOW) o;
return year.equals(tp.year) && day.equals(tp.day);
}
return false;
}
#Override
public int hashCode() {
// TODO Auto-generated method stub
return year.hashCode() * 163 + day.hashCode();
}
}
public class AccidentDowDemo extends Configured implements Tool {
public static class DOWMapper extends Mapper<LongWritable, Text, DOW, IntWritable> {
private static final Logger sLogger = Logger.getLogger(DOWMapper.class);
#Override
protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
if (value.toString().contains(",")) {
String[] array = value.toString().split(",");
if (!array[9].equals("Date")) {
Date dt = null;
try {
dt = new SimpleDateFormat("dd/mm/yyyy").parse(array[9]);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int year = dt.getYear();
int day = Integer.parseInt(array[10].toString());
context.write(new DOW(new Text(Integer.toString(year)),
new Text(Integer.toString(day))),
new IntWritable(1));
}
}
};
}
public static class DOWReducer extends Reducer<DOW, IntWritable, DOW, IntWritable> {
private static final Logger sLogger = Logger
.getLogger(DOWReducer.class);
#Override
protected void reduce(DOW key, Iterable<IntWritable> values,
Context context) throws java.io.IOException,
InterruptedException {
int count = 0;
sLogger.info("key =" + key);
for (IntWritable x : values) {
int val = Integer.parseInt(x.toString());
count = count + val;
}
context.write(key, new IntWritable(count));
};
}
public static class FirstPartitioner extends Partitioner<DOW, IntWritable> {
#Override
public int getPartition(DOW key, IntWritable value, int numPartitions) {
// TODO Auto-generated method stub
return Math.abs(Integer.parseInt(key.getYear().toString()) * 127)
% numPartitions;
}
}
public static class KeyComparator extends WritableComparator {
protected KeyComparator() {
super(DOW.class, true);
}
#Override
public int compare(WritableComparable w1, WritableComparable w2) {
// TODO Auto-generated method stub
DOW ip1 = (DOW) w1;
DOW ip2 = (DOW) w2;
int cmp = ip1.getYear().compareTo(ip2.getYear());
if (cmp == 0) {
cmp = -1 * ip1.getDay().compareTo(ip2.getDay());
}
return cmp;
}
}
public static class GroupComparator extends WritableComparator {
protected GroupComparator() {
super(DOW.class, true);
}
#Override
public int compare(WritableComparable w1, WritableComparable w2) {
// TODO Auto-generated method stub
DOW ip1 = (DOW) w1;
DOW ip2 = (DOW) w2;
return ip1.getYear().compareTo(ip2.getYear());
}
}
}

If you need to basically simulate
select year, day, count(*) as totalPerDay from DATA group by year, day
than you do not need secondary sort.
But if you need to produce something like a CUBE, where you need to calculate total per year and total per week in one MR job, than secondary sort is the way to go.

It is more or less kind of a secondary sorting but is not. The problem is with GroupComparator, the comparison has to be done both on year and day. The idea of groupcomparator is to make sure that the same year does go into same reducer but here we don't need that,instead the data has to go into same reducer if it has same year and same day(1979 and sunday). It should look something like this.
package accidentexercise;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class ClassGroupComparator extends WritableComparator
{
protected ClassGroupComparator()
{
super(TextpairWritable.class,true);
}
#SuppressWarnings("rawtypes")
public int compare(WritableComparable w,WritableComparable w1)
{
TextpairWritable s=(TextpairWritable)w;
TextpairWritable s1=(TextpairWritable)w1;
int cmp= s.year.compareTo(s1.year);
if(cmp==0)
{
cmp= -1*s.day.compareTo(s1.day);
}
return cmp;
}
}
I am pasting my whole code as well.
TextpairWritable:
package accidentexercise;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class TextpairWritable implements WritableComparable<TextpairWritable>
{
Text year=new Text();
Text day=new Text();
public TextpairWritable()
{
this.year=new Text();
this.day=new Text();
}
public TextpairWritable(Text year,Text day)
{
this.year=year;
this.day=day;
}
public TextpairWritable(String year,String day)
{
this.year=new Text(year);
this.day=new Text(day);
}
public TextpairWritable(TextpairWritable o)
{
this.year=o.year;
this.day=o.day;
}
public void set(Text year,Text day)
{
this.year=year;
this.day=day;
}
public Text getyear()
{
return this.year;
}
public Text getday()
{
return this.day;
}
#Override
public void readFields(DataInput in) throws IOException {
year.readFields(in);
day.readFields(in);
}
#Override
public void write(DataOutput out) throws IOException {
year.write(out);
day.write(out);
}
public String toString()
{
return year+" "+day;
}
public int compareTo(TextpairWritable o)
{
int cmp=year.compareTo(day);
if(cmp==0)
{
cmp=day.compareTo(day);
}
return cmp;
}
}
GroupComparator:
package accidentexercise;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class ClassGroupComparator extends WritableComparator
{
protected ClassGroupComparator()
{
super(TextpairWritable.class,true);
}
#SuppressWarnings("rawtypes")
public int compare(WritableComparable w,WritableComparable w1)
{
TextpairWritable s=(TextpairWritable)w;
TextpairWritable s1=(TextpairWritable)w1;
int cmp= s.year.compareTo(s1.year);
if(cmp==0)
{
cmp= -1*s.day.compareTo(s1.day);
}
return cmp;
}
}
SortComparator:
package accidentexercise;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class ClassSortComparator extends WritableComparator
{
protected ClassSortComparator()
{
super(TextpairWritable.class,true);
}
#SuppressWarnings("rawtypes")
public int compare(WritableComparable w,WritableComparable w1)
{
TextpairWritable s=(TextpairWritable)w;
TextpairWritable s1=(TextpairWritable)w1;
int cmp=s.year.compareTo(s1.year);
if(cmp==0)
{
cmp= -1*s.day.compareTo(s1.day);
}
return cmp;
}
}
Mapper:
package accidentexercise;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ClassMapper extends Mapper<LongWritable,Text,TextpairWritable,IntWritable>
{
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException
{
Logger log=LoggerFactory.getLogger(ClassMapper.class) ;
String s=value.toString();
String[] orig_data=s.split(",");
SimpleDateFormat df=new SimpleDateFormat("dd/MM/yyyy");
df.setLenient(false);
try
{
#SuppressWarnings("unused")
Date date=df.parse(orig_data[0]);
String myyear=orig_data[0].substring(6, 10);
context.write(new TextpairWritable(new Text(myyear),new Text(orig_data[2])),new IntWritable(Integer.parseInt(orig_data[1])));
}
catch(ParseException e)
{
log.info("Date is not correct"+e);
}
}
}
Reducer:
package accidentexercise;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class ClassReducer extends Reducer<TextpairWritable,IntWritable,TextpairWritable,IntWritable>
{
public void reduce(TextpairWritable key,Iterable<IntWritable> value,Context context) throws IOException,InterruptedException
{
int count=0;
for(IntWritable it:value)
{
count+=it.get();
}
context.write(key,new IntWritable(count));
}
}
Driver:
package accidentexercise;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ClassDriver {
public static void main(String args[]) throws Exception
{
if(args.length!=2)
{
System.err.println("Usage: Worddrivernewapi <input path> <output path>");
System.exit(-1);
}
Job job=new Job();
job.setJarByClass(ClassDriver.class);
job.setJobName("MyDriver");
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
job.setMapperClass(ClassMapper.class);
job.setPartitionerClass(ClassPartitioner.class);
job.setSortComparatorClass(ClassSortComparator.class);
job.setGroupingComparatorClass(ClassGroupComparator.class);
job.setReducerClass(ClassReducer.class);
//job.setNumReduceTasks(0);
job.setOutputKeyClass(TextpairWritable.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Partitioner:
package accidentexercise;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class ClassPartitioner extends Partitioner<TextpairWritable,IntWritable>
{
#Override
public int getPartition(TextpairWritable tp, IntWritable value, int numPartitions) {
return Math.abs(Integer.parseInt(tp.getyear().toString()) * 127) % numPartitions;
}
}
Sample Input:
Date,Number_of_accidents,day
01/03/2014,18,2
02/03/2014,19,3
03/03/2014,20,4
01/03/2014,1,2
02/03/2014,2,3
03/03/2014,4,4
01/03/2014,8,2
02/03/2014,9,3
03/03/2014,2,4
Output:
01/03/2014,2,27
02/03/2014,3,30
03/03/2014,4,26

Related

NiFI "unable to find flowfile content"

I am using nifi 1.6 and get the following errors when trying to modify a clone of an incoming flowFile:
[1]"unable to find content for FlowFile: ... MissingFlowFileException
...
Caused by ContentNotFoundException: Could not find contetn for StandardClaim
...
Caused by java.io.EOFException: null"
[2]"FlowFileHandlingException: StandardFlowFileRecord... is not known in this session"
The first error occurs when trying to access the contents of the flow file, the second when removing the flow file from the session (within a catch of the first). This process is known to have worked under nifi 0.7.
The basic process is:
Clone the incoming flow file
Write to the clone
Write to the clone again (some additional formatting)
Repeat 1-3
The error occurs on the second iteration step 3.
An interesting point is that if immediately after the clone is performed, a session.read of the clone is done everything works fine. The read seems to reset some pointer.
I have created unit tests for this processor, but they do not fail in either case.
Below is code simplified from the actual version in use that demonstrates the issue. (The development system is not connected so I had to copy the code. Please forgive any typos - it should be close. This is also why a full stack trace is not provided.) The processor doing the work has a property to determine if an immediate read should be done, or not. So both scenarios can be performed easily. To set it up, all that is needed is a GetFile processor to supply the input and terminators for the output from the SampleCloningProcessor. A sample input file is included as well. The meat of the code is in the onTrigger and manipulate methods. The manipulation in this simplified version really don't do anything but copy the input to the output.
Any insights into why this is happening and suggestions for corrections will be appreciated - thanks.
SampleCloningProcessor.java
processor sample.package.cloning
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.util.Arrays;
import java.util.Hashset;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.nifi.annotation.documentaion.CapabilityDescription;
import org.apache.nifi.annotation.documentaion.Tags;
import org.apache.nifi.componets.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessorContext;
import org.apache.nifi.processor.ProcessorSession;
import org.apache.nifi.processor.ProcessorInitioalizationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCalback;
import org.apache.nifi.processor.io.OutputStreamCalback;
import org.apache.nifi.processor.io.StreamCalback;
import org.apache.nifi.processor.util.StandardValidators;
import com.google.gson.Gson;
#Tags({"example", "clone"})
#CapabilityDescription("Demonsrates cloning of flowfile failure.")
public class SampleCloningProcessor extend AbstractProcessor {
/* Determines if an immediate read is performed after cloning of inoming flowfile. */
public static final PropertyDescriptor IMMEDIATE_READ = new PropertyDescriptor.Builder()
.name("immediateRead")
.description("Determines if processor runs successfully. If a read is done immediatly "
+ "after the clone of the incoming flowFile, then the processor should run successfully.")
.required(true)
.allowableValues("true", "false")
.defaultValue("true")
.addValidator(StandardValidators.BOLLEAN_VALIDATOR)
.build();
public static final Relationship SUCCESS = new Relationship.Builder().name("success").
description("No unexpected errors.").build();
public static final Relationship FAILURE = new Relationship.Builder().name("failure").
description("Errors were thrown.").build();
private Set<Relationship> relationships;
private List<PropertyDescriptors> properties;
#Override
public void init(final ProcessorInitializationContext contex) {
relationships = new HashSet<>(Arrays.asList(SUCCESS, FAILURE));
properties = new Arrays.asList(IMMEDIATE_READ);
}
#Override
public Set<Relationship> getRelationships() {
return this.relationships;
}
#Override
public List<PropertyDescriptor> getSuppprtedPropertyDescriptors() {
return this.properties;
}
#Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile incomingFlowFile = session.get();
if (incomingFlowFile == null) {
return;
}
try {
final InfileReader inFileReader = new InfileReader();
session.read(incomingFlowFile, inFileReader);
Product product = infileReader.getProduct();
boolean transfer = false;
getLogger().info("\tSession :\n" + session);
getLogger().info("\toriginal :\n" + incomingFlowFile);
for(int i = 0; i < 2; i++) {
transfer = manipulate(context, session, inclmingFlowFile, product);
}
} catch (Exception e) {
getLogger().error(e.getMessage(), e);
session.rollback(true);
}
}
private boolean manipuate(final ProcessContext context, final ProcessSession session
final FlowFile incomingFlowFile, final Product product) {
boolean transfer = false;
FlowFile outgoingFlowFile = null;
boolean immediateRead = context.getProperty(IMMEDIATE_READ).asBoolean();
try {
//Clone incoming flowFile
outgoinFlowFile = session.clone(incomingFlowFile);
getLogger().info("\tclone outgoing :\n" + outgoingFlowFile);
if(immediateRead) {
readFlowFile(session, outgoingFlowFile);
}
//First write into clone
StageOneWrite stage1Write = new StaeOneWrite(product);
outgoingFlowFile = session.write(outgoingFlowFile, stage1Write);
getLogger().info("\twrite outgoing :\n" + outgoingFlowFile);
// Format the cloned file with another write
outgoingFlowFile = formatFlowFile(outgoingFlowFile, session)
getLogger().info("\format outgoing :\n" + outgoingFlowFile);
session.transfer(outgoingFlowFile, SUCCESS);
transfer != true;
} catch(Exception e)
getLogger().error(e.getMessage(), e);
if(outgoingFlowFile ! = null) {
session.remove(outgoingFlowFile);
}
}
return transfer;
}
private void readFlowFile(fainl ProcessSession session, fianl Flowfile flowFile) {
session.read(flowFile, new InputStreamCallback() {
#Override
public void process(Final InputStream in) throws IOException {
try (Scanner scanner = new Scanner(in)) {
scanner.useDelimiter("\\A").next();
}
}
});
}
private FlowFile formatFlowFile(fainl ProcessSession session, FlowFile flowfile) {
OutputFormatWrite formatWrite = new OutputFormatWriter();
flowfile = session.write(flowFile, formatWriter);
return flowFile;
}
private static class OutputFormatWriter implement StreamCallback {
#Override
public void process(final InputStream in, final OutputStream out) throws IOException {
try {
IOUtils.copy(in. out);
out.flush();
} finally {
IOUtils.closeQuietly(in);
IOUtils.closeQuietly(out);
}
}
}
private static class StageOneWriter implements OutputStreamCallback {
private Product product = null;
public StageOneWriter(Produt product) {
this.product = product;
}
#Override
public void process(final OutputStream out) throws IOException {
final Gson gson = new Gson();
final String json = gson.toJson(product);
out.write(json.getBytes());
}
}
private static class InfileReader implements InputStreamCallback {
private Product product = null;
public StageOneWriter(Produt product) {
this.product = product;
}
#Override
public void process(final InputStream out) throws IOException {
product = null;
final Gson gson = new Gson();
Reader inReader = new InputStreamReader(in, "UTF-8");
product = gson.fromJson(inreader, Product.calss);
}
public Product getProduct() {
return product;
}
}
SampleCloningProcessorTest.java
package sample.processors.cloning;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Before;
import org.junit.Test;
public class SampleCloningProcessorTest {
final satatic String flowFileContent = "{"
+ "\"cost\": \"cost 1\","
+ "\"description\": \"description","
+ "\"markup\": 1.2"
+ "\"name\":\"name 1\","
+ "\"supplier\":\"supplier 1\","
+ "}";
private TestRunner testRunner;
#Before
public void init() {
testRunner = TestRunner.newTestRunner(SampleCloningProcessor.class);
testRunner.enqueue(flowFileContent);
}
#Test
public void testProcessorImmediateRead() {
testRunner.setProperty(SampleCloningProcessor.IMMEDIATE_READ, "true");
testRunner.run();
testRinner.assertTransferCount("success", 2);
}
#Test
public void testProcessorImmediateRead_false() {
testRunner.setProperty(SampleCloningProcessor.IMMEDIATE_READ, "false");
testRunner.run();
testRinner.assertTransferCount("success", 2);
}
}
Product.java
package sample.processors.cloning;
public class Product {
private String name;
private String description;
private String supplier;
private String cost;
private float markup;
public String getName() {
return name;
}
public void setName(final String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescriptione(final String description) {
this.description = description;
}
public String getSupplier() {
return supplier;
}
public void setSupplier(final String supplier) {
this.supplier = supplier;
}
public String getCost() {
return cost;
}
public void setCost(final String cost) {
this.cost = cost;
}
public float getMarkup() {
return markup;
}
public void setMarkup(final float name) {
this.markup = markup;
}
}
product.json A sample input file.
{
"const" : "cost 1",
"description" : "description 1",
"markup" : 1.2,
"name" : "name 1",
"supplier" : "supplier 1"
}
Reported as a bug in Nifi. Being addressed by https://issues.apache.org/jira/browse/NIFI-5879

I want to show max,min and avg temperature using hadoop

My project is to show max,min and avg temperature. I have already done it, but I have to show this functions using group by key. There are 4 radio buttons for Year, month, date and city in my application. If I select one then it will ask me to input the aggregate functions(max,min,avg). For these I need to change my CompositeGroupKey class, but I don't have any idea about that. So please help me, and provide inputs about the changes need to be done with the code.
The driver :
import org.apache.hadoop.io.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxTemperature
{
public static void Main (String[] args) throws Exception
{
if (args.length != 2)
{
System.err.println("Please Enter the input and output parameters");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path (args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setMapOutputKeyClass(CompositeGroupKey.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(CompositeGroupKey.class);
job.setOutputValueClass(DoubleWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
The mapper :
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import java.io.IOException;
public class MaxTemperatureMapper extends Mapper <LongWritable, Text, CompositeGroupKey, IntWritable>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
int year = Integer.parseInt(line.substring(0,4));
String mnth = line.substring(7,10);
int date = Integer.parseInt(line.substring(10,12));
int temp= Integer.parseInt(line.substring(12,14));
CompositeGroupKey cntry = new CompositeGroupKey(year,mnth, date);
context.write(cntry, new IntWritable(temp));
}
}
The reducer :
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.*;
import java.io.IOException;
public class MaxTemperatureReducer extends Reducer <CompositeGroupKey, IntWritable, CompositeGroupKey, CompositeGroupkeyall >{
public void reduce(CompositeGroupKey key, Iterable<IntWritable> values , Context context) throws IOException,InterruptedException
{
Double max = Double.MIN_VALUE;
Double min =Double.MAX_VALUE;
for (IntWritable value : values )
{
min = Math.min(min, value.get());
max = Math.max(max, value.get());
}
CompositeGroupkeyall val =new CompositeGroupkeyall(max,min);
context.write(key, val);
}
}
And the composite key :
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;
class CompositeGroupKey implements WritableComparable<CompositeGroupKey> {
int year;
String mnth;
int date;
CompositeGroupKey(int y, String c, int d){
year = y;
mnth = c;
date = d;
}
CompositeGroupKey(){}
public void write(DataOutput out) throws IOException {
out.writeInt(year);
WritableUtils.writeString(out, mnth);
out.writeInt(date);
}
public void readFields(DataInput in) throws IOException {
this.year = in.readInt();
this.mnth = WritableUtils.readString(in);
this.date = in.readInt();
}
public int compareTo(CompositeGroupKey pop) {
if (pop == null)
return 0;
int intcnt;
intcnt = Integer.valueOf(year).toString().compareTo(Integer.valueOf(pop.year).toString());
if(intcnt != 0){
return intcnt;
}else if(mnth.compareTo(pop.mnth) != 0){
return mnth.compareTo(pop.mnth);
}else{
return Integer.valueOf(date).toString().compareTo(Integer.valueOf(pop.date).toString());
}
}
public String toString() {
return year + " :" + mnth.toString() + " :" + date;
}
}
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
class CompositeGroupkeyall implements WritableComparable<CompositeGroupkeyall> {
Double max;
Double min;
CompositeGroupkeyall(double x, double y){
max = x ;
min = y ;
}
CompositeGroupkeyall(){}
public void readFields(DataInput in) throws IOException {
this.max = in.readDouble();
this.min = in.readDouble();
}
public void write(DataOutput out) throws IOException {
out.writeDouble(max);
out.writeDouble(min);
}
public int compareTo(CompositeGroupkeyall arg0) {
return -1;
}
public String toString() {
return max + " " + min +" " ;
}
}
You can create more key value pairs as below and let the same reducer process the data, all the date/month/year will be processed by the same reducer
CompositeGroupKey cntry = new CompositeGroupKey(year, mnth, date);
CompositeGroupKey cntry_date = new CompositeGroupKey((int)0, "ALL", date);
CompositeGroupKey cntry_mnth = new CompositeGroupKey((int)0, mnth, (int) 1);
CompositeGroupKey cntry_year = new CompositeGroupKey(year, "ALL", (int) 1);
context.write(cntry, new IntWritable(temp));
context.write(cntry_date, new IntWritable(temp));
context.write(cntry_mnth, new IntWritable(temp));
context.write(cntry_year, new IntWritable(temp));

Can't stop javafx tables from ignoring my the setter function validation

I'm using javafx to do some table stuff. I want to validate my textfields in the myTextRow Class. In the "setText2" method I check the input if it is not bigger than 6 symbols, but it has no effects at all.
import java.util.ArrayList;
import javafx.beans.property.SimpleStringProperty;
import javafx.beans.property.StringProperty;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.scene.control.ContentDisplay;
import javafx.scene.control.TableCell;
import javafx.scene.control.TableColumn;
import javafx.scene.control.TextArea;
import javafx.util.Callback;
import javafx.application.Application;
import static javafx.application.Application.launch;
import javafx.beans.property.IntegerProperty;
import javafx.beans.property.SimpleIntegerProperty;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
import javafx.event.ActionEvent;
import javafx.event.EventHandler;
import javafx.geometry.Insets;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.TableView;
import javafx.scene.control.cell.PropertyValueFactory;
import javafx.scene.layout.BorderPane;
import javafx.scene.layout.HBox;
import javafx.stage.Stage;
public class Supermain extends Application {
#Override
public void start(Stage primaryStage) {
ArrayList myindizes=new ArrayList();
final TableView<myTextRow> table = new TableView<>();
table.setEditable(true);
table.setStyle("-fx-text-wrap: true;");
//Table columns
TableColumn<myTextRow, String> clmID = new TableColumn<>("ID");
clmID.setMinWidth(160);
clmID.setCellValueFactory(new PropertyValueFactory<>("ID"));
TableColumn<myTextRow, String> clmtext = new TableColumn<>("Text");
clmtext.setMinWidth(160);
clmtext.setCellValueFactory(new PropertyValueFactory<>("text"));
clmtext.setCellFactory(new TextFieldCellFactory());
TableColumn<myTextRow, String> clmtext2 = new TableColumn<>("Text2");
clmtext2.setMinWidth(160);
clmtext2.setCellValueFactory(new PropertyValueFactory<>("text2"));
clmtext2.setCellFactory(new TextFieldCellFactory());
//Add data
final ObservableList<myTextRow> data = FXCollections.observableArrayList(
new myTextRow(5, "Lorem","bla"),
new myTextRow(2, "Ipsum","bla")
);
table.getColumns().addAll(clmID, clmtext,clmtext2);
table.setItems(data);
HBox hBox = new HBox();
hBox.setSpacing(5.0);
hBox.setPadding(new Insets(5, 5, 5, 5));
Button btn = new Button();
btn.setText("Get Data");
btn.setOnAction(new EventHandler<ActionEvent>() {
#Override
public void handle(ActionEvent event) {
for (myTextRow data1 : data) {
System.out.println("data:" + data1.getText2());
}
}
});
hBox.getChildren().add(btn);
BorderPane pane = new BorderPane();
pane.setTop(hBox);
pane.setCenter(table);
primaryStage.setScene(new Scene(pane, 640, 480));
primaryStage.show();
}
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
launch(args);
}
public static class TextFieldCellFactory
implements Callback<TableColumn<myTextRow, String>, TableCell<myTextRow, String>> {
#Override
public TableCell<myTextRow, String> call(TableColumn<myTextRow, String> param) {
TextFieldCell textFieldCell = new TextFieldCell();
return textFieldCell;
}
public static class TextFieldCell extends TableCell<myTextRow, String> {
private TextArea textField;
private StringProperty boundToCurrently = null;
public TextFieldCell() {
textField = new TextArea();
textField.setWrapText(true);
textField.setMinWidth(this.getWidth() - this.getGraphicTextGap() * 2);
this.setGraphic(textField);
}
#Override
protected void updateItem(String item, boolean empty) {
super.updateItem(item, empty);
if (!empty) {
// Show the Text Field
this.setContentDisplay(ContentDisplay.GRAPHIC_ONLY);
// myindizes.add(getIndex());
// Retrieve the actual String Property that should be bound to the TextField
// If the TextField is currently bound to a different StringProperty
// Unbind the old property and rebind to the new one
ObservableValue<String> ov = getTableColumn().getCellObservableValue(getIndex());
SimpleStringProperty sp = (SimpleStringProperty) ov;
if (this.boundToCurrently == null) {
this.boundToCurrently = sp;
this.textField.textProperty().bindBidirectional(sp);
} else if (this.boundToCurrently != sp) {
this.textField.textProperty().unbindBidirectional(this.boundToCurrently);
this.boundToCurrently = sp;
this.textField.textProperty().bindBidirectional(this.boundToCurrently);
}
double height = real_lines_height(textField.getText(), this.getWidth(), 30, 22);
textField.setPrefHeight(height);
textField.setMaxHeight(height);
textField.setMaxHeight(Double.MAX_VALUE);
// if height bigger than the biggest height in the row
//-> change all heights of the row(textfields ()typeof textarea) to this height
// else leave the height as it is
//System.out.println("item=" + item + " ObservableValue<String>=" + ov.getValue());
//this.textField.setText(item); // No longer need this!!!
} else {
this.setContentDisplay(ContentDisplay.TEXT_ONLY);
}
}
}
}
public class myTextRow {
private final SimpleIntegerProperty ID;
private final SimpleStringProperty text;
private final SimpleStringProperty text2;
public myTextRow(int ID, String text,String text2) {
this.ID = new SimpleIntegerProperty(ID);
this.text = new SimpleStringProperty(text);
this.text2 = new SimpleStringProperty(text2);
}
public void setID(int id) {
this.ID.set(id);
}
public void setText(String text) {
this.text.set(text);
}
public void setText2(String text) {
if(text2check(text)){
this.text2.set(text);}
else
{System.out.println("wrong value!!!");}
}
public int getID() {
return ID.get();
}
public String getText() {
return text.get();
}
public StringProperty textProperty() {
return text;
}
public String getText2() {
return text2.get();
}
public StringProperty text2Property() {
return text2;
}
public IntegerProperty IDProperty() {
return ID;
}
public boolean text2check(String t)
{
if(t.length()>6)return false;
return true;
}
}
private static double real_lines_height(String s, double width, double heightCorrector, double widthCorrector) {
HBox h = new HBox();
Label l = new Label("Text");
h.getChildren().add(l);
Scene sc = new Scene(h);
l.applyCss();
double line_height = l.prefHeight(-1);
int new_lines = s.replaceAll("[^\r\n|\r|\n]", "").length();
// System.out.println("new lines= "+new_lines);
String[] lines = s.split("\r\n|\r|\n");
// System.out.println("line count func= "+ lines.length);
int count = 0;
//double rest=0;
for (int i = 0; i < lines.length; i++) {
double text_width = get_text_width(lines[i]);
double plus_lines = Math.ceil(text_width / (width - widthCorrector));
if (plus_lines > 1) {
count += plus_lines;
//rest+= (text_width / (width-widthCorrector)) - plus_lines;
} else {
count += 1;
}
}
//count+=(int) Math.ceil(rest);
count += new_lines - lines.length;
return count * line_height + heightCorrector;
}
private static double get_text_width(String s) {
HBox h = new HBox();
Label l = new Label(s);
l.setWrapText(false);
h.getChildren().add(l);
Scene sc = new Scene(h);
l.applyCss();
// System.out.println("dubbyloop.FXMLDocumentController.get_text_width(): "+l.prefWidth(-1));
return l.prefWidth(-1);
}
}
A rule of the JavaFX Properties pattern is that for a property x, invoking xProperty().setValue(value) should always be identical to invoking setX(value). Your validation makes this not true. The binding your cell implementation uses invokes the setValue method on the property, which is why it bypasses your validation check.
(Side note: in all the code I am going to change the names so that they adhere to proper naming conventions.)
The default way to implement a property in this pattern is:
public class MyTextRow {
private final StringProperty text = new SimpleStringProperty();
public StringProperty textProperty() {
return text ;
}
public final void setText(String text) {
textProperty().set(text);
}
public final String getText() {
return textProperty().get();
}
}
By having the set/get methods delegate to the appropriate property methods, you are guaranteed these rules are enforced, even if the textProperty() methods is overridden in a subclass. Making the set and get methods final ensures that the rule is not broken by a subclass overriding those methods.
One approach might be to override the set and setValue methods in the property, as follows:
public class MyTextRow {
private final StringProperty text2 = new StringPropertyBase() {
#Override
public String getName() {
return "text2";
}
#Override
public Object getBean() {
return MyTextRow.this ;
}
#Override
public void setValue(String value) {
if (text2Check(value)) {
super.setValue(value);
}
}
#Override
public void set(String value) {
if (text2Check(value)) {
super.set(value);
}
}
}
public StringProperty text2Property() {
return text2 ;
}
public final void setText2(String text2) {
text2Property().set(text2);
}
public final String getText2() {
return text2Property().get();
}
// ...
}
however, I think this will break the bidirectional binding that you have with the text property in the TextArea (basically, there is no way to communicate back to the text area when a change is vetoed, so the text area will not know to revert to the previous value). One fix would be to implement your cell using listeners on the properties instead of bindings. You could use a TextFormatter on the text area that simply updates the property and vetoes the text change if the change doesn't occur.
Here is a complete SSCCE using this approach:
import java.util.function.Function;
import java.util.function.UnaryOperator;
import javafx.application.Application;
import javafx.beans.property.Property;
import javafx.beans.property.SimpleStringProperty;
import javafx.beans.property.StringProperty;
import javafx.beans.property.StringPropertyBase;
import javafx.scene.Scene;
import javafx.scene.control.ContentDisplay;
import javafx.scene.control.TableCell;
import javafx.scene.control.TableColumn;
import javafx.scene.control.TableView;
import javafx.scene.control.TextArea;
import javafx.scene.control.TextFormatter;
import javafx.scene.control.TextFormatter.Change;
import javafx.stage.Stage;
public class VetoStringChange extends Application {
#Override
public void start(Stage primaryStage) {
TableView<Item> table = new TableView<>();
table.setEditable(true);
table.getColumns().add(column("Item", Item::nameProperty));
table.getColumns().add(column("Description", Item::descriptionProperty));
for (int i = 1; i <= 20 ; i++) {
table.getItems().add(new Item("Item "+i, ""));
}
primaryStage.setScene(new Scene(table, 600, 600));
primaryStage.show();
}
public static <S> TableColumn<S,String> column(String title, Function<S,Property<String>> property) {
TableColumn<S,String> col = new TableColumn<>(title);
col.setCellValueFactory(cellData -> property.apply(cellData.getValue()));
col.setCellFactory(tc -> new TextAreaCell<S>(property));
col.setPrefWidth(200);
return col ;
}
public static class TextAreaCell<S> extends TableCell<S, String> {
private TextArea textArea ;
public TextAreaCell(Function<S, Property<String>> propertyAccessor) {
textArea = new TextArea();
textArea.setWrapText(true);
textArea.setMinWidth(this.getWidth() - this.getGraphicTextGap() * 2);
textArea.setMaxHeight(Double.MAX_VALUE);
UnaryOperator<Change> filter = c -> {
String proposedText = c.getControlNewText() ;
Property<String> prop = propertyAccessor.apply(getTableView().getItems().get(getIndex()));
prop.setValue(proposedText);
if (prop.getValue().equals(proposedText)) {
return c ;
} else {
return null ;
}
};
textArea.setTextFormatter(new TextFormatter<String>(filter));
this.setGraphic(textArea);
}
#Override
protected void updateItem(String item, boolean empty) {
super.updateItem(item, empty);
if (!empty) {
if (! textArea.getText().equals(item)) {
textArea.setText(item);
}
// Show the Text Field
this.setContentDisplay(ContentDisplay.GRAPHIC_ONLY);
} else {
this.setContentDisplay(ContentDisplay.TEXT_ONLY);
}
}
}
public static class Item {
private final StringProperty name = new StringPropertyBase() {
#Override
public Object getBean() {
return Item.this;
}
#Override
public String getName() {
return "name" ;
}
#Override
public void set(String value) {
if (checkValue(value)) {
super.set(value);
}
}
#Override
public void setValue(String value) {
if (checkValue(value)) {
super.setValue(value);
}
}
};
private final StringProperty description = new SimpleStringProperty();
public Item(String name, String description) {
setName(name);
setDescription(description);
}
private boolean checkValue(String value) {
return value.length() <= 6 ;
}
public final StringProperty nameProperty() {
return this.name;
}
public final String getName() {
return this.nameProperty().get();
}
public final void setName(final String name) {
this.nameProperty().set(name);
}
public final StringProperty descriptionProperty() {
return this.description;
}
public final String getDescription() {
return this.descriptionProperty().get();
}
public final void setDescription(final String description) {
this.descriptionProperty().set(description);
}
}
public static void main(String[] args) {
launch(args);
}
}
Another approach is to allow a "commit and revert" type strategy on your property:
public class MyTextRow {
private final StringProperty text2 = new SimpleStringProperty();
public MyTextRow() {
text2.addListener((obs, oldText, newText) -> {
if (! checkText2(newText)) {
// sanity check:
if (checkText2(oldText)) {
text2.set(oldText);
}
}
});
}
public StringProperty text2Property() {
return text ;
}
public final void setText2(String text2) {
text2Property().set(text2);
}
public final String getText2() {
return text2Property().get();
}
}
In general I dislike validation by listening for an invalid value and reverting like this, because other listeners to the property will see all the changes, including changes to and from invalid values. However, this might be the best option in this case.
Finally, you could consider vetoing invalid changes as in the first option, and also setting a TextFormatter on the control in the cell that simply doesn't allow text entry that results in an invalid string. This isn't always possible from a usability perspective (e.g. if empty strings are invalid, you almost always want to allow the user to temporarily delete all the text), and it means keeping two validation checks in sync in your code, which is a pain.

Why Hadoop shuffle not working as expected

I have this hadoop map reduce code that works on graph data (in adjacency list form) and kind of similar to in-adjacency list to out-adjacency list transformation algorithms. The main MapReduce Task code is following:
public class TestTask extends Configured
implements Tool {
public static class TTMapper extends MapReduceBase
implements Mapper<Text, TextArrayWritable, Text, NeighborWritable> {
#Override
public void map(Text key,
TextArrayWritable value,
OutputCollector<Text, NeighborWritable> output,
Reporter reporter) throws IOException {
int numNeighbors = value.get().length;
double weight = (double)1 / numNeighbors;
Text[] neighbors = (Text[]) value.toArray();
NeighborWritable me = new NeighborWritable(key, new DoubleWritable(weight));
for (int i = 0; i < neighbors.length; i++) {
output.collect(neighbors[i], me);
}
}
}
public static class TTReducer extends MapReduceBase
implements Reducer<Text, NeighborWritable, Text, Text> {
#Override
public void reduce(Text key,
Iterator<NeighborWritable> values,
OutputCollector<Text, Text> output,
Reporter arg3)
throws IOException {
ArrayList<NeighborWritable> neighborList = new ArrayList<NeighborWritable>();
while(values.hasNext()) {
neighborList.add(values.next());
}
NeighborArrayWritable neighbors = new NeighborArrayWritable
(neighborList.toArray(new NeighborWritable[0]));
Text out = new Text(neighbors.toString());
output.collect(key, out);
}
}
#Override
public int run(String[] arg0) throws Exception {
JobConf conf = Util.getMapRedJobConf("testJob",
SequenceFileInputFormat.class,
TTMapper.class,
Text.class,
NeighborWritable.class,
1,
TTReducer.class,
Text.class,
Text.class,
TextOutputFormat.class,
"test/in",
"test/out");
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new TestTask(), args);
System.exit(res);
}
}
The auxiliary code is following:
TextArrayWritable:
public class TextArrayWritable extends ArrayWritable {
public TextArrayWritable() {
super(Text.class);
}
public TextArrayWritable(Text[] values) {
super(Text.class, values);
}
}
NeighborWritable:
public class NeighborWritable implements Writable {
private Text nodeId;
private DoubleWritable weight;
public NeighborWritable(Text nodeId, DoubleWritable weight) {
this.nodeId = nodeId;
this.weight = weight;
}
public NeighborWritable () { }
public Text getNodeId() {
return nodeId;
}
public DoubleWritable getWeight() {
return weight;
}
public void setNodeId(Text nodeId) {
this.nodeId = nodeId;
}
public void setWeight(DoubleWritable weight) {
this.weight = weight;
}
#Override
public void readFields(DataInput in) throws IOException {
nodeId = new Text();
nodeId.readFields(in);
weight = new DoubleWritable();
weight.readFields(in);
}
#Override
public void write(DataOutput out) throws IOException {
nodeId.write(out);
weight.write(out);
}
public String toString() {
return "NW[nodeId=" + (nodeId != null ? nodeId.toString() : "(null)") +
",weight=" + (weight != null ? weight.toString() : "(null)") + "]";
}
public boolean equals(Object o) {
if (!(o instanceof NeighborWritable)) {
return false;
}
NeighborWritable that = (NeighborWritable)o;
return (nodeId.equals(that.getNodeId()) && (weight.equals(that.getWeight())));
}
}
and the Util class:
public class Util {
public static JobConf getMapRedJobConf(String jobName,
Class<? extends InputFormat> inputFormatClass,
Class<? extends Mapper> mapperClass,
Class<?> mapOutputKeyClass,
Class<?> mapOutputValueClass,
int numReducer,
Class<? extends Reducer> reducerClass,
Class<?> outputKeyClass,
Class<?> outputValueClass,
Class<? extends OutputFormat> outputFormatClass,
String inputDir,
String outputDir) throws IOException {
JobConf conf = new JobConf();
if (jobName != null)
conf.setJobName(jobName);
conf.setInputFormat(inputFormatClass);
conf.setMapperClass(mapperClass);
if (numReducer == 0) {
conf.setNumReduceTasks(0);
conf.setOutputKeyClass(outputKeyClass);
conf.setOutputValueClass(outputValueClass);
conf.setOutputFormat(outputFormatClass);
} else {
// may set actual number of reducers
// conf.setNumReduceTasks(numReducer);
conf.setMapOutputKeyClass(mapOutputKeyClass);
conf.setMapOutputValueClass(mapOutputValueClass);
conf.setReducerClass(reducerClass);
conf.setOutputKeyClass(outputKeyClass);
conf.setOutputValueClass(outputValueClass);
conf.setOutputFormat(outputFormatClass);
}
// delete the existing target output folder
FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(outputDir), true);
// specify input and output DIRECTORIES (not files)
FileInputFormat.addInputPath(conf, new Path(inputDir));
FileOutputFormat.setOutputPath(conf, new Path(outputDir));
return conf;
}
}
My input is following graph: (in binary format, here I am giving the text format)
1 2
2 1,3,5
3 2,4
4 3,5
5 2,4
According to the logic of the code the output should be:
1 NWArray[size=1,{NW[nodeId=2,weight=0.3333333333333333],}]
2 NWArray[size=3,{NW[nodeId=5,weight=0.5],NW[nodeId=3,weight=0.5],NW[nodeId=1,weight=1.0],}]
3 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=4,weight=0.5],}]
4 NWArray[size=2,{NW[nodeId=5,weight=0.5],NW[nodeId=3,weight=0.5],}]
5 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=4,weight=0.5],}]
But the output is coming as:
1 NWArray[size=1,{NW[nodeId=2,weight=0.3333333333333333],}]
2 NWArray[size=3,{NW[nodeId=5,weight=0.5],NW[nodeId=5,weight=0.5],NW[nodeId=5,weight=0.5],}]
3 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=2,weight=0.3333333333333333],}]
4 NWArray[size=2,{NW[nodeId=5,weight=0.5],NW[nodeId=5,weight=0.5],}]
5 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=2,weight=0.3333333333333333],}]
I cannot understand the reason why the expected output is not coming out. Any help will be appreciated.
Thanks.
You're falling foul of object re-use
while(values.hasNext()) {
neighborList.add(values.next());
}
values.next() will return the same object reference, but the underlying contents of that object will change for each iteration (the readFields method is called to re-populate the contents)
Suggest you amend to (you'll need to obtain the Configuration conf variable from a setup method, unless you can obtain it from the Reporter or OutputCollector - sorry i don't use the old API)
while(values.hasNext()) {
neighborList.add(
ReflectionUtils.copy(conf, values.next(), new NeighborWritable());
}
But I still can't understand why my unit test passed then. Here is the code -
public class UWLTInitReducerTest {
private Text key;
private Iterator<NeighborWritable> values;
private NeighborArrayWritable nodeData;
private TTReducer reducer;
/**
* Set up the states for calling the map function
*/
#Before
public void setUp() throws Exception {
key = new Text("1001");
NeighborWritable[] neighbors = new NeighborWritable[4];
for (int i = 0; i < 4; i++) {
neighbors[i] = new NeighborWritable(new Text("300" + i), new DoubleWritable((double) 1 / (1 + i)));
}
values = Arrays.asList(neighbors).iterator();
nodeData = new NeighborArrayWritable(neighbors);
reducer = new TTReducer();
}
/**
* Test method for InitModelMapper#map - valid input
*/
#Test
public void testMapValid() {
// mock the output object
OutputCollector<Text, UWLTNodeData> output = mock(OutputCollector.class);
try {
// call the API
reducer.reduce(key, values, output, null);
// in order (sequential) verification of the calls to output.collect()
verify(output).collect(key, nodeData);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Why didn't this code catch the bug?

Using a custom Object as key emitted by mapper

I have a situation in which mapper emits as key an object of custom type.
It has two fields an intWritable ID, and a data array IntArrayWritable.
The implementation is as follows.
`
import java.io.*;
import org.apache.hadoop.io.*;
public class PairDocIdPerm implements WritableComparable<PairDocIdPerm> {
public PairDocIdPerm(){
this.permId = new IntWritable(-1);
this.SignaturePerm = new IntArrayWritable();
}
public IntWritable getPermId() {
return permId;
}
public void setPermId(IntWritable permId) {
this.permId = permId;
}
public IntArrayWritable getSignaturePerm() {
return SignaturePerm;
}
public void setSignaturePerm(IntArrayWritable signaturePerm) {
SignaturePerm = signaturePerm;
}
private IntWritable permId;
private IntArrayWritable SignaturePerm;
public PairDocIdPerm(IntWritable permId,IntArrayWritable SignaturePerm) {
this.permId = permId;
this.SignaturePerm = SignaturePerm;
}
#Override
public void write(DataOutput out) throws IOException {
permId.write(out);
SignaturePerm.write(out);
}
#Override
public void readFields(DataInput in) throws IOException {
permId.readFields(in);
SignaturePerm.readFields(in);
}
#Override
public int hashCode() { // same permId must go to same reducer. there fore just permId
return permId.get();//.hashCode();
}
#Override
public boolean equals(Object o) {
if (o instanceof PairDocIdPerm) {
PairDocIdPerm tp = (PairDocIdPerm) o;
return permId.equals(tp.permId) && SignaturePerm.equals(tp.SignaturePerm);
}
return false;
}
#Override
public String toString() {
return permId + "\t" +SignaturePerm.toString();
}
#Override
public int compareTo(PairDocIdPerm tp) {
int cmp = permId.compareTo(tp.permId);
Writable[] ar, other;
ar = this.SignaturePerm.get();
other = tp.SignaturePerm.get();
if (cmp == 0) {
for(int i=0;i<ar.length;i++){
if(((IntWritable)ar[i]).get() == ((IntWritable)other[i]).get()){cmp= 0;continue;}
else if(((IntWritable)ar[i]).get() < ((IntWritable)other[i]).get()){ return -1;}
else if(((IntWritable)ar[i]).get() > ((IntWritable)other[i]).get()){return 1;}
}
}
return cmp;
//return 1;
}
}`
I require the keys with same Id to go to the same reducer with their sort order as coded in the compareTo method.
However when i use this, my job execution status is always map100% reduce 0%.
The reduce never runs to completion. Is there any thing wrong in this implementation?
In general what is the likely problem if reducer status is always 0%.
I think this might be a possible null pointer exception in the read method:
#Override
public void readFields(DataInput in) throws IOException {
permId.readFields(in);
SignaturePerm.readFields(in);
}
permId is null in this case.
So what you have to do is this:
IntWritable permId = new IntWritable();
Either in the field initializer or before the read.
However, your code is horrible to read.

Resources