Map-Reduce not reducing as much as expected with complex keys and values - hadoop

No matter how simple I make the compareTo of my complex key, I don't get expected results. With the exception of if I use one key that is the same for every record, it will appropriately reduce to one record. I've also witnessed that this happens only when I process the full load, if I break off a few of the records that didn't reduce and run it on a much smaller scale those records get combined.
The sum of the output records is correct, but there is duplication at the record level of items I would have expected to group together. So where I would expect say 500 records summing up to 5,000, I end up with 1232 records summing up to 5,000 with obvious records that should have been reduced into one.
I've read about the problems with object references and complex keys and values, but I don't see anywhere that I have potential for that left. To that end you will find places that I'm creating new objects that I probably don't need to, but I'm trying everything at this point and will dial it back once it is working.
I'm out of ideas on what to try or where and how to poke to figure this out. Please help!
public static class Map extends
Mapper<LongWritable, Text, IMSTranOut, IMSTranSums> {
//private SimpleDateFormat dtFormat = new SimpleDateFormat("yyyyddd");
#Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
SimpleDateFormat dtFormat = new SimpleDateFormat("yyyyddd");
IMSTranOut dbKey = new IMSTranOut();
IMSTranSums sumVals = new IMSTranSums();
String[] tokens = line.split(",", -1);
dbKey.setLoadKey(-99);
dbKey.setTranClassKey(-99);
dbKey.setTransactionCode(tokens[0]);
dbKey.setTransactionType(tokens[1]);
dbKey.setNpaNxx(getNPA(dbKey.getTransactionCode()));
try {
dbKey.setTranDate(new Date(dtFormat.parse(tokens[2]).getTime()));
} catch (ParseException e) {
}// 2
dbKey.setTranHour(getTranHour(tokens[3]));
try {
dbKey.setStartDate(new Date(dtFormat.parse(tokens[4]).getTime()));
} catch (ParseException e) {
}// 4
dbKey.setStartHour(getTranHour(tokens[5]));
try {
dbKey.setStopDate(new Date(dtFormat.parse(tokens[6]).getTime()));
} catch (ParseException e) {
}// 6
dbKey.setStopHour(getTranHour(tokens[7]));
sumVals.setTranCount(1);
sumVals.setInputQTime(Double.parseDouble(tokens[8]));
sumVals.setElapsedTime(Double.parseDouble(tokens[9]));
sumVals.setCpuTime(Double.parseDouble(tokens[10]));
context.write(dbKey, sumVals);
}
}
public static class Reduce extends
Reducer<IMSTranOut, IMSTranSums, IMSTranOut, IMSTranSums> {
#Override
public void reduce(IMSTranOut key, Iterable<IMSTranSums> values,
Context context) throws IOException, InterruptedException {
int tranCount = 0;
double inputQ = 0;
double elapsed = 0;
double cpu = 0;
for (IMSTranSums val : values) {
tranCount += val.getTranCount();
inputQ += val.getInputQTime();
elapsed += val.getElapsedTime();
cpu += val.getCpuTime();
}
IMSTranSums sumVals=new IMSTranSums();
IMSTranOut dbKey=new IMSTranOut();
sumVals.setCpuTime(inputQ);
sumVals.setElapsedTime(elapsed);
sumVals.setInputQTime(cpu);
sumVals.setTranCount(tranCount);
dbKey.setLoadKey(key.getLoadKey());
dbKey.setTranClassKey(key.getTranClassKey());
dbKey.setNpaNxx(key.getNpaNxx());
dbKey.setTransactionCode(key.getTransactionCode());
dbKey.setTransactionType(key.getTransactionType());
dbKey.setTranDate(key.getTranDate());
dbKey.setTranHour(key.getTranHour());
dbKey.setStartDate(key.getStartDate());
dbKey.setStartHour(key.getStartHour());
dbKey.setStopDate(key.getStopDate());
dbKey.setStopHour(key.getStopHour());
dbKey.setInputQTime(inputQ);
dbKey.setElapsedTime(elapsed);
dbKey.setCpuTime(cpu);
dbKey.setTranCount(tranCount);
context.write(dbKey, sumVals);
}
}
Here is the implementation of the DBWritable class:
public class IMSTranOut implements DBWritable,
WritableComparable<IMSTranOut> {
private int loadKey;
private int tranClassKey;
private String npaNxx;
private String transactionCode;
private String transactionType;
private Date tranDate;
private double tranHour;
private Date startDate;
private double startHour;
private Date stopDate;
private double stopHour;
private double inputQTime;
private double elapsedTime;
private double cpuTime;
private int tranCount;
public void readFields(ResultSet rs) throws SQLException {
setLoadKey(rs.getInt("LOAD_KEY"));
setTranClassKey(rs.getInt("TRAN_CLASS_KEY"));
setNpaNxx(rs.getString("NPA_NXX"));
setTransactionCode(rs.getString("TRANSACTION_CODE"));
setTransactionType(rs.getString("TRANSACTION_TYPE"));
setTranDate(rs.getDate("TRAN_DATE"));
setTranHour(rs.getInt("TRAN_HOUR"));
setStartDate(rs.getDate("START_DATE"));
setStartHour(rs.getInt("START_HOUR"));
setStopDate(rs.getDate("STOP_DATE"));
setStopHour(rs.getInt("STOP_HOUR"));
setInputQTime(rs.getInt("INPUT_Q_TIME"));
setElapsedTime(rs.getInt("ELAPSED_TIME"));
setCpuTime(rs.getInt("CPU_TIME"));
setTranCount(rs.getInt("TRAN_COUNT"));
}
public void write(PreparedStatement ps) throws SQLException {
ps.setInt(1, loadKey);
ps.setInt(2, tranClassKey);
ps.setString(3, npaNxx);
ps.setString(4, transactionCode);
ps.setString(5, transactionType);
ps.setDate(6, tranDate);
ps.setDouble(7, tranHour);
ps.setDate(8, startDate);
ps.setDouble(9, startHour);
ps.setDate(10, stopDate);
ps.setDouble(11, stopHour);
ps.setDouble(12, inputQTime);
ps.setDouble(13, elapsedTime);
ps.setDouble(14, cpuTime);
ps.setInt(15, tranCount);
}
public int getLoadKey() {
return loadKey;
}
public void setLoadKey(int loadKey) {
this.loadKey = loadKey;
}
public int getTranClassKey() {
return tranClassKey;
}
public void setTranClassKey(int tranClassKey) {
this.tranClassKey = tranClassKey;
}
public String getNpaNxx() {
return npaNxx;
}
public void setNpaNxx(String npaNxx) {
this.npaNxx = new String(npaNxx);
}
public String getTransactionCode() {
return transactionCode;
}
public void setTransactionCode(String transactionCode) {
this.transactionCode = new String(transactionCode);
}
public String getTransactionType() {
return transactionType;
}
public void setTransactionType(String transactionType) {
this.transactionType = new String(transactionType);
}
public Date getTranDate() {
return tranDate;
}
public void setTranDate(Date tranDate) {
this.tranDate = new Date(tranDate.getTime());
}
public double getTranHour() {
return tranHour;
}
public void setTranHour(double tranHour) {
this.tranHour = tranHour;
}
public Date getStartDate() {
return startDate;
}
public void setStartDate(Date startDate) {
this.startDate = new Date(startDate.getTime());
}
public double getStartHour() {
return startHour;
}
public void setStartHour(double startHour) {
this.startHour = startHour;
}
public Date getStopDate() {
return stopDate;
}
public void setStopDate(Date stopDate) {
this.stopDate = new Date(stopDate.getTime());
}
public double getStopHour() {
return stopHour;
}
public void setStopHour(double stopHour) {
this.stopHour = stopHour;
}
public double getInputQTime() {
return inputQTime;
}
public void setInputQTime(double inputQTime) {
this.inputQTime = inputQTime;
}
public double getElapsedTime() {
return elapsedTime;
}
public void setElapsedTime(double elapsedTime) {
this.elapsedTime = elapsedTime;
}
public double getCpuTime() {
return cpuTime;
}
public void setCpuTime(double cpuTime) {
this.cpuTime = cpuTime;
}
public int getTranCount() {
return tranCount;
}
public void setTranCount(int tranCount) {
this.tranCount = tranCount;
}
public void readFields(DataInput input) throws IOException {
setNpaNxx(input.readUTF());
setTransactionCode(input.readUTF());
setTransactionType(input.readUTF());
setTranDate(new Date(input.readLong()));
setStartDate(new Date(input.readLong()));
setStopDate(new Date(input.readLong()));
setLoadKey(input.readInt());
setTranClassKey(input.readInt());
setTranHour(input.readDouble());
setStartHour(input.readDouble());
setStopHour(input.readDouble());
setInputQTime(input.readDouble());
setElapsedTime(input.readDouble());
setCpuTime(input.readDouble());
setTranCount(input.readInt());
}
public void write(DataOutput output) throws IOException {
output.writeUTF(npaNxx);
output.writeUTF(transactionCode);
output.writeUTF(transactionType);
output.writeLong(tranDate.getTime());
output.writeLong(startDate.getTime());
output.writeLong(stopDate.getTime());
output.writeInt(loadKey);
output.writeInt(tranClassKey);
output.writeDouble(tranHour);
output.writeDouble(startHour);
output.writeDouble(stopHour);
output.writeDouble(inputQTime);
output.writeDouble(elapsedTime);
output.writeDouble(cpuTime);
output.writeInt(tranCount);
}
public int compareTo(IMSTranOut o) {
return (Integer.compare(loadKey, o.getLoadKey()) == 0
&& Integer.compare(tranClassKey, o.getTranClassKey()) == 0
&& npaNxx.compareTo(o.getNpaNxx()) == 0
&& transactionCode.compareTo(o.getTransactionCode()) == 0
&& (transactionType.compareTo(o.getTransactionType()) == 0)
&& tranDate.compareTo(o.getTranDate()) == 0
&& Double.compare(tranHour, o.getTranHour()) == 0
&& startDate.compareTo(o.getStartDate()) == 0
&& Double.compare(startHour, o.getStartHour()) == 0
&& stopDate.compareTo(o.getStopDate()) == 0
&& Double.compare(stopHour, o.getStopHour()) == 0) ? 0 : 1;
}
}
Implementation of the Writable class for the complex values:
public class IMSTranSums
implements Writable{
private double inputQTime;
private double elapsedTime;
private double cpuTime;
private int tranCount;
public double getInputQTime() {
return inputQTime;
}
public void setInputQTime(double inputQTime) {
this.inputQTime = inputQTime;
}
public double getElapsedTime() {
return elapsedTime;
}
public void setElapsedTime(double elapsedTime) {
this.elapsedTime = elapsedTime;
}
public double getCpuTime() {
return cpuTime;
}
public void setCpuTime(double cpuTime) {
this.cpuTime = cpuTime;
}
public int getTranCount() {
return tranCount;
}
public void setTranCount(int tranCount) {
this.tranCount = tranCount;
}
public void write(DataOutput output) throws IOException {
output.writeDouble(inputQTime);
output.writeDouble(elapsedTime);
output.writeDouble(cpuTime);
output.writeInt(tranCount);
}
public void readFields(DataInput input) throws IOException {
inputQTime=input.readDouble();
elapsedTime=input.readDouble();
cpuTime=input.readDouble();
tranCount=input.readInt();
}
}

Your compareTo is flawed, it will totally fail the sort algorithm, because you seem to break transivity in your ordering.
I would recommend you to use a CompareToBuilder from Apache Commons or a ComparisonChain from Guava to make your comparisons much more readable (and correct!).

Related

trino udf how to create a aggregate function for the window function

I tried to write a udf function to calculate my data. In the trino's docs, I knew I should to write a function plugin and I succeed to execute my udf aggregate function sql.
But when I write sql with aggregate function and window function, the sql executed failed.
The error log is com.google.common.util.concurrent.ExecutionError: java.lang.NoClassDefFoundError: com/example/ListState.
I think I may implement the interface about the window function.
The ListState.java file code
#AccumulatorStateMetadata(stateSerializerClass = ListStateSerializer.class, stateFactoryClass = ListStateFactory.class)
public interface ListState extends AccumulatorState {
List<String> getList();
void setList(List<String> value);
}
The ListStateSerializer file code
public class ListStateSerializer implements AccumulatorStateSerializer<ListState>
{
#Override
public Type getSerializedType() {
return VARCHAR;
}
#Override
public void serialize(ListState state, BlockBuilder out) {
if (state.getList() == null) {
out.appendNull();
return;
}
String value = String.join(",", state.getList());
VARCHAR.writeSlice(out, Slices.utf8Slice(value));
}
#Override
public void deserialize(Block block, int index, ListState state) {
String value = VARCHAR.getSlice(block, index).toStringUtf8();
List<String> list = Arrays.asList(value.split(","));
state.setList(list);
}
}
The ListStateFactory file code
public class ListStateFactory implements AccumulatorStateFactory<ListState> {
public static final class SingleListState implements ListState {
private List<String> list = new ArrayList<>();
#Override
public List<String> getList() {
return list;
}
#Override
public void setList(List<String> value) {
list = value;
}
#Override
public long getEstimatedSize() {
if (list == null) {
return 0;
}
return list.size();
}
}
public static class GroupedListState implements GroupedAccumulatorState, ListState {
private final ObjectBigArray<List<String>> container = new ObjectBigArray<>();
private long groupId;
#Override
public List<String> getList() {
return container.get(groupId);
}
#Override
public void setList(List<String> value) {
container.set(groupId, value);
}
#Override
public void setGroupId(long groupId) {
this.groupId = groupId;
if (this.getList() == null) {
this.setList(new ArrayList<String>());
}
}
#Override
public void ensureCapacity(long size) {
container.ensureCapacity(size);
}
#Override
public long getEstimatedSize() {
return container.sizeOf();
}
}
#Override
public ListState createSingleState() {
return new SingleListState();
}
#Override
public ListState createGroupedState() {
return new GroupedListState();
}
}
Thanks for help!!!!
And I found the WindowAccumulator class in the trino source code. But I don't know how to use it.
How to create a aggregate function for window function?

Spring Batch FlatFileItemWriter

salve sto cercando di leggere u oggetto e scivere un oggetto ma ricevo un errore :
Invalid property 'KDE22' of bean class [com.Bnl.Wl.Batch2.Model.EmployeeOut]: Bean property 'KDE22' is not readable or has an invalid getter method: Does the return type of the getter match the parameter type of the setter?
#Bean
public FlatFileItemWriter<EmployeeOut> writer() throws IOException {
FlatFileItemWriter<EmployeeOut> writer = new FlatFileItemWriter<EmployeeOut>();
writer.setAppendAllowed(true);
System.out.println("conteggio " + conteggio);
writer.setResource(new FileSystemResource(dirOutputPath + dharControlEnvironment + nameAppOrigin
+ sdf1.format(timestamp) + String.format("%03d", conteggio + 1).toString() + ".txt"));
DelimitedLineAggregator<EmployeeOut> aggregator = new DelimitedLineAggregator<>();
BeanWrapperFieldExtractor<EmployeeOut> fieldExtractor = new BeanWrapperFieldExtractor<>();
fieldExtractor.setNames(EmployeeOut.fields());
aggregator.setFieldExtractor(fieldExtractor);
aggregator.setDelimiter("|");
System.out.println(writer.getExecutionContextKey(dharControlEnvironment).toString());
writer.setLineAggregator(aggregator);
writer.setHeaderCallback(
(org.springframework.batch.item.file.FlatFileHeaderCallback) new FlatFileHeaderCallback() {
public void writeHeader(Writer writer) throws IOException {
Timestamp timestamp = new Timestamp(System.currentTimeMillis());
writer.write("00" + "|" + sdf1.format(timestamp) + "|" + dharControlEnvironment + nameAppOrigin
+ "ANAGPILOTA");
}
});
writer.setFooterCallback(new FlatFileFooterCallback() {
#Override
public void writeFooter(Writer writer) throws IOException {
int cont = 0;
FileReader reader = new FileReader(dirOutputPath + dharControlEnvironment + nameAppOrigin
+ sdf1.format(timestamp) + String.format("%03d", conteggio + 1).toString() + extention);
BufferedReader in = new BufferedReader(reader);
while (in.readLine() != null)
cont++;
Timestamp timestamp = new Timestamp(System.currentTimeMillis());
writer.write("99" + "|" + sdf1.format(timestamp) + "|" + dharControlEnvironment + nameAppOrigin + "|"
+ cont);
writer.write(System.lineSeparator());
}
});
return writer;
}
public class EmployeeItemProcessor implements ItemProcessor<Employee, EmployeeOut> {
private static final Logger log = LoggerFactory.getLogger(JobExecutionListener.class);
//private EmployeeOut itemOut = new EmployeeOut();
#Override
public EmployeeOut process(Employee item) throws Exception {
EmployeeOut itemOut = new EmployeeOut();
itemOut.setCOMPETENCE_DATE("afafafasdfadsfasdf");
itemOut.setDIVISA(item.getDIVISA());
itemOut.setCONTO_CONTABILE(item.getCONTO_CONTABILE());
itemOut.setTIPO_SCRITTURA(item.getID_SCRITTURA());
itemOut.setIMPORTO(item.getIMPORTO());
itemOut.setDATA_VALUTA(item.getDATA_VALUTA());
itemOut.setOM_BN(item.getOM_BN());
itemOut.setDOSSIER(item.getDOSSIER());
itemOut.setKDE1(item.getKDE1());
itemOut.setNDG(item.getNDG());
itemOut.setIBAN(item.getIBAN());
itemOut.setKDE2(item.getKDE2());
itemOut.setKDE3(item.getKDE3());
itemOut.setTIPO_EVENTO(item.getTIPO_EVENTO());
itemOut.setID_SCRITTURA(item.getID_SCRITTURA());
itemOut.setSIGLA_CIRCUITO(item.getSIGLA_CIRCUITO());
itemOut.setDATA_TESORERIA(item.getDATA_TESORERIA());
itemOut.setCOMPETENZA(item.getCOMPETENZA());
itemOut.setFLAG_SUBGROUP(item.getFLAG_SUBGROUP());
itemOut.setOPERATION(item.getOPERATION());
itemOut.setNew_field1("prova222");
itemOut.setNew_field1("prova");
itemOut.setNew_field2("sdfsdff");
log.info("filed 1 "+ itemOut.getNew_field1());
return itemOut;
}
}
package com.Bnl.Wl.Batch2.Model;
public class Employee {
protected String COMPETENCE_DATE;
protected String DIVISA;
protected String CONTO_CONTABILE;
protected String TIPO_SCRITTURA;
protected String IMPORTO;
protected String DATA_VALUTA;
protected String OM_BN;
protected String DOSSIER;
protected String KDE1;
protected String NDG;
protected String IBAN;
protected String KDE2;
protected String KDE3;
protected String TIPO_EVENTO;
protected String ID_SCRITTURA;
protected String SIGLA_CIRCUITO;
protected String DATA_TESORERIA;
protected String COMPETENZA;
protected String FLAG_SUBGROUP;
protected String OPERATION;
public Employee() {
}
public static String[] fields() {
return new String[] {"COMPETENCE_DATE",
"DIVISA",
"CONTO_CONTABILE",
"TIPO_SCRITTURA",
"IMPORTO",
"DATA_VALUTA",
"OM_BN",
"DOSSIER",
"KDE1",
"NDG",
"IBAN",
"KDE2",
"KDE3",
"TIPO_EVENTO",
"ID_SCRITTURA",
"SIGLA_CIRCUITO",
"DATA_TESORERIA",
"COMPETENZA",
"FLAG_SUBGROUP",
"OPERATION",
};
}
public String getCOMPETENCE_DATE() {
return COMPETENCE_DATE;
}
public void setCOMPETENCE_DATE(String cOMPETENCE_DATE) {
COMPETENCE_DATE = cOMPETENCE_DATE;
}
public String getDIVISA() {
return DIVISA;
}
public void setDIVISA(String dIVISA) {
DIVISA = dIVISA;
}
public String getCONTO_CONTABILE() {
return CONTO_CONTABILE;
}
public void setCONTO_CONTABILE(String cONTO_CONTABILE) {
CONTO_CONTABILE = cONTO_CONTABILE;
}
public String getTIPO_SCRITTURA() {
return TIPO_SCRITTURA;
}
public void setTIPO_SCRITTURA(String tIPO_SCRITTURA) {
TIPO_SCRITTURA = tIPO_SCRITTURA;
}
public String getIMPORTO() {
return IMPORTO;
}
public void setIMPORTO(String iMPORTO) {
IMPORTO = iMPORTO;
}
public String getDATA_VALUTA() {
return DATA_VALUTA;
}
public void setDATA_VALUTA(String dATA_VALUTA) {
DATA_VALUTA = dATA_VALUTA;
}
public String getOM_BN() {
return OM_BN;
}
public void setOM_BN(String oM_BN) {
OM_BN = oM_BN;
}
public String getDOSSIER() {
return DOSSIER;
}
public void setDOSSIER(String dOSSIER) {
DOSSIER = dOSSIER;
}
public String getKDE1() {
return KDE1;
}
public void setKDE1(String kDE1) {
KDE1 = kDE1;
}
public String getNDG() {
return NDG;
}
public void setNDG(String nDG) {
NDG = nDG;
}
public String getIBAN() {
return IBAN;
}
public void setIBAN(String iBAN) {
IBAN = iBAN;
}
public String getKDE2() {
return KDE2;
}
public void setKDE2(String kDE2) {
KDE2 = kDE2;
}
public String getKDE3() {
return KDE3;
}
public void setKDE3(String kDE3) {
KDE3 = kDE3;
}
public String getTIPO_EVENTO() {
return TIPO_EVENTO;
}
public void setTIPO_EVENTO(String tIPO_EVENTO) {
TIPO_EVENTO = tIPO_EVENTO;
}
public String getID_SCRITTURA() {
return ID_SCRITTURA;
}
public void setID_SCRITTURA(String iD_SCRITTURA) {
ID_SCRITTURA = iD_SCRITTURA;
}
public String getSIGLA_CIRCUITO() {
return SIGLA_CIRCUITO;
}
public void setSIGLA_CIRCUITO(String sIGLA_CIRCUITO) {
SIGLA_CIRCUITO = sIGLA_CIRCUITO;
}
public String getDATA_TESORERIA() {
return DATA_TESORERIA;
}
public void setDATA_TESORERIA(String dATA_TESORERIA) {
DATA_TESORERIA = dATA_TESORERIA;
}
public String getCOMPETENZA() {
return COMPETENZA;
}
public void setCOMPETENZA(String cOMPETENZA) {
COMPETENZA = cOMPETENZA;
}
public String getFLAG_SUBGROUP() {
return FLAG_SUBGROUP;
}
public void setFLAG_SUBGROUP(String fLAG_SUBGROUP) {
FLAG_SUBGROUP = fLAG_SUBGROUP;
}
public String getOPERATION() {
return OPERATION;
}
public void setOPERATION(String oPERATION) {
OPERATION = oPERATION;
}
}
package com.Bnl.Wl.Batch2.Model;
public class EmployeeOut extends Employee {
private String New_field1;
public String getNew_field1() {
return New_field1;
}
public void setNew_field1(String new_field1) {
New_field1 = new_field1;
}
public String getNew_field2() {
return New_field2;
}
public void setNew_field2(String new_field2) {
New_field2 = new_field2;
}
private String New_field2;
public EmployeeOut() {
super();
// TODO Auto-generated constructor stub
}
public static String[] fields() {
return new String[] { "COMPETENCE_DATE", "DIVISA", "CONTO_CONTABILE", "TIPO_SCRITTURA", "IMPORTO",
"DATA_VALUTA", "OM_BN", "DOSSIER", "KDE1", "NDG", "IBAN", "KDE22", "KDE3", "TIPO_EVENTO", "ID_SCRITTURA",
"SIGLA_CIRCUITO", "DATA_TESORERIA", "COMPETENZA", "FLAG_SUBGROUP", "OPERATION", "Field1", "Field2" };
}
}
object with n fields Object with n fields + m, but something goes wrong

TableColumn, How to connect it with property but not fill the cells

I have a TableView and Data class with integer properties for columns. However I would like columns at first show empty cells so user can put value he wants.
Right now its impossible because when creating Data object, integer values has to be created with initial value, so table shows up filled already with numbers.
private ObservableList<MyData> dataList = FXCollections.observableArrayList();
.....edited....
private void buttAddColumnAction(ActionEvent event){
int i = numberOfColumns;// thats the key for lambda expression. Unicate number for column to access its variable;
if(dataList.size() > 0)//resizing each data object with new variable
for(MyData x: dataList)
x.addNew();
TableColumn<MyData, Integer> newColumn = new TableColumn<>("#" + String.valueOf(++numberOfColumns));
newColumn.setCellValueFactory(cellData -> cellData.getValue().getCellValue(i));
// newColumn.setCellFactory(TextFieldTableCell.<MyData, Integer>forTableColumn(new IntegerStringConverter()));
Callback<TableColumn<MyData, Integer>, TableCell<MyData, Integer>> cellFactoryInt = (TableColumn<MyData, Integer> p) -> new EditingCellNumbers(tableView);
newColumn.setCellFactory(cellFactoryInt);
tableView.getColumns().add(newColumn);
}
public class MyData{ //dont forget about public because you wont get acces to properties
private ObservableList<ObjectProperty<Integer>> cellValue = FXCollections.observableArrayList();
public MyData(int howManyColumns) {
for(int i=0; i<howManyColumns; ++i)
this.cellValue.add(new SimpleObjectProperty<Integer>(null));
}
public ObjectProperty<Integer> getCellValue(int whichOne) {
return cellValue.get(whichOne);
}
public void setCellValue(int cellValue, int whichOne) {
this.cellValue.set(whichOne, new SimpleObjectProperty<Integer>(cellValue));
}
public void addNew(){ //ads another variable for another column
cellValue.add(new SimpleObjectProperty<Integer>(null));
}
public void deleteLast(){ //deletes last variable when column is deleted
cellValue.remove(cellValue.size()-1);
}
}
CellFactory
//Klasa ta pozwala na definiowania zachowania komórek, które edytuje użytkownik
public class EditingCellNumbers extends TableCell<MyData, Integer>{
private TextField textField;
private TableView<MyData> parentTableView;
public static int numberOfColumns;
public EditingCellNumbers(TableView<MyData> parent) {
this.parentTableView = parent;
numberOfColumns = parent.getColumns().size();
}
#Override
public void startEdit(){
if (!isEmpty()) {
super.startEdit();
createTextField();
setText(null);
setGraphic(textField);
textField.selectAll();
textField.requestFocus();
}
}
#Override
public void cancelEdit() {
super.cancelEdit();
setText(String.valueOf(getItem()));
setGraphic(null);
}
#Override
public void updateItem(Integer item, boolean empty) {
super.updateItem(item, empty);
if (empty) {
setText(null);
setGraphic(null);
} else {
if (isEditing()) {
if (textField != null) {
textField.setText(getString());
}
setText(null);
setGraphic(textField);
} else {
setText(getString());
setGraphic(null);
}
}
}
private void createTextField() {
textField = new TextField(getString());
textField.setMinWidth(this.getWidth() - this.getGraphicTextGap()* 2);
textField.focusedProperty().addListener(
(ObservableValue<? extends Boolean> arg0,
Boolean arg1, Boolean arg2) -> {
if (!arg2) {
XXX commitEdit(Integer.valueOf(textField.getText()));
}
});
textField.setOnKeyReleased(new EventHandler<Event>() {
#Override
public void handle(Event event) {
try{
int i = Integer.valueOf(textField.getText());
//digit given...
if( (i>=0) && (i<10) ){//making sure cell is filled with just one digit
commitEdit(Integer.valueOf(textField.getText()));
int selectedColumn = parentTableView.getSelectionModel().getSelectedCells().get(0).getColumn(); // gets the number of selected column
int selectedRow = parentTableView.getSelectionModel().getSelectedCells().get(0).getRow();
if(selectedColumn < numberOfColumns-1){
parentTableView.getSelectionModel().selectNext();
parentTableView.edit(selectedRow, parentTableView.getColumns().get(selectedColumn+1));
}else{
parentTableView.getSelectionModel().select(selectedRow+1, parentTableView.getColumns().get(0));
parentTableView.edit(selectedRow+1, parentTableView.getColumns().get(0));
}
}else
textField.clear();
}catch(NumberFormatException e){
textField.clear();
}
}
});
}
private String getString() {
return getItem() == null ? "" : getItem().toString();
}
}
Allow null values in your column by using an ObjectProperty<Integer> instead of an IntegerProperty. This gives a more natural way to define "not initialized" than representing it with 0 (or some other proxy value).
Then you can use the TextFieldTableCell, but just supply a custom StringConverter<Integer>:
public class MyData{ //dont forget about public because you wont get acces to properties
private ObservableList<ObjectProperty<Integer>> cellValue = FXCollections.observableArrayList();
public MyData(int howManyColumns) {
for(int i=0; i<howManyColumns; ++i)
this.cellValue.add(new SimpleObjectProperty<>(new Random().nextInt(10)));
}
// ...
}
and
newColumn.setCellValueFactory(cellData -> cellData.getValue().getCellValue(i));
newColumn.setCellFactory(TextFieldTableCell.<MyData, Integer>forTableColumn(new StringConverter<Integer>() {
#Override
public String toString(Integer i) {
if (i == null) {
return "" ;
} else {
return i.toString();
}
}
#Override
public Integer fromString(String string) {
if (string.trim().length() == 0) {
return null ;
} else {
try {
return Integer.valueOf(string);
} catch (NumberFormatException nfe) {
return null ;
}
}
}
}));
Complete example:
import java.util.Random;
import java.util.function.Function;
import javafx.application.Application;
import javafx.beans.property.ObjectProperty;
import javafx.beans.property.SimpleObjectProperty;
import javafx.beans.property.SimpleStringProperty;
import javafx.beans.property.StringProperty;
import javafx.beans.value.ObservableValue;
import javafx.scene.Scene;
import javafx.scene.control.TableColumn;
import javafx.scene.control.TableView;
import javafx.scene.control.cell.TextFieldTableCell;
import javafx.scene.layout.BorderPane;
import javafx.stage.Stage;
import javafx.util.StringConverter;
public class TableViewWithEmptyIntegerColumn extends Application {
#Override
public void start(Stage primaryStage) {
TableView<Item> table = new TableView<>();
table.setEditable(true);
TableColumn<Item, String> nameCol = createCol("Name", Item::nameProperty);
TableColumn<Item, Integer> valueCol = createCol("Value", Item::valueProperty);
valueCol.setEditable(true);
valueCol.setCellFactory(TextFieldTableCell.forTableColumn(new StringConverter<Integer>() {
#Override
public String toString(Integer i) {
if (i == null) {
return "" ;
} else {
return i.toString() ;
}
}
#Override
public Integer fromString(String string) {
if (string.trim().length() == 0) {
return null ;
} else {
// better to check for a valid int format instead of using try-catch...
try {
return Integer.valueOf(string);
} catch (NumberFormatException nfe) {
return null ;
}
}
}
}));
Random rng = new Random();
for (int i=1; i<=20; i++) {
if (rng.nextDouble() < 0.5) {
table.getItems().add(new Item("Item "+i));
} else {
table.getItems().add(new Item("Item "+i, rng.nextInt(10)+1));
}
}
table.getColumns().addAll(nameCol, valueCol);
primaryStage.setScene(new Scene(new BorderPane(table), 400, 600));
primaryStage.show();
}
private <S,T> TableColumn<S,T> createCol(String title, Function<S, ObservableValue<T>> property) {
TableColumn<S,T> col = new TableColumn<>(title);
col.setCellValueFactory(cellData -> property.apply(cellData.getValue()));
return col ;
}
public static class Item {
private final StringProperty name = new SimpleStringProperty();
private final ObjectProperty<Integer> value = new SimpleObjectProperty<>();
public Item(String name, Integer value) {
setName(name);
setValue(value);
}
public Item(String name) {
this(name, null);
}
public final StringProperty nameProperty() {
return this.name;
}
public final String getName() {
return this.nameProperty().get();
}
public final void setName(final String name) {
this.nameProperty().set(name);
}
public final ObjectProperty<Integer> valueProperty() {
return this.value;
}
public final Integer getValue() {
return this.valueProperty().get();
}
public final void setValue(final Integer value) {
this.valueProperty().set(value);
}
}
public static void main(String[] args) {
launch(args);
}
}

Why Hadoop shuffle not working as expected

I have this hadoop map reduce code that works on graph data (in adjacency list form) and kind of similar to in-adjacency list to out-adjacency list transformation algorithms. The main MapReduce Task code is following:
public class TestTask extends Configured
implements Tool {
public static class TTMapper extends MapReduceBase
implements Mapper<Text, TextArrayWritable, Text, NeighborWritable> {
#Override
public void map(Text key,
TextArrayWritable value,
OutputCollector<Text, NeighborWritable> output,
Reporter reporter) throws IOException {
int numNeighbors = value.get().length;
double weight = (double)1 / numNeighbors;
Text[] neighbors = (Text[]) value.toArray();
NeighborWritable me = new NeighborWritable(key, new DoubleWritable(weight));
for (int i = 0; i < neighbors.length; i++) {
output.collect(neighbors[i], me);
}
}
}
public static class TTReducer extends MapReduceBase
implements Reducer<Text, NeighborWritable, Text, Text> {
#Override
public void reduce(Text key,
Iterator<NeighborWritable> values,
OutputCollector<Text, Text> output,
Reporter arg3)
throws IOException {
ArrayList<NeighborWritable> neighborList = new ArrayList<NeighborWritable>();
while(values.hasNext()) {
neighborList.add(values.next());
}
NeighborArrayWritable neighbors = new NeighborArrayWritable
(neighborList.toArray(new NeighborWritable[0]));
Text out = new Text(neighbors.toString());
output.collect(key, out);
}
}
#Override
public int run(String[] arg0) throws Exception {
JobConf conf = Util.getMapRedJobConf("testJob",
SequenceFileInputFormat.class,
TTMapper.class,
Text.class,
NeighborWritable.class,
1,
TTReducer.class,
Text.class,
Text.class,
TextOutputFormat.class,
"test/in",
"test/out");
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new TestTask(), args);
System.exit(res);
}
}
The auxiliary code is following:
TextArrayWritable:
public class TextArrayWritable extends ArrayWritable {
public TextArrayWritable() {
super(Text.class);
}
public TextArrayWritable(Text[] values) {
super(Text.class, values);
}
}
NeighborWritable:
public class NeighborWritable implements Writable {
private Text nodeId;
private DoubleWritable weight;
public NeighborWritable(Text nodeId, DoubleWritable weight) {
this.nodeId = nodeId;
this.weight = weight;
}
public NeighborWritable () { }
public Text getNodeId() {
return nodeId;
}
public DoubleWritable getWeight() {
return weight;
}
public void setNodeId(Text nodeId) {
this.nodeId = nodeId;
}
public void setWeight(DoubleWritable weight) {
this.weight = weight;
}
#Override
public void readFields(DataInput in) throws IOException {
nodeId = new Text();
nodeId.readFields(in);
weight = new DoubleWritable();
weight.readFields(in);
}
#Override
public void write(DataOutput out) throws IOException {
nodeId.write(out);
weight.write(out);
}
public String toString() {
return "NW[nodeId=" + (nodeId != null ? nodeId.toString() : "(null)") +
",weight=" + (weight != null ? weight.toString() : "(null)") + "]";
}
public boolean equals(Object o) {
if (!(o instanceof NeighborWritable)) {
return false;
}
NeighborWritable that = (NeighborWritable)o;
return (nodeId.equals(that.getNodeId()) && (weight.equals(that.getWeight())));
}
}
and the Util class:
public class Util {
public static JobConf getMapRedJobConf(String jobName,
Class<? extends InputFormat> inputFormatClass,
Class<? extends Mapper> mapperClass,
Class<?> mapOutputKeyClass,
Class<?> mapOutputValueClass,
int numReducer,
Class<? extends Reducer> reducerClass,
Class<?> outputKeyClass,
Class<?> outputValueClass,
Class<? extends OutputFormat> outputFormatClass,
String inputDir,
String outputDir) throws IOException {
JobConf conf = new JobConf();
if (jobName != null)
conf.setJobName(jobName);
conf.setInputFormat(inputFormatClass);
conf.setMapperClass(mapperClass);
if (numReducer == 0) {
conf.setNumReduceTasks(0);
conf.setOutputKeyClass(outputKeyClass);
conf.setOutputValueClass(outputValueClass);
conf.setOutputFormat(outputFormatClass);
} else {
// may set actual number of reducers
// conf.setNumReduceTasks(numReducer);
conf.setMapOutputKeyClass(mapOutputKeyClass);
conf.setMapOutputValueClass(mapOutputValueClass);
conf.setReducerClass(reducerClass);
conf.setOutputKeyClass(outputKeyClass);
conf.setOutputValueClass(outputValueClass);
conf.setOutputFormat(outputFormatClass);
}
// delete the existing target output folder
FileSystem fs = FileSystem.get(conf);
fs.delete(new Path(outputDir), true);
// specify input and output DIRECTORIES (not files)
FileInputFormat.addInputPath(conf, new Path(inputDir));
FileOutputFormat.setOutputPath(conf, new Path(outputDir));
return conf;
}
}
My input is following graph: (in binary format, here I am giving the text format)
1 2
2 1,3,5
3 2,4
4 3,5
5 2,4
According to the logic of the code the output should be:
1 NWArray[size=1,{NW[nodeId=2,weight=0.3333333333333333],}]
2 NWArray[size=3,{NW[nodeId=5,weight=0.5],NW[nodeId=3,weight=0.5],NW[nodeId=1,weight=1.0],}]
3 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=4,weight=0.5],}]
4 NWArray[size=2,{NW[nodeId=5,weight=0.5],NW[nodeId=3,weight=0.5],}]
5 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=4,weight=0.5],}]
But the output is coming as:
1 NWArray[size=1,{NW[nodeId=2,weight=0.3333333333333333],}]
2 NWArray[size=3,{NW[nodeId=5,weight=0.5],NW[nodeId=5,weight=0.5],NW[nodeId=5,weight=0.5],}]
3 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=2,weight=0.3333333333333333],}]
4 NWArray[size=2,{NW[nodeId=5,weight=0.5],NW[nodeId=5,weight=0.5],}]
5 NWArray[size=2,{NW[nodeId=2,weight=0.3333333333333333],NW[nodeId=2,weight=0.3333333333333333],}]
I cannot understand the reason why the expected output is not coming out. Any help will be appreciated.
Thanks.
You're falling foul of object re-use
while(values.hasNext()) {
neighborList.add(values.next());
}
values.next() will return the same object reference, but the underlying contents of that object will change for each iteration (the readFields method is called to re-populate the contents)
Suggest you amend to (you'll need to obtain the Configuration conf variable from a setup method, unless you can obtain it from the Reporter or OutputCollector - sorry i don't use the old API)
while(values.hasNext()) {
neighborList.add(
ReflectionUtils.copy(conf, values.next(), new NeighborWritable());
}
But I still can't understand why my unit test passed then. Here is the code -
public class UWLTInitReducerTest {
private Text key;
private Iterator<NeighborWritable> values;
private NeighborArrayWritable nodeData;
private TTReducer reducer;
/**
* Set up the states for calling the map function
*/
#Before
public void setUp() throws Exception {
key = new Text("1001");
NeighborWritable[] neighbors = new NeighborWritable[4];
for (int i = 0; i < 4; i++) {
neighbors[i] = new NeighborWritable(new Text("300" + i), new DoubleWritable((double) 1 / (1 + i)));
}
values = Arrays.asList(neighbors).iterator();
nodeData = new NeighborArrayWritable(neighbors);
reducer = new TTReducer();
}
/**
* Test method for InitModelMapper#map - valid input
*/
#Test
public void testMapValid() {
// mock the output object
OutputCollector<Text, UWLTNodeData> output = mock(OutputCollector.class);
try {
// call the API
reducer.reduce(key, values, output, null);
// in order (sequential) verification of the calls to output.collect()
verify(output).collect(key, nodeData);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Why didn't this code catch the bug?

Using a custom Object as key emitted by mapper

I have a situation in which mapper emits as key an object of custom type.
It has two fields an intWritable ID, and a data array IntArrayWritable.
The implementation is as follows.
`
import java.io.*;
import org.apache.hadoop.io.*;
public class PairDocIdPerm implements WritableComparable<PairDocIdPerm> {
public PairDocIdPerm(){
this.permId = new IntWritable(-1);
this.SignaturePerm = new IntArrayWritable();
}
public IntWritable getPermId() {
return permId;
}
public void setPermId(IntWritable permId) {
this.permId = permId;
}
public IntArrayWritable getSignaturePerm() {
return SignaturePerm;
}
public void setSignaturePerm(IntArrayWritable signaturePerm) {
SignaturePerm = signaturePerm;
}
private IntWritable permId;
private IntArrayWritable SignaturePerm;
public PairDocIdPerm(IntWritable permId,IntArrayWritable SignaturePerm) {
this.permId = permId;
this.SignaturePerm = SignaturePerm;
}
#Override
public void write(DataOutput out) throws IOException {
permId.write(out);
SignaturePerm.write(out);
}
#Override
public void readFields(DataInput in) throws IOException {
permId.readFields(in);
SignaturePerm.readFields(in);
}
#Override
public int hashCode() { // same permId must go to same reducer. there fore just permId
return permId.get();//.hashCode();
}
#Override
public boolean equals(Object o) {
if (o instanceof PairDocIdPerm) {
PairDocIdPerm tp = (PairDocIdPerm) o;
return permId.equals(tp.permId) && SignaturePerm.equals(tp.SignaturePerm);
}
return false;
}
#Override
public String toString() {
return permId + "\t" +SignaturePerm.toString();
}
#Override
public int compareTo(PairDocIdPerm tp) {
int cmp = permId.compareTo(tp.permId);
Writable[] ar, other;
ar = this.SignaturePerm.get();
other = tp.SignaturePerm.get();
if (cmp == 0) {
for(int i=0;i<ar.length;i++){
if(((IntWritable)ar[i]).get() == ((IntWritable)other[i]).get()){cmp= 0;continue;}
else if(((IntWritable)ar[i]).get() < ((IntWritable)other[i]).get()){ return -1;}
else if(((IntWritable)ar[i]).get() > ((IntWritable)other[i]).get()){return 1;}
}
}
return cmp;
//return 1;
}
}`
I require the keys with same Id to go to the same reducer with their sort order as coded in the compareTo method.
However when i use this, my job execution status is always map100% reduce 0%.
The reduce never runs to completion. Is there any thing wrong in this implementation?
In general what is the likely problem if reducer status is always 0%.
I think this might be a possible null pointer exception in the read method:
#Override
public void readFields(DataInput in) throws IOException {
permId.readFields(in);
SignaturePerm.readFields(in);
}
permId is null in this case.
So what you have to do is this:
IntWritable permId = new IntWritable();
Either in the field initializer or before the read.
However, your code is horrible to read.

Resources