Use an alias on Pig UDF paramter - hadoop

I need your help to know how to use an alias (stored tuple) on my Pig udf function, i exmplain:
my_file.csv
101,message here
102,message here
103,message here
...
My script PIG:
X = load'mydata.csv' using PigStorage(',') as (myVar:chararray);
A = load'my_file.csv' using PigStorage(',') as (key:chararray,value:chararray);
B = GROUP par ALL;
C = foreach B {
D = ORDER par BY key;
GENERATE BagToTuple(D);
};
the result of the C is something like (101,message here, 102, message here, 103, message here...)
Now what i need is to pass this result in my udf function like :
Z = foreach X generate MYUDF(myVar, C);
the alias "C" is the tuple key,value,key,value...
MYUDF :
import java.io.IOException;
import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.PigWarning;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.util.WrappedIOException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
public class ReDecode extends EvalFunc<String> {
int numParams = -1;
Pattern mPattern = null;
#Override
public Schema outputSchema(Schema input) {
try {
return new Schema(new Schema.FieldSchema(getSchemaName(this
.getClass().getName().toLowerCase(), input),
DataType.CHARARRAY));
} catch (Exception e) {
return null;
}
}
#Override
public String exec(Tuple tuple) throws IOException {
if (numParams==-1) // Not initialized
{
numParams = tuple.size();
if (numParams <= 2) {
String msg = "Decode: Atleast an expression and default string is required.";
throw new IOException(msg);
}
if (tuple.size()%2!=0) {
String msg = "ItssPigUDFs.ReDecode : Some parameters are unmatched.";
throw new IOException(msg);
}
}
if (tuple.get(0)==null)
return null;
try {
for (int count = 1; count < numParams - 1; count += 2)
{
mPattern=Pattern.compile((String)tuple.get(count));
if (mPattern.matcher((String)tuple.get(0)).matches())
{
return (String)tuple.get(count+1);
}
}
} catch (ClassCastException e) {
warn("ItssPigUDFs.ReDecode : Data type error", PigWarning.UDF_WARNING_1);
return null;
} catch (NullPointerException e) {
String msg = "ItssPigUDFs.ReDecode : Encounter null in the input";
throw new IOException(msg);
}
return (String)tuple.get(tuple.size()-1);
}
Thank you for your help

I don't think numParams is needed; the number of params that you get to the UDF will be input.size().
Therefore, if you call MYUDF(myVar, C), then you should be able to get those values in Java like String myVar = (String) input.get(0) and Tuple param2 = input.get(1).

Related

Can't get my String switch statement to hit anything but default

I am trying to use a switch statement to pass a LinkedHashMap to the correct class constructor for a school project(I just added the rest of the code).
The code reads takes in a txt file and based off the first word in the text sends the hash map.
I can't seem to get a hit on the case report I am testing.
I have even tried just making everything into an if-else-if structure,
and that still didn't work out,
I've tried using a private enum method to no avail.
I am at a loss here.
I am running Java 8.
I am open to any suggestion on optimizing the code as well.
Thanks.
package linkedlist;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JLabel;
/**
*
* #author admin
*/
public class TextReaderGUI extends javax.swing.JFrame {
JFileChooser fileChooser = new JFileChooser();
String rawText;
String[] text;
public String listType;
private JButton fileChooserButton;
private JLabel statusLabel;
/**
* Creates new form TextReaderGUI
*/
public TextReaderGUI() {
initComponents();
}
/**
* This method is called from within the constructor to initialize the form.
* WARNING: Do NOT modify this code. The content of this method is always
* regenerated by the Form Editor.
*/
#SuppressWarnings("unchecked")
// <editor-fold defaultstate="collapsed" desc="Generated Code">
private void initComponents() {
fileChooserButton = new javax.swing.JButton();
statusLabel = new javax.swing.JLabel();
setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
fileChooserButton.setText("File Chooser");
fileChooserButton.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
fileChooserButtonActionPerformed(evt);
}
});
statusLabel.setText("Status: ");
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
getContentPane().setLayout(layout);
layout.setHorizontalGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING).addGroup(layout
.createSequentialGroup()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup().addGap(14, 14, 14).addComponent(fileChooserButton))
.addGroup(layout.createSequentialGroup().addGap(36, 36, 36).addComponent(statusLabel)))
.addContainerGap(264, Short.MAX_VALUE)));
layout.setVerticalGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup().addGap(16, 16, 16).addComponent(fileChooserButton)
.addGap(18, 18, 18).addComponent(statusLabel).addContainerGap(221, Short.MAX_VALUE)));
pack();
}// </editor-fold>
private void fileChooserButtonActionPerformed(java.awt.event.ActionEvent evt) {
// TODO add your handling code here:
try {
int returnVal = fileChooser.showOpenDialog(this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = fileChooser.getSelectedFile();
rawText = "";
BufferedReader reader = new BufferedReader(new FileReader(file));
StringBuilder stringb = new StringBuilder();
String s;
while ((s = reader.readLine()) != null) {
stringb.append(s);
stringb.append("\n"); // this makes sure that java sees when a new line has started
}
rawText = stringb.toString();
statusLabel.setText("Status: " + file.getName());
}
} catch (IOException e) {
statusLabel.setText("Status" + e);
}
text = rawText.split("\n"); // creating a string array split at each line break
Map<String, String> lines = new LinkedHashMap<>();
for (int i = 0; i < text.length; i++) { // this sets the first word of the line = key
String[] currentLine = text[i].split("\\s+"); // splits the words in the current line to an array
if (i == 0) {
listType = currentLine[0].replaceAll("\n", "").replaceAll("\\s+", ""); // determines listType to pass
}
if (currentLine.length > 1 && i > 0) {
lines.put(currentLine[0] + " " + i, currentLine[1]); // if two words exist on a line
// the first is the key second is the value
} else if (currentLine.length == 1 && i > 0) { // keeps list type out of key values
lines.put(currentLine[0] + " " + i, ""); // " " + i is used to ensure that each command is unique key
}
}
lines.keySet().forEach((name) -> {// Testing to see if document was correctly placed into the HashMap
String key = name;
String value = lines.get(name);
System.out.println(key + " " + value + "\n");
});
System.out.println(listType); // testing to see if list type was correctly stored
switch (listType) {
case "stack":
Stack stack = new Stack((LinkedHashMap) lines);
break;
case "queue":
Queue queue = new Queue((LinkedHashMap) lines);
break;
case "dll":
Dll dll = new Dll((LinkedHashMap) lines);
break;
case "sll":
System.out.println("almost there");
Sll sll = new Sll((LinkedHashMap) lines);
break;
case "cll":
Cll cll = new Cll((LinkedHashMap) lines);
break;
default:
System.out.println("something went wrong here");
break;
}
}
}

Spring Boot send only changed data

I am building a game in Spring Boot on a server and classic Javascript on a backend.
Right now I have this:
...
#Autowired
private SimpMessagingTemplate template;
...
#Scheduled(fixedRate = 1000 / Constants.FPS)
public void renderClients() {
for(Game g : games) {
template.convertAndSend("/game/render/" + g.getId(), g);
}
}
...
Basically I have a multiple Games running and I send each with it's id to the client.
However the data I am sending (or the most of the data) is static (not changing)...
What if I want not to send the whole data but only parts which have changed.
Btw the response JSON looks like this:
{"id":"862b1dd8-48d5-4562-802a-7d669a5a5ed5","players":[{"id":"da8dcbec-7028-4a39-9547-a4e2dc321c3c","name":"John Doe","position":{"x":100.0,"y":100.0},"rotation":0.0,"hero":{"maxHealth":1300.0,"movementSpeed":4.5,"attackDamage":32.75,"width":68,"height":71,"heroName":"drowRanger","radius":34.0},"stats":{"kills":0,"lastHits":0},"lastClick":null}],"duration":380107.12}
and the only thing that is changing is duration and sometimes the x and y when the player moves...
Is it even possible?
Could I write some middleware that will do that at the time the objects are converted to JSON?
Maintain a data structure stores your changed value, and attach it to your Game Object.
When the time to send ,convert the map to a json ,and clear it.
Using this way may use more memory than before , but won't cost much time.
I DID IT!!
In my GameController I do:
#Scheduled(fixedRate = 1000 / Constants.FPS)
public void renderClients() throws Exception {
for(Game g : games) {
template.convertAndSend("/game/render/" + g.getId(), g.formatToSend());
}
}
Notice the g.formatToSend() method
here is how a Game class looks like:
public class Game {
private BandWidthOptimizer optimizer = new BandWidthOptimizer();
...
...
public String formatToSend() throws Exception {
return optimizer.optimize(this);
}
}
And Here Comes THE BandWidthOptimizer:
package com.iddqd.doto.optimization;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.sun.tools.classfile.Opcode;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import java.util.Iterator;
import java.util.Set;
import java.util.function.BiConsumer;
public class BandWidthOptimizer {
import com.fasterxml.jackson.databind.ObjectMapper;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
private String[] preserveKeys;
public BandWidthOptimizer() {
this.preserveKeys = new String[0];
}
public BandWidthOptimizer(String[] preserveKeys) {
this.preserveKeys = preserveKeys;
}
public String optimize(Object obj) throws Exception {
String json = mapper.writeValueAsString(obj);
Object nobj = parser.parse(json);
Object oobj = parser.parse(lastJSON);
JSONObject newJsonObj = (JSONObject)nobj;
JSONObject oldJsonObj = (JSONObject)oobj;
JSONObject res = getJSONObjectDiff(newJsonObj, oldJsonObj);
lastJSON = json;
return res.toJSONString();
}
private JSONObject getJSONObjectDiff(JSONObject obj1, JSONObject obj2) {
JSONObject res = new JSONObject();
Set set = obj1.keySet();
for (Object key : set) {
// If doesn't exist put it in the diff
if (!obj2.containsKey(key)) {
res.put(key, obj1.get(key));
} else {
// Get the values from both objects
Object val1 = obj1.get(key);
Object val2 = obj2.get(key);
// If their instances are of the same type
if(val1 == null) {
continue;
}
if(val2 == null) {
res.put(key, val1);
continue;
}
if (val1.getClass().equals(val2.getClass())) {
// If they are JSONObject
if (val1 instanceof JSONObject) {
// Recursively parse JSONObject with all of it's properties
JSONObject nested = getJSONObjectDiff((JSONObject) obj1.get(key), (JSONObject) obj2.get(key));
// If it contains any keys
if(nested.keySet().size() > 0) {
// Store the diff into final diff
res.put(key, nested);
}
// If they are JSONArrays
} else if (val1 instanceof JSONArray) {
// If val1 contains some values (is not empty)
if(((JSONArray) val1).size() > 0) {
// Get their diff
JSONArray arr = getJSONArrayDiff((JSONArray) val1, (JSONArray) val2);
// If array is not empty
if (arr.size() > 0) {
// put it into the diff
res.put(key, arr);
}
}
// If they are just a pure values
} else {
// Compare them - If they're not equal
if(!val1.equals(val2)) {
// put the val1 into diff
res.put(key, val1);
}
}
} else {
res.put(key, val1);
}
}
}
return res;
}
private JSONArray getJSONArrayDiff(JSONArray arr1, JSONArray arr2) {
JSONArray res = new JSONArray();
// For every element
for(int i = 0; i < arr1.size(); i++) {
Object val1 = arr1.get(i);
// If i is out of arr2 bounds
if(i > arr2.size()) {
// put the arr1 item into the diff
res.add(val1);
}
Object val2 = arr2.get(i);
if(val1 == null) {
continue;
}
if(val2 == null) {
res.add(val1);
continue;
}
// If their types are equal
if(val1.getClass().equals(val2.getClass())) {
// If they are JSONObjects
if(val1 instanceof JSONObject) {
// Get their diff
JSONObject obj = getJSONObjectDiff((JSONObject) val1, (JSONObject) val2);
// If it contains any keys
if(obj.keySet().size() > 0) {
// Store the diff into final diff
res.add(obj);
}
// If they are JSONArrays
} else if (val1 instanceof JSONArray) {
// Get their diff
JSONArray arr = getJSONArrayDiff((JSONArray) val1, (JSONArray) val2);
// If array is not empty
if(arr.size() > 0) {
// put it into the diff
res.add(arr);
}
// If they are just a pure values
} else {
// Compare them - If they're not equal
if(val1 != val2) {
// add the val1 into diff
res.add(val1);
}
}
} else {
res.add(val1);
}
}
return res;
}
}
This is it, now if nothing moves on the map the result JSON looks like this:
{"duration":282964.56}
because only the duration changes
But when my Player moves on the map see what happens:
{"duration":386676.06,"players":[{"position":{"x":556.5914801003707,"y":153.55964799554002}}]}
TODO
I have to implement a preserveKeys functionallity because I always want to send some keys like id and so on...

ERROR 2078: Caught error from UDF

I am getting the error "ERROR 2078: Caught error from UDF: com.Hadoop.pig.SplitRec [Caught exception processing input row [1]]". I am sure that the input string is going out of bound, but I am not sure which record(record number) is causing the problem.
I am trying to create log for displaying the record which is causing the problem, but I am not sure about debugging to print/log the error record.
The input looks like:
**PXW01YIN 12000099PGEN PXW01YINFFFFFFFF PXW01YINIMFGUIPY04301Y301 JFK 00888JFK 008880001 PIMF 0000N/ACTRC5/TXN08/SCR301\/SEQ/TEX021\#
PXW01PIN 12000099PGEN PXW01PINFFFFFFFF PXW01PINIMFGUIAV04301P301 PER 03615PER 036150001 PIMF 0000N/ACTRCK/TXN08/SCR301\/SEQ/TEX021\#**
The above lines are two records and I have tested them(using LIMIT), and they are not causing problem. I have more than 150kb of input data.
The script that I am using:
SPLT_REC1 = load '/user/hduser/output/realdata/pig_out6/part-m-00000' as (tran_array:chararray);
register /home/cloudera/workspace/SplitRec.jar;
define SplitRec com.Hadoop.pig.SplitRec();
SPLT_REC2 = foreach SPLT_REC1 generate SplitRec(tran_array);
store SPLT_REC2 into '/user/hduser/output/realdata/pig_out7';
package com.Hadoop.pig;
import java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.WrappedIOException;
#SuppressWarnings("deprecation")
public class SplitRec extends EvalFunc<String> {
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return null;
try {
String Str1 = (String)input.get(0);
String delim1 = "PIMF+";
String[] tokens1 = Str1.split(delim1);
String part3 = tokens1[0];
String part4 = tokens1[1];
int len1 = part4.length();
String part5 = part4.substring(8,len1);
String conCat1 = part3+":"+part5;
return conCat1;
}
catch(Exception e) {
throw WrappedIOException.wrap("Caught exception processing input row ", e);
}
}

p-value calculation using Java

I want to make a Java program to calculate the P-value of my data that I want to upload, but getting too much trouble to do so. The data is been read by the Java program, but next step is to solve data for getting p-value.
The input file is:
The input Should be:-
Name A B C
a 1.7085586 0.73179674 3.3962722
b 0.092749596 -0.10030079 -0.47453594
c 1.1727467 0.15784931 0.0572958
d -0.91714764 -0.62808895 -0.6190882
e 0.34570503 0.10605621 0.30304766
f 2.333506 -0.2063818 0.4022169
g 0.7893815 1.449388 1.5907407
And the Output should be like this:-
Name pValue
a 0.129618298
b 0.4363544
c 0.323631285
d 0.017916658
e 0.076331828
f 0.385619995
g 0.035449488
I have run this data in R program but I want to write some Java to solve this.
My Java code till now is:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class Abc {
public static void main(String[] args) {
// TODO Auto-generated method stub
BufferedReader br = null;
try {
String sCurrentLine;
br = new BufferedReader(new FileReader("C:/Documents and Settings/Admin/Desktop/test.txt"));
while ((sCurrentLine = br.readLine()) != null) {
System.out.println(sCurrentLine);
//output = BufferedReader.getParser().getAsDoubleArray("br");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)br.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
}
This is only reading my data and showing it in the console of Java. How to proceed?
I think your are facing problem with retrieving and printing values from file. Below program gives output in required format :
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class Abc {
public static void main(String[] args) {
BufferedReader br = null;
String sCurrentLine;
try {
br = new BufferedReader(new FileReader("C:/Documents and Settings/Admin/Desktop/test.txt"));
// Override Default Header Row
br.readLine();
System.out.println("Name" + "\t" + "pValue");
while ((sCurrentLine = br.readLine()) != null) {
int i = 0;
String str[] = sCurrentLine.split("\t");
System.out.print(str[i] + "\t");
Double dValue1 = Double.parseDouble(str[++i]);
Double dValue2 = Double.parseDouble(str[++i]);
Double dValue3 = Double.parseDouble(str[++i]);
// Do pValue Calc here
Double pValue = 1.2334;
System.out.println(pValue);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (br != null)
br.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
}
}

Reading a file with newlines as a tuple in pig

Is it possible to change the record delimiter from newline to some other string so as to read a file with newlines into a single tuple in pig.
Yes.
A = LOAD '...' USING PigStorage(',') AS (...); //comma is the delimeter for fields
SET textinputformat.record.delimiter '<delimeter>'; // record delimeter, by default it is `\n`. You can change to any delimeter.
As mentioned here
You can use PigStorage
A = LOAD '/some/path/COMMA-DELIM-PREFIX*' USING PigStorage(',') AS (f1:chararray, ...);
B = LOAD '/some/path/SEMICOLON-DELIM-PREFIX*' USING PigStorage('\t') AS (f1:chararray, ...);
You can even try writing load/store UDF.
There is java code example for both load and store.
Load Functions : LoadFunc abstract class has the main methods for loading data and for most use cases it would suffice to extend it. You can read more here
Example
The loader implementation in the example is a loader for text data
with line delimiter as '\n' and '\t' as default field delimiter (which
can be overridden by passing a different field delimiter in the
constructor) - this is similar to current PigStorage loader in Pig.
The implementation uses an existing Hadoop supported Inputformat -
TextInputFormat - as the underlying InputFormat.
public class SimpleTextLoader extends LoadFunc {
protected RecordReader in = null;
private byte fieldDel = '\t';
private ArrayList<Object> mProtoTuple = null;
private TupleFactory mTupleFactory = TupleFactory.getInstance();
private static final int BUFFER_SIZE = 1024;
public SimpleTextLoader() {
}
/**
* Constructs a Pig loader that uses specified character as a field delimiter.
*
* #param delimiter
* the single byte character that is used to separate fields.
* ("\t" is the default.)
*/
public SimpleTextLoader(String delimiter) {
this();
if (delimiter.length() == 1) {
this.fieldDel = (byte)delimiter.charAt(0);
} else if (delimiter.length() > 1 & & delimiter.charAt(0) == '\\') {
switch (delimiter.charAt(1)) {
case 't':
this.fieldDel = (byte)'\t';
break;
case 'x':
fieldDel =
Integer.valueOf(delimiter.substring(2), 16).byteValue();
break;
case 'u':
this.fieldDel =
Integer.valueOf(delimiter.substring(2)).byteValue();
break;
default:
throw new RuntimeException("Unknown delimiter " + delimiter);
}
} else {
throw new RuntimeException("PigStorage delimeter must be a single character");
}
}
#Override
public Tuple getNext() throws IOException {
try {
boolean notDone = in.nextKeyValue();
if (!notDone) {
return null;
}
Text value = (Text) in.getCurrentValue();
byte[] buf = value.getBytes();
int len = value.getLength();
int start = 0;
for (int i = 0; i < len; i++) {
if (buf[i] == fieldDel) {
readField(buf, start, i);
start = i + 1;
}
}
// pick up the last field
readField(buf, start, len);
Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple);
mProtoTuple = null;
return t;
} catch (InterruptedException e) {
int errCode = 6018;
String errMsg = "Error while reading input";
throw new ExecException(errMsg, errCode,
PigException.REMOTE_ENVIRONMENT, e);
}
}
private void readField(byte[] buf, int start, int end) {
if (mProtoTuple == null) {
mProtoTuple = new ArrayList<Object>();
}
if (start == end) {
// NULL value
mProtoTuple.add(null);
} else {
mProtoTuple.add(new DataByteArray(buf, start, end));
}
}
#Override
public InputFormat getInputFormat() {
return new TextInputFormat();
}
#Override
public void prepareToRead(RecordReader reader, PigSplit split) {
in = reader;
}
#Override
public void setLocation(String location, Job job)
throws IOException {
FileInputFormat.setInputPaths(job, location);
}
}
Store Functions : StoreFunc abstract class has the main methods for storing data and for most use cases it should suffice to extend it
Example
The storer implementation in the example is a storer for text data
with line delimiter as '\n' and '\t' as default field delimiter (which
can be overridden by passing a different field delimiter in the
constructor) - this is similar to current PigStorage storer in Pig.
The implementation uses an existing Hadoop supported OutputFormat -
TextOutputFormat as the underlying OutputFormat.
public class SimpleTextStorer extends StoreFunc {
protected RecordWriter writer = null;
private byte fieldDel = '\t';
private static final int BUFFER_SIZE = 1024;
private static final String UTF8 = "UTF-8";
public PigStorage() {
}
public PigStorage(String delimiter) {
this();
if (delimiter.length() == 1) {
this.fieldDel = (byte)delimiter.charAt(0);
} else if (delimiter.length() > 1delimiter.charAt(0) == '\\') {
switch (delimiter.charAt(1)) {
case 't':
this.fieldDel = (byte)'\t';
break;
case 'x':
fieldDel =
Integer.valueOf(delimiter.substring(2), 16).byteValue();
break;
case 'u':
this.fieldDel =
Integer.valueOf(delimiter.substring(2)).byteValue();
break;
default:
throw new RuntimeException("Unknown delimiter " + delimiter);
}
} else {
throw new RuntimeException("PigStorage delimeter must be a single character");
}
}
ByteArrayOutputStream mOut = new ByteArrayOutputStream(BUFFER_SIZE);
#Override
public void putNext(Tuple f) throws IOException {
int sz = f.size();
for (int i = 0; i < sz; i++) {
Object field;
try {
field = f.get(i);
} catch (ExecException ee) {
throw ee;
}
putField(field);
if (i != sz - 1) {
mOut.write(fieldDel);
}
}
Text text = new Text(mOut.toByteArray());
try {
writer.write(null, text);
mOut.reset();
} catch (InterruptedException e) {
throw new IOException(e);
}
}
#SuppressWarnings("unchecked")
private void putField(Object field) throws IOException {
//string constants for each delimiter
String tupleBeginDelim = "(";
String tupleEndDelim = ")";
String bagBeginDelim = "{";
String bagEndDelim = "}";
String mapBeginDelim = "[";
String mapEndDelim = "]";
String fieldDelim = ",";
String mapKeyValueDelim = "#";
switch (DataType.findType(field)) {
case DataType.NULL:
break; // just leave it empty
case DataType.BOOLEAN:
mOut.write(((Boolean)field).toString().getBytes());
break;
case DataType.INTEGER:
mOut.write(((Integer)field).toString().getBytes());
break;
case DataType.LONG:
mOut.write(((Long)field).toString().getBytes());
break;
case DataType.FLOAT:
mOut.write(((Float)field).toString().getBytes());
break;
case DataType.DOUBLE:
mOut.write(((Double)field).toString().getBytes());
break;
case DataType.BYTEARRAY: {
byte[] b = ((DataByteArray)field).get();
mOut.write(b, 0, b.length);
break;
}
case DataType.CHARARRAY:
// oddly enough, writeBytes writes a string
mOut.write(((String)field).getBytes(UTF8));
break;
case DataType.MAP:
boolean mapHasNext = false;
Map<String, Object> m = (Map<String, Object>)field;
mOut.write(mapBeginDelim.getBytes(UTF8));
for(Map.Entry<String, Object> e: m.entrySet()) {
if(mapHasNext) {
mOut.write(fieldDelim.getBytes(UTF8));
} else {
mapHasNext = true;
}
putField(e.getKey());
mOut.write(mapKeyValueDelim.getBytes(UTF8));
putField(e.getValue());
}
mOut.write(mapEndDelim.getBytes(UTF8));
break;
case DataType.TUPLE:
boolean tupleHasNext = false;
Tuple t = (Tuple)field;
mOut.write(tupleBeginDelim.getBytes(UTF8));
for(int i = 0; i < t.size(); ++i) {
if(tupleHasNext) {
mOut.write(fieldDelim.getBytes(UTF8));
} else {
tupleHasNext = true;
}
try {
putField(t.get(i));
} catch (ExecException ee) {
throw ee;
}
}
mOut.write(tupleEndDelim.getBytes(UTF8));
break;
case DataType.BAG:
boolean bagHasNext = false;
mOut.write(bagBeginDelim.getBytes(UTF8));
Iterator<Tuple> tupleIter = ((DataBag)field).iterator();
while(tupleIter.hasNext()) {
if(bagHasNext) {
mOut.write(fieldDelim.getBytes(UTF8));
} else {
bagHasNext = true;
}
putField((Object)tupleIter.next());
}
mOut.write(bagEndDelim.getBytes(UTF8));
break;
default: {
int errCode = 2108;
String msg = "Could not determine data type of field: " + field;
throw new ExecException(msg, errCode, PigException.BUG);
}
}
}
#Override
public OutputFormat getOutputFormat() {
return new TextOutputFormat<WritableComparable, Text>();
}
#Override
public void prepareToWrite(RecordWriter writer) {
this.writer = writer;
}
#Override
public void setStoreLocation(String location, Job job) throws IOException {
job.getConfiguration().set("mapred.textoutputformat.separator", "");
FileOutputFormat.setOutputPath(job, new Path(location));
if (location.endsWith(".bz2")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
} else if (location.endsWith(".gz")) {
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}
}
}

Resources