Hadoop: reducer not getting invoked - hadoop

I know this is a very basic question but I am not able to find where I am making a mistake. My Reducer is not getting invoked from the driver code. I would greatly appreciate if anyone can help me out.
My Driver Code
package com.mycompany.myorg;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class carsDriver {
public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length != 2){
System.err.println("specified input and output path is not correct");
System.exit(-1);
}
// set up the job details
Job job = new Job(conf,"Cars Avg Fuel Economy");
job.setJarByClass(carsDriver.class);
//job.setJobName("Cars Avg Fuel Economy");
//setup the input and output paths for the MR job
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// setup of the Mapper, combiner and Reducer classes
job.setMapperClass(carsMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//job.setCombinerClass(carsCombiner.class);
job.setReducerClass(carsReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true)?0:1);
}
}
Mapper Code
package com.mycompany.myorg;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class carsMapper extends Mapper<Object, Text, Text, IntWritable> {
private Text mapkey = new Text();
private final static IntWritable mapval = new IntWritable(1);
public void map(Object key, Text Value,Mapper<Object, Text, Text, IntWritable>.Context context ) throws IOException, InterruptedException{
System.out.println("Running the Mapper");
String items[] = Value.toString().split(",");
System.out.println(items[2]+" "+Integer.parseInt(items[23].toString()));
mapkey.set(items[2]);
mapval.set(Integer.parseInt(items[23].toString()));
context.write(mapkey, mapval);
}
}
Reducer Code
package com.mycompany.myorg;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class carsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reducer(Text key, Iterable<IntWritable> value,Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
System.out.println("Reducer Code");
Text redKey = new Text();
IntWritable redVal = new IntWritable();
redKey.set(key);
int sum=0;
int count=0;
for(IntWritable val: value){
sum= sum +val.get();
count= count + 1;
}
redVal.set((sum/count));
context.write(redKey, redVal);
}
}

After long time debugging the problem I Found that the issue is with the reduce override method.
I used
public void reducer
instead of
public void reduce
observe that it should be reduce instead of reducer.

Related

mapreduce WordCount example using outputcollector

I'm trying to run basic wordcount mapreduce example using outputcollector , but im getting exceptions.
INFO mapreduce.Job: Job job_local1048833344_0001 failed with state FAILED due to: NA
java.lang.Exception: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable ...
Here is the code I'm trying to run:
import java.io.*;
import java.util.StringTokenizer;
import java.util.Iterator;
import org.apache.hadoop.io.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCountOutputCollector {
public static class WordCountOutputCollectorMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
public static class WordCountOutputCollectorReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count outputcollector");
job.setJarByClass(WordCountOutputCollector.class);
job.setMapperClass(WordCountOutputCollectorMapper.class);
job.setCombinerClass(WordCountOutputCollectorReducer.class);
job.setReducerClass(WordCountOutputCollectorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//conf.setInputFormat(TextInputFormat.class);
//conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//JobClient.runJob(conf);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Try This : hadoop-wordcount
import java.io.IOException;
import java.io.PrintStream;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount
{
public static class Map
extends Mapper<LongWritable, Text, Text, IntWritable>
{
private static final IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable paramLongWritable, Text paramText, Mapper<LongWritable, Text, Text, IntWritable>.Context paramMapper)
throws IOException, InterruptedException
{
StringTokenizer localStringTokenizer = new StringTokenizer(paramText.toString());
while (localStringTokenizer.hasMoreTokens())
{
this.word.set(localStringTokenizer.nextToken());
paramMapper.write(this.word, one);
}
}
}
public static class Reduce
extends Reducer<Text, IntWritable, Text, IntWritable>
{
private IntWritable result = new IntWritable();
public void reduce(Text paramText, Iterable<IntWritable> paramIterable, Reducer<Text, IntWritable, Text, IntWritable>.Context paramReducer)
throws IOException, InterruptedException
{
int i = 0;
for (IntWritable localIntWritable : paramIterable) {
i += localIntWritable.get();
}
this.result.set(i);
paramReducer.write(paramText, this.result);
}
}
public static void main(String[] paramArrayOfString)
throws Exception
{
Configuration localConfiguration = new Configuration();
String[] arrayOfString = new GenericOptionsParser(localConfiguration, paramArrayOfString).getRemainingArgs();
if (arrayOfString.length != 2)
{
System.err.println("Usage: WordCount <in> <out>");
System.exit(2);
}
Job localJob = new Job(localConfiguration, "wordcount");
localJob.setJarByClass(WordCount.class);
localJob.setMapperClass(WordCount.Map.class);
localJob.setReducerClass(WordCount.Reduce.class);
localJob.setCombinerClass(WordCount.Reduce.class);
localJob.setOutputKeyClass(Text.class);
localJob.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(localJob, new Path(arrayOfString[0]));
FileOutputFormat.setOutputPath(localJob, new Path(arrayOfString[1]));
System.exit(localJob.waitForCompletion(true) ? 0 : 1);
}
}
I think it's mainly because the map output has not been converted into Text.
Try to uncomment the code below:
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
JobClient.runJob(conf);

The mapreduce code here produces an empty output file. The code and the input is given below

The mapreduce code here produces an empty output file. The code and the input is given below.
package temperature;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TemperatureMapper extends Mapper<Text, Text, Text, IntWritable> {
#Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {
if (isValueValid(value.toString())) {
Text key2 = new Text(getStateFromValue(value.toString()));
IntWritable value2 = new IntWritable(getTemperatureFrom(value.toString()));
context.write(key2, value2);
}
}
private boolean isValueValid(final String value) {
// We expect that the value is a String in the form of : State, Temperature. E.g. MP,77
Pattern p = Pattern.compile("\\S\\S\\,\\d+");
Matcher m = p.matcher(value);
return m.matches();
}
private String getStateFromValue(final String value) {
final String[] subvalues = value.split("\\,");
return subvalues[0];
}
private int getTemperatureFrom(final String value) {
final String[] subvalues = value.split("\\,");
return Integer.parseInt(subvalues[1]);
}
}
public class TemperatureReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
#Override
protected void reduce(final Text key, final Iterable<IntWritable> values, final Context context) throws IOException, InterruptedException {
int sumOfTemperatures = 0;
int nbValues = 0;
int average=0;
for (IntWritable temperature : values) {
sumOfTemperatures += temperature.get();
nbValues++;
}
average = sumOfTemperatures / nbValues;
context.write(key, new IntWritable(average));
}
}
public class average {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
/*if (otherArgs.length != 2) {
System.err.println("Usage: Main <in> <out>");
System.exit(-1);
}*/
Job job = new Job(conf, "Calculate average Temperature");
job.setInputFormatClass(KeyValueTextInputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
job.setJarByClass(average.class);
job.setMapperClass(TemperatureMapper.class);
job.setReducerClass(TemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : -1);
}
}
The code works fine for the input:
Ujjain MP,77
Bhopal MP,76
Indore MP,72
Raipur CG,72
Durg CG,75
Raigarth CG,70
Kendujhar OR,69
Bhubaneswar OR,71
Puri OR,76
But not for some random input like:
hello VI,6
bye RE,2
It rather produces an empty output file.
modify your regular expression for the following to support that kind of input
Pattern p = Pattern.compile("[a-zA-Z]*\\s*[a-zA-Z]{2},\\d+$");
Also, you will need split again to get the state
String[] subvalues = value.split("\\,")[0].split(" ");
return subvalues[subvalues.length - 1];
I hope it helps. In my side, I had to change the key type in the value LongWritable, I am not sure why is not complaining in our side, probably a different api version
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

Code not skipping two words in wordcount program

This code counts words and skips two given words(in & of) form a file:-
Please help why it is not skipping these words.
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
class skipwc_mapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer t = new StringTokenizer(line);
Text word = null;
while (t.hasMoreTokens()) {
word = new Text(t.nextToken());
context.write(word, new IntWritable(1));
}
}
}
class skipwc_reducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int tot = 0;
if (key.toString() != "in" && key.toString() != "of") {
while (values.iterator().hasNext()) {
tot += values.iterator().next().get();
}
context.write(key, new IntWritable(tot));
}
}
}
public static class skipwc_runner {
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(skipwc_runner.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(skipwc_mapper.class);
job.setReducerClass(skipwc_reducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
}
Use equals method to compare String like:
if (!"in".equals(key.toString()) && !"of".equals(key.toString()))
Also it would be beneficial if you skip of/in in the mapper rather than reducer as it would efficient to remove the data before sorting and shuffling phase, so you avoid additional IO.

hadoop program only logic to be written.write the program only for reducer

Write a map reduce programm to print the most frequenty ocuring words in a text document.
The threshld value can be fixed and the word whose frequency exceeds the threshold need to be output.
Eg: If thereshold=100, and “is” occurs 150 times in the document, it has to be printed in the output.
program :
package org.myorg;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class WordCount {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, Inritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context coext)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "wordcount");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
Here's the complete code,
Driver Class
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class FrequentWordClassDriver extends Configured implements Tool{
#Override
public int run(String[] args) throws Exception {
if(args.length != 2){
return -1;
}
JobConf conf = new JobConf(getConf(), FrequentWordClassDriver.class);
conf.setJobName(this.getClass().getName());
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(FrequentWordClassMapper.class);
conf.setReducerClass(FrequentWordClassReducer.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception{
int exitCode = ToolRunner.run(new FrequentWordClassDriver(), args);
System.exit(exitCode);
}
}
Mapper Class
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class FrequentWordClassMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable>{
#Override
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
String line = value.toString();
for(String phrase : line.split(" ")){
output.collect(new Text(phrase.toUpperCase()), new IntWritable(1));
}
}
}
Reducer Class
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class FrequentWordClassReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable>{
#Override
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException{
int wordcount = 0, threshold = 100;
while(values.hasNext()){
IntWritable value = values.next();
wordcount +=value.get();
}
if(wordcount >= threshold){
output.collect(key, new IntWritable(wordcount));
}
}
}
The Driver Class, Mapper Class and Reducer Class is fairly simple and self explanatory. The mapper class split each sentence into words and send them to reducer class in the format <word, 1>. The reducer class will receive the data in the format <word, [1, 1, 1, 1]> and it will aggregate and count the occurrence of each word, and if the occurrence of each word is greater than or equal to threshold value then it will send the word as output.
Hope this will help you.
It's very simple.
Have a look at traditional word count example. You can use same code.
After setting Reducer class, add below line (If you want your output in single reduce file)
job.setNumReduceTasks(1);
Add your condition in reduce method.
Before writing to context.write(key, result);, add your condition
if ( sum > threshold) {
context.write(key, result);
}
You can better achieve this by using counters.
You can set the number of counter
public void reduce(Text word, Iterable<IntWritable> count,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : count) {
sum += val.get();
}
context.getCounter(word.toString()).increment(sum);
}
And then in the your driver program, you can get the counter using
Counters counters=job.getCounters();
You can use this and run multiple mappers and reducer, thus not compromising the performance.

MapReduce Old API - Passing Command Line Argument to Map

I am coding a MapReduce job for finding the occurrence of a search string (passed through Command Line argument) in an input file stored in HDFS using old API.
Below is my Driver class -
public class StringSearchDriver
{
public static void main(String[] args) throws IOException
{
JobConf jc = new JobConf(StringSearchDriver.class);
jc.set("SearchWord", args[2]);
jc.setJobName("String Search");
FileInputFormat.addInputPath(jc, new Path(args[0]));
FileOutputFormat.setOutputPath(jc, new Path(args[1]));
jc.setMapperClass(StringSearchMap.class);
jc.setReducerClass(StringSearchReduce.class);
jc.setOutputKeyClass(Text.class);
jc.setOutputValueClass(IntWritable.class);
JobClient.runJob(jc);
}
}
Below is my Mapper Class -
public class StringSearchMap extends MapReduceBase implements
Mapper<LongWritable, Text, Text, IntWritable>
{
String searchWord;
public void configure(JobConf jc)
{
searchWord = jc.get("SearchWord");
}
#Override
public void map(LongWritable key, Text value,
OutputCollector<Text, IntWritable> out, Reporter reporter)
throws IOException
{
String[] input = value.toString().split("");
for(String word:input)
{
if (word.equalsIgnoreCase(searchWord))
out.collect(new Text(word), new IntWritable(1));
}
}
}
On running the job (command line string passed is "hi"), I am getting the below error -
14/09/21 22:35:41 INFO mapred.JobClient: Task Id : attempt_201409212134_0005_m_000001_2, Status : FAILED
java.lang.ClassCastException: interface javax.xml.soap.Text
at java.lang.Class.asSubclass(Class.java:3129)
at org.apache.hadoop.mapred.JobConf.getOutputKeyComparator(JobConf.java:795)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.<init>(MapTask.java:964)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:422)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:416)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Please suggest.
You auto imported the wrong import.
Instead of import org.apache.hadoop.io.Text you import javax.xml.soap.Text
You can find a sample wrong import in this blog.
One point , It is better to adopt New API
EDIT
I used New Api
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* #author Unmesha sreeveni
* #Date 23 sep 2014
*/
public class StringSearchDriver extends Configured implements Tool {
public static class Map extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
String line = value.toString();
String searchString = conf.get("word");
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if(token.equals(searchString)){
word.set(token);
context.write(word, one);
}
}
}
}
public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int res = ToolRunner.run(conf, new StringSearchDriver(), args);
System.exit(res);
}
#Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
if (args.length != 3) {
System.out
.printf("Usage: Search String <input dir> <output dir> <search word> \n");
System.exit(-1);
}
String source = args[0];
String dest = args[1];
String searchword = args[2];
Configuration conf = new Configuration();
conf.set("word", searchword);
Job job = new Job(conf, "Search String");
job.setJarByClass(StringSearchDriver.class);
FileSystem fs = FileSystem.get(conf);
Path in =new Path(source);
Path out =new Path(dest);
if (fs.exists(out)) {
fs.delete(out, true);
}
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
boolean sucess = job.waitForCompletion(true);
return (sucess ? 0 : 1);
}
}
This works.
For Text; required hadoop package is org.apache.hadoop.io..
Check your packages
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;

Resources