Hadoop - Mappers not emitting anything - hadoop

I'm running the code below and no output is generated (well, the output folder and the reducer output file are created, but there is nothing wihtin the part-r-00000 file). From the logs, I suspect the mappers are not emitting anything.
The code:
package com.telefonica.iot.tidoop.mrlib;
import com.telefonica.iot.tidoop.mrlib.utils.Constants;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
public class Count extends Configured implements Tool {
private static final Logger LOGGER = Logger.getLogger(Count.class);
public static class UnitEmitter extends Mapper<Object, Text, Text, LongWritable> {
private final Text commonKey = new Text("common-key");
#Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
context.write(commonKey, new LongWritable(1));
} // map
} // UnitEmitter
public static class Adder extends Reducer<Text, LongWritable, Text, LongWritable> {
#Override
public void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
} // for
context.write(key, new LongWritable(sum));
} // reduce
} // Adder
public static class AdderWithTag extends Reducer<Text, LongWritable, Text, LongWritable> {
private String tag;
#Override
public void setup(Context context) throws IOException, InterruptedException {
tag = context.getConfiguration().get(Constants.PARAM_TAG, "");
} // setup
#Override
public void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
} // for
context.write(new Text(tag), new LongWritable(sum));
} // reduce
} // AdderWithTag
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new Filter(), args);
System.exit(res);
} // main
#Override
public int run(String[] args) throws Exception {
// check the number of arguments, show the usage if it is wrong
if (args.length != 3) {
showUsage();
return -1;
} // if
// get the arguments
String input = args[0];
String output = args[1];
String tag = args[2];
// create and configure a MapReduce job
Configuration conf = this.getConf();
conf.set(Constants.PARAM_TAG, tag);
Job job = Job.getInstance(conf, "tidoop-mr-lib-count");
job.setNumReduceTasks(1);
job.setJarByClass(Count.class);
job.setMapperClass(UnitEmitter.class);
job.setCombinerClass(Adder.class);
job.setReducerClass(AdderWithTag.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
// run the MapReduce job
return job.waitForCompletion(true) ? 0 : 1;
} // main
private void showUsage() {
System.out.println("...");
} // showUsage
} // Count
The command executed, and the output logs:
$ hadoop jar target/tidoop-mr-lib-0.0.0-SNAPSHOT-jar-with-dependencies.jar com.telefonica.iot.tidoop.mrlib.Count -libjars target/tidoop-mr-lib-0.0.0-SNAPSHOT-jar-with-dependencies.jar tidoop/numbers tidoop/numbers_count onetag
15/11/05 17:24:52 INFO input.FileInputFormat: Total input paths to process : 1
15/11/05 17:24:52 WARN snappy.LoadSnappy: Snappy native library is available
15/11/05 17:24:53 INFO util.NativeCodeLoader: Loaded the native-hadoop library
15/11/05 17:24:53 INFO snappy.LoadSnappy: Snappy native library loaded
15/11/05 17:24:53 INFO mapred.JobClient: Running job: job_201507101501_23002
15/11/05 17:24:54 INFO mapred.JobClient: map 0% reduce 0%
15/11/05 17:25:00 INFO mapred.JobClient: map 100% reduce 0%
15/11/05 17:25:07 INFO mapred.JobClient: map 100% reduce 33%
15/11/05 17:25:08 INFO mapred.JobClient: map 100% reduce 100%
15/11/05 17:25:09 INFO mapred.JobClient: Job complete: job_201507101501_23002
15/11/05 17:25:09 INFO mapred.JobClient: Counters: 25
15/11/05 17:25:09 INFO mapred.JobClient: Job Counters
15/11/05 17:25:09 INFO mapred.JobClient: Launched reduce tasks=1
15/11/05 17:25:09 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=5350
15/11/05 17:25:09 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
15/11/05 17:25:09 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
15/11/05 17:25:09 INFO mapred.JobClient: Rack-local map tasks=1
15/11/05 17:25:09 INFO mapred.JobClient: Launched map tasks=1
15/11/05 17:25:09 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=8702
15/11/05 17:25:09 INFO mapred.JobClient: FileSystemCounters
15/11/05 17:25:09 INFO mapred.JobClient: FILE_BYTES_READ=6
15/11/05 17:25:09 INFO mapred.JobClient: HDFS_BYTES_READ=1968928
15/11/05 17:25:09 INFO mapred.JobClient: FILE_BYTES_WRITTEN=108226
15/11/05 17:25:09 INFO mapred.JobClient: Map-Reduce Framework
15/11/05 17:25:09 INFO mapred.JobClient: Map input records=598001
15/11/05 17:25:09 INFO mapred.JobClient: Reduce shuffle bytes=6
15/11/05 17:25:09 INFO mapred.JobClient: Spilled Records=0
15/11/05 17:25:09 INFO mapred.JobClient: Map output bytes=0
15/11/05 17:25:09 INFO mapred.JobClient: CPU time spent (ms)=2920
15/11/05 17:25:09 INFO mapred.JobClient: Total committed heap usage (bytes)=355663872
15/11/05 17:25:09 INFO mapred.JobClient: Combine input records=0
15/11/05 17:25:09 INFO mapred.JobClient: SPLIT_RAW_BYTES=124
15/11/05 17:25:09 INFO mapred.JobClient: Reduce input records=0
15/11/05 17:25:09 INFO mapred.JobClient: Reduce input groups=0
15/11/05 17:25:09 INFO mapred.JobClient: Combine output records=0
15/11/05 17:25:09 INFO mapred.JobClient: Physical memory (bytes) snapshot=328683520
15/11/05 17:25:09 INFO mapred.JobClient: Reduce output records=0
15/11/05 17:25:09 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1466642432
15/11/05 17:25:09 INFO mapred.JobClient: Map output records=0
The content of the output file:
$ hadoop fs -cat /user/frb/tidoop/numbers_count/part-r-00000
[frb#cosmosmaster-gi tidoop-mr-lib]$ hadoop fs -ls /user/frb/tidoop/numbers_count/
Found 3 items
-rw-r--r-- 3 frb frb 0 2015-11-05 17:25 /user/frb/tidoop/numbers_count/_SUCCESS
drwxr----- - frb frb 0 2015-11-05 17:24 /user/frb/tidoop/numbers_count/_logs
-rw-r--r-- 3 frb frb 0 2015-11-05 17:25 /user/frb/tidoop/numbers_count/part-r-00000
Any hints about what is happening?

Weird. I'd try using Mapper (identity mapper) with your job.
If the Mapper does not output anything there must be something weird with your hadoop installation, or job configuration.

Related

Unable to Configure Number of Reducers In WordCount Job in hadoop

I m using Single Node Cluster - Hadoop-2.7.0 in my Linum Machine.
My code for WordCount Job is running fine with 1 reducer.
But Not working fine if i increase the reducers.
It is showing the following error:
15/05/25 21:15:10 INFO util.NativeCodeLoader: Loaded the native-hadoop library
15/05/25 21:15:10 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
15/05/25 21:15:10 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
15/05/25 21:15:10 WARN snappy.LoadSnappy: Snappy native library is available
15/05/25 21:15:10 INFO snappy.LoadSnappy: Snappy native library loaded
15/05/25 21:15:10 INFO mapred.FileInputFormat: Total input paths to process : 1
15/05/25 21:15:10 INFO mapred.JobClient: Running job: job_local_0001
15/05/25 21:15:11 INFO util.ProcessTree: setsid exited with exit code 0
15/05/25 21:15:11 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin#5f1fd699
15/05/25 21:15:11 INFO mapred.MapTask: numReduceTasks: 1
15/05/25 21:15:11 INFO mapred.MapTask: io.sort.mb = 100
15/05/25 21:15:11 INFO mapred.MapTask: data buffer = 79691776/99614720
15/05/25 21:15:11 INFO mapred.MapTask: record buffer = 262144/327680
15/05/25 21:15:11 WARN mapred.LocalJobRunner: job_local_0001
java.io.IOException: Illegal partition for am (1)
at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1073)
at org.apache.hadoop.mapred.MapTask$OldOutputCollector.collect(MapTask.java:592)
at WordMapper.map(WordMapper.java:24)
at WordMapper.map(WordMapper.java:1)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212)
My getPartition Method Looks like this:
public int getPartition(Text key, IntWritable value, int numRedTasks) {
String s = key.toString();
if(s.length() == 1)
{
return 0;
}
else if(s.length() == 2)
{
return 1;
}
else if(s.length() == 3)
{
return 2;
}
else
return 3;
}
Run Method in WordCount.class File:
if(input.length < 2)
{
System.out.println("Please provide valid input");
return -1;
}
else
{
JobConf config = new JobConf();
FileInputFormat.setInputPaths(config, new Path(input[0]));
FileOutputFormat.setOutputPath(config, new Path(input[1]));
config.setMapperClass(WordMapper.class);
config.setReducerClass(WordReducer.class);
config.setNumReduceTasks(4);
config.setPartitionerClass(MyPartitioner.class);
config.setMapOutputKeyClass(Text.class);
config.setMapOutputValueClass(IntWritable.class);
config.setOutputKeyClass(Text.class);
config.setOutputValueClass(IntWritable.class);
JobClient.runJob(config);
}
return 0;
}
My Mapper and Reducer Code is fine because Wordcount Job with 1 reducer is running fine.
Any One able to figure it out?
This may be due to pig fails in the operation due to high default_parallel could be set in it.
Thanks,
Shailesh.
You need to use tooRunner in your driver class and invoke the toolrunner in your main class. You can do this by using combiner as part of workflow. Below is the driver class code: As you can see from the code below, along with the mapper and reducer calls, there is a combiner call as well. And the exit code in the main runner is " int exitCode = ToolRunner.run(new Configuration(), new WordCountWithCombiner(), args);" which invokes tool runner at run time and you can specify the number of reducers or mappers you would like to use by using the "-D" option when running the wordcount program. A sample command line would look like "-D mapred.reduce.tasks =2 input output"
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
public class WordCountWithCombiner extends Configured
implements Tool{
#Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf, "MyJob");
job.setJarByClass(WordCount.class);
job.setJobName("Word Count With Combiners");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(WordCountMapper.class);
job.setCombinerClass(WordCountReducer.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Configuration(), new WordCountWithCombiner(), args);
System.exit(exitCode);
}
}

Custom Partitioning gives ArrayIndexOuntOfBounds Error

When I run my code, I get the following exception:
hadoop#hadoop:~/testPrograms$ hadoop jar cp.jar CustomPartition /test/test.txt /test/output33
15/03/03 16:33:33 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
15/03/03 16:33:33 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
15/03/03 16:33:33 WARN mapreduce.JobSubmitter: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
15/03/03 16:33:33 INFO input.FileInputFormat: Total input paths to process : 1
15/03/03 16:33:34 INFO mapreduce.JobSubmitter: number of splits:1
15/03/03 16:33:34 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local1055584612_0001
15/03/03 16:33:35 INFO mapreduce.Job: The url to track the job: http://localhost:8080/
15/03/03 16:33:35 INFO mapreduce.Job: Running job: job_local1055584612_0001
15/03/03 16:33:35 INFO mapred.LocalJobRunner: OutputCommitter set in config null
15/03/03 16:33:35 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
15/03/03 16:33:35 INFO mapred.LocalJobRunner: Waiting for map tasks
15/03/03 16:33:35 INFO mapred.LocalJobRunner: Starting task: attempt_local1055584612_0001_m_000000_0
15/03/03 16:33:35 INFO mapred.Task: Using ResourceCalculatorProcessTree : [ ]
15/03/03 16:33:35 INFO mapred.MapTask: Processing split: hdfs://node1/test/test.txt:0+107
15/03/03 16:33:35 INFO mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
15/03/03 16:33:35 INFO mapred.MapTask: mapreduce.task.io.sort.mb: 100
15/03/03 16:33:35 INFO mapred.MapTask: soft limit at 83886080
15/03/03 16:33:35 INFO mapred.MapTask: bufstart = 0; bufvoid = 104857600
15/03/03 16:33:35 INFO mapred.MapTask: kvstart = 26214396; length = 6553600
15/03/03 16:33:35 INFO mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
15/03/03 16:33:35 INFO mapred.MapTask: Starting flush of map output
15/03/03 16:33:35 INFO mapred.LocalJobRunner: map task executor complete.
15/03/03 16:33:35 WARN mapred.LocalJobRunner: job_local1055584612_0001
java.lang.Exception: java.lang.ArrayIndexOutOfBoundsException: 2
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522)
Caused by: java.lang.ArrayIndexOutOfBoundsException: 2
at CustomPartition$MapperClass.map(CustomPartition.java:27)
at CustomPartition$MapperClass.map(CustomPartition.java:17)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
15/03/03 16:33:36 INFO mapreduce.Job: Job job_local1055584612_0001 running in uber mode : false
15/03/03 16:33:36 INFO mapreduce.Job: map 0% reduce 0%
15/03/03 16:33:36 INFO mapreduce.Job: Job job_local1055584612_0001 failed with state FAILED due to: NA
15/03/03 16:33:36 INFO mapreduce.Job: Counters: 0
I am trying to partition based on the game the persons play. Each word is separated by a tab. And after the three fields, I got the next line by pressing the return key.
My code:
public class CustomPartition {
public static class MapperClass extends Mapper<Object, Text, Text, Text>{
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String itr[] = value.toString().split("\t");
String game=itr[2].toString();
String nameGoals=itr[0]+"\t"+itr[1];
context.write(new Text(game), new Text(nameGoals));
}
}
public static class GoalPartition extends Partitioner<Text, Text> {
#Override
public int getPartition(Text key,Text value, int numReduceTasks){
if(key.toString()=="football")
{return 0;}
else if(key.toString()=="basketball")
{return 1;}
else// (key.toString()=="icehockey")
{return 2;}
}
}
public static class ReducerClass extends Reducer<Text,Text,Text,Text> {
#Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String name="";
String game="";
int maxGoals=0;
for (Text val : values)
{
String valTokens[]= val.toString().split("\t");
int goals = Integer.parseInt(valTokens[1]);
if(goals > maxGoals)
{
name = valTokens[0];
game = key.toString();
maxGoals = goals;
context.write(new Text(name), new Text ("game"+game+"score"+maxGoals));
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "custom partition");
job.setJarByClass(CustomPartition.class);
job.setMapperClass(MapperClass.class);
job.setCombinerClass(ReducerClass.class);
job.setPartitionerClass(GoalPartition.class);
job.setReducerClass(ReducerClass.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Error on map reduce example of Hadoop 2.2.0

I am new to hadoop and after installing Hadoop 2.2.0 I tried to follow example http://www.srccodes.com/p/article/45/run-hadoop-wordcount-mapreduce-example-windows to try a simple map reduce job.
However whenever I try to do the map reduce job over the txt file I created, I keep getting failures with this message
c:\hadoop>bin\yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.ja
r wordcount /input output
14/03/26 14:20:48 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0
:8032
14/03/26 14:20:50 INFO input.FileInputFormat: Total input paths to process : 1
14/03/26 14:20:51 INFO mapreduce.JobSubmitter: number of splits:1
14/03/26 14:20:51 INFO Configuration.deprecation: user.name is deprecated. Inste
ad, use mapreduce.job.user.name
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.jar is deprecated. Inst
ead, use mapreduce.job.jar
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.output.value.class is d
eprecated. Instead, use mapreduce.job.output.value.class
14/03/26 14:20:51 INFO Configuration.deprecation: mapreduce.combine.class is dep
recated. Instead, use mapreduce.job.combine.class
14/03/26 14:20:51 INFO Configuration.deprecation: mapreduce.map.class is depreca
ted. Instead, use mapreduce.job.map.class
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.job.name is deprecated.
Instead, use mapreduce.job.name
14/03/26 14:20:51 INFO Configuration.deprecation: mapreduce.reduce.class is depr
ecated. Instead, use mapreduce.job.reduce.class
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.input.dir is deprecated
. Instead, use mapreduce.input.fileinputformat.inputdir
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.output.dir is deprecate
d. Instead, use mapreduce.output.fileoutputformat.outputdir
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.map.tasks is deprecated
. Instead, use mapreduce.job.maps
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.output.key.class is dep
recated. Instead, use mapreduce.job.output.key.class
14/03/26 14:20:51 INFO Configuration.deprecation: mapred.working.dir is deprecat
ed. Instead, use mapreduce.job.working.dir
14/03/26 14:20:51 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_13
95833928952_0004
14/03/26 14:20:52 INFO impl.YarnClientImpl: Submitted application application_13
95833928952_0004 to ResourceManager at /0.0.0.0:8032
14/03/26 14:20:52 INFO mapreduce.Job: The url to track the job: http://GoncaloPe
reira:8088/proxy/application_1395833928952_0004/
14/03/26 14:20:52 INFO mapreduce.Job: Running job: job_1395833928952_0004
14/03/26 14:21:08 INFO mapreduce.Job: Job job_1395833928952_0004 running in uber
mode : false
14/03/26 14:21:08 INFO mapreduce.Job: map 0% reduce 0%
14/03/26 14:21:20 INFO mapreduce.Job: Task Id : attempt_1395833928952_0004_m_000
000_0, Status : FAILED
Error: java.lang.ClassCastException: org.apache.hadoop.mapreduce.lib.input.FileS
plit cannot be cast to org.apache.hadoop.mapred.InputSplit
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:402)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma
tion.java:1491)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
14/03/26 14:21:33 INFO mapreduce.Job: Task Id : attempt_1395833928952_0004_m_000
000_1, Status : FAILED
Error: java.lang.ClassCastException: org.apache.hadoop.mapreduce.lib.input.FileS
plit cannot be cast to org.apache.hadoop.mapred.InputSplit
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:402)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma
tion.java:1491)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
14/03/26 14:21:48 INFO mapreduce.Job: Task Id : attempt_1395833928952_0004_m_000
000_2, Status : FAILED
Error: java.lang.ClassCastException: org.apache.hadoop.mapreduce.lib.input.FileS
plit cannot be cast to org.apache.hadoop.mapred.InputSplit
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:402)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma
tion.java:1491)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157)
14/03/26 14:22:04 INFO mapreduce.Job: map 100% reduce 100%
14/03/26 14:22:10 INFO mapreduce.Job: Job job_1395833928952_0004 failed with sta
te FAILED due to: Task failed task_1395833928952_0004_m_000000
Job failed as tasks failed. failedMaps:1 failedReduces:0
14/03/26 14:22:10 INFO mapreduce.Job: Counters: 6
Job Counters
Failed map tasks=4
Launched map tasks=4
Other local map tasks=3
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=48786
Total time spent by all reduces in occupied slots (ms)=0
Since I followed everything with no issues step by step I have no idea why this might be, does anyone know?
Edit: Tried adopt 2.3.0 same issue happens with the example jar given, and the code bellow I tried compile, no idea what the issue is
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class teste {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "wordcount");
job.setJarByClass(teste.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
I had the same issue (java.lang.ClassCastException) and was able to solve it by running Hadoop with admin privileges. The problem seems to be the creation of symbolic links which by default is not possible for non-admin Windows users. Open a console as administrator and then proceed as described in the example from your link.
link you provided has input perameter as input NOT /input...try with this syntax...
C:\hadoop>bin\yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar wordcount input output
if this doesn't work than see this - Link and modify the mapper class.

Hadoop Word Count working but not summing words up

I'm using Hadoop 1.2.1 and for some reason my Word Count output looks strange:
input file:
this is sparta this was sparta hello world goodbye world
output in hdfs:
goodbye 1
hello 1
is 1
sparta 1
sparta 1
this 1
this 1
was 1
world 1
world 1
code:
public class WordCount {
public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
And here's some relevant console output:
14/01/04 16:17:37 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/01/04 16:17:37 INFO input.FileInputFormat: Total input paths to process : 1
14/01/04 16:17:37 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/01/04 16:17:37 WARN snappy.LoadSnappy: Snappy native library not loaded
14/01/04 16:17:38 INFO mapred.JobClient: Running job: job_201401041506_0013
14/01/04 16:17:39 INFO mapred.JobClient: map 0% reduce 0%
14/01/04 16:17:45 INFO mapred.JobClient: map 100% reduce 0%
14/01/04 16:17:52 INFO mapred.JobClient: map 100% reduce 33%
14/01/04 16:17:54 INFO mapred.JobClient: map 100% reduce 100%
14/01/04 16:17:55 INFO mapred.JobClient: Job complete: job_201401041506_0013
14/01/04 16:17:55 INFO mapred.JobClient: Counters: 26
14/01/04 16:17:55 INFO mapred.JobClient: Job Counters
14/01/04 16:17:55 INFO mapred.JobClient: Launched reduce tasks=1
14/01/04 16:17:55 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=6007
14/01/04 16:17:55 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
14/01/04 16:17:55 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
14/01/04 16:17:55 INFO mapred.JobClient: Launched map tasks=1
14/01/04 16:17:55 INFO mapred.JobClient: Data-local map tasks=1
14/01/04 16:17:55 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=9167
14/01/04 16:17:55 INFO mapred.JobClient: File Output Format Counters
14/01/04 16:17:55 INFO mapred.JobClient: Bytes Written=77
14/01/04 16:17:55 INFO mapred.JobClient: FileSystemCounters
14/01/04 16:17:55 INFO mapred.JobClient: FILE_BYTES_READ=123
14/01/04 16:17:55 INFO mapred.JobClient: HDFS_BYTES_READ=169
14/01/04 16:17:55 INFO mapred.JobClient: FILE_BYTES_WRITTEN=122037
14/01/04 16:17:55 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=77
14/01/04 16:17:55 INFO mapred.JobClient: File Input Format Counters
14/01/04 16:17:55 INFO mapred.JobClient: Bytes Read=57
14/01/04 16:17:55 INFO mapred.JobClient: Map-Reduce Framework
14/01/04 16:17:55 INFO mapred.JobClient: Map output materialized bytes=123
14/01/04 16:17:55 INFO mapred.JobClient: Map input records=10
14/01/04 16:17:55 INFO mapred.JobClient: Reduce shuffle bytes=123
14/01/04 16:17:55 INFO mapred.JobClient: Spilled Records=20
14/01/04 16:17:55 INFO mapred.JobClient: Map output bytes=97
14/01/04 16:17:55 INFO mapred.JobClient: Total committed heap usage (bytes)=269619200
14/01/04 16:17:55 INFO mapred.JobClient: Combine input records=0
14/01/04 16:17:55 INFO mapred.JobClient: SPLIT_RAW_BYTES=112
14/01/04 16:17:55 INFO mapred.JobClient: Reduce input records=10
14/01/04 16:17:55 INFO mapred.JobClient: Reduce input groups=7
14/01/04 16:17:55 INFO mapred.JobClient: Combine output records=0
14/01/04 16:17:55 INFO mapred.JobClient: Reduce output records=10
14/01/04 16:17:55 INFO mapred.JobClient: Map output records=10
What can cause this? I'm very new to Hadoop, so i'm not sure where to look.
Thanks!
You're using an old API signature. In 1.x+ the reduce method changed to use iterables instead of iterator (which was what the old 0.x API used, so you will see iterator in many examples in books and on the web).
http://hadoop.apache.org/docs/r1.2.1/api/org/apache/hadoop/mapreduce/Reducer.html#reduce%28KEYIN,%20java.lang.Iterable,%20org.apache.hadoop.mapreduce.Reducer.Context%29
Try
#Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
The #Override annotation tells your compiler to check that your reduce method is overriding the correct method signature in the parent class.

Avro Map Reduce - AvroInputFormat not found error

This is what i have understood so far reading from varied sources on the internet.
Avro mapred and Avro are not part of CDH4 (Cloudera Distribution) and i have to set it up manually using HADOOP_CLASSPATH=avro.jar:avro-mapred.jar
I have done that and when i run my job on my pseudo cluster it throws the following exception:
13/12/27 00:47:40 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/12/27 00:47:40 INFO mapred.FileInputFormat: Total input paths to process : 1
13/12/27 00:47:41 INFO mapred.JobClient: Running job: job_201312221245_0017
13/12/27 00:47:42 INFO mapred.JobClient: map 0% reduce 0%
13/12/27 00:47:57 INFO mapred.JobClient: Task Id : attempt_201312221245_0017_m_000000_0, Status : FAILED
java.lang.RuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.avro.mapred.AvroInputFormat not found
I'm running the job as follows:
hadoop jar build/libs/hadoop-boilerplate-1.0.jar CustomerMapReduce transactions/input transactions/output1 -libjars /path/to/libs/avro-1.7.4.jar,/path/to/libs/avro-mapred-1.7.4.jar
You should implement Tool and use getConf() for job configuration.
public class SomeClass extends Configured implements Tool {
#Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
...
}
}

Resources