I am facing certain error while setting my job for a custom InputFormat
Below Is my Code
package com.nline_delimiter;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class NL_driver {
public static void main(String [] args) throws IOException, InterruptedException, ClassNotFoundException
{
Configuration conf=new Configuration(true);
Job job_run =new Job(conf);
job_run.setJobName("nline input format each line seperate wth delimiter");
job_run.setJarByClass(NL_driver.class);
job_run.setMapperClass(NL_mapper.class);
job_run.setReducerClass(NL_reducer.class);
job_run.setInputFormatClass(NL_inputformatter.class);;
job_run.setMapOutputKeyClass(Text.class);
job_run.setMapOutputValueClass(IntWritable.class);
job_run.setOutputKeyClass(Text.class);
job_run.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job_run,new Path("/home/hduser/input_formatter_usage.txt"));
FileOutputFormat.setOutputPath(job_run, new Path("/home/hduser/input_formatter_usage"));
job_run.waitForCompletion(true);
}
}
The Line
job_run.setInputFormatClass(NL_inputformatter.class)
shows error
The NL_inputformatter is a custom Inputformatter class that extends the FileInputFormat
Is there something I need to import for the setInputFormatClass, because the default error check in Eclipse asks me to change the setInputFormatClass to setOutFormatClass but doesn't asks for any Import.
The Sourcecode for NL_inputformatter is below.
package com.nline_delimiter;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
public class NL_inputformatter extends FileInputFormat<Text, IntWritable>{
#Override
public RecordReader<Text, IntWritable> getRecordReader(InputSplit input,
JobConf job_run, Reporter reporter) throws IOException {
// TODO Auto-generated method stub
System.out.println("I am Inside the NL_inputformatter class");
reporter.setStatus(input.toString());
return new NL_record_reader(job_run, (FileSplit)input);
}
}
Your help would be appreciated.
It's because you are using the FileInputFormat from the old Hadoop API with the new one. You have to change your import and your implementation:
import org.apache.hadoop.mapred.FileInputFormat;
to
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Related
I can not understand what the bug is, when I removed the
job.setSortComparatorClass(LongWritable.DecreasingComparator.class);
I got the output but when I tried to use it I'm getting this exception.
Im trying to get the output in decreasing order from the reducer based on the value, hence I have used setsortcomparator class, so please help me out
package topten.mostviewed.movies;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MostViewdReducer extends Reducer<Text,IntWritable,Text,LongWritable>
{
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException
{
int sum = 0;
for(IntWritable value:values)
{
sum = sum+1;
}
context.write(key, new LongWritable(sum));
}
}
package topten.mostviewed.movies;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class MostViewdDriver
{
// #SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2)
{
System.err.println("Usage: movie <input> <out>");
System.exit(2);
}
Job job = new Job(conf, "Movie ");
job.setJarByClass(MostViewdDriver.class);
job.setMapperClass(MostviewdMapper.class);
job.setReducerClass(MostViewdReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setSortComparatorClass(LongWritable.DecreasingComparator.class);
// job.setSortComparatorClass((Class<? extends RawComparator>) LongWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
The exception i'm getting is as below:
18/10/11 11:35:05 INFO mapreduce.Job: Task Id : attempt_1539236679371_0004_r_000000_2, Status : FAILED
Error: java.lang.ArrayIndexOutOfBoundsException: 7
at org.apache.hadoop.io.WritableComparator.readInt(WritableComparator.java:212)
at org.apache.hadoop.io.WritableComparator.readLong(WritableComparator.java:226)
at org.apache.hadoop.io.LongWritable$Comparator.compare(LongWritable.java:91)
at org.apache.hadoop.io.LongWritable$DecreasingComparator.compare(LongWritable.java:106)
at org.apache.hadoop.mapreduce.task.ReduceContextImpl.nextKeyValue(ReduceContextImpl.java:158)
at org.apache.hadoop.mapreduce.task.ReduceContextImpl.nextKey(ReduceContextImpl.java:121)
at org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer$Context.nextKey(WrappedReducer.java:307)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:170)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:627)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1642)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:163)
Your map output keys are ints, but you tried to use comparator intended for longs. Replace LongWritable.DecreasingComparator.class with IntWritable.DecreasingComparator.class.
I am using JUnit and Mockito to write test cases for Spring Boot Application. I have multiple controllers(For eg: ContractController and CountryCOntroller). When I write test cases for both of them in a single file , test will pass but if I write ContractController test cases in one file and the other controller test cases in second file , test cases fail.
Can you please let me know how to write in different files?
Contract Controller TEst
package com.example.demo;
import static org.junit.Assert.*;
import java.util.Collections;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mockito;
import org.skyscreamer.jsonassert.JSONAssert;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.http.MediaType;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MockMvc;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.RequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import com.example.demo.controllers.ContractController;
import com.example.demo.controllers.CountryController;
import com.example.demo.entities.Contract;
import com.example.demo.entities.Country;
import com.example.demo.repositories.ContractRepository;
import com.example.demo.repositories.CountryRepository;
#RunWith(SpringRunner.class)
public class ContractControllerTest {
#Autowired
private MockMvc mockMvc;
#MockBean
private ContractRepository contractRepository;
#SuppressWarnings("unchecked")
#Test
public void testGetContract() throws Exception {
System.out.println("contract testing");
Contract contract = new Contract();
contract.setContractTypeId(1);
contract.setContractType("Calibration");
Mockito.when(this.contractRepository.findAll()).thenReturn((Collections.singletonList(contract)));
RequestBuilder requestBuilder = MockMvcRequestBuilders.get("/api/contractType").accept(MediaType.APPLICATION_JSON_UTF8);
MvcResult result = mockMvc.perform(requestBuilder).andReturn();
System.out.println("result is"+result.getResponse().getContentAsString());
String expected = "[{id:1,contractType:Calibration}]";
JSONAssert.assertEquals(expected, result.getResponse().getContentAsString(), false);
}
}
COuntry COntroller Test
package com.example.demo;
import static org.junit.Assert.*;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import org.json.JSONObject;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mockito;
import org.skyscreamer.jsonassert.JSONAssert;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.http.MediaType;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.web.servlet.MockMvc;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.RequestBuilder;
import org.springframework.test.web.servlet.request.MockMvcRequestBuilders;
import com.example.demo.controllers.CountryController;
import com.example.demo.entities.Contract;
import com.example.demo.entities.Country;
import com.example.demo.repositories.ContractRepository;
import com.example.demo.repositories.CountryRepository;
#RunWith(SpringRunner.class)
#WebMvcTest(value = CountryController.class)
public class CountryControllerTest {
#Autowired
private MockMvc mockMvc;
#MockBean
private CountryRepository countryRepository;
#MockBean
private ContractRepository contractRepository;
#SuppressWarnings("unchecked")
#Test
public void testGetCountries() throws Exception {
System.out.println("mockito testing");
Country country = new Country();
country.setId(1);
country.setCountryName("Afghanistan");
country.setShortName("AF");
Mockito.when(this.countryRepository.findAll()).thenReturn((Collections.singletonList(country)));
RequestBuilder requestBuilder = MockMvcRequestBuilders.get("/api/countries").accept(MediaType.APPLICATION_JSON_UTF8);
MvcResult result = mockMvc.perform(requestBuilder).andReturn();
System.out.println("result is"+result.getResponse().getContentAsString());
String expected = "[{id:1,shortName:AF,countryName:Afghanistan}]";
JSONAssert.assertEquals(expected, result.getResponse().getContentAsString(), false);
}
I generated my Spring application using Jhipster. Now I want to add controller for FilesUpload, and StorageService for it. But when I run my application it gets me this message
Description:Parameter 0 of constructor in com.kongresspring.myapp.web.rest.FileUploadResource required a bean of type 'com.kongresspring.myapp.service.StorageService' that could not be found.
Action:Consider defining a bean of type 'com.kongresspring.myapp.service.StorageService' in your configuration.
I can't find beans.xml to add new bean. I'm new in spring, so maybe there's some other way to configure bean, I'm not familiar whit. Here's my code for uploading file controller:
package com.kongresspring.myapp.web.rest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.mvc.support.RedirectAttributes;
import com.kongresspring.myapp.service.StorageService;
#RestController
#RequestMapping("/api")
public class FileUploadResource {
private final Logger log = LoggerFactory.getLogger(FileUploadResource.class);
private final StorageService storageService;
#Autowired
public FileUploadResource(StorageService storageService) {
this.storageService = storageService;
}
/**
* POST uploadFile
*/
#PostMapping("/upload-file")
public String uploadFile(#RequestParam("file") MultipartFile file,
RedirectAttributes redirectAttributes) {
storageService.store(file);
redirectAttributes.addFlashAttribute("message",
"You successfully uploaded " + file.getOriginalFilename() + "!");
return "success";
}
/**
* GET preview
*/
#GetMapping("/preview")
public String preview() {
return "preview";
}
}
And here's my StorageService code:
package com.kongresspring.myapp.service;
import org.springframework.core.io.Resource;
import org.springframework.web.multipart.MultipartFile;
import java.nio.file.Path;
import java.util.stream.Stream;
public interface StorageService {
void init();
void store(MultipartFile file);
Stream<Path> loadAll();
Path load(String filename);
Resource loadAsResource(String filename);
}
You can create an implementation for StorageService, and annotate it as #Service/#Component, spring will automatically discover the bean:
#Service
public class StorageServiceImpl implements StorageService {
void init(){// You code goes here/}
void store(MultipartFile file){///}
Stream<Path> loadAll(){///}
Path load(String filename){//}
Resource loadAsResource(String filename){///}
}
My output is coming wrong. The Input File is:
1 2 3 4
5 4 3 2
Output should be key: sum value: 24
Output produced by MapReduce: key: sum value: 34
I am using OpenJDK 7 in Ubuntu 14.04 to run the jar file whereas, the jar file was created in Eclipse Juna and the java version used was Oracle JDK 7 to compile it.
NumberDriver.java
package numbersum;
import java.io.*;
//import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
//import org.apache.hadoop.mapreduce.Mapper;
//import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class NumberDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
String[] otherArgs=new GenericOptionsParser(conf,args).getRemainingArgs();
if(otherArgs.length!=2)
{
System.err.println("Error");
System.exit(2);
}
Job job=new Job(conf, "number sum");
job.setJarByClass(NumberDriver.class);
job.setMapperClass(NumberMapper.class);
job.setReducerClass(NumberReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}
NumberMapper.java
package numbersum;
import java.io.*;
import java.util.StringTokenizer;
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
//import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//import org.apache.hadoop.util.GenericOptionsParser;
//import org.hsqldb.Tokenizer;
public class NumberMapper extends Mapper <LongWritable, Text, Text, IntWritable>
{
int sum;
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
StringTokenizer itr=new StringTokenizer(value.toString());
while(itr.hasMoreTokens())
{
sum+=Integer.parseInt(itr.nextToken());
}
context.write(new Text("sum"),new IntWritable(sum));
}
}
NumberReducer.java
package numbersum;
import java.io.*;
//import java.util.StringTokenizer;
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapreduce.Job;
//import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
//import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
//import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//import org.apache.hadoop.util.GenericOptionsParser;
public class NumberReducer extends Reducer <Text, IntWritable, Text, IntWritable>
{
public void reduce(Text key,Iterable<IntWritable> values, Context context)throws IOException, InterruptedException
{
int sum=0;
for(IntWritable value:values)
{
sum+=value.get();
}
context.write(key,new IntWritable(sum));
}
}
My best guess:
int sum; // <-- Why a class member?
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
int sum = 0; //Why not here?
StringTokenizer itr=new StringTokenizer(value.toString());
Reasoning for the guess:
1st map: 1 + 2 + 3 + 4 = 10
2nd map:(10 +) 2 + 3 + 4 + 5 = 34
..meaning, the previous value is being retained.
I think you forget to set sum to 0 at the begining of map function:
public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException
{
sum = 0;
...
I wanted to create a new datatype in Hadoop but I get the following error from my custom inputformat class Here is my code :
error - WholeFileRecordReader cannot be cast to org.apache.hadoop.mapred.RecordReader
code -
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TaskAttemptContext;
public class wholeFileInputFormat extends FileInputFormat<Text, apriori>{
public RecordReader<Text, apriori> getRecordReader(
InputSplit input, JobConf job, Reporter reporter)
throws IOException {
reporter.setStatus(input.toString());
return (RecordReader<Text, apriori>) new WholeFileRecordReader(job,FileSplit)input);
}
}
My custom Record Reader is as follows
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
class WholeFileRecordReader extends RecordReader<Text, apriori> {
private FileSplit fileSplit;
private Configuration conf;
private InputStream in;
private Text key = new Text("");
private apriori value = new apriori();
private boolean processed = false;
public void initialize( JobConf job, FileSplit split)
throws IOException {
this.fileSplit = split;
this.conf = job;
final Path file = fileSplit.getPath();
String StringPath = new String(fileSplit.getPath().toString());
String StringPath2 = new String();
StringPath2 = StringPath.substring(5);
System.out.println(StringPath2);
in = new FileInputStream(StringPath2);
FileSystem fs = file.getFileSystem(conf);
in = fs.open(file);
}
public boolean nextKeyValue() throws IOException, InterruptedException {
if (!processed) {
byte[] contents = new byte[(int) fileSplit.getLength()];
Path file = fileSplit.getPath();
key.set(file.getName());
try {
IOUtils.readFully(in, contents, 0, contents.length);
value.set(contents, 0, contents.length);
} finally {
IOUtils.closeStream(in);
}
processed = true;
return true;
}
return false;
}
#Override
public Text getCurrentKey() throws IOException, InterruptedException {
return key;
}
#Override
public apriori getCurrentValue() throws IOException, InterruptedException {
return value;
}
#Override
public float getProgress() throws IOException {
return processed ? 1.0f : 0.0f;
}
#Override
public void close() throws IOException {
// Do nothing
}
#Override
public void initialize(InputSplit arg0, TaskAttemptContext arg1)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
}
}
WholeFileRecordReader class is a subclass of org.apache.hadoop.mapreduce.RecordReader class.This class cannot be cast to a org.apache.hadoop.mapred.RecordReader class.Can you try using same APIs in both the classes
As per rules of Java programming language only classes or interfaces (collectively known as Type) from same Type hierarchy can be cast or converted into each other. If you try to cast two object which doesn't share same type hierarchy, i.e. there is no parent child relationship between them, you will get compile time error.You can refer this link
There was package mismatch due to which this error was coming.
In your code you combined both MRv1 and MRv2 due to which you got the error.
Packages org.apache.hadoop.mapred is Mrv1. (Map Reduce version 1)
Packages org.apache.hadoop.mapreduce is Mrv2. (Map Reduce version 2)
In your code you combined both MRv1 and MRv2:
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
Either use all the import packages as org.apache.hadoop.mapred (MRv1) or org.apache.hadoop.mapreduce (MRv2).
Hope this helps.