Running Dataproc BigQuery example on local machine

Running Dataproc BigQuery example on local machine - hadoop

I am trying to run the connector example on local machine but keep getting UnknownHostException. How do I configure the access to BigQuery using the Hadoop Connector?
package com.mycompany.dataproc;
import com.google.cloud.hadoop.io.bigquery.BigQueryConfiguration;
import com.google.cloud.hadoop.io.bigquery.GsonBigQueryInputFormat;
import com.google.gson.JsonObject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import scala.Tuple2;
public class BigQueryAccessExample {
JavaSparkContext jsc ;
public BigQueryAccessExample(JavaSparkContext jsc){
}
public static void main(String[] args) throws Exception{
SparkConf conf = new SparkConf()
.setAppName("BigQuery Reader").setMaster("local[5]");
conf.set("spark.serializer", org.apache.spark.serializer.KryoSerializer.class.getName());
JavaSparkContext jsc = new JavaSparkContext(conf);
String projectId = "mycompany-data";
String fullyQualifiedInputTableId = "mylogs.display20151030";
Configuration hadoopConfiguration = jsc.hadoopConfiguration();
//BigQueryConfiguration.
// Set the job-level projectId.
hadoopConfiguration.set(BigQueryConfiguration.PROJECT_ID_KEY, projectId);
// Use the systemBucket for temporary BigQuery export data used by the InputFormat.
String bucket = "my-spark-test";
hadoopConfiguration.set(BigQueryConfiguration.GCS_BUCKET_KEY, bucket);
com.google.cloud.hadoop.io.bigquery.
// Configure input and output for BigQuery access.
BigQueryConfiguration.configureBigQueryInput(hadoopConfiguration, fullyQualifiedInputTableId);
//BigQueryConfiguration.configureBigQueryOutput(conf, fullyQualifiedOutputTableId, outputTableSchema);
JavaPairRDD<LongWritable, JsonObject> tableData = jsc.newAPIHadoopRDD(hadoopConfiguration, GsonBigQueryInputFormat.class, LongWritable.class, JsonObject.class);
//tableData.count();
JavaRDD<JsonObject> myRdd = tableData.map(new Function<Tuple2<LongWritable, JsonObject>, JsonObject>() {
public JsonObject call(Tuple2<LongWritable, JsonObject> v1) throws Exception {
System.out.println(String.format("idx: %s val: %s", v1._1(), v1._2().toString()));
return v1._2();
}
});
myRdd.take(10);
}
}
but I get UnknownHostException
java.net.UnknownHostException: metadata
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:184)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
at sun.net.NetworkClient.doConnect(NetworkClient.java:175)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:432)
at sun.net.www.http.HttpClient.openServer(HttpClient.java:527)
at sun.net.www.http.HttpClient.<init>(HttpClient.java:211)
at sun.net.www.http.HttpClient.New(HttpClient.java:308)
at sun.net.www.http.HttpClient.New(HttpClient.java:326)
at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:1168)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1104)
at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:998)
at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:932)
at com.google.api.client.http.javanet.NetHttpRequest.execute(NetHttpRequest.java:93)
at com.google.api.client.http.HttpRequest.execute(HttpRequest.java:972)
at com.google.cloud.hadoop.util.CredentialFactory$ComputeCredentialWithRetry.executeRefreshToken(CredentialFactory.java:142)
at com.google.api.client.auth.oauth2.Credential.refreshToken(Credential.java:489)
at com.google.cloud.hadoop.util.CredentialFactory.getCredentialFromMetadataServiceAccount(CredentialFactory.java:189)
at com.google.cloud.hadoop.util.CredentialConfiguration.getCredential(CredentialConfiguration.java:71)
at com.google.cloud.hadoop.io.bigquery.BigQueryFactory.createBigQueryCredential(BigQueryFactory.java:81)
at com.google.cloud.hadoop.io.bigquery.BigQueryFactory.getBigQuery(BigQueryFactory.java:101)
at com.google.cloud.hadoop.io.bigquery.BigQueryFactory.getBigQueryHelper(BigQueryFactory.java:89)
at com.google.cloud.hadoop.io.bigquery.AbstractBigQueryInputFormat.getBigQueryHelper(AbstractBigQueryInputFormat.java:363)
at com.google.cloud.hadoop.io.bigquery.AbstractBigQueryInputFormat.getSplits(AbstractBigQueryInputFormat.java:102)
at org.apache.spark.rdd.NewHadoopRDD.getPartitions(NewHadoopRDD.scala:115)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1277)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:306)
at org.apache.spark.rdd.RDD.take(RDD.scala:1272)
at org.apache.spark.api.java.JavaRDDLike$class.take(JavaRDDLike.scala:494)
at org.apache.spark.api.java.AbstractJavaRDDLike.take(JavaRDDLike.scala:47)
at com.mycompany.dataproc.BigQueryAccessExample.main(BigQueryAccessExample.java:57)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
I appears that I need to set up access credentials or permissions .. but I don't see any docs regarding that.
I downloaded credentials from https://console.developers.google.com/project//apiui/credential
and set up GOOGLE_APPLICATION_CREDENTIALS but that didn't seem to work.
Any help?

The simplest way is to create a new service account and download the .p12 file (the Hadoop connectors do not currently support Application Default Credentials or JSON keyfiles):
String serviceAccount = "foo#bar.gserviceaccount.com";
String localKeyfile = "/path/to/local/keyfile.p12";
hadoopConfiguration.set("google.cloud.auth.service.account.enable", true);
hadoopConfiguration.set("google.cloud.auth.service.account.email", serviceAccount);
hadoopConfiguration.set("google.cloud.auth.service.account.keyfile", localKeyfile);

Related

Dropwizard testing - ResourceTestRule throwing NoClassDefFoundError: ch/qos/logback/core/filter/Filter

I am using dropwizard 1.2.4 with log4j 1.2.17. I have followed the instructions as mentioned below
https://github.com/arteam/dropwizard-nologback/
It is throwing the exception like below during unit testing.
java.lang.NoClassDefFoundError: ch/qos/logback/core/filter/Filter
at io.dropwizard.testing.junit.ResourceTestRule.<clinit>(ResourceTestRule.java:34)
at com.vnera.restapilayer.api.resources.ApiInfoControllerTest.<clinit>(ApiInfoControllerTest.java:25)
at sun.misc.Unsafe.ensureClassInitialized(Native Method)
at sun.reflect.UnsafeFieldAccessorFactory.newFieldAccessor(UnsafeFieldAccessorFactory.java:43)
at sun.reflect.ReflectionFactory.newFieldAccessor(ReflectionFactory.java:156)
at java.lang.reflect.Field.acquireFieldAccessor(Field.java:1088)
at java.lang.reflect.Field.getFieldAccessor(Field.java:1069)
at java.lang.reflect.Field.get(Field.java:393)
at org.junit.runners.model.FrameworkField.get(FrameworkField.java:73)
at org.junit.runners.model.TestClass.getAnnotatedFieldValues(TestClass.java:230)
at org.junit.runners.ParentRunner.classRules(ParentRunner.java:255)
at org.junit.runners.ParentRunner.withClassRules(ParentRunner.java:244)
at org.junit.runners.ParentRunner.classBlock(ParentRunner.java:194)
at org.junit.runners.ParentRunner.run(ParentRunner.java:362)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.execution.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:47)
at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:242)
at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:70)
Caused by: java.lang.ClassNotFoundException: ch.qos.logback.core.filter.Filter
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 19 more
My test code looks like below
import io.dropwizard.testing.junit.ResourceTestRule;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import javax.ws.rs.core.Response;
import static org.mockito.Mockito.mock;
#Category(value = UnitTest.class)
public class ApiInfoControllerTest {
private static ApiNonFunctionalHandler nonFunctionalHandler = mock(ApiNonFunctionalHandler.class);
private static ApiFilter apiFilter = new ApiFilter(nonFunctionalHandler);
private static final String authToken = "NetworkInsight xTyAGJmZ8nU8yJDP7LnA8Q==";
#ClassRule
public static final ResourceTestRule resources = ResourceTestRule.builder()
.addResource(new ApiInfoController())
.addProvider(apiFilter).build();
#Test
public void testApiVersion() throws Exception {
Response response = resources.client()
.target(ApiConstants.INFO_BASE_URL + "/version")
.request()
.header("Authorization", authToken)
.buildGet().invoke();
Assert.assertNotNull(response);
Assert.assertEquals(response.toString(), Response.Status.OK.getStatusCode(), response.getStatus());
final VersionResponse actualError = response.readEntity(VersionResponse.class);
Assert.assertEquals(actualError.getApiVersion(), ApiConstants.API_VERSION);
}
}
My main application is working fine. The configuration.yaml for main application looks like below
# Change default server ports
server:
applicationConnectors:
- type: http
port: 8123
adminConnectors:
- type: http
port: 8124
requestLog:
type: external
logging:
type: external
Can someone let me know what could be going wrong and how can I get around this?
EDIT
Output of mvn dependency:tree is placed here as I am hitting the character limit here.

This is a bug in dropwizard 1.2.4 as discussed below
https://github.com/dropwizard/dropwizard/pull/2338

gatling jms-Nosuchmethoderror

I installed ActiveMQ in my local machine. I want to test it using Gatling. I created one simulation class to test my JMS. But I am getting error shown below.
Here is My Code:
import io.gatling.core.Predef._
import io.gatling.jms.Predef._
import scala.concurrent.duration._
import javax.jms._
import org.apache.activemq.ActiveMQConnectionFactory
class JmsTest extends Simulation{
val jmsConfig = jms
.connectionFactoryName("connectionFactory")
.url("tcp://localhost:61616")
.credentials("admin", "admin")
.contextFactory(classOf[org.apache.activemq.jndi.ActiveMQInitialContextFactory].getName)
.listenerCount(1)
val scn = scenario("JMS DSL test").repeat(1) {
exec(jms("req reply testing")
.reqreply
.queue("MyQueue")
.replyQueue("MyTopic")
.textMessage("Hello this is Naveen")
)
}
setUp(scn.inject(atOnceUsers(1)))
.protocols(jmsConfig)
}
I created jndi.properties file with following code.
java.naming.factory.initial = org.apache.activemq.jndi.ActiveMQInitialContextFactory
java.naming.provider.url = vm://localhost
connectionFactoryNames = connectionFactory
queue.MyQueue = TestJms1
topic.MyTopic = TestJms1
The error which I am getting is:
Exception in thread "main" java.lang.NoSuchMethodError: io.gatling.jms.Predef$.jms()Lio/gatling/jms/protocol/JmsProtocolBuilderBase$;
at computerdatabase.JmsTest.<init>(JmsTest.scala:14)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at java.lang.Class.newInstance(Class.java:442)
at io.gatling.app.Gatling$.io$gatling$app$Gatling$$$anonfun$1(Gatling.scala:41)
at io.gatling.app.Gatling.run(Gatling.scala:92)
at io.gatling.app.Gatling.runIfNecessary(Gatling.scala:75)
at io.gatling.app.Gatling.start(Gatling.scala:65)
at io.gatling.app.Gatling$.start(Gatling.scala:57)
at io.gatling.app.Gatling$.fromArgs(Gatling.scala:49)
at io.gatling.app.Gatling$.main(Gatling.scala:43)
at io.gatling.app.Gatling.main(Gatling.scala)

How to import table data stored in Hive in my MapReduce job?

I am using a single node cluster setup of Apache Hadoop 2.5.0 on Ubuntu 14.04
I stored tweets in my HDFS using Flume.
Then, I used the following Hive commands to create a table in Hive which stores all the tweets in tabular format:
CREATE EXTERNAL TABLE tweets (
id BIGINT,
created_at STRING,
source STRING,
favorited BOOLEAN,
retweet_count INT,
retweeted_status STRUCT<
text:STRING,
user:STRUCT<screen_name:STRING,name:STRING>>,
entities STRUCT<
urls:ARRAY<STRUCT<expanded_url:STRING>>,
user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,
hashtags:ARRAY<STRUCT<text:STRING>>>,
text STRING,
user STRUCT<
screen_name:STRING,
name:STRING,
friends_count:INT,
followers_count:INT,
statuses_count:INT,
verified:BOOLEAN,
utc_offset:INT,
time_zone:STRING>,
in_reply_to_screen_name STRING
)
ROW FORMAT SERDE 'com.cloudera.hive.serde.JSONSerDe'
LOCATION '/user/flume/tweets';
I have verified that the data exists in the table 'tweets' by querying the database using HiveQL (from the Hive Command Line Interface). I also created an output table using the following command:
CREATE TABLE outputtable (
a STRING,
b INT );
I am using Apache Hive 0.13.1 which already has HCatalog in it. After all this, I am trying to write a MapReduce Job using Java language in Eclipse. I have added the following libraries to my project as external jars:
All the libraries present in path-of-installation-of-hadoop/share/hadoop/common
All the libraries present in path-of-installation-of-hadoop/share/hadoop/mapreduce
All the libraries present in the lib folder of Hive
All the libraries present in path-of-installation-of-Hive/hcatalog/share/hcatalog
My MapReduce code is trying to import the text of the tweets from the table 'tweets' and then process it. My MapReduce code is:
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.util.*;
import org.apache.hcatalog.common.*;
import org.apache.hcatalog.mapreduce.*;
import org.apache.hcatalog.data.*;
import org.apache.hcatalog.data.schema.*;
public class UseHCat extends Configured implements Tool {
public static class Map extends Mapper<WritableComparable, HCatRecord, Text, IntWritable> {
String tweetText;
#Override
protected void map( WritableComparable key,
HCatRecord value,
org.apache.hadoop.mapreduce.Mapper<WritableComparable, HCatRecord,
Text, IntWritable>.Context context)
throws IOException, InterruptedException {
tweetText = (String) value.get(7);
int i = 1;
context.write(new Text(tweetText), new IntWritable(i));
}
}
public static class Reduce extends Reducer<Text, IntWritable,
WritableComparable, HCatRecord> {
protected void reduce( Text key,
java.lang.Iterable<IntWritable> values,
org.apache.hadoop.mapreduce.Reducer<Text, IntWritable,
WritableComparable, HCatRecord>.Context context)
throws IOException, InterruptedException {
Iterator<IntWritable> iter = values.iterator();
IntWritable iw = iter.next();
int id = iw.get();
HCatRecord record = new DefaultHCatRecord(2);
record.set(0, key.toString());
record.set(1, id);
context.write(null, record);
}
}
public int run(String[] args) throws Exception {
Configuration conf = getConf();
String inputTableName = "tweets";
String outputTableName = "outputtable";
String dbName = null;
Job job = new Job(conf, "UseHCat");
HCatInputFormat.setInput(job, InputJobInfo.create(dbName, inputTableName, null));
job.setJarByClass(UseHCat.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
// An HCatalog record as input
job.setInputFormatClass(HCatInputFormat.class);
// Mapper emits a string as key and an integer as value
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// Ignore the key for the reducer output; emitting an HCatalog record as value
job.setOutputKeyClass(WritableComparable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
job.setOutputFormatClass(HCatOutputFormat.class);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
HCatSchema s = HCatOutputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
return (job.waitForCompletion(true) ? 0 : 1);
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new UseHCat(), args);
System.exit(exitCode);
}
}
The first problem that we are facing is that we are getting many warnings specifying that some of the types and constructors are deprecated. We ignored the warnings and created a jar file of our Project whose main class is 'UseHCat'. Then we browsed to the location where the jar file was created using the terminal provided in Ubuntu and ran the following command:
hadoop jar MyProject.jar
We got the following error:
14/11/16 17:17:29 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hive/ql/metadata/HiveStorageHandler
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:800)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:449)
at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at org.apache.hcatalog.mapreduce.InitializeInput.getInputJobInfo(InitializeInput.java:146)
at org.apache.hcatalog.mapreduce.InitializeInput.setInput(InitializeInput.java:86)
at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:86)
at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:55)
at org.apache.hcatalog.mapreduce.HCatInputFormat.setInput(HCatInputFormat.java:47)
at UseHCat.run(UseHCat.java:64)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at UseHCat.main(UseHCat.java:91)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.metadata.HiveStorageHandler
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 26 more

Hive is developed to minimize the writing mapreduce program.You can perform the process using Hive queries, internally it will convert into mapreduce job.
However, if you want to access the Hivedb data,you can access. Hive is not a database. All the data stored under warehouse dir in readable format. You can give full path as a input to your mapreduce program.
Have you tried a sample mapreduce program in eclipse. Because you have build the Hadoop plugin or you can use the existing plugin in your eclipse to run mapreduce.

Getting ClassNotFound Exception: While dumping Configuration in Hadoop

I'm trying a simple program from the "hadoop in Action" book to merge a series of files from the local file system into one file in the hdfs. The code snippet is the same as the one provided in the book.
import java.lang.*;
import java.util.*;
import java.io.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
public class PutMerge {
public static void main(String[] args) throws IOException{
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
FileSystem local = FileSystem.getLocal(conf);
Path inputDir = new Path(args[0]); // First argument has the input directory
Path hdfsFile = new Path(args[1]); // Concatenated hdfs file name
try {
FileStatus[] inputFiles = local.listStatus(inputDir); // list of Local Files
FSDataOutputStream out = hdfs.create(hdfsFile); // target file creation
for (int i = 0; i<inputFiles.size; i++ {
FSDataInputStream in = local.open(inputFiles[i].getPath());
int bytesRead = 0;
byte[] buff = new byte[256];
while (bytesRead = (in.read(buff))>0) {
out.write(buff,0,bytesRead);
}
in.close();
}
out.close();
}
catch(Exception e) {
e.printStackTrace();
}
}
}
The program successfully compiled and while trying to run I'm getting the following exception
Exception in thread "main" java.lang.NoClassDefFoundError:
org/apache/commons/configuration/Configuration
at org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.(DefaultMetricsSystem.java:37)
at org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.(DefaultMetricsSystem.java:34)
at org.apache.hadoop.security.UgiInstrumentation.create(UgiInstrumentation.java:51)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:217)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:185)
at org.apache.hadoop.security.UserGroupInformation.isSecurityEnabled(UserGroupInformation.java:237)
at org.apache.hadoop.security.KerberosName.(KerberosName.java:79)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:210)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:185)
at org.apache.hadoop.security.UserGroupInformation.isSecurityEnabled(UserGroupInformation.java:237)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:482)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:468)
at org.apache.hadoop.fs.FileSystem$Cache$Key.(FileSystem.java:1519)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1420)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:123)
at PutMerge.main(PutMerge.java:16) Caused by: java.lang.ClassNotFoundException:
org.apache.commons.configuration.Configuration
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:423)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:356)
... 17 more
Based on inputs from some of the posts, I added the commons package. My classpath definition is
/usr/java/jdk1.7.0_21:/data/commons-logging-1.1.2/commons-logging-1.1.2.jar:/data/hadoop-1.1.2/hadoop-core-1.1.2.jar:/data/commons-logging-1.1.2/commons-logging-adapters-1.1.2.jar:/data/commons-logging-1.1.2/commons-logging-api-1.1.2.jar:.
Any clue on why this is not working?

You didnt include apache configuration in your classpath.
Really though you shouldn't need to include much besides hadoop itself. Make sure you are running your jar with hadoop itself.
> hadoop -jar myJar.jar

Issue while using hbase java client while puting data in database

I am testing the hbase . i am using a standalone one without hadoop. i used the version hbase 0.90.6 the code worked fine and i upgraded to latest version 0.94.0 it fails and gives this exception while i try to put datas in table.
Exception
Exception in thread "main" org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException: Failed 1 action: DoNotRetryIOException: 1 time, servers with issues: xxxx:36601,
at org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.processBatchCallback(HConnectionManager.java:1591)
at org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.processBatch(HConnectionManager.java:1367)
at org.apache.hadoop.hbase.client.HTable.flushCommits(HTable.java:945)
at org.apache.hadoop.hbase.client.HTable.doPut(HTable.java:801)
at org.apache.hadoop.hbase.client.HTable.put(HTable.java:776)
at com.hhase.Hbase.main(Hbase.java:22)
I am using the below code.
package com.hhase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
public class Hbase {
public static void main(String args[]) throws IOException {
Configuration hConf = HBaseConfiguration.create();
HTable table = new HTable(hConf, "myLittleHBaseTable");
Put p = new Put(Bytes.toBytes("myLittleRow"));
Put put = new Put(Bytes.toBytes("myLittleRow"));
put.add(Bytes.toBytes("myLittleFamily"),
Bytes.toBytes("someQualifier"), Bytes.toBytes("Some"));
table.put(put);
}
}
Library used
commons-cli-1.2.jar hadoop-core-1.0.2.jar
commons-codec-1.4.jar hbase-0.94.0.jar
commons-collections-3.2.1.jar httpclient-4.1.2.jar
commons-configuration-1.6.jar httpcore-4.1.4.jar
commons-httpclient-3.1.jar log4j-1.2.16.jar
commons-io-2.1.jar protobuf-java-2.4.0a.jar
commons-lang-2.5.jar slf4j-api-1.5.8.jar
commons-logging-1.1.1.jar slf4j-log4j12-1.5.8.jar
commons-net-1.4.1.jar stax-api-1.0.1.jar
guava-r09.jar zookeeper-3.4.3.jar

I encountered same error while inserting data into the HBase.
In my case, it was due to incorrect column family name.
Please refer to this conversation.

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Running Dataproc BigQuery example on local machine - hadoop

Related

Dropwizard testing - ResourceTestRule throwing NoClassDefFoundError: ch/qos/logback/core/filter/Filter

gatling jms-Nosuchmethoderror

How to import table data stored in Hive in my MapReduce job?

Getting ClassNotFound Exception: While dumping Configuration in Hadoop

Issue while using hbase java client while puting data in database

Categories

Resources