ClassCastException on Drop table query in apache spark hive - hadoop

I'm using the following hive query :
this.queryExecutor.executeQuery("Drop table user")
and am getting the following exception :
java.lang.LinkageError: ClassCastException: attempting to castjar:file:/usr/hdp/2.4.2.0-258/spark/lib/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar!/javax/ws/rs/ext/RuntimeDelegate.classtojar:file:/usr/hdp/2.4.2.0-258/spark/lib/spark-assembly-1.6.1.2.4.2.0-258-hadoop2.7.1.2.4.2.0-258.jar!/javax/ws/rs/ext/RuntimeDelegate.class
at javax.ws.rs.ext.RuntimeDelegate.findDelegate(RuntimeDelegate.java:116)
at javax.ws.rs.ext.RuntimeDelegate.getInstance(RuntimeDelegate.java:91)
at javax.ws.rs.core.MediaType.<clinit>(MediaType.java:44)
at com.sun.jersey.core.header.MediaTypes.<clinit>(MediaTypes.java:64)
at com.sun.jersey.core.spi.factory.MessageBodyFactory.initReaders(MessageBodyFactory.java:182)
at com.sun.jersey.core.spi.factory.MessageBodyFactory.initReaders(MessageBodyFactory.java:175)
at com.sun.jersey.core.spi.factory.MessageBodyFactory.init(MessageBodyFactory.java:162)
at com.sun.jersey.api.client.Client.init(Client.java:342)
at com.sun.jersey.api.client.Client.access$000(Client.java:118)
at com.sun.jersey.api.client.Client$1.f(Client.java:191)
at com.sun.jersey.api.client.Client$1.f(Client.java:187)
at com.sun.jersey.spi.inject.Errors.processWithErrors(Errors.java:193)
at com.sun.jersey.api.client.Client.<init>(Client.java:187)
at com.sun.jersey.api.client.Client.<init>(Client.java:170)
at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.serviceInit(TimelineClientImpl.java:340)
at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
at org.apache.hadoop.hive.ql.hooks.ATSHook.<init>(ATSHook.java:67)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at java.lang.Class.newInstance(Class.java:442)
at org.apache.hadoop.hive.ql.hooks.HookUtils.getHooks(HookUtils.java:60)
at org.apache.hadoop.hive.ql.Driver.getHooks(Driver.java:1309)
at org.apache.hadoop.hive.ql.Driver.getHooks(Driver.java:1293)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1347)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1195)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1059)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1049)
at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:495)
at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$runHive$1.apply(ClientWrapper.scala:484)
at org.apache.spark.sql.hive.client.ClientWrapper$$anonfun$withHiveState$1.apply(ClientWrapper.scala:290)
at org.apache.spark.sql.hive.client.ClientWrapper.liftedTree1$1(ClientWrapper.scala:237)
at org.apache.spark.sql.hive.client.ClientWrapper.retryLocked(ClientWrapper.scala:236)
at org.apache.spark.sql.hive.client.ClientWrapper.withHiveState(ClientWrapper.scala:279)
at org.apache.spark.sql.hive.client.ClientWrapper.runHive(ClientWrapper.scala:484)
at org.apache.spark.sql.hive.client.ClientWrapper.runSqlHive(ClientWrapper.scala:474)
at org.apache.spark.sql.hive.HiveContext.runSqlHive(HiveContext.scala:613)
at org.apache.spark.sql.hive.execution.DropTable.run(commands.scala:89)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:817)
at com.accenture.aa.dmah.spark.core.QueryExecutor.executeQuery(QueryExecutor.scala:35)
at com.accenture.aa.dmah.attribution.transformer.MulltipleUserJourneyTransformer.transform(MulltipleUserJourneyTransformer.scala:32)
at com.accenture.aa.dmah.attribution.userjourney.UserJourneyBuilder$$anonfun$buildUserJourney$1.apply$mcVI$sp(UserJourneyBuilder.scala:31)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
at com.accenture.aa.dmah.attribution.userjourney.UserJourneyBuilder.buildUserJourney(UserJourneyBuilder.scala:29)
at com.accenture.aa.dmah.attribution.core.AttributionHub.executeAttribution(AttributionHub.scala:47)
at com.accenture.aa.dmah.attribution.jobs.AttributionJob.process(AttributionJob.scala:33)
at com.accenture.aa.dmah.core.DMAHJob.processJob(DMAHJob.scala:73)
at com.accenture.aa.dmah.core.DMAHJob.execute(DMAHJob.scala:27)
at com.accenture.aa.dmah.core.JobRunner.<init>(JobRunner.scala:17)
at com.accenture.aa.dmah.core.ApplicationInstance.initilize(ApplicationInstance.scala:48)
at com.accenture.aa.dmah.core.Bootstrap.boot(Bootstrap.scala:112)
at com.accenture.aa.dmah.core.BootstrapObj$.main(Bootstrap.scala:134)
at com.accenture.aa.dmah.core.BootstrapObj.main(Bootstrap.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.util.ScalaClassLoader$$anonfun$run$1.apply(ScalaClassLoader.scala:71)
at scala.tools.nsc.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.tools.nsc.util.ScalaClassLoader$URLClassLoader.asContext(ScalaClassLoader.scala:139)
at scala.tools.nsc.util.ScalaClassLoader$class.run(ScalaClassLoader.scala:71)
at scala.tools.nsc.util.ScalaClassLoader$URLClassLoader.run(ScalaClassLoader.scala:139)
at scala.tools.nsc.CommonRunner$class.run(ObjectRunner.scala:28)
at scala.tools.nsc.ObjectRunner$.run(ObjectRunner.scala:45)
at scala.tools.nsc.CommonRunner$class.runAndCatch(ObjectRunner.scala:35)
at scala.tools.nsc.ObjectRunner$.runAndCatch(ObjectRunner.scala:45)
at scala.tools.nsc.MainGenericRunner.runTarget$1(MainGenericRunner.scala:74)
at scala.tools.nsc.MainGenericRunner.process(MainGenericRunner.scala:96)
at scala.tools.nsc.MainGenericRunner$.main(MainGenericRunner.scala:105)
at scala.tools.nsc.MainGenericRunner.main(MainGenericRunner.scala)
I saw there have been similar posts here and here but they haven't had any response till now.
Also have looked here but don't think thats a valid course of action in my case.
Whats intriguing is that this is specific when we try to use drop table (or drop table if exists) query.
Hoping to find resolution for the same.

To my knowledge, The above error could be because of sample class with same package structure ie: 'javax.ws.rs.ext.RuntimeDelegate' found in different JARs issue. Class Objects are created and casted at run time. So there is every possibility the the code responsible for triggering DROP syntax , the above class would be used and broken due as it it is found more than once in the classpath.
I have tried DROP and DROP IF EXISTS in chd5 and was working without issue, below are the details of my run:
first run - Hadoop version - 2.6,Hive 1.1.0 and Spark - 1.3.1 (included hive libraries to spark lib)
second run -Hadoop version - 2.6,Hive 1.1.0 and Spark - 1.6.1
Mode of run - cli
scala> sqlContext.sql("DROP TABLE SAMPLE");
16/08/04 11:31:39 INFO parse.ParseDriver: Parsing command: DROP TABLE SAMPLE
16/08/04 11:31:39 INFO parse.ParseDriver: Parse Completed
......
scala>sqlContext.sql("DROP TABLE IF EXISTS SAMPLE");
16/08/04 11:40:34 INFO parse.ParseDriver: Parsing command: DROP TABLE IF EXISTS SAMPLE
16/08/04 11:40:35 INFO parse.ParseDriver: Parse Completed
.....
If Possible please validate DROP commands using a different version of spark lib to narrow down problem scope.
Meanwhile, I am analyzing the jars to find out the linkage where two occurrences of same class 'RuntimeDelegate' existence and will report back to check if the removal of any jar can fix the issue and addition of the jar should recreate the same issue.

Related

Hive does not start: Error creating path /hive/cluster/delegation/METASTORE/keys

I ran into a problem on a kerberized cluster where hive would not start.
Symptoms:
Services start succesfully (and did not stop)
In Ambari an alert appeared which mentioned that the Hive metastore failed
Starting hive on the command line did not succeed (it just kept hanging)
Via beeline I was able to see metadata, but not get actual data
I found the following error in /var/log/hive/hivemetastore.log
2016-08-29 10:12:49,047 ERROR [main]: metastore.HiveMetaStore (HiveMetaStore.java:main(5934)) - Metastore Thrift Server threw an exception...
org.apache.hadoop.hive.thrift.DelegationTokenStore$TokenStoreException: Error creating path /hive/cluster/delegation/METASTORE/keys
at org.apache.hadoop.hive.thrift.ZooKeeperTokenStore.ensurePath(ZooKeeperTokenStore.java:166)
at org.apache.hadoop.hive.thrift.ZooKeeperTokenStore.initClientAndPaths(ZooKeeperTokenStore.java:236)
at org.apache.hadoop.hive.thrift.ZooKeeperTokenStore.init(ZooKeeperTokenStore.java:469)
at org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge$Server.startDelegationTokenSecretManager(HadoopThriftAuthBridge.java:444)
at org.apache.hadoop.hive.metastore.HiveMetaStore.startMetaStore(HiveMetaStore.java:6015)
at org.apache.hadoop.hive.metastore.HiveMetaStore.main(HiveMetaStore.java:5930)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: org.apache.zookeeper.KeeperException$AuthFailedException: KeeperErrorCode = AuthFailed for /hive/cluster/delegation/METASTORE/keys
at org.apache.zookeeper.KeeperException.create(KeeperException.java:123)
at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
at org.apache.zookeeper.ZooKeeper.create(ZooKeeper.java:783)
at org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:691)
at org.apache.curator.framework.imps.CreateBuilderImpl$11.call(CreateBuilderImpl.java:675)
at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
at org.apache.curator.framework.imps.CreateBuilderImpl.pathInForeground(CreateBuilderImpl.java:672)
at org.apache.curator.framework.imps.CreateBuilderImpl.protectedPathInForeground(CreateBuilderImpl.java:453)
at org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:443)
at org.apache.curator.framework.imps.CreateBuilderImpl.forPath(CreateBuilderImpl.java:423)
at org.apache.curator.framework.imps.CreateBuilderImpl$3.forPath(CreateBuilderImpl.java:257)
at org.apache.curator.framework.imps.CreateBuilderImpl$3.forPath(CreateBuilderImpl.java:205)
at org.apache.hadoop.hive.thrift.ZooKeeperTokenStore.ensurePath(ZooKeeperTokenStore.java:160)
... 11 more
Note that I actually tried several things, so I am not sure whether this is the full solution, but here is the final step, which I believe to be the critical one:
After a long search I indirectly found this site: https://community.hortonworks.com/articles/49040/hive-metastore-crashes-on-nullpointerexception-wit.html
Here is the relevant fragment that helped me resolve the issue:
This is a known issue being tracked in the following Hortonworks bug:
https://hortonworks.jira.com/browse/BUG-42602
WORKAROUND:
Set the hive.cluster.delegation.token.store.class to the following:
hive.cluster.delegation.token.store.class=org.apache.hadoop.hive.thrift.DBTokenStore
If using Ambari, this setting can be changed by clicking on the Hive
service on the Ambari Dashboard, navigating to the "Configs" tab, and
modifying the parameter in the "Advanced Hive-site" section of the
Hive configs. Save the changes and restart Hive from the Ambari User
Interface when prompted.
If not using ambari, this setting can be located in the
/etc/hive/conf/hive-site.xml file. Make sure this change is made on
all applicable nodes on the cluster. Once the changes are made, the
Hive services must be restarted.

PIG Script Error: java.lang.NoSuchMethodError: org.apache.thrift.protocol.TProtocol.getScheme

I am running a PIG script in mapreduce mode. The script reads RCFile (containing Thrift serialized data stored in GZIP compressed format), deserializes it using a UDF, extracts certain fields from the Thrift struct, and stores them.
Some of the mappers fail with following error:
2015-12-23 03:07:45,638 FATAL [Thread-5] org.apache.hadoop.mapred.YarnChild: Error running child : java.lang.NoSuchMethodError: org.apache.thrift.protocol.TProtocol.getScheme()Ljava/lang/Class;
at com.xxx.yyy.thrift.dto.LatLong.read(LatLong.java:553)
at com.twitter.elephantbird.util.ThriftUtils.readSingleFieldNoTag(ThriftUtils.java:318)
at com.twitter.elephantbird.util.ThriftUtils.readFieldNoTag(ThriftUtils.java:352)
at com.twitter.elephantbird.mapreduce.input.RCFileThriftTupleInputFormat$TupleReader.getCurrentTupleValue(RCFileThriftTupleInputFormat.java:74)
at com.twitter.elephantbird.pig.load.RCFileThriftPigLoader.getNext(RCFileThriftPigLoader.java:46)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigRecordReader.nextKeyValue(PigRecordReader.java:204)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:553)
at org.apache.hadoop.mapreduce.task.MapContextImpl.nextKeyValue(MapContextImpl.java:80)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.nextKeyValue(WrappedMapper.java:91)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Here's my script:
REGISTER '/user/ameya/libs/geo-analysis-1.0.0-SNAPSHOT.jar';
REGISTER '/user/ameya/libs/libthrift-0.8.0.jar';
REGISTER '/user/ameya/libs/thrift-0.8-types-1.1.29-SNAPSHOT.jar';
REGISTER '/user/ameya/libs/libs/elephant-bird-pig-4.7.jar';
REGISTER '/user/ameya/libs/libs/elephant-bird-rcfile-4.7.jar';
REGISTER '/user/ameya/libs/libs/elephant-bird-core-4.7.jar';
REGISTER '/user/ameya/libs/libs/elephant-bird-hadoop-compat-4.7.jar';
REGISTER '/user/ameya/libs/libs/hive-0.4.1.jar';
REGISTER '/user/ameya/libs/libs/libs/hive-serde-0.13.3.jar';
SET output.compression.enabled true;
SET output.compression.codec org.apache.hadoop.io.compress.GzipCodec;
thrift = LOAD '$input' USING com.twitter.elephantbird.pig.load.RCFileThriftPigLoader('com.xxx.yyy.thrift.dto.LatLong');
final = FOREACH thrift GENERATE (requestLatLong is not null ? requestLatLong.latitude : null) AS req_ll_lat,
(requestLatLong is not null ? requestLatLong.longitude : null) AS req_ll_lng;
STORE final INTO '$output';
I am using libthrift-0.8.0.jar, where class TProtocol.java has indeed defined getScheme() method (with public access). Interestingly, not all the mappers fail, just a few of them; but that causes my job to fail. Could this be a CLASSPATH issue?
I tried searching for this issue, but could not find relevant answers. Can someone please help me get some leads to fix this?
Found the reason. The class "org.apache.thrift.protocol.TProtocol" was defined in two jars, i.e. libthrift-0.8.0.jar and hive-0.4.1.jar. The one in hive-0.4.1.jar did not have method getScheme() defined. When it picked up hive-0.4.1.jar in the classpath first, the mappers were not able to find method getScheme().
I am not sure why the behavior was not consistent across all mappers. Any comments to explain that would be helpful.
I replaced hive-0.4.1.jar with hive-exec-0.13.3.jar and the issue got resolved.

org.apache.hadoop.hbase.TableNotFoundException: SYSTEM.CATALOG exception with phoenix 4.5.2

I've been trying to integrate Phoenix 4.5.2 to my existing hadoop cluster.
Hadoop Version : 2.7.1
HBase Version : 1.1.2
When I try to create table from my phoenix client I'm getting following exception. But I'm able to create table successfully from HBase console.
org.apache.phoenix.exception.PhoenixIOException: SYSTEM.CATALOG
at org.apache.phoenix.util.ServerUtil.parseServerException(ServerUtil.java:108)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.metaDataCoprocessorExec(ConnectionQueryServicesImpl.java:1051)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.metaDataCoprocessorExec(ConnectionQueryServicesImpl.java:1014)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.createTable(ConnectionQueryServicesImpl.java:1259)
at org.apache.phoenix.query.DelegateConnectionQueryServices.createTable(DelegateConnectionQueryServices.java:113)
at org.apache.phoenix.schema.MetaDataClient.createTableInternal(MetaDataClient.java:1937)
at org.apache.phoenix.schema.MetaDataClient.createTable(MetaDataClient.java:751)
at org.apache.phoenix.compile.CreateTableCompiler$2.execute(CreateTableCompiler.java:186)
at org.apache.phoenix.jdbc.PhoenixStatement$2.call(PhoenixStatement.java:320)
at org.apache.phoenix.jdbc.PhoenixStatement$2.call(PhoenixStatement.java:312)
at org.apache.phoenix.call.CallRunner.run(CallRunner.java:53)
at org.apache.phoenix.jdbc.PhoenixStatement.executeMutation(PhoenixStatement.java:310)
at org.apache.phoenix.jdbc.PhoenixStatement.executeUpdate(PhoenixStatement.java:1422)
at org.apache.phoenix.query.ConnectionQueryServicesImpl$12.call(ConnectionQueryServicesImpl.java:1927)
at org.apache.phoenix.query.ConnectionQueryServicesImpl$12.call(ConnectionQueryServicesImpl.java:1896)
at org.apache.phoenix.util.PhoenixContextExecutor.call(PhoenixContextExecutor.java:77)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.init(ConnectionQueryServicesImpl.java:1896)
at org.apache.phoenix.jdbc.PhoenixDriver.getConnectionQueryServices(PhoenixDriver.java:180)
at org.apache.phoenix.jdbc.PhoenixEmbeddedDriver.connect(PhoenixEmbeddedDriver.java:132)
at org.apache.phoenix.jdbc.PhoenixDriver.connect(PhoenixDriver.java:151)
at org.apache.tools.ant.taskdefs.JDBCTask.getConnection(JDBCTask.java:370)
at org.apache.tools.ant.taskdefs.SQLExec.getConnection(SQLExec.java:940)
at org.apache.tools.ant.taskdefs.SQLExec.execute(SQLExec.java:612)
at org.apache.tools.ant.UnknownElement.execute(UnknownElement.java:292)
at sun.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.tools.ant.dispatch.DispatchUtils.execute(DispatchUtils.java:106)
at org.apache.tools.ant.Task.perform(Task.java:348)
at org.apache.tools.ant.Target.execute(Target.java:435)
at org.apache.tools.ant.Target.performTasks(Target.java:456)
at org.apache.tools.ant.Project.executeSortedTargets(Project.java:1393)
at org.apache.tools.ant.helper.SingleCheckExecutor.executeTargets(SingleCheckExecutor.java:38)
at org.apache.tools.ant.Project.executeTargets(Project.java:1248)
at org.apache.tools.ant.taskdefs.Ant.execute(Ant.java:440)
at org.apache.tools.ant.UnknownElement.execute(UnknownElement.java:292)
at sun.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.tools.ant.dispatch.DispatchUtils.execute(DispatchUtils.java:106)
at org.apache.tools.ant.Task.perform(Task.java:348)
at org.apache.tools.ant.Target.execute(Target.java:435)
at org.apache.tools.ant.Target.performTasks(Target.java:456)
at org.apache.tools.ant.Project.executeSortedTargets(Project.java:1393)
at org.apache.tools.ant.Project.executeTarget(Project.java:1364)
at org.apache.tools.ant.helper.DefaultExecutor.executeTargets(DefaultExecutor.java:41)
at org.apache.tools.ant.Project.executeTargets(Project.java:1248)
at org.apache.tools.ant.Main.runBuild(Main.java:851)
at org.apache.tools.ant.Main.startAnt(Main.java:235)
at org.apache.tools.ant.launch.Launcher.run(Launcher.java:280)
at org.apache.tools.ant.launch.Launcher.main(Launcher.java:109)
Caused by: org.apache.hadoop.hbase.TableNotFoundException: SYSTEM.CATALOG
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegionInMeta(ConnectionManager.java:1257)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1155)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1139)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.locateRegion(ConnectionManager.java:1096)
at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.getRegionLocation(ConnectionManager.java:931)
at org.apache.hadoop.hbase.client.HRegionLocator.getRegionLocation(HRegionLocator.java:83)
at org.apache.hadoop.hbase.client.HTable.getRegionLocation(HTable.java:496)
at org.apache.hadoop.hbase.client.HTable.getKeysAndRegionsInRange(HTable.java:736)
at org.apache.hadoop.hbase.client.HTable.getKeysAndRegionsInRange(HTable.java:706)
at org.apache.hadoop.hbase.client.HTable.getStartKeysInRange(HTable.java:1760)
at org.apache.hadoop.hbase.client.HTable.coprocessorService(HTable.java:1715)
at org.apache.hadoop.hbase.client.HTable.coprocessorService(HTable.java:1695)
at org.apache.phoenix.query.ConnectionQueryServicesImpl.metaDataCoprocessorExec(ConnectionQueryServicesImpl.java:1034)
... 49 more
Please suggest what's going wrong here and Phoenix 4.5.2 is compatible with HBase 1.1.2 or not.
Clearing zookeeper will solve the problem.
bin/hbase clean --cleanZk
Note:- you need to shutdown your master and regionserver before using above command.
Here's what you need to do:
Login into hbase zookeeper cli: hbase zkcli
check to see if the SYSTEM.CATALOG, SYSTEM.SEQUENCE, SYSTEM.STATS, and SYSTEM.FUNCTION exists by issuing: ls /hbase/table
If so... then this is the solution:)
Remove all the aforementioned directories: i.e. from the same cli issue rmr /hbase/table/SYSTEM.CATALOG. Do this for all four of the aforementioned files
Retry saline... and smile
I have an embedded HBase, I had this error after deleting manually HBase data, so I have had to clean Zookeeper and the rest of HBase with:
bin/hbase clean --cleanAll
That solved the problem for me.
According to your given logs,
Pheonix was trying to create a table called SYSTEM.CATALOG in Hbase. But, due to some problem in Hbase, it wasn't able to create it.
The table "SYSTEM.CATALOG" is created the 1st time the connection is made from Pheonix to Hbase.
Therefore, I would recommend checking your Pheonix Configuration for connectivity issues like wrong mapping of IP-Address , forgetting to create a Password less SSH etc.
The above suggestions of cleaning hbase data with only zookeeper running did not help me. What fiannl helped me overcome this error and allow creation of the SYSTEM.CATALOG table and the other associated tables was by copying the hbase-site.xml from the hbase installation into the bin folder of the apache-phoenix installation.
In my case the hbase conf was in /usr/local/hbase/conf which I copied into /usr/local/apache-phoenix/bin. I just renamed the original hbase-site.xml that was already present in the bin folder to something else, just as a back up.
This solved the problem of connectivity from sqlline.py as well as SQuirreL client.
This was apart from ensuring the phoenix client libraries were in /squirrel/lib folder for squirrel to work.
Check your hdfs://.../hbase/data/default/ is exist SYSTEM.CATALOG ? enter image description here
if there is nothing, you must try to use bin/hbase clean --cleanZk before you use the command, you must stop hbase Master and regionServers,but still keep ZK alive.

Run install pig from ozzie hadoop

I have installed oozie on my system and I have also installed pig. Now I want ozzie to run workflow from pig which is installed on my system not from the ozzie sharelib. Please help, as I get the following error:
2015-08-19 17:15:25,724 WARN PigActionExecutor:523 - SERVER[edb-node1] USER[hduser] GROUP[-] TOKEN[] APP[pig-wf] JOB[0000002-150819170943510-oozie-hdus-W] ACTION[0000002-150819170943510-oozie-hdus-W#pig-node] Launcher ERROR, reason: Main class [org.apache.oozie.action.hadoop.PigMain], exception invoking main(), java.lang.ClassNotFoundException: Class org.apache.oozie.action.hadoop.PigMain not found
2015-08-19 17:15:25,728 WARN PigActionExecutor:523 - SERVER[edb-node1] USER[hduser] GROUP[-] TOKEN[] APP[pig-wf] JOB[0000002-150819170943510-oozie-hdus-W] ACTION[0000002-150819170943510-oozie-hdus-W#pig-node] Launcher exception: java.lang.ClassNotFoundException: Class org.apache.oozie.action.hadoop.PigMain not found
java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.oozie.action.hadoop.PigMain not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2074)
at org.apache.oozie.action.hadoop.LauncherMapper.map(LauncherMapper.java:234)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler.runSubtask(LocalContainerLauncher.java:370)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler.runTask(LocalContainerLauncher.java:295)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler.access$200(LocalContainerLauncher.java:181)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler$1.run(LocalContainerLauncher.java:224)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask.run(FutureTask.java:262)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException: Class org.apache.oozie.action.hadoop.PigMain not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1980)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2072)
... 13 more
You've got error messages that show clearly that you have an incomplete CLASSPATH.
That's because the pig command line does a lot of things, among which setting up silently the appropriate Pig JARs in the CLASSPATH, before invoking the PigMain Java class. But Oozie calls the Java class directly; the CLASSPATH issues are supposed to be handled either...
by the Pig ShareLib, when active
or by you, as a knowledgeable Java developer -- after all, it's your
choice not to use the default way to run Pig, so you know what you
are doing, right?
Before asking your question, did you try a search on StackOverflow (and/or Google) with the following keywords? The results could prove useful.
oozie pig custom classpath
In the above case have you checked if you are able to access Pig CLI {grunt shell} or are you able to running Pig Script manually, without Using Oozie.
The Case what happend is Pig Jar was not accessable while You are trying to use via Oozie, the Best Case to avoide such issue is to use Oozie - -ShareLib , but as You mention you dnt want the same to work using sharedlib, then use some alternate case like:
Update Pig Home Path in Hadoop ClassPath. These will help MR to fetch the List of Jars in HDFS Temp Location every time Oozie Submit Request to MR.
Update Pig Home in BashProfile {if Pig shows error while accessing GRANT Shell with Command not Found}.
Hope these will help some.

Load CSV data to HBase using pig or hive

Hi I have created a pig script which loads data into hbase. My csv file is stored into hadoop location at /hbase_tables/zip.csv
Pig Script
register /home/hduser/pig-0.12.0/lib/pig-0.8.0-core.jar;
A = LOAD '/hbase_tables/zip.csv' USING PigStorage(',') as (id:chararray, zip:chararray, desc1:chararray, desc2:chararray, income:chararray);
STORE A INTO 'hbase://mydata' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage('zip:zip,desc:desc1,desc:desc2,income:income');
when i execute it gives below error
Pig Stack Trace
ERROR 2017: Internal error creating job configuration.
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobCreationException: ERROR 2017: Internal error creating job configuration.
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.getJob(JobControlCompiler.java:667)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.compile(JobControlCompiler.java:256)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.launchPig(MapReduceLauncher.java:147)
at org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.execute(HExecutionEngine.java:378)
at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1198)
at org.apache.pig.PigServer.execute(PigServer.java:1190)
at org.apache.pig.PigServer.access$100(PigServer.java:128)
at org.apache.pig.PigServer$Graph.execute(PigServer.java:1517)
at org.apache.pig.PigServer.executeBatchEx(PigServer.java:362)
at org.apache.pig.PigServer.executeBatch(PigServer.java:329)
at org.apache.pig.tools.grunt.GruntParser.executeBatch(GruntParser.java:112)
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:169)
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:141)
at org.apache.pig.tools.grunt.Grunt.exec(Grunt.java:90)
at org.apache.pig.Main.run(Main.java:510)
at org.apache.pig.Main.main(Main.java:107)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path in absolute URI: hbase://mydata_logs
at org.apache.hadoop.fs.Path.initialize(Path.java:148)
at org.apache.hadoop.fs.Path.<init>(Path.java:71)
at org.apache.hadoop.fs.Path.<init>(Path.java:45)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.getJob(JobControlCompiler.java:470)
... 20 more
Caused by: java.net.URISyntaxException: Relative path in absolute URI: hbase://mydata_logs
at java.net.URI.checkPath(URI.java:1804)
at java.net.URI.<init>(URI.java:752)
at org.apache.hadoop.fs.Path.initialize(Path.java:145)
... 23 more
Please let me know how i can import csv data file into hbase or if you have any alternate solution.
Seems like your problem is with "Relative path" in absolute URI: hbase://mydata_logs.
Are you sure the path is correct?
Probably table mydata_logs does not exist. Start: hbase shell and type list. Is your table mydata_logs on the list?
I had the same task once and have fully-working solution (actually, I'm not sure about commas in your third line of the code):
%default hbase_home `echo \$HBASE_HOME`;
%default tmp '/user/alexander/tmp/users_dump/k14'
set zookeeper.znode.parent '/hbase-unsecure';
set hbase.zookeeper.quorum 'dmp-hbase.local';
register $hbase_home/lib/zookeeper-3.4.5.jar;
register $hbase_home/hbase-0.94.20.jar;
UsersHdfs = LOAD '$tmp' using PigStorage('\t', '-schema');
store UsersHdfs into 'hbase://user_test' using
org.apache.pig.backend.hadoop.hbase.HBaseStorage(
'id:DEFAULT id:last_modified birth:year gender:female gender:male','-caster HBaseBinaryConverter'
);
That code works for me, maybe the matter is in you hbase configs.
You could provide your .csv file and we could talk about it in more details.

Resources