spark startup on windows - windows

I am trying to learn Apache Spark. I am following this book. I downloaded Apache Spark 1.5.1 and extracted it F:\spark-1.5.1. I started it with spark-shell. I get the following exception every time:
15/11/06 15:29:06 WARN Connection: BoneCP specified but not present in CLASSPATH
(or one of dependencies)
15/11/06 15:29:21 WARN ObjectStore: Version information not found in metastore.
hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
15/11/06 15:29:21 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException
15/11/06 15:29:23 WARN : Your hostname, inc resolves to a loopback/non-reachable address: fxx0:0:0:0:0:5xxe:cxx8:1%12, but we couldn't find any external IP address!
java.lang.RuntimeException: java.lang.NullPointerException
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:171)
at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:162)
at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:160)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:167)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at org.apache.spark.repl.SparkILoop.createSQLContext(SparkILoop.scala:1028)
at $iwC$$iwC.<init>(<console>:9)
at $iwC.<init>(<console>:18)
at <init>(<console>:20)
at .<init>(<console>:24)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1340)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:132)
at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)
at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)
at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)
at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)
at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)
at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NullPointerException
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1010)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:482)
at org.apache.hadoop.util.Shell.run(Shell.java:455)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:715)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:808)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:582)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:557)
at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
... 56 more
<console>:10: error: not found: value sqlContext
import sqlContext.implicits._
^
<console>:10: error: not found: value sqlContext
import sqlContext.sql
^
scala>
Spark Context is working fine. SQLContext is not. I would like to know what's the correct way to setup/configure a stand-alone spark instance.
Please read my comment below to understand why I created this.

I don't want to try 2-3 random things without knowing what I am doing.
I guess the random thing that you are taliking about is copy winutils.exe. Well It is because Spark sometimes looks for HADOOP_HOME variable in configuration, So to overcome this error that file is being copied.
This Blog can help you to setup spark.
HTH!
Thanks.

Related

TDCH with oozie is failing while connecting to Hive

I am trying to execute data import from Teradata to Hive using TDCH 0.15. The import is working fine while issuing a CLI command on namenode but failing with below error when integrated with oozie (using Java node). Is it because of missing library?
2016-10-10 16:36:02,743 WARN [main] org.apache.hadoop.hive.metastore.ObjectStore: Failed to get database d0_edw1_ato, returning NoSuchObjectException
2016-10-10 16:36:02,750 ERROR [main] org.apache.hadoop.hive.metastore.RetryingHMSHandler: InvalidObjectException(message:d0_edw1_ato)
at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_table_with_environment_context(HiveMetaStore.java:1451)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
at com.sun.proxy.$Proxy22.create_table_with_environment_context(Unknown Source)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.create_table_with_environment_context(HiveMetaStoreClient.java:2056)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:675)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:663)
at com.teradata.connector.hive.utils.HiveUtils.createHiveTable(HiveUtils.java:979)
at com.teradata.connector.hive.processor.HiveOutputProcessor.outputPostProcessor(HiveOutputProcessor.java:258)
at com.teradata.connector.common.tool.ConnectorJobRunner.runJob(ConnectorJobRunner.java:162)
at com.teradata.connector.common.tool.ConnectorImportTool.run(ConnectorImportTool.java:74)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90)
at com.teradata.connector.common.tool.ConnectorImportTool.main(ConnectorImportTool.java:813)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.oozie.action.hadoop.JavaMain.run(JavaMain.java:56)
at org.apache.oozie.action.hadoop.LauncherMain.run(LauncherMain.java:47)
at org.apache.oozie.action.hadoop.JavaMain.main(JavaMain.java:35)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.oozie.action.hadoop.LauncherMapper.map(LauncherMapper.java:241)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:168)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:162)

Hadoop on windows: Not a valid DFS filename

I have configured Hadoop 2.7.2 on Windows, I can see name node, data node, resource manager and node manager are running properly, problem occures when I try to run one of the map reduce program provided as example.
Please find below the command I am runningc:\hdp\bin\yarn jar c:\hdp\share\hadoop\mapreduce\hadoop-mapreduce-examples-2.7.2.jar wordcount c:\hdp\LICENSE.txt /out
I can see all the files are present at the desired locations.
Please find below the stack trace:
C:\WINDOWS\system32>c:\hdp\bin\yarn jar c:\hdp\share\hadoop\mapreduce\hadoop-map
reduce-examples-2.7.2.jar wordcount c:\hdp\LICENSE.txt /out
16/02/03 16:50:55 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0
:8032
16/02/03 16:50:56 INFO mapreduce.JobSubmitter: Cleaning up the staging area /tmp
/hadoop-yarn/staging/tbhakre/.staging/job_1454492091405_0006
java.lang.IllegalArgumentException: Pathname /c:/hdp/LICENSE.txt from hdfs://0.0
.0.0:19000/c:/hdp/LICENSE.txt is not a valid DFS filename.
at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedF
ileSystem.java:197)
at org.apache.hadoop.hdfs.DistributedFileSystem.access$000(DistributedFi
leSystem.java:106)
at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFil
eSystem.java:1305)
at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFil
eSystem.java:1301)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkRes
olver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(Distribute
dFileSystem.java:1301)
at org.apache.hadoop.fs.Globber.getFileStatus(Globber.java:57)
at org.apache.hadoop.fs.Globber.glob(Globber.java:252)
at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1674)
at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedL
istStatus(FileInputFormat.java:294)
at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(File
InputFormat.java:265)
at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getSplits(FileI
nputFormat.java:387)
at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.
java:301)
at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.jav
a:318)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitt
er.java:196)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma
tion.java:1657)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1308)
at org.apache.hadoop.examples.WordCount.main(WordCount.java:87)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.
java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAcces
sorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(Progra
mDriver.java:71)
at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
at org.apache.hadoop.examples.ExampleDriver.main(ExampleDriver.java:74)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.
java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAcces
sorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
if you are running everything on the same machine, you can just refer to it with a file: URL, like file://c:/hdp/LICENSE.txt, [maybe it's file:/c:/hdp/LICENSE.txt; I forget the naming logic]
LICENSE.txt (first argument for wordcount) should be located in HDFS, not on your local machine.

pig script fail on HUE Browser

I have the following pig script:
data = LOAD '/user/test/text.txt' as (text:CHARARRAY) ;
DUMP data;
When I run this in SHELL it work but FAIL When run in HUE Browser
This is the output log:
2015-08-19 15:42:21,033 [JobControl] ERROR com.hadoop.compression.lzo.GPLNativeCodeLoader - Could not load native gpl library
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1886)
at java.lang.Runtime.loadLibrary0(Runtime.java:849)
at java.lang.System.loadLibrary(System.java:1088)
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32)
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:270)
at org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:2051)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2016)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:128)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:175)
at org.apache.hadoop.mapreduce.lib.input.TextInputFormat.isSplitable(TextInputFormat.java:58)
at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getSplits(FileInputFormat.java:397)
at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigInputFormat.getSplits(PigInputFormat.java:274)
at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:597)
at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:614)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:492)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1306)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1303)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1303)
at org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.submit(ControlledJob.java:335)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.pig.backend.hadoop23.PigJobControl.submit(PigJobControl.java:128)
at org.apache.pig.backend.hadoop23.PigJobControl.run(PigJobControl.java:191)
at java.lang.Thread.run(Thread.java:745)
Any one have met this error before please help me
Thanks

Hive MapReduce Job Submission fails "Target is a directory"

I've been playing around with Hadoop and it's sister projects, and I've had a few problems along the way, but I've finally hit one that I can not find an answer to:
I have a hive table stored on hdfs as a tab-delimited text file. And I can do a basic select on the table, but as soon as I make the query a little more complicated, hive turns it into a map reduce job which fails with the following stack trace
13/11/29 08:31:00 ERROR security.UserGroupInformation: PriviledgedActionException as:hduser (auth:SIMPLE) cause:java.io.IOException: Target /tmp/hadoop-> > yarn/staging/hduser/.staging/job_1385633903169_0013/libjars/lib/lib is a directory
13/11/29 08:31:00 ERROR security.UserGroupInformation: PriviledgedActionException as:hduser (auth:SIMPLE) cause:java.io.IOException: Target /tmp/hadoop-yarn/staging/hduser/.staging/job_1385633903169_0013/libjars/lib/lib is a directory
java.io.IOException: Target /tmp/hadoop-yarn/staging/hduser/.staging/job_1385633903169_0013/libjars/lib/lib is a directory
at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:500)
at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:502)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:348)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:289)
at org.apache.hadoop.mapreduce.JobSubmitter.copyRemoteFiles(JobSubmitter.java:139)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:212)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:300)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:387)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:562)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:557)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:557)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:548)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:425)
at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:144)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:259)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:216)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:781)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:675)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:614)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
Job Submission failed with exception 'java.io.IOException(Target /tmp/hadoop-yarn/staging/hduser/.staging/job_1385633903169_0013/libjars/lib/lib is a directory)'
13/11/29 08:31:00 ERROR exec.Task: Job Submission failed with exception 'java.io.IOException(Target /tmp/hadoop-yarn/staging/hduser/.staging/job_1385633903169_0013/libjars/lib/lib is a directory)'
java.io.IOException: Target /tmp/hadoop-yarn/staging/hduser/.staging/job_1385633903169_0013/libjars/lib/lib is a directory
at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:500)
at org.apache.hadoop.fs.FileUtil.checkDest(FileUtil.java:502)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:348)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:338)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:289)
at org.apache.hadoop.mapreduce.JobSubmitter.copyRemoteFiles(JobSubmitter.java:139)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:212)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:300)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:387)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:562)
at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:557)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:557)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:548)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:425)
at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:144)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:151)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:65)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1414)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1192)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1020)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:888)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:259)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:216)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:781)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:675)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:614)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
The folder in question does exist on the dfs, at least the "/tmp/hadoop-yarn/staging" part, and no matter what I set it's permissions to, hive or hadoop resets them on a job submission. The really concerning part is that the full path appears to be a generated folder name, so why does the software have a problem with something that it has generated on its own? Why is it a problem that the path is a directory? And what should it rather be?
Edit:
Here is the table I'm working with and the query I'm trying to run:
Query:
select * from hive_flow_details where node_id = 100 limit 10;
Table:
col_name data_type comment
id bigint None
flow_versions_id int None
node_id int None
node_name string None
Bear in mind, this happens to any uery that I attempt that has any kind of a where clause, as hive translates that into an MR job.
I eventually came right with this issue. I found conflicting jars in my classpath which I cleaned, and since then I have had no problems.

How to submit hadoop jobs as other Linux users

user "hadoop" is runnnig jobtracker and tasktracker on cluster, but what if other Linux users want to submit their jobs?
I've followed this documentation, http://hadoop.apache.org/docs/r1.0.4/service_level_auth.html#Hadoop+Services+and+Configuration+Properties
add following into "core-site.xml":
<property>
<name>hadoop.security.authorization</name>
<value>true</value>
</property>
and for conf file "hadoop-policy.xml", I add "hadoopGroup" to "security.job.submission.protocol.acl", my account "qingya.shu" belong to this group.
but when i run hadoop as qingya.shu, encountered error like:
12/11/09 12:57:29 INFO util.NativeCodeLoader: Loaded the native-hadoop library
12/11/09 12:57:29 INFO input.FileInputFormat: Total input paths to process : 1
12/11/09 12:57:29 WARN snappy.LoadSnappy: Snappy native library not loaded
12/11/09 12:57:29 INFO mapred.JobClient: Cleaning up the staging area file:/lustre/hadoop/user/qingya.shu/.staging/job_201211082024_0008
12/11/09 12:57:29 ERROR security.UserGroupInformation: PriviledgedActionException as:qingya.shu cause:org.apache.hadoop.ipc.RemoteException: java.io.IOException: java.io.FileNotFoundException: File file:/lustre/hadoop/user/qingya.shu/.staging/job_201211082024_0008/job.xml does not exist.
at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3943)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:563)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1388)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1384)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1382)
Caused by: java.io.FileNotFoundException: File file:/lustre/hadoop/user/qingya.shu/.staging/job_201211082024_0008/job.xml does not exist.
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:397)
at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:251)
at org.apache.hadoop.mapred.JobInProgress.<init>(JobInProgress.java:406)
at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3941)
... 11 more
org.apache.hadoop.ipc.RemoteException: java.io.IOException: java.io.FileNotFoundException: File file:/lustre/hadoop/user/qingya.shu/.staging/job_201211082024_0008/job.xml does not exist.
at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3943)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:563)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1388)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1384)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1382)
Caused by: java.io.FileNotFoundException: File file:/lustre/hadoop/user/qingya.shu/.staging/job_201211082024_0008/job.xml does not exist.
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:397)
at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:251)
at org.apache.hadoop.mapred.JobInProgress.<init>(JobInProgress.java:406)
at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3941)
... 11 more
at org.apache.hadoop.ipc.Client.call(Client.java:1070)
at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:225)
at org.apache.hadoop.mapred.$Proxy1.submitJob(Unknown Source)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:921)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:850)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:850)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:500)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:530)
at org.apache.hadoop.examples.WordCount.main(WordCount.java:67)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:68)
at org.apache.hadoop.util.ProgramDriver.driver(ProgramDriver.java:139)
at org.apache.hadoop.examples.ExampleDriver.main(ExampleDriver.java:64)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
notice that lustre is used as distributed file system instead of HDFS.
can anyone help me?
try this:
<property>
<name>hadoop.security.authorization</name>
<value>simple</value>
</property>

Resources