Execute permissions do not stick on HDFS files - hadoop

Calling chmod is not sticking for the execute permissions.
-bash-3.2$ hadoop dfs -ls /user/myuser/myapp/
Found 1 items
-rw-rw-rw- 3 myuser myuser 2056 2013-09-13 23:14 /user/myuser/myapp/paf-rules.properties
-bash-3.2$ hadoop dfs -chmod ugo+x /user/myuser/myapp/*
-bash-3.2$ hadoop dfs -ls /user/myuser/myapp/
Found 1 items
-rw-rw-rw- 3 myuser myuser 2056 2013-09-13 23:14 /user/myuser/myapp/paf-rules.properties
We get a permissions error in which HDFS complains that the Execute permission is missing:
Unable to open readFileFromHdfs based rules file /user/myuser/dqm/paf-rules.properties : org.apache.hadoop.security.AccessControlException: org.apache.hadoop.security.AccessControlException: Permission denied: user=myuser, access=EXECUTE, inode="paf-rules.properties":myuser:myuser:rw-rw-rw-
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:95)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:57)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:960)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:524)
at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:768)
at com.apple.ist.gbi.edw.etl.fwrk.udf.dqm.UDFPafDqm.readFileFromHdfs(UDFPafDqm.java:81)
at com.apple.ist.gbi.edw.etl.fwrk.udf.dqm.UDFPafDqm.<init>(UDFPafDqm.java:110)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:113)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge.initialize(GenericUDFBridge.java:145)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDF.initializeAndFoldConstants(GenericUDF.java:98)
at org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc.newInstance(ExprNodeGenericFuncDesc.java:214)
at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory$DefaultExprProcessor.getXpathOrFuncExprNodeDesc(TypeCheckProcFactory.java:767)
at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory$DefaultExprProcessor.process(TypeCheckProcFactory.java:888)
at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:89)
at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:88)
at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:125)
at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:102)
at org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.genExprNode(TypeCheckProcFactory.java:165)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:7755)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:2310)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:2112)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:6165)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:6136)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:6762)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:6702)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:6723)
at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:7531)
at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:244)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:431)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:336)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:909)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:258)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:215)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:406)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:689)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:557)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
Caused by: org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.security.AccessControlException: Permission denied: user=myuser, access=EXECUTE, inode="paf-rules.properties":myuser:myuser:rw-rw-rw-
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:199)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkTraverse(FSPermissionChecker.java:155)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:125)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkPermission(FSNamesystem.java:5222)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkTraverse(FSNamesystem.java:5201)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getFileInfo(FSNamesystem.java:2027)
at org.apache.hadoop.hdfs.server.namenode.NameNode.getFileInfo(NameNode.java:825)
at sun.reflect.GeneratedMethodAccessor28.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:578)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1388)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1384)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1122)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1382)
at org.apache.hadoop.ipc.Client.call(Client.java:1092)
at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)
at $Proxy6.getFileInfo(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:85)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:62)
at $Proxy6.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:958)
... 42 more

Related

Nifi 1.10 STATELESS -Caused by: java.nio.file.NoSuchFileException

RUN THIS COMMAND
/bin/nifi.sh stateless RunFromRegistry Once --file ./test/stateless_test1.json
LOG
Note: Use of this command is considered experimental. The commands and approach used may change from time to time.
Java home (JAVA_HOME): /home/deltaman/software/jdk1.8.0_211
Java options (STATELESS_JAVA_OPTS): -Xms1024m -Xmx1024m
13:48:39.835 [main] INFO org.apache.nifi.StatelessNiFi - Unpacking 100 NARs
13:50:51.513 [main] INFO org.apache.nifi.StatelessNiFi - Finished unpacking 100 NARs in 131671 millis
Exception in thread "main" java.lang.reflect.InvocationTargetException
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.nifi.StatelessNiFi.main(StatelessNiFi.java:103)
... 5 more
Caused by: java.nio.file.NoSuchFileException: ./test/stateless_test1.json
at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102)
at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107)
at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214)
at java.nio.file.Files.newByteChannel(Files.java:361)
at java.nio.file.Files.newByteChannel(Files.java:407)
at java.nio.file.Files.readAllBytes(Files.java:3152)
at org.apache.nifi.stateless.runtimes.Program.runLocal(Program.java:119)
at org.apache.nifi.stateless.runtimes.Program.launch(Program.java:67)
... 10 more
it seems no exist file,but i can find the file as follows:
$ cat ./test/stateless_test1.json
{
"registryUrl": "http://10.148.123.12:9991",
"bucketId": "ec1b291e-c3f1-437c-a4e4-c069bd2f6ed1",
"flowId": "b1f73fe8-2874-47a5-970c-6b25eea19497",
"parameters": {
"text" : "xixixixi"
}
}
CONFIGURATION
IDK WHAT IS THE PROBLEM?
ANY SUGGESTION IS APPRECIATION!
/bin/nifi.sh stateless RunFromRegistry Once --file ./test/stateless_test1.json
it is relative path,must use full path,such as
/home/NiFi/nifi-1.10.0/bin/nifi.sh stateless RunFromRegistry Once --file /home/NiFi/nifi-1.10.0/test/stateless_test1.json

apache storm throws Exception "java.net.BindException: Address already in use"

Apache storm throws exception when I submit a new topology in storm cluster which has 100+ free slots. The storm cluster has 35 nodes.
org.apache.storm.shade.org.jboss.netty.channel.ChannelException: Failed to bind to: 0.0.0.0/0.0.0.0:6734
at org.apache.storm.shade.org.jboss.netty.bootstrap.ServerBootstrap.bind(ServerBootstrap.java:272) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.messaging.netty.Server.<init>(Server.java:101) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.messaging.netty.Context.bind(Context.java:67) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.daemon.worker$worker_data$fn__5173.invoke(worker.clj:272) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.util$assoc_apply_self.invoke(util.clj:931) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.daemon.worker$worker_data.invoke(worker.clj:269) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.daemon.worker$fn__5471$exec_fn__1371__auto__$reify__5473.run(worker.clj:613) ~[storm-core-1.0.3.jar:1.0.3]
at java.security.AccessController.doPrivileged(Native Method) ~[?:1.8.0_121]
at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_121]
at org.apache.storm.daemon.worker$fn__5471$exec_fn__1371__auto____5472.invoke(worker.clj:611) ~[storm-core-1.0.3.jar:1.0.3]
at clojure.lang.AFn.applyToHelper(AFn.java:178) ~[clojure-1.7.0.jar:?]
at clojure.lang.AFn.applyTo(AFn.java:144) ~[clojure-1.7.0.jar:?]
at clojure.core$apply.invoke(core.clj:630) ~[clojure-1.7.0.jar:?]
at org.apache.storm.daemon.worker$fn__5471$mk_worker__5562.doInvoke(worker.clj:585) [storm-core-1.0.3.jar:1.0.3]
at clojure.lang.RestFn.invoke(RestFn.java:512) [clojure-1.7.0.jar:?]
at org.apache.storm.daemon.worker$_main.invoke(worker.clj:769) [storm-core-1.0.3.jar:1.0.3]
at clojure.lang.AFn.applyToHelper(AFn.java:165) [clojure-1.7.0.jar:?]
at clojure.lang.AFn.applyTo(AFn.java:144) [clojure-1.7.0.jar:?]
at org.apache.storm.daemon.worker.main(Unknown Source) [storm-core-1.0.3.jar:1.0.3]
Caused by: java.net.BindException: Address already in use
at sun.nio.ch.Net.bind0(Native Method) ~[?:1.8.0_121]
at sun.nio.ch.Net.bind(Net.java:433) ~[?:1.8.0_121]
at sun.nio.ch.Net.bind(Net.java:425) ~[?:1.8.0_121]
at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:223) ~[?:1.8.0_121]
2019-11-02 18:39
at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74) ~[?:1.8.0_121]
at org.apache.storm.shade.org.jboss.netty.channel.socket.nio.NioServerBoss$RegisterTask.run(NioServerBoss.java:193) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.shade.org.jboss.netty.channel.socket.nio.AbstractNioSelector.processTaskQueue(AbstractNioSelector.java:372) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.shade.org.jboss.netty.channel.socket.nio.AbstractNioSelector.run(AbstractNioSelector.java:296) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.shade.org.jboss.netty.channel.socket.nio.NioServerBoss.run(NioServerBoss.java:42) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.shade.org.jboss.netty.util.ThreadRenamingRunnable.run(ThreadRenamingRunnable.java:108) ~[storm-core-1.0.3.jar:1.0.3]
at org.apache.storm.shade.org.jboss.netty.util.internal.DeadLockProofWorker$1.run(DeadLockProofWorker.java:42) ~[storm-core-1.0.3.jar:1.0.3]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_121]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_121]
at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_121]
2019-11-01 17:57:42.487 o.a.s.util main [ERROR] Halting process: ("Error on initialization")java.lang.RuntimeException: ("Error on initialization")
at org.apache.storm.util$exit_process_BANG_.doInvoke(util.clj:341) [storm-core-1.0.3.jar:1.0.3]
at clojure.lang.RestFn.invoke(RestFn.java:423) [clojure-1.7.0.jar:?]
at org.apache.storm.daemon.worker$fn__5471$mk_worker__5562.doInvoke(worker.clj:585) [storm-core-1.0.3.jar:1.0.3]
at clojure.lang.RestFn.invoke(RestFn.java:512) [clojure-1.7.0.jar:?]
at org.apache.storm.daemon.worker$_main.invoke(worker.clj:769) [storm-core-1.0.3.jar:1.0.3]
at clojure.lang.AFn.applyToHelper(AFn.java:165) [clojure-1.7.0.jar:?]
at clojure.lang.AFn.applyTo(AFn.java:144) [clojure-1.7.0.jar:?]
at org.apache.storm.daemon.worker.main(Unknown Source) [storm-core-1.0.3.jar:1.0.3]
When I use lsof command to find out which process uses the port, it shows
$ lsof -i:6734
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
java 21812 orc 292u IPv6 2397383060 0t0 TCP bj4191:6734->10.15.70.5:boe-resssvr2 (ESTABLISHED)
java 30636 orc 58u IPv6 2394755694 0t0 TCP bj4191:28367->10.12.178.13:6734 (ESTABLISHED)
java 30636 orc 90u IPv6 2394760492 0t0 TCP bj4191:23217->10.12.174.3:6734 (ESTABLISHED)
java 30636 orc 1191u IPv6 2397398591 0t0 TCP bj4191:6734->10.12.172.46:6307 (ESTABLISHED)
java 30645 orc 88u IPv6 2394657692 0t0 TCP bj4191:22997->10.12.174.3:6734 (ESTABLISHED)
java 30645 orc 99u IPv6 2394657702 0t0 TCP bj4191:28257->10.12.178.13:6734 (ESTABLISHED)
java 30645 orc 3016u IPv6 2394770505 0t0 TCP bj4191:6734->10.32.210.18:boks_clntd (ESTABLISHED)
I can't understand why the second process use an address which has been used. How can I solve it? Thx.

Spark On windows gives error in saveAsTextFile

I have installed Spark on my laptop and I am trying to execute some very basic commands. Most of them work except .saveAsTextFile. In pyshell I wrote
nums=sc.parallellize([1,2,3])
nums.saveAsTextFile("file:///C:/Java/ouput")
The last statement of saveAsTextFile gives me the following error
[Stage 0:> (0 + 8)
/ 8]2018-03-24 11:48:14 ERROR Executor:91 - Exception in task 6.0 in stage
0.0 (TID 6)
ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2018-03-24 11:48:14 WARN TaskSetManager:66 - Lost task 6.0 in stage 0.0 (TID 6, localhost, executor driver): ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2018-03-24 11:48:14 ERROR TaskSetManager:70 - Task 6 in stage 0.0 failed 1 times; aborting job
2018-03-24 11:48:14 ERROR SparkHadoopWriter:91 - Aborting job job_20180324114813_0003.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 6, localhost, executor driver): ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2048)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2080)
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1493)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1472)
at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:550)
at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\spark-2.3.0-bin-hadoop2.7\python\pyspark\rdd.py", line 1568, in saveAsTextFile
keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
File "C:\spark-2.3.0-bin-hadoop2.7\python\lib\py4j-0.10.6-src.zip\py4j\java_gateway.py", line 1160, in __call__
File "C:\spark-2.3.0-bin-hadoop2.7\python\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "C:\spark-2.3.0-bin-hadoop2.7\python\lib\py4j-0.10.6-src.zip\py4j\protocol.py", line 320, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o39.saveAsTextFile.
: org.apache.spark.SparkException: Job aborted.
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:96)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1493)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1472)
at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:550)
at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 6, localhost, executor driver): ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2048)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2080)
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78)
... 41 more
The error message was too long and mostly repetitive so I posted most of the it but couldn't include the last part due to size constraint.
This seems very basic and still somehow doesn't work. I am not sure what I might be doing wrong. Most of the suggestions that I have found until now have not worked for me. Any help will be greatly appreciated.
Have you set winutils.exe correctly?
Set the HADOOP_HOME environment variable correctly and make sure that winutils.exe can execute. If it can't, you may need to download MSVC Runtime Library.
Actually you should write to HDFS (Hadoop File System) as default.
I have simulated a session on a single cluster node. Here is the full list of commands creating the list, writing it to HDFS and finally printing out the results on the console using hdfs:
spark-shell
After the shell has started you type:
val nums = sc.parallelize(List(1,2,3,4,5))
nums.saveAsTextFile("/tmp/simple_list")
:quit
Now we read the data from HDFS (Hadoop File System):
hdfs dfs -ls /tmp/simple_list
This prints something like:
Found 3 items
-rw-r--r-- 1 gil_fernandes hadoop 0 2018-03-24 16:39 /tmp/simple_list/_SUCCESS
-rw-r--r-- 1 gil_fernandes hadoop 4 2018-03-24 16:39 /tmp/simple_list/part-00000
-rw-r--r-- 1 gil_fernandes hadoop 6 2018-03-24 16:39 /tmp/simple_list/part-00001
Finally you print out the content of the files using hdfs again:
hdfs dfs -cat /tmp/simple_list/part-*
This prints out:
1
2
3
4
5
You can also use the hdfs dfs cat command to get the file from HDFS:
hdfs dfs -cat /tmp/simple_list/part-* > simple_list.txt
Update
If you want to run saveAsTextFile using the file:// protocol you should typically run spark-shell under the user which runs you Spark cluster.
These are the steps I have used to save as a text file on the local file system:
Bash:
sudo -i
su - yarn
spark-shell
Spark-shell:
val nums = sc.parallelize(List(1,2,3,4,5))
nums.saveAsTextFile("file:///tmp/simple_list_12")
:quit
Bash:
ls /tmp/simple_list_12
Output of the last command:
part-00000 part-00001 _SUCCESS

InvokeHTTP NIFI TrustAnchors parameter must be non-empty

I'm Trying to send a GET Request to an external API which I need to put an apikey as a header to authenticate .
My problem is when I use the InvokeHTTP Processor with a GET Method and I add an attribute apikey with the key value, I get the following error :
The trustanchors parameter must be non-empty
2016-12-06 20:49:36,007 ERROR [Timer-Driven Process Thread-5] o.a.nifi.processors.standard.InvokeHTTP InvokeHTTP[id=d5437c56-0158-1000-f994-28154b2af3ae] Routing to Failure due to exception: javax.net.ssl.SSLException: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty: javax.net.ssl.SSLException: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
2016-12-06 20:49:36,009 ERROR [Timer-Driven Process Thread-5] o.a.nifi.processors.standard.InvokeHTTP
javax.net.ssl.SSLException: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
at sun.security.ssl.Alerts.getSSLException(Alerts.java:208) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1949) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1906) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.handleException(SSLSocketImpl.java:1889) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1410) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1387) ~[na:1.8.0_91]
at com.squareup.okhttp.internal.io.RealConnection.connectTls(RealConnection.java:188) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.io.RealConnection.connectSocket(RealConnection.java:145) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.io.RealConnection.connect(RealConnection.java:108) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.StreamAllocation.findConnection(StreamAllocation.java:184) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.StreamAllocation.findHealthyConnection(StreamAllocation.java:126) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.StreamAllocation.newStream(StreamAllocation.java:95) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.HttpEngine.connect(HttpEngine.java:283) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.HttpEngine.sendRequest(HttpEngine.java:224) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call.getResponse(Call.java:286) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call$ApplicationInterceptorChain.proceed(Call.java:243) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call.getResponseWithInterceptorChain(Call.java:205) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call.execute(Call.java:80) ~[okhttp-2.7.1.jar:na]
at org.apache.nifi.processors.standard.InvokeHTTP.onTrigger(InvokeHTTP.java:624) ~[nifi-standard-processors-1.0.0.jar:1.0.0]
at org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) [nifi-api-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1064) [nifi-framework-core-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.tasks.ContinuallyRunProcessorTask.call(ContinuallyRunProcessorTask.java:136) [nifi-framework-core-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.tasks.ContinuallyRunProcessorTask.call(ContinuallyRunProcessorTask.java:47) [nifi-framework-core-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:132) [nifi-framework-core-1.0.0.jar:1.0.0]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_91]
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [na:1.8.0_91]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_91]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [na:1.8.0_91]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_91]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_91]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_91]
Caused by: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
at sun.security.validator.PKIXValidator.<init>(PKIXValidator.java:90) ~[na:1.8.0_91]
at sun.security.validator.Validator.getInstance(Validator.java:179) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.getValidator(X509TrustManagerImpl.java:312) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.checkTrustedInit(X509TrustManagerImpl.java:171) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.checkTrusted(X509TrustManagerImpl.java:184) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(X509TrustManagerImpl.java:124) ~[na:1.8.0_91]
at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1491) ~[na:1.8.0_91]
at sun.security.ssl.ClientHandshaker.processMessage(ClientHandshaker.java:216) ~[na:1.8.0_91]
at sun.security.ssl.Handshaker.processLoop(Handshaker.java:979) ~[na:1.8.0_91]
at sun.security.ssl.Handshaker.process_record(Handshaker.java:914) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1062) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1375) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1403) ~[na:1.8.0_91]
... 26 common frames omitted
Caused by: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
at java.security.cert.PKIXParameters.setTrustAnchors(PKIXParameters.java:200) ~[na:1.8.0_91]
at java.security.cert.PKIXParameters.<init>(PKIXParameters.java:120) ~[na:1.8.0_91]
at java.security.cert.PKIXBuilderParameters.<init>(PKIXBuilderParameters.java:104) ~[na:1.8.0_91]
at sun.security.validator.PKIXValidator.<init>(PKIXValidator.java:88) ~[na:1.8.0_91]
... 38 common frames omitted
2016-12-06 20:49:36,359 ERROR [Timer-Driven Process Thread-3] o.a.nifi.processors.standard.InvokeHTTP InvokeHTTP[id=d5437c56-0158-1000-f994-28154b2af3ae] Routing to Failure due to exception: javax.net.ssl.SSLException: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty: javax.net.ssl.SSLException: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
2016-12-06 20:49:36,360 ERROR [Timer-Driven Process Thread-3] o.a.nifi.processors.standard.InvokeHTTP
javax.net.ssl.SSLException: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
at sun.security.ssl.Alerts.getSSLException(Alerts.java:208) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1949) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1906) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.handleException(SSLSocketImpl.java:1889) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1410) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1387) ~[na:1.8.0_91]
at com.squareup.okhttp.internal.io.RealConnection.connectTls(RealConnection.java:188) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.io.RealConnection.connectSocket(RealConnection.java:145) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.io.RealConnection.connect(RealConnection.java:108) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.StreamAllocation.findConnection(StreamAllocation.java:184) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.StreamAllocation.findHealthyConnection(StreamAllocation.java:126) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.StreamAllocation.newStream(StreamAllocation.java:95) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.HttpEngine.connect(HttpEngine.java:283) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.internal.http.HttpEngine.sendRequest(HttpEngine.java:224) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call.getResponse(Call.java:286) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call$ApplicationInterceptorChain.proceed(Call.java:243) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call.getResponseWithInterceptorChain(Call.java:205) ~[okhttp-2.7.1.jar:na]
at com.squareup.okhttp.Call.execute(Call.java:80) ~[okhttp-2.7.1.jar:na]
at org.apache.nifi.processors.standard.InvokeHTTP.onTrigger(InvokeHTTP.java:624) ~[nifi-standard-processors-1.0.0.jar:1.0.0]
at org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27) [nifi-api-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1064) [nifi-framework-core-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.tasks.ContinuallyRunProcessorTask.call(ContinuallyRunProcessorTask.java:136) [nifi-framework-core-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.tasks.ContinuallyRunProcessorTask.call(ContinuallyRunProcessorTask.java:47) [nifi-framework-core-1.0.0.jar:1.0.0]
at org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:132) [nifi-framework-core-1.0.0.jar:1.0.0]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_91]
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [na:1.8.0_91]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_91]
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [na:1.8.0_91]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_91]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_91]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_91]
Caused by: java.lang.RuntimeException: Unexpected error: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
at sun.security.validator.PKIXValidator.<init>(PKIXValidator.java:90) ~[na:1.8.0_91]
at sun.security.validator.Validator.getInstance(Validator.java:179) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.getValidator(X509TrustManagerImpl.java:312) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.checkTrustedInit(X509TrustManagerImpl.java:171) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.checkTrusted(X509TrustManagerImpl.java:184) ~[na:1.8.0_91]
at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(X509TrustManagerImpl.java:124) ~[na:1.8.0_91]
at sun.security.ssl.ClientHandshaker.serverCertificate(ClientHandshaker.java:1491) ~[na:1.8.0_91]
at sun.security.ssl.ClientHandshaker.processMessage(ClientHandshaker.java:216) ~[na:1.8.0_91]
at sun.security.ssl.Handshaker.processLoop(Handshaker.java:979) ~[na:1.8.0_91]
at sun.security.ssl.Handshaker.process_record(Handshaker.java:914) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.readRecord(SSLSocketImpl.java:1062) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.performInitialHandshake(SSLSocketImpl.java:1375) ~[na:1.8.0_91]
at sun.security.ssl.SSLSocketImpl.startHandshake(SSLSocketImpl.java:1403) ~[na:1.8.0_91]
... 26 common frames omitted
Caused by: java.security.InvalidAlgorithmParameterException: the trustAnchors parameter must be non-empty
at java.security.cert.PKIXParameters.setTrustAnchors(PKIXParameters.java:200) ~[na:1.8.0_91]
at java.security.cert.PKIXParameters.<init>(PKIXParameters.java:120) ~[na:1.8.0_91]
at java.security.cert.PKIXBuilderParameters.<init>(PKIXBuilderParameters.java:104) ~[na:1.8.0_91]
at sun.security.validator.PKIXValidator.<init>(PKIXValidator.java:88) ~[na:1.8.0_91]
Any idea how to fix this ?
Copied suggestion from comments to serve as definitive answer:
Are you connecting to an HTTPS endpoint? If so, you'll need to load a
truststore in order to verify the server certificate presented by the
endpoint. You can use $JAVA_HOME/jre/lib/security/cacerts with the
default password of changeit for this -- it contains the CA
certificates trusted by your installation of Java.

Copying data between 2 different hadoop clusters

I am trying to copy data from one HDFS directory to another using distcp:
Source hadoop version:
hadoop version
Hadoop 2.0.0-cdh4.3.1
Destination hadoop version:
hadoop version
Hadoop 2.0.0-cdh4.4.0
Command I am using is:
hadoop distcp hftp://foo.test.net:50070/new_data/raw/new_logs/utc_date=2014-09-01/utc_hour=22 hdfs://localhost.localdomain/user/cloudera/new_logs
Error message I am getting is:
java.io.IOException: Copied: 0 Skipped: 0 Failed: 13
at org.apache.hadoop.tools.DistCp$CopyFilesMapper.close(DistCp.java:582)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:57)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:417)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:332)
at org.apache.hadoop.mapred.Child$4.run(Child.java:268)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1408)
at org.apache.hadoop.mapred.Child.main(Child.java:262)
==
Logs from "Task Logs":
2014-09-02 13:50:30,947 INFO org.apache.hadoop.tools.DistCp: FAIL utc_hour=22/000012_0 : java.io.IOException: HTTP_OK expected, received 500
at org.apache.hadoop.hdfs.HftpFileSystem$RangeHeaderUrlOpener.connect(HftpFileSystem.java:376)
at org.apache.hadoop.hdfs.ByteRangeInputStream.openInputStream(ByteRangeInputStream.java:119)
at org.apache.hadoop.hdfs.ByteRangeInputStream.getInputStream(ByteRangeInputStream.java:103)
at org.apache.hadoop.hdfs.ByteRangeInputStream.read(ByteRangeInputStream.java:187)
at java.io.DataInputStream.read(DataInputStream.java:83)
at org.apache.hadoop.tools.DistCp$CopyFilesMapper.copy(DistCp.java:424)
at org.apache.hadoop.tools.DistCp$CopyFilesMapper.map(DistCp.java:547)
at org.apache.hadoop.tools.DistCp$CopyFilesMapper.map(DistCp.java:314)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:50)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:417)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:332)
at org.apache.hadoop.mapred.Child$4.run(Child.java:268)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1408)
at org.apache.hadoop.mapred.Child.main(Child.java:262)
2014-09-02 13:50:31,118 INFO org.apache.hadoop.mapred.TaskLogsTruncater: Initializing logs' truncater with mapRetainSize=-1 and reduceRetainSize=-1
2014-09-02 13:50:31,131 ERROR org.apache.hadoop.security.UserGroupInformation: PriviledgedActionException as:cloudera (auth:SIMPLE) cause:java.io.IOException: Copied: 0 Skipped: 0 Failed: 13
2014-09-02 13:50:31,131 WARN org.apache.hadoop.mapred.Child: Error running child
java.io.IOException: Copied: 0 Skipped: 0 Failed: 13
at org.apache.hadoop.tools.DistCp$CopyFilesMapper.close(DistCp.java:582)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:57)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:417)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:332)
at org.apache.hadoop.mapred.Child$4.run(Child.java:268)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1408)
at org.apache.hadoop.mapred.Child.main(Child.java:262)
2014-09-02 13:50:31,145 INFO org.apache.hadoop.mapred.Task: Runnning cleanup for the task
=
Any help?
Thanks,
Rio

Resources