Please help me to resolve this issue.
I am getting issue while executing below commands under steps in jenkinsfile.
steps{
sh "curl -s -L https://github.com/stedolan/jq/releases/download/jq-1.5/jq-linux64 > ./jq"
sh "chmod +x ./jq"
sh "curl -X POST --header 'Content-Type: application/json' --header 'Accept: application/json' --header 'X-API-Token: xxxxxxxx' 'https://api.appcenter.ms/v0.1/apps/raghu/${app_name}/release_uploads' >> output_file.json"
sh "cat output_file.json"
script{
upload_id=$(cat output_file.json | jq -r '.upload_id')
echo "${upload_id}"
}
}
[Pipeline] End of Pipeline
groovy.lang.MissingPropertyException: No such property: output for class: groovy.lang.Binding
at groovy.lang.Binding.getVariable(Binding.java:63)
at org.jenkinsci.plugins.scriptsecurity.sandbox.groovy.SandboxInterceptor.onGetProperty(SandboxInterceptor.java:264)
at org.kohsuke.groovy.sandbox.impl.Checker$6.call(Checker.java:289)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:293)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:269)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:269)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:269)
at com.cloudbees.groovy.cps.sandbox.SandboxInvoker.getProperty(SandboxInvoker.java:29)
at com.cloudbees.groovy.cps.impl.PropertyAccessBlock.rawGet(PropertyAccessBlock.java:20)
at WorkflowScript.run(WorkflowScript:25)
at cps.transform(Native Method)
at com.cloudbees.groovy.cps.impl.PropertyishBlock$ContinuationImpl.get(PropertyishBlock.java:74)
at com.cloudbees.groovy.cps.LValueBlock$GetAdapter.receive(LValueBlock.java:30)
at com.cloudbees.groovy.cps.impl.PropertyishBlock$ContinuationImpl.fixName(PropertyishBlock.java:66)
at sun.reflect.GeneratedMethodAccessor8670.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.cloudbees.groovy.cps.impl.ContinuationPtr$ContinuationImpl.receive(ContinuationPtr.java:72)
at com.cloudbees.groovy.cps.impl.ConstantBlock.eval(ConstantBlock.java:21)
at com.cloudbees.groovy.cps.Next.step(Next.java:83)
at com.cloudbees.groovy.cps.Continuable$1.call(Continuable.java:174)
at com.cloudbees.groovy.cps.Continuable$1.call(Continuable.java:163)
at org.codehaus.groovy.runtime.GroovyCategorySupport$ThreadCategoryInfo.use(GroovyCategorySupport.java:129)
at org.codehaus.groovy.runtime.GroovyCategorySupport.use(GroovyCategorySupport.java:268)
at com.cloudbees.groovy.cps.Continuable.run0(Continuable.java:163)
at org.jenkinsci.plugins.workflow.cps.SandboxContinuable.access$101(SandboxContinuable.java:34)
at org.jenkinsci.plugins.workflow.cps.SandboxContinuable.lambda$run0$0(SandboxContinuable.java:59)
at org.jenkinsci.plugins.scriptsecurity.sandbox.groovy.GroovySandbox.runInSandbox(GroovySandbox.java:136)
at org.jenkinsci.plugins.workflow.cps.SandboxContinuable.run0(SandboxContinuable.java:58)
at org.jenkinsci.plugins.workflow.cps.CpsThread.runNextChunk(CpsThread.java:174)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.run(CpsThreadGroup.java:347)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.access$200(CpsThreadGroup.java:93)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup$2.call(CpsThreadGroup.java:259)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup$2.call(CpsThreadGroup.java:247)
at org.jenkinsci.plugins.workflow.cps.CpsVmExecutorService$2.call(CpsVmExecutorService.java:64)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at hudson.remoting.SingleLaneExecutorService$1.run(SingleLaneExecutorService.java:131)
at jenkins.util.ContextResettingExecutorService$1.run(ContextResettingExecutorService.java:28)
at jenkins.security.ImpersonatingExecutorService$1.run(ImpersonatingExecutorService.java:59)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Finished: FAILURE
script{
sh """
upload_id=\$(cat output_file.json | jq -r '.upload_id')
echo "\${upload_id}"
"""
}
You need to modify the script block as above and also need to escape $-sign to avoid this error. But dont know where you are using upload_id and where you are getting release_url
Related
We are trying to get the value from rest API and assign that value to variable to access on other stages, this stage running and we are getting desired value but stage is showing as failed and other stages are not executing, previosuly it was working but suddenly it is started failing, please find the stage defination below.
stage('Get build number') {
steps {
script {
try {
NirvanaCC = "${params.NirvanaClearCaseViewProfile}"
echo "CC is: ${NirvanaCC}"
def sout = new StringBuffer(), serr = new StringBuffer()
def val1 = NirvanaCC.tokenize( '\\' )
echo "val1 is: ${val1}"
def val2 = val1[3..-1]
echo "val2 is: ${val2}"
def val3 = val2.last()
echo "val3 is: ${val3}"
def val4 = val3.substring(0, val3.length() - 1)
echo "val4 is: ${val4}"
def val5 = val2.remove(val2.size() -1)
echo "val5 is: ${val5}"
def val6 = val2 << val4
echo "val6 is: ${val6}"
def branch = val6.join('\\')
echo "branch is: ${val6}"
//def proc = bat(script: '#curl -s -G --data ccbranch=Staging\\Enhancements -k https://apex.oraclecorp.com/pls/apex/jdedevops/nirvanabuild/next', returnStdout: true).trim()
def proc = "curl -v -G --data ccbranch=${branch} -k https://apex.oraclecorp.com/pls/apex/jdedevops/nirvanabuild/next".execute()
echo "Responce is: ${proc}"
proc.consumeProcessOutput(sout, serr)
proc.waitForOrKill(6000)
def x = sout.tokenize()
echo "sout is: ${x}"
JsonSlurper slurper = new JsonSlurper()
parsedJson = slurper.parseText(x)
def nextbuild = [parsedJson.next_buildid]
NirvanaClearCaseLabelName = nextbuild[0]
echo "NirvanaClearCaseLabelName is: $NirvanaClearCaseLabelName"
NirvanaProductVersion = nextbuild[0]
echo "NirvanaProductVersion is: $NirvanaProductVersion"
} catch (Exception e) {
echo 'Exception occurred: ' + e.toString()
}
}
}
}
Below is the output of the job
[Pipeline] { (Get build number)
[Pipeline] script
[Pipeline] {
[Pipeline] echo
CC is: "\Clearcase View Profiles\E1T_OWA\Staging\Enhancements"
[Pipeline] echo
val1 is: [", Clearcase View Profiles, E1T_OWA, Staging, Enhancements"]
[Pipeline] echo
val2 is: [Staging, Enhancements"]
[Pipeline] echo
val3 is: Enhancements"
[Pipeline] echo
val4 is: Enhancements
[Pipeline] echo
val5 is: Enhancements"
[Pipeline] echo
val6 is: [Staging, Enhancements]
[Pipeline] echo
branch is: [Staging, Enhancements]
[Pipeline] echo
Responce is: java.lang.ProcessImpl#5a3fbe3b
[Pipeline] echo
sout is: [{"next_buildid":"ENH_9.2.6.04.08"}]
[Pipeline] echo
NirvanaClearCaseLabelName is: ENH_9.2.6.04.08
[Pipeline] echo
NirvanaProductVersion is: ENH_9.2.6.04.08
[Pipeline] }
[Pipeline] // script
[Pipeline] }
[Pipeline] // stage
[Pipeline] stage
[Pipeline] { (Settings List)
Stage "Settings List" skipped due to earlier failure(s)
This is the error message I am getting at the end of the pipeline
[Pipeline] End of Pipelinean exception which occurred:
in field org.jenkinsci.plugins.pipeline.modeldefinition.withscript.WithScriptScript.script
in object org.jenkinsci.plugins.pipeline.modeldefinition.agent.impl.LabelScript#63b53ff
in field groovy.lang.Closure.delegate
in object org.jenkinsci.plugins.workflow.cps.CpsClosure2#397359e1
in field groovy.lang.Closure.delegate
in object org.jenkinsci.plugins.workflow.cps.CpsClosure2#28960361
in field org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.closures
in object org.jenkinsci.plugins.workflow.cps.CpsThreadGroup#649a893f
in object org.jenkinsci.plugins.workflow.cps.CpsThreadGroup#649a893f
Caused: java.io.NotSerializableException: groovy.json.internal.LazyMap
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:926)
at org.jboss.marshalling.river.BlockMarshaller.doWriteObject(BlockMarshaller.java:65)
at org.jboss.marshalling.river.BlockMarshaller.writeObject(BlockMarshaller.java:56)
at org.jboss.marshalling.MarshallerObjectOutputStream.writeObjectOverride(MarshallerObjectOutputStream.java:50)
at org.jboss.marshalling.river.RiverObjectOutputStream.writeObjectOverride(RiverObjectOutputStream.java:179)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:344)
at java.util.LinkedHashMap.internalWriteEntries(LinkedHashMap.java:333)
at java.util.HashMap.writeObject(HashMap.java:1363)
at sun.reflect.GeneratedMethodAccessor100.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.jboss.marshalling.reflect.JDKSpecific$SerMethods.callWriteObject(JDKSpecific.java:156)
at org.jboss.marshalling.reflect.SerializableClass.callWriteObject(SerializableClass.java:191)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1028)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.river.BlockMarshaller.doWriteObject(BlockMarshaller.java:65)
at org.jboss.marshalling.river.BlockMarshaller.writeObject(BlockMarshaller.java:56)
at org.jboss.marshalling.MarshallerObjectOutputStream.writeObjectOverride(MarshallerObjectOutputStream.java:50)
at org.jboss.marshalling.river.RiverObjectOutputStream.writeObjectOverride(RiverObjectOutputStream.java:179)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:344)
at com.cloudbees.groovy.cps.SerializableScript.writeObject(SerializableScript.java:26)
at sun.reflect.GeneratedMethodAccessor382.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.jboss.marshalling.reflect.JDKSpecific$SerMethods.callWriteObject(JDKSpecific.java:156)
at org.jboss.marshalling.reflect.SerializableClass.callWriteObject(SerializableClass.java:191)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1028)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.river.RiverMarshaller.doWriteFields(RiverMarshaller.java:1082)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1040)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.river.RiverMarshaller.doWriteFields(RiverMarshaller.java:1082)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1040)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.river.RiverMarshaller.doWriteFields(RiverMarshaller.java:1082)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1040)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1019)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.river.BlockMarshaller.doWriteObject(BlockMarshaller.java:65)
at org.jboss.marshalling.river.BlockMarshaller.writeObject(BlockMarshaller.java:56)
at org.jboss.marshalling.MarshallerObjectOutputStream.writeObjectOverride(MarshallerObjectOutputStream.java:50)
at org.jboss.marshalling.river.RiverObjectOutputStream.writeObjectOverride(RiverObjectOutputStream.java:179)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:344)
at java.util.HashMap.internalWriteEntries(HashMap.java:1793)
at java.util.HashMap.writeObject(HashMap.java:1363)
at sun.reflect.GeneratedMethodAccessor100.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.jboss.marshalling.reflect.JDKSpecific$SerMethods.callWriteObject(JDKSpecific.java:156)
at org.jboss.marshalling.reflect.SerializableClass.callWriteObject(SerializableClass.java:191)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1028)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.river.RiverMarshaller.doWriteFields(RiverMarshaller.java:1082)
at org.jboss.marshalling.river.RiverMarshaller.doWriteSerializableObject(RiverMarshaller.java:1040)
at org.jboss.marshalling.river.RiverMarshaller.doWriteObject(RiverMarshaller.java:920)
at org.jboss.marshalling.AbstractObjectOutput.writeObject(AbstractObjectOutput.java:58)
at org.jboss.marshalling.AbstractMarshaller.writeObject(AbstractMarshaller.java:111)
at org.jenkinsci.plugins.workflow.support.pickles.serialization.RiverWriter.lambda$writeObject$0(RiverWriter.java:144)
at org.jenkinsci.plugins.scriptsecurity.sandbox.groovy.GroovySandbox.runInSandbox(GroovySandbox.java:237)
at org.jenkinsci.plugins.workflow.support.pickles.serialization.RiverWriter.writeObject(RiverWriter.java:143)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.saveProgram(CpsThreadGroup.java:557)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.saveProgram(CpsThreadGroup.java:534)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.saveProgramIfPossible(CpsThreadGroup.java:517)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.run(CpsThreadGroup.java:441)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.access$400(CpsThreadGroup.java:96)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup$2.call(CpsThreadGroup.java:312)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup$2.call(CpsThreadGroup.java:276)
at org.jenkinsci.plugins.workflow.cps.CpsVmExecutorService$2.call(CpsVmExecutorService.java:67)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at hudson.remoting.SingleLaneExecutorService$1.run(SingleLaneExecutorService.java:139)
at jenkins.util.ContextResettingExecutorService$1.run(ContextResettingExecutorService.java:28)
at jenkins.security.ImpersonatingExecutorService$1.run(ImpersonatingExecutorService.java:68)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Finished: FAILURE
I have this sshagent code block:
sshagent(['ssh_key.hashed']) {
sh """
ssh -o StrictHostKeyChecking=no -l user example.com <<EOF
today=`date +%Y-%m-%d`
drush -r /var/www/html/example.com sql-dump --gzip > /var/www/html/example.com/backups/example_prodDB-jenkins-${today}.sql.gz
EOF
""".stripIndent()
}
Where as you can see, the real intention is to get the db dump of a drupal database.
Now, it will work if used on a regular shell script.
I need to write it that way as I will reuse the $today shell variable in another line of code.
Within the jenkinsfile, it seems like it is interpreted as a groovy variable based on the error:
groovy.lang.MissingPropertyException: No such property: today for class: groovy.lang.Binding
at groovy.lang.Binding.getVariable(Binding.java:63)
at org.jenkinsci.plugins.scriptsecurity.sandbox.groovy.SandboxInterceptor.onGetProperty(SandboxInterceptor.java:270)
at org.kohsuke.groovy.sandbox.impl.Checker$7.call(Checker.java:353)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:357)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:333)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:333)
at org.kohsuke.groovy.sandbox.impl.Checker.checkedGetProperty(Checker.java:333)
at com.cloudbees.groovy.cps.sandbox.SandboxInvoker.getProperty(SandboxInvoker.java:29)
at com.cloudbees.groovy.cps.impl.PropertyAccessBlock.rawGet(PropertyAccessBlock.java:20)
at WorkflowScript.run(WorkflowScript:11)
at ___cps.transform___(Native Method)
at com.cloudbees.groovy.cps.impl.PropertyishBlock$ContinuationImpl.get(PropertyishBlock.java:74)
at com.cloudbees.groovy.cps.LValueBlock$GetAdapter.receive(LValueBlock.java:30)
at com.cloudbees.groovy.cps.impl.PropertyishBlock$ContinuationImpl.fixName(PropertyishBlock.java:66)
at jdk.internal.reflect.GeneratedMethodAccessor305.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at com.cloudbees.groovy.cps.impl.ContinuationPtr$ContinuationImpl.receive(ContinuationPtr.java:72)
at com.cloudbees.groovy.cps.impl.ConstantBlock.eval(ConstantBlock.java:21)
at com.cloudbees.groovy.cps.Next.step(Next.java:83)
at com.cloudbees.groovy.cps.Continuable$1.call(Continuable.java:174)
at com.cloudbees.groovy.cps.Continuable$1.call(Continuable.java:163)
at org.codehaus.groovy.runtime.GroovyCategorySupport$ThreadCategoryInfo.use(GroovyCategorySupport.java:129)
at org.codehaus.groovy.runtime.GroovyCategorySupport.use(GroovyCategorySupport.java:268)
at com.cloudbees.groovy.cps.Continuable.run0(Continuable.java:163)
at org.jenkinsci.plugins.workflow.cps.SandboxContinuable.access$001(SandboxContinuable.java:18)
at org.jenkinsci.plugins.workflow.cps.SandboxContinuable.run0(SandboxContinuable.java:51)
at org.jenkinsci.plugins.workflow.cps.CpsThread.runNextChunk(CpsThread.java:185)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.run(CpsThreadGroup.java:400)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.access$400(CpsThreadGroup.java:96)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup$2.call(CpsThreadGroup.java:312)
at org.jenkinsci.plugins.workflow.cps.CpsThreadGroup$2.call(CpsThreadGroup.java:276)
at org.jenkinsci.plugins.workflow.cps.CpsVmExecutorService$2.call(CpsVmExecutorService.java:67)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at hudson.remoting.SingleLaneExecutorService$1.run(SingleLaneExecutorService.java:131)
at jenkins.util.ContextResettingExecutorService$1.run(ContextResettingExecutorService.java:28)
at jenkins.security.ImpersonatingExecutorService$1.run(ImpersonatingExecutorService.java:59)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:834)
Finished: FAILURE
Any hints is appreciated.
Thanks.
After a lot of searching online and trial and error found this helpful reference https://code-maven.com/jenkins-pipeline-environment-variables
I don't know if it's the right way to do it but it works.
So in summary, I had to add an environment variable and add use it within my "sshagent"
Posting sample Jenkinsfile that works.
pipeline {
agent any
stages {
stage('Dump Prod DB') {
environment {
today = sh(script: 'date +%Y-%m-%d', , returnStdout: true).trim()
}
steps {
sshagent(['promet_key.hashed']) {
sh """
ssh -o StrictHostKeyChecking=no -l user example.com <<EOF
drush -r /var/www/html/example.com sql-dump --gzip > /var/www/html/example.com/backups/example_prodDB-jenkins-${today}.sql.gz
EOF
""".stripIndent()
}
}
}
}
}
I can not solve GCS bucket permission issue when submitting job to Dataproc.
Here is what I'm doing:
Created a project
Created a bucket xmitya-test
Created a cluster:
gcloud dataproc clusters create cascade --bucket=xmitya-test \
--master-boot-disk-size=80G --master-boot-disk-type=pd-standard \
--num-master-local-ssds=0 --num-masters=1 \
--num-workers=2 --num-worker-local-ssds=0 \
--worker-boot-disk-size=80G --worker-boot-disk-type=pd-standard \
--master-machine-type=n1-standard-2 \
--worker-machine-type=n1-standard-2 \
--zone=us-west1-a --image-version=1.3 \
--properties 'hadoop-env:HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:/etc/tez/conf:/usr/lib/tez/*:/usr/lib/tez/lib/*'
Uploaded job jar: /apps/wordcount.jar and library /apps/lib/commons-collections-3.2.2.jar
Then submit a job with jar in classpath:
gcloud dataproc jobs submit hadoop --cluster=cascade \
--jar=gs:/apps/wordcount.jar \
--jars=gs://apps/lib/commons-collections-3.2.2.jar --bucket=xmitya-test \
-- gs:/input/url+page.200.txt gs:/output/wc.out local
Then I'm getting forbidden error accessing the library file:
java.io.IOException: Error accessing: bucket: apps, object: lib/commons-collections-3.2.2.jar
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.wrapException(GoogleCloudStorageImpl.java:1957)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getObject(GoogleCloudStorageImpl.java:1983)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getItemInfo(GoogleCloudStorageImpl.java:1870)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.getFileInfo(GoogleCloudStorageFileSystem.java:1156)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.getFileStatus(GoogleHadoopFileSystemBase.java:1058)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:363)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:314)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2375)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2344)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.copyToLocalFile(GoogleHadoopFileSystemBase.java:1793)
at org.apache.hadoop.fs.FileSystem.copyToLocalFile(FileSystem.java:2320)
at com.google.cloud.hadoop.services.agent.util.HadoopUtil.download(HadoopUtil.java:70)
at com.google.cloud.hadoop.services.agent.job.AbstractJobHandler.downloadResources(AbstractJobHandler.java:448)
at com.google.cloud.hadoop.services.agent.job.AbstractJobHandler$StartDriver.call(AbstractJobHandler.java:579)
at com.google.cloud.hadoop.services.agent.job.AbstractJobHandler$StartDriver.call(AbstractJobHandler.java:568)
at com.google.cloud.hadoop.services.repackaged.com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:125)
at com.google.cloud.hadoop.services.repackaged.com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:57)
at com.google.cloud.hadoop.services.repackaged.com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:78)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.json.GoogleJsonResponseException: 403 Forbidden
{
"code" : 403,
"errors" : [ {
"domain" : "global",
"message" : "714526773712-compute#developer.gserviceaccount.com does not have storage.objects.get access to apps/lib/commons-collections-3.2.2.jar.",
"reason" : "forbidden"
} ],
"message" : "714526773712-compute#developer.gserviceaccount.com does not have storage.objects.get access to apps/lib/commons-collections-3.2.2.jar."
}
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.json.GoogleJsonResponseException.from(GoogleJsonResponseException.java:150)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:113)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:40)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest$1.interceptResponse(AbstractGoogleClientRequest.java:401)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1097)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:499)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:432)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:549)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getObject(GoogleCloudStorageImpl.java:1978)
... 23 more
Tried set read permission from browser to 714526773712-compute#developer.gserviceaccount.com user and set public permissions to all files: gsutil defacl ch -u AllUsers:R gs://xmitya-test and gsutil acl ch -d allUsers:R gs://xmitya-test/** - no effect.
What could be the reason?
Thanks!
It's complaining about access to apps, input and output buckets, that you specified in parameters of job submission command:
gcloud dataproc jobs submit hadoop --cluster=cascade --jar=gs:/apps/wordcount.jar --jars=gs://apps/lib/commons-collections-3.2.2.jar --bucket=xmitya-test gs:/input/url+page.200.txt gs:/output/wc.out local
To fix this issue you need to grant access to these buckets or if these are folders inside xmitya-test bucket then you need to specify it explicitly in the path: gs://xmitya-test/apps/wordcount.jar.
I have installed Spark on my laptop and I am trying to execute some very basic commands. Most of them work except .saveAsTextFile. In pyshell I wrote
nums=sc.parallellize([1,2,3])
nums.saveAsTextFile("file:///C:/Java/ouput")
The last statement of saveAsTextFile gives me the following error
[Stage 0:> (0 + 8)
/ 8]2018-03-24 11:48:14 ERROR Executor:91 - Exception in task 6.0 in stage
0.0 (TID 6)
ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2018-03-24 11:48:14 WARN TaskSetManager:66 - Lost task 6.0 in stage 0.0 (TID 6, localhost, executor driver): ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2018-03-24 11:48:14 ERROR TaskSetManager:70 - Task 6 in stage 0.0 failed 1 times; aborting job
2018-03-24 11:48:14 ERROR SparkHadoopWriter:91 - Aborting job job_20180324114813_0003.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 6, localhost, executor driver): ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2048)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2080)
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1493)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1472)
at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:550)
at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\spark-2.3.0-bin-hadoop2.7\python\pyspark\rdd.py", line 1568, in saveAsTextFile
keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
File "C:\spark-2.3.0-bin-hadoop2.7\python\lib\py4j-0.10.6-src.zip\py4j\java_gateway.py", line 1160, in __call__
File "C:\spark-2.3.0-bin-hadoop2.7\python\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "C:\spark-2.3.0-bin-hadoop2.7\python\lib\py4j-0.10.6-src.zip\py4j\protocol.py", line 320, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o39.saveAsTextFile.
: org.apache.spark.SparkException: Job aborted.
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:96)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1096)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1094)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1067)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1032)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:958)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:957)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1493)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1472)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1472)
at org.apache.spark.api.java.JavaRDDLike$class.saveAsTextFile(JavaRDDLike.scala:550)
at org.apache.spark.api.java.AbstractJavaRDDLike.saveAsTextFile(JavaRDDLike.scala:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 6, localhost, executor driver): ExitCodeException exitCode=-1073741701:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:866)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:849)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:733)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:225)
at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:209)
at org.apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.java:307)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:296)
at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:328)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:398)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:461)
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:440)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:804)
at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
at org.apache.spark.internal.io.HadoopMapRedWriteConfigUtil.initWriter(SparkHadoopWriter.scala:224)
at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:118)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:79)
at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2048)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2080)
at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78)
... 41 more
The error message was too long and mostly repetitive so I posted most of the it but couldn't include the last part due to size constraint.
This seems very basic and still somehow doesn't work. I am not sure what I might be doing wrong. Most of the suggestions that I have found until now have not worked for me. Any help will be greatly appreciated.
Have you set winutils.exe correctly?
Set the HADOOP_HOME environment variable correctly and make sure that winutils.exe can execute. If it can't, you may need to download MSVC Runtime Library.
Actually you should write to HDFS (Hadoop File System) as default.
I have simulated a session on a single cluster node. Here is the full list of commands creating the list, writing it to HDFS and finally printing out the results on the console using hdfs:
spark-shell
After the shell has started you type:
val nums = sc.parallelize(List(1,2,3,4,5))
nums.saveAsTextFile("/tmp/simple_list")
:quit
Now we read the data from HDFS (Hadoop File System):
hdfs dfs -ls /tmp/simple_list
This prints something like:
Found 3 items
-rw-r--r-- 1 gil_fernandes hadoop 0 2018-03-24 16:39 /tmp/simple_list/_SUCCESS
-rw-r--r-- 1 gil_fernandes hadoop 4 2018-03-24 16:39 /tmp/simple_list/part-00000
-rw-r--r-- 1 gil_fernandes hadoop 6 2018-03-24 16:39 /tmp/simple_list/part-00001
Finally you print out the content of the files using hdfs again:
hdfs dfs -cat /tmp/simple_list/part-*
This prints out:
1
2
3
4
5
You can also use the hdfs dfs cat command to get the file from HDFS:
hdfs dfs -cat /tmp/simple_list/part-* > simple_list.txt
Update
If you want to run saveAsTextFile using the file:// protocol you should typically run spark-shell under the user which runs you Spark cluster.
These are the steps I have used to save as a text file on the local file system:
Bash:
sudo -i
su - yarn
spark-shell
Spark-shell:
val nums = sc.parallelize(List(1,2,3,4,5))
nums.saveAsTextFile("file:///tmp/simple_list_12")
:quit
Bash:
ls /tmp/simple_list_12
Output of the last command:
part-00000 part-00001 _SUCCESS
Hadoop streaming makes the filename available to every map task through the environment variable.
Python:
os.environ["map.input.file"]
Java:
System.getenv(“map.input.file”).
How about Ruby?
mapper.rb
#!/usr/bin/env ruby
STDIN.each_line do |line|
line.split.each do |word|
word = word[/([a-zA-Z0-9]+)/]
word = word.gsub(/ /,"")
puts [word, 1].join("\t")
end
end
puts ENV['map.input.file']
How about:
ENV['map.input.file']
Ruby lets you assign to the ENV hash just as easily:
ENV['map.input.file'] = '/path/to/file'
All JobConf variables are put into environment variables by hadoop-streaming. The variable names are made "safe" by converting any character not in 0-9 A-Z a-z to _.
So map.input.file => map_input_file
Try: puts ENV['map_input_file']
Using the input from op, I tried mapper:
#!/usr/bin/python
import os
file_name = os.getenv('map_input_file')
print file_name
and a standard wordcount reducer using command:
hadoop fs -rmr /user/itsjeevs/wc &&
hadoop jar $STRMJAR -files /home/jejoseph/wc_mapper.py,/home/jejoseph/wc_reducer.py \
-mapper wc_mapper.py \
-reducer wc_reducer.py \
-numReduceTasks 10 \
-input "/data/*" \
-output wc
to fail with error:
16/03/10 15:21:32 INFO mapreduce.Job: Task Id : attempt_1455931799889_822384_m_000043_0, Status : FAILED
Error: java.io.IOException: Stream closed
at java.lang.ProcessBuilder$NullOutputStream.write(ProcessBuilder.java:434)
at java.io.OutputStream.write(OutputStream.java:116)
at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82)
at java.io.BufferedOutputStream.write(BufferedOutputStream.java:126)
at java.io.DataOutputStream.write(DataOutputStream.java:107)
at org.apache.hadoop.streaming.io.TextInputWriter.writeUTF8(TextInputWriter.java:72)
at org.apache.hadoop.streaming.io.TextInputWriter.writeValue(TextInputWriter.java:51)
at org.apache.hadoop.streaming.PipeMapper.map(PipeMapper.java:106)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
16/03/10 15:21:32 INFO mapreduce.Job: Task Id : attempt_1455931799889_822384_m_000077_0, Status : FAILED
Error: java.io.IOException: Broken pipe
at java.io.FileOutputStream.writeBytes(Native Method)
at java.io.FileOutputStream.write(FileOutputStream.java:345)
at java.io.BufferedOutputStream.write(BufferedOutputStream.java:122)
at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82)
at java.io.BufferedOutputStream.write(BufferedOutputStream.java:126)
at java.io.DataOutputStream.write(DataOutputStream.java:107)
at org.apache.hadoop.streaming.io.TextInputWriter.writeUTF8(TextInputWriter.java:72)
at org.apache.hadoop.streaming.io.TextInputWriter.writeValue(TextInputWriter.java:51)
at org.apache.hadoop.streaming.PipeMapper.map(PipeMapper.java:106)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.streaming.PipeMapRunner.run(PipeMapRunner.java:34)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:450)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Not sure what is happening.