Recursively upload a directory contents to GCS using GO SDK - go

I am trying to upload and download a directory to GCS which container large amount of data. Can someone point me how to achieve this using Golang SDK. I would like to do multipart upload as well (-m)
Sample Directory structure:
$ tree dir1/
dir1/
└── dir2
└── dir3
├── 1.csv
└── 2.csv
Equivalent GSUtil command: gustil -m cp -r dir1 gs://exmaple/

By my understanding, I believe the GCP storage API is atomic and only allows for a single object upload at a time.
The gsutil command uses recursion to iterate through the provided path but ultimately does singular API calls for uploading.
The snippet below will give you some hints, please not I haven't tested the actual copying to GCP. I have only verified that the code compiles.
It's a modified version of what Google have in their docs Uploading Objects
package main
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
"cloud.google.com/go/storage"
)
// BulkUpload uploads files in bulk
func BulkUpload(bucket, rootPath string) error {
ctx := context.Background()
client, err := storage.NewClient(ctx)
if err != nil {
return fmt.Errorf("storage.NewClient: %v", err)
}
defer client.Close()
fileList, objPath, err := pathWalk(rootPath)
if err != nil {
return err
}
_ = objPath
for i:=0; i <len(fileList);i++ {
// Open and read local file
f, err := os.Open(fileList[i])
if err != nil {
return fmt.Errorf("os.Open: %v", err)
}
f.Close()
ctx, cancel := context.WithTimeout(ctx, time.Second*50)
defer cancel()
// Upload an object with storage.Writer.
wc := client.Bucket(bucket).Object(objPath[i]).NewWriter(ctx)
if _, err = io.Copy(wc, f); err != nil {
return fmt.Errorf("io.Copy: %v", err)
}
if err := wc.Close(); err != nil {
return fmt.Errorf("Writer.Close: %v", err)
}
fmt.Printf("Blob %v uploaded.\n", objPath[i])
}
return nil
}
// pathWalk create a list of files from a root path
// returns a list of all files and a list of files excluding the root path
func pathWalk(root string) ([]string, []string, error){
var files []string
var objectPath []string
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
files = append(files, path)
return nil
})
if err != nil {
return nil, nil, fmt.Errorf("error walking path: %v", err)
}
for _, f := range files {
objectPath = append(objectPath, strings.Trim(f, root))
}
return files, objectPath, nil
}
func main() {
if err := BulkUpload("mybucket", "somedirectory/" ); err != nil {
panic(err)
}
}

Related

ClamAv not detecting eicar signature in a zip file

I have a zip file (considerably large for ClamAV) that has EICAR file in it and for whatever reason, clam av is unable to detect it. When I unzip the file and pass the folder path, it is able to detect the EICAR signature. It is also able to detect eicar signatures on small zip files consistently but not so consistent with large files. I have also observed that ClamAV is not able to detect EICAR signatures on some golang and java lib compressed files but is able to detect them when compressed using the zip command line util.
Max file size and scan size are set to 0 to disable any limit.
Steps to reproduce: Please clone the repo here and compress using golang's archive/zip. Pass this on to ClamAV to find that the EICAR signature is not detected.
Here is what I have used to compress the file in golang.
package main
import (
"archive/zip"
"io"
"log"
"os"
"path/filepath"
)
func zipSource(source, target string) error {
// 1. Create a ZIP file and zip.Writer
f, err := os.Create(target)
if err != nil {
return err
}
defer f.Close()
writer := zip.NewWriter(f)
defer writer.Close()
// 2. Go through all the files of the source
return filepath.Walk(source, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// 3. Create a local file header
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
// set compression
header.Method = zip.Deflate
// 4. Set relative path of a file as the header name
header.Name, err = filepath.Rel(filepath.Dir(source), path)
if err != nil {
return err
}
if info.IsDir() {
header.Name += "/"
}
// 5. Create writer for the file header and save content of the file
headerWriter, err := writer.CreateHeader(header)
if err != nil {
return err
}
if info.IsDir() {
return nil
}
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
_, err = io.Copy(headerWriter, f)
return err
})
}
func main() {
if err := zipSource({sourcefolderLocation}, {targetZipFileName}); err != nil {
log.Fatal(err)
}
}
Any help in understanding this unpredictable behavior is highly appreciated.

Golang: Facing error while creating .tar.gz file having large name

I am trying to create a .tar.gz file from folder that contains multiple files / folders. Once the .tar.gz file gets created, while extracting, the files are not not properly extracted. Mostly I think its because of large names or path exceeding some n characters, because same thing works when the filename/path is small. I referred this https://github.com/golang/go/issues/17630 and tried to add below code but it did not help.
header.Uid = 0
header.Gid = 0
I am using simple code seen below to create .tar.gz. The approach is, I create a temp folder, do some processing on the files and from that temp path, I create the .tar.gz file hence in the path below I am using pre-defined temp folder path.
package main
import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"log"
"os"
fp "path/filepath"
)
func main() {
// Create output file
out, err := os.Create("output.tar.gz")
if err != nil {
log.Fatalln("Error writing archive:", err)
}
defer out.Close()
// Create the archive and write the output to the "out" Writer
tmpDir := "C:/Users/USERNAME~1/AppData/Local/Temp/temp-241232063"
err = createArchive1(tmpDir, out)
if err != nil {
log.Fatalln("Error creating archive:", err)
}
fmt.Println("Archive created successfully")
}
func createArchive1(path string, targetFile *os.File) error {
gw := gzip.NewWriter(targetFile)
defer gw.Close()
tw := tar.NewWriter(gw)
defer tw.Close()
// walk through every file in the folder
err := fp.Walk(path, func(filePath string, info os.FileInfo, err error) error {
// ensure the src actually exists before trying to tar it
if _, err := os.Stat(filePath); err != nil {
return err
}
if err != nil {
return err
}
if info.IsDir() {
return nil
}
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
// generate tar header
header, err := tar.FileInfoHeader(info, info.Name())
header.Uid = 0
header.Gid = 0
if err != nil {
return err
}
header.Name = filePath //strings.TrimPrefix(filePath, fmt.Sprintf("%s/", fp.Dir(path))) //info.Name()
// write header
if err := tw.WriteHeader(header); err != nil {
return err
}
if _, err := io.Copy(tw, file); err != nil {
return err
}
return nil
})
return err
}
Please let me know what wrong I am doing.

Golang - Zip a directory which includes empty subdirectory and files

I am trying to zip an existing directory that has some empty subdirectories as well.
Here is the folder structure.
parent/
├── child
│   └── child.txt
├── empty-folder
└── parent.txt
2 directories, 2 files
Here is the source code. It writes all the subdirectories which have files on that. But it skipped an empty subdirectory. Is there any way to add an empty subdirectory as well in the zip file?. Thanks in advance.
package main
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
)
// check for error and stop the execution
func checkForError(err error) {
if err != nil {
fmt.Println("Error - ", err)
os.Exit(1)
}
}
const (
ZIP_FILE_NAME = "example.zip"
MAIN_FOLDER_NAME = "parent"
)
// Main function
func main() {
var targetFilePaths []string
// get filepaths in all folders
err := filepath.Walk(MAIN_FOLDER_NAME, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
// add all the file paths to slice
targetFilePaths = append(targetFilePaths, path)
return nil
})
checkForError(err)
// zip file logic starts here
ZipFile, err := os.Create(ZIP_FILE_NAME)
checkForError(err)
defer ZipFile.Close()
zipWriter := zip.NewWriter(ZipFile)
defer zipWriter.Close()
for _, targetFilePath := range targetFilePaths {
file, err := os.Open(targetFilePath)
checkForError(err)
defer file.Close()
// create path in zip
w, err := zipWriter.Create(targetFilePath)
checkForError(err)
// write file to zip
_, err = io.Copy(w, file)
checkForError(err)
}
}
To write an empty directory you just need to call Create with the directory path with a trailing path separator.
package main
import (
"archive/zip"
"fmt"
"io"
"log"
"os"
"path/filepath"
)
const (
ZIP_FILE_NAME = "example.zip"
MAIN_FOLDER_NAME = "parent"
)
type fileMeta struct {
Path string
IsDir bool
}
func main() {
var files []fileMeta
err := filepath.Walk(MAIN_FOLDER_NAME, func(path string, info os.FileInfo, err error) error {
files = append(files, fileMeta{Path: path, IsDir: info.IsDir()})
return nil
})
if err != nil {
log.Fatalln(err)
}
z, err := os.Create(ZIP_FILE_NAME)
if err != nil {
log.Fatalln(err)
}
defer z.Close()
zw := zip.NewWriter(z)
defer zw.Close()
for _, f := range files {
path := f.Path
if f.IsDir {
path = fmt.Sprintf("%s%c", path, os.PathSeparator)
}
w, err := zw.Create(path)
if err != nil {
log.Fatalln(err)
}
if !f.IsDir {
file, err := os.Open(f.Path)
if err != nil {
log.Fatalln(err)
}
defer file.Close()
if _, err = io.Copy(w, file); err != nil {
log.Fatalln(err)
}
}
}
}

How to compress a file to .zip without directory folder in Go

There're examples about compressing a file to .zip in Go. However, the file they generate include the directory folder. When I decompress the .zip file, there will be a new folder.
So, how can I compress a file to .zip without getting the directory folder included?
An example:
https://golangcode.com/create-zip-files-in-go/
package main
import (
"archive/zip"
"fmt"
"io"
"os"
)
func main() {
// List of Files to Zip
files := []string{"example.csv", "data.csv"}
output := "done.zip"
if err := ZipFiles(output, files); err != nil {
panic(err)
}
fmt.Println("Zipped File:", output)
}
// ZipFiles compresses one or many files into a single zip archive file.
// Param 1: filename is the output zip file's name.
// Param 2: files is a list of files to add to the zip.
func ZipFiles(filename string, files []string) error {
newZipFile, err := os.Create(filename)
if err != nil {
return err
}
defer newZipFile.Close()
zipWriter := zip.NewWriter(newZipFile)
defer zipWriter.Close()
// Add files to zip
for _, file := range files {
if err = AddFileToZip(zipWriter, file); err != nil {
return err
}
}
return nil
}
func AddFileToZip(zipWriter *zip.Writer, filename string) error {
fileToZip, err := os.Open(filename)
if err != nil {
return err
}
defer fileToZip.Close()
// Get the file information
info, err := fileToZip.Stat()
if err != nil {
return err
}
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
// Using FileInfoHeader() above only uses the basename of the file. If we want
// to preserve the folder structure we can overwrite this with the full path.
header.Name = filename
// Change to deflate to gain better compression
// see http://golang.org/pkg/archive/zip/#pkg-constants
header.Method = zip.Deflate
writer, err := zipWriter.CreateHeader(header)
if err != nil {
return err
}
_, err = io.Copy(writer, fileToZip)
return err
}
Just use a base name of the file in the zip header.
header.Name = filepath.Base(filename)
^^^^^^^^^^^^^^
Here is a version that does the same thing
package main
import (
"archive/zip"
"io"
"log"
"os"
"path/filepath"
)
func createFlatZip(w io.Writer, files ...string) error {
z := zip.NewWriter(w)
for _, file := range files {
src, err := os.Open(file)
if err != nil {
return err
}
info, err := src.Stat()
if err != nil {
return err
}
hdr, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
hdr.Name = filepath.Base(file) // Write only the base name in the header
dst, err := z.CreateHeader(hdr)
if err != nil {
return err
}
_, err = io.Copy(dst, src)
if err != nil {
return err
}
src.Close()
}
return z.Close()
}
func main() {
if len(os.Args) < 3 {
log.Fatalf("archive name and at least one file are required")
}
a, err := os.Create(os.Args[1])
if err != nil {
log.Fatal(err)
}
defer a.Close()
err = createFlatZip(a, os.Args[2:]...)
if err != nil {
log.Fatal(err)
}
}
Results:
~/src/gozip
➜ go build
~/src/gozip
➜ mkdir test && echo 1 > test/1.txt # create a test file in a subfolder
~/src/gozip
➜ ./gozip 1.zip test/1.txt
~/src/gozip
➜ unzip -l 1.zip
Archive: 1.zip
Length Date Time Name
--------- ---------- ----- ----
2 08-15-2019 01:29 1.txt
--------- -------
2 1 file

zip a folder of files with golang creates a broken zip

how do I zip files in a folder with sub-directories correctly.
I have a local folder with following structure:
folder/hello/
folder/hello/world/
folder/hello/world/helloword.txt
folder/index.txt
this is my code:
package main
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
)
func main() {
files, err := listFiles("./folder")
if err != nil {
panic(err)
}
zipMe(files, "test.zip")
for _, f := range files {
fmt.Println(f)
}
fmt.Println("Done!")
}
func listFiles(root string) ([]string, error) {
var files []string
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
files = append(files, path)
return nil
})
if err != nil {
return nil, err
}
return files, nil
}
func zipMe(filepaths []string, target string) error {
flags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
file, err := os.OpenFile(target, flags, 0644)
if err != nil {
return fmt.Errorf("Failed to open zip for writing: %s", err)
}
defer file.Close()
zipw := zip.NewWriter(file)
defer zipw.Close()
for _, filename := range filepaths {
if err := addFileToZip(filename, zipw); err != nil {
return fmt.Errorf("Failed to add file %s to zip: %s", filename, err)
}
}
return nil
}
func addFileToZip(filename string, zipw *zip.Writer) error {
file, err := os.Open(filename)
if err != nil {
return fmt.Errorf("Error opening file %s: %s", filename, err)
}
defer file.Close()
wr, err := zipw.Create(filename)
if err != nil {
return fmt.Errorf("Error adding file; '%s' to zip : %s", filename, err)
}
if _, err := io.Copy(wr, file); err != nil {
return fmt.Errorf("Error writing %s to zip: %s", filename, err)
}
return nil
}
This creates a broken zip which can not be extracted (I am running on mac os but this should not make a difference).
I also tried several other examples from stackoverflow and links found through google, but I do always get a broken zip. I get a zip with 135 bytes when I extract it I get 1 binary file with 0 bytes).
It would be great if someone could help me to find out what I am missing here.
Thx
You need to list and zip the files, not the directories. Simply make this adjustment to ignore directories in your listFiles function.
if !info.IsDir() {
files = append(files, path)
}

Resources