How would I optimize code that reads when doing a Hash and seeks to the beginning to re-read it again? - go

How do I make this not require a file seek? Basically, I am doing a hash, and then re-reading the file. This is not optimal. How can I optimize by either using TeeReader or another method to read in chunks so Hashing and Writing content can happen without duplication of reading.
Also, do I need to specify content length myself?
// PUT method
func (c *Client) PutFileOld(filename string, noLen bool) error {
file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()
hasher := md5.New()
if _, err := io.Copy(hasher, file); err != nil {
log.Fatal("Could not compute MD5")
}
// Lazy way to go back to the beginning since the reader has consumed our bytes
// and we have to compute the hash
file.Seek(0, 0)
c.MD5 = hex.EncodeToString(hasher.Sum(nil)[:16])
log.Printf("Uploading to: %s", fmt.Sprintf("%s/%s", c.baseURL, filename))
baseURL, err := url.Parse(fmt.Sprintf("%s/%s", c.baseURL, filename))
if err != nil {
return err
}
log.Printf("MD5: %s - file: %s\n", c.MD5, filename)
req, err := http.NewRequest(http.MethodPut, baseURL.String(), bufio.NewReader(file))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/octet-stream")
req.Header.Set("Content-Md5", c.MD5)
fi, _ := file.Stat()
// Not sure if this is needed, or if Go sets it automatically
req.ContentLength = fi.Size()
res, err := c.httpClient.Do(req)
if err != nil {
return err
}
dump, err := httputil.DumpResponse(res, true)
if err != nil {
log.Fatal(err)
}
fmt.Printf("%q\n", dump)
c.StatusCode = res.StatusCode
defer res.Body.Close()
return nil
}

Related

Post multi-part form data for audio file

I'm wrote a function that posts multipart form data to an endpoint but doesn't seem to be working properly. Here's the code
func Upload(filePath string) error {
client := &http.Client{
Timeout: time.Second * 10,
}
// New multipart writer.
body := &bytes.Buffer{}
file, err := os.Open(filePath)
if err != nil {
return err
}
writer := multipart.NewWriter(body)
fw, err := writer.CreateFormFile("file", filepath.Base(filePath))
if err != nil {
return err
}
_, err = io.Copy(fw, bufio.NewReader(file))
if err != nil {
return err
}
// Close multipart writer.
writer.Close()
req, err := http.NewRequest("POST", "http://localhost:5050/upload", bytes.NewReader(body.Bytes()))
if err != nil {
return err
}
req.Header.Set("Content-Type", writer.FormDataContentType())
rsp, _ := client.Do(req)
if rsp == nil {
return fmt.Errorf("failed to upload")
}
if rsp.StatusCode != http.StatusOK {
fmt.Printf("Request failed with response code: %d", rsp.StatusCode)
}
fmt.Println("rsp: ", rsp.StatusCode)
return nil
}
The handler on the API receiving the POST is this:
func (m *MetadataService) uploadHandler(res http.ResponseWriter, req *http.Request) {
file, handler, err := req.FormFile("file")
if err != nil {
panic(err) //dont do this
}
defer file.Close()
fmt.Println("req: ", req)
// Create a buffer to store the header of the file in
fileHeader := make([]byte, 512)
// Copy the headers into the FileHeader buffer
if _, err := file.Read(fileHeader); err != nil {
panic(err) //dont do this
}
// set position back to start.
if _, err := file.Seek(0, 0); err != nil {
panic(err) //dont do this
}
// copy example
f, err := os.OpenFile(handler.Filename, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
panic(err) //please dont
}
defer f.Close()
buf := bytes.NewBuffer(nil)
if _, err := io.Copy(buf, file); err != nil {
panic(err)
}
fmt.Println("filename:", handler.Filename)
fmt.Println("file.(Sizer).Size():", file.(Sizer).Size())
fmt.Println("contentType:", http.DetectContentType(fileHeader))
io.WriteString(res, id)
}
The code panics when reading the fileHeader thought. The content type when sent is "application/octet-stream" when I'd expect it to be "audio/mpeg" since i'm uploading a song. Not sure what's wrong with the aforementioned Upload function. Need help! Thank you!

Zip a Directory and not Have the Result Saved in File System

I am able to zip a file using logic similar to the zip writer seen here.
This results in an array of bytes ([]byte) being created within the bytes.Buffer object that is returned. I would just like to know if there is there any way I can upload this 'zipped' array of bytes to an API endpoint that expects a 'multipart/form-data' request body (without having to save it locally).
Supplementary information:
I have code that utilizes this when compressing a folder. I am able to successfully execute an HTTP POST request with the zip file to the endpoint with this logic.
However, this unfortunately saves zipped files in a user's local file system. I would like to try to avoid this :)
You can create multipart writer and write []byte zipped data into field with field name you like and file name like below.
func addZipFileToReq(zipped []byte) (*http.Request, error){
body := bytes.NewBuffer(nil)
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile(`fileField`, `filename`)
if err != nil {
return nil, err
}
_, err = part.Write(zipped)
if err != nil {
return nil, err
}
err = writer.Close()
if err != nil {
return nil, err
}
r, err := http.NewRequest(http.MethodPost, "https://example.com", body)
if err != nil {
return nil, err
}
r.Header.Set("Content-Type", writer.FormDataContentType())
return r, nil
}
If you want to stream-upload the zip, you should be able to do so with io.Pipe. The following is an incomplete and untested example to demonstrate the general idea. To make it work you'll need to modify it and potentially fix whatever bugs you encounter.
func UploadReader(r io.Reader) error {
req, err := http.NewRequest("POST", "<UPLOAD_URL>", r)
if err != nil {
return err
}
// TODO set necessary headers (content type, auth, etc)
res, err := http.DefaultClient.Do(req)
if err != nil {
return err
} else if res.StatusCode != 200 {
return errors.New("not ok")
}
return nil
}
func ZipDir(dir string, w io.Writer) error {
zw := zip.NewWriter(w)
defer zw.Close()
return filepath.Walk(dir, func(path string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if !fi.Mode().IsRegular() {
return nil
}
header, err := zip.FileInfoHeader(fi)
if err != nil {
return err
}
header.Name = path
header.Method = zip.Deflate
w, err := zw.CreateHeader(header)
if err != nil {
return err
}
f, err := os.Open(path)
if err != nil {
return err
}
defer f.Close()
if _, err := io.Copy(w, f); err != nil {
return err
}
return nil
})
}
func UploadDir(dir string) error {
r, w := io.Pipe()
ch := make(chan error)
wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
defer w.Close()
if err := ZipDir(dir, w); err != nil {
ch <- err
}
}()
wg.Add(1)
go func() {
defer wg.Done()
defer r.Close()
if err := UploadReader(r); err != nil {
ch <- err
}
}()
go func() {
wg.Wait()
close(ch)
}()
return <-ch
}

Trouble getting content type of file in Go

I have a function in which I take in a base64 string and get the content of it (PDF or JPEG).
I read in the base64 content, convert it to bytes and decode it into the file that it is.
I then create a file where I will output the decoded file (JPEG or PDF).
Then I write the bytes to it.
Then I call my GetFileContentType on it and it returns to me an empty string.
If I run the functions separately, as in I first the first function to create the decoded file, and end it. And then call the second function to get the content type, it works and returns it as JPEG or PDF.
What am I doing wrong here?
And is there a better way to do this?
func ConvertToJPEGBase64(
src string,
dst string,
) error {
b, err := ioutil.ReadFile(src)
if err != nil {
return err
}
str := string(b)
byteArray, err := base64.StdEncoding.DecodeString(str)
if err != nil {
return err
}
f, err := os.Create(dst)
if err != nil {
return err
}
if _, err := f.Write(byteArray); err != nil {
return err
}
f.Sync()
filetype, err := client.GetFileContentType(f)
if err != nil {
return err
}
if strings.Contains(filetype, "jpeg") {
// do something
} else {
// do something else
}
return nil
}
// GetFileContentType tells us the type of file
func GetFileContentType(out *os.File) (string, error) {
// Only the first 512 bytes are used to sniff the content type.
buffer := make([]byte, 512)
_, err := out.Read(buffer)
if err != nil {
return "", err
}
contentType := http.DetectContentType(buffer)
return contentType, nil
}
The problem is that GetFileContentType reads from the end of the file. Fix this be seeking back to the beginning of the file before calling calling GetFileContentType:
if _, err := f.Seek(io.SeekStart, 0); err != nil {
return err
}
A better fix is to use the file data that's already in memory. This simplifies the code to the point where there's no need for the GetFileContentType function.
func ConvertToJPEGBase64(
src string,
dst string,
) error {
b, err := ioutil.ReadFile(src)
if err != nil {
return err
}
str := string(b)
byteArray, err := base64.StdEncoding.DecodeString(str)
if err != nil {
return err
}
f, err := os.Create(dst)
if err != nil {
return err
}
defer f.Close() // <-- Close the file on return.
if _, err := f.Write(byteArray); err != nil {
return err
}
fileType := http.DetectContentType(byteArray) // <-- use data in memory
if strings.Contains(fileType, "jpeg") {
// do something
} else {
// do something else
}
return nil
}
More code can be eliminated by using ioutil.WriteFile:
func ConvertToJPEGBase64(src, dst string) error {
b, err := ioutil.ReadFile(src)
if err != nil {
return err
}
byteArray, err := base64.StdEncoding.DecodeString(string(b))
if err != nil {
return err
}
if err := ioutil.WriteFile(dst, byteArray, 0666); err != nil {
return err
}
fileType := http.DetectContentType(byteArray)
if strings.Contains(fileType, "jpeg") {
// do something
} else {
// do something else
}
return nil
}

golang scp file using crypto/ssh

I'm trying to download a remote file over ssh
The following approach works fine on shell
ssh hostname "tar cz /opt/local/folder" > folder.tar.gz
However the same approach on golang giving some difference in output artifact size. For example the same folders with pure shell produce artifact gz file 179B and same with go script 178B.
I assume that something has been missed from io.Reader or session got closed earlier. Kindly ask you guys to help.
Here is the example of my script:
func executeCmd(cmd, hostname string, config *ssh.ClientConfig, path string) error {
conn, _ := ssh.Dial("tcp", hostname+":22", config)
session, err := conn.NewSession()
if err != nil {
panic("Failed to create session: " + err.Error())
}
r, _ := session.StdoutPipe()
scanner := bufio.NewScanner(r)
go func() {
defer session.Close()
name := fmt.Sprintf("%s/backup_folder_%v.tar.gz", path, time.Now().Unix())
file, err := os.OpenFile(name, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
panic(err)
}
defer file.Close()
for scanner.Scan() {
fmt.Println(scanner.Bytes())
if err := scanner.Err(); err != nil {
fmt.Println(err)
}
if _, err = file.Write(scanner.Bytes()); err != nil {
log.Fatal(err)
}
}
}()
if err := session.Run(cmd); err != nil {
fmt.Println(err.Error())
panic("Failed to run: " + err.Error())
}
return nil
}
Thanks!
bufio.Scanner is for newline delimited text. According to the documentation, the scanner will remove the newline characters, stripping any 10s out of your binary file.
You don't need a goroutine to do the copy, because you can use session.Start to start the process asynchronously.
You probably don't need to use bufio either. You should be using io.Copy to copy the file, which has an internal buffer already on top of any buffering already done in the ssh client itself. If an additional buffer is needed for performance, wrap the session output in a bufio.Reader
Finally, you return an error value, so use it rather than panic'ing on regular error conditions.
conn, err := ssh.Dial("tcp", hostname+":22", config)
if err != nil {
return err
}
session, err := conn.NewSession()
if err != nil {
return err
}
defer session.Close()
r, err := session.StdoutPipe()
if err != nil {
return err
}
name := fmt.Sprintf("%s/backup_folder_%v.tar.gz", path, time.Now().Unix())
file, err := os.OpenFile(name, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return err
}
defer file.Close()
if err := session.Start(cmd); err != nil {
return err
}
n, err := io.Copy(file, r)
if err != nil {
return err
}
if err := session.Wait(); err != nil {
return err
}
return nil
You can try doing something like this:
r, _ := session.StdoutPipe()
reader := bufio.NewReader(r)
go func() {
defer session.Close()
// open file etc
// 10 is the number of bytes you'd like to copy in one write operation
p := make([]byte, 10)
for {
n, err := reader.Read(p)
if err == io.EOF {
break
}
if err != nil {
log.Fatal("err", err)
}
if _, err = file.Write(p[:n]); err != nil {
log.Fatal(err)
}
}
}()
Make sure your goroutines are synchronized properly so output is completeky written to the file.

How can I efficiently download a large file using Go?

Is there a way to download a large file using Go that will store the content directly into a file instead of storing it all in memory before writing it to a file? Because the file is so big, storing it all in memory before writing it to a file is going to use up all the memory.
I'll assume you mean download via http (error checks omitted for brevity):
import ("net/http"; "io"; "os")
...
out, err := os.Create("output.txt")
defer out.Close()
...
resp, err := http.Get("http://example.com/")
defer resp.Body.Close()
...
n, err := io.Copy(out, resp.Body)
The http.Response's Body is a Reader, so you can use any functions that take a Reader, to, e.g. read a chunk at a time rather than all at once. In this specific case, io.Copy() does the gruntwork for you.
A more descriptive version of Steve M's answer.
import (
"os"
"net/http"
"io"
)
func downloadFile(filepath string, url string) (err error) {
// Create the file
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
// Get the data
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
// Check server response
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}
// Writer the body to file
_, err = io.Copy(out, resp.Body)
if err != nil {
return err
}
return nil
}
The answer selected above using io.Copy is exactly what you need, but if you are interested in additional features like resuming broken downloads, auto-naming files, checksum validation or monitoring progress of multiple downloads, checkout the grab package.
Here is a sample. https://github.com/thbar/golang-playground/blob/master/download-files.go
Also I give u some codes might help you.
code:
func HTTPDownload(uri string) ([]byte, error) {
fmt.Printf("HTTPDownload From: %s.\n", uri)
res, err := http.Get(uri)
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()
d, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatal(err)
}
fmt.Printf("ReadFile: Size of download: %d\n", len(d))
return d, err
}
func WriteFile(dst string, d []byte) error {
fmt.Printf("WriteFile: Size of download: %d\n", len(d))
err := ioutil.WriteFile(dst, d, 0444)
if err != nil {
log.Fatal(err)
}
return err
}
func DownloadToFile(uri string, dst string) {
fmt.Printf("DownloadToFile From: %s.\n", uri)
if d, err := HTTPDownload(uri); err == nil {
fmt.Printf("downloaded %s.\n", uri)
if WriteFile(dst, d) == nil {
fmt.Printf("saved %s as %s\n", uri, dst)
}
}
}

Resources