Golang AWS S3manager multipartreader w/ Goroutines - go

I'm creating an endpoint that allows a user to upload several files at the same time and store them in S3. Currently I'm able to achieve this using MultipartReader and s3manager but only in a non-synchronous fashion.
I'm trying to implement Go routines to speed this functionality up and have multiple files uploaded to S3 concurrently, but a data race error is causing trouble. I think *s3manager might not be goroutine safe as the docs say it is.
(Code works synchronously if go-statement is replaced with function code).
Could implementing mutex locks possibly fix my error?
func uploadHandler(w http.ResponseWriter, r *http.Request) {
counter := 0
switch r.Method {
// GET to display the upload form.
case "GET":
err := templates.Execute(w, nil)
if err != nil {
log.Print(err)
}
// POST uploads each file and sends them to S3
case "POST":
c := make(chan string)
// grab the request.MultipartReader
reader, err := r.MultipartReader()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// copy each part to destination.
for {
part, err := reader.NextPart()
if err == io.EOF {
break
}
// if part.FileName() is empty, skip this iteration.
if part.FileName() == "" {
continue
}
counter++
go S3Upload(c, part)
}
for i := 0; i < counter; i++ {
fmt.Println(<-c)
}
// displaying a success message.
err = templates.Execute(w, "Upload successful.")
if err != nil {
log.Print(err)
}
default:
w.WriteHeader(http.StatusMethodNotAllowed)
}
}
func S3Upload(c chan string, part *multipart.Part) {
bucket := os.Getenv("BUCKET")
sess, err := session.NewSession(&aws.Config{
Region: aws.String(os.Getenv("REGION"))},
)
if err != nil {
c <- "error occured creating session"
return
}
uploader := s3manager.NewUploader(sess)
_, err = uploader.Upload(&s3manager.UploadInput{
Bucket: aws.String(bucket),
Key: aws.String(part.FileName()),
Body: part,
})
if err != nil {
c <- "Error occurred attempting to upload to S3"
return
}
// successful upload
c <- "successful upload"
}

^ see all the comments above,
here is some modified code example, channels not useful here.
package main
import (
"bytes"
"io"
"log"
"net/http"
"os"
"strings"
"sync"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
)
var (
setupUploaderOnce sync.Once
uploader *s3manager.Uploader
bucket string
region string
)
// ensure sessions and uploader are setup only once using a Singleton pattern
func setupUploader() {
setupUploaderOnce.Do(func() {
bucket = os.Getenv("BUCKET")
region = os.Getenv("REGION")
sess, err := session.NewSession(&aws.Config{Region: aws.String(region)})
if err != nil {
log.Fatal(err)
}
uploader := s3manager.NewUploader(sess)
})
}
// normally singleton stuff is packaged out and called before starting the server, but to keep the example a single file, load it up here
func init() {
setupUploader()
}
func uploadHandler(w http.ResponseWriter, r *http.Request) {
counter := 0
switch r.Method {
// GET to display the upload form.
case "GET":
err := templates.Execute(w, nil)
if err != nil {
log.Print(err)
}
// POST uploads each file and sends them to S3
case "POST":
var buf bytes.Buffer
// "file" is defined by the form field, change it to whatever your form sets it too
file, header, err := r.FormFile("file")
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// close the file
defer file.Close()
fileName := strings.Split(header.Filename, ".")
// load the entire file data to the buffer
_, err = io.Copy(&buf, file)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
// copy each part to destination.
go S3Upload(buf, fileName[0])
// displaying a success message.
err = templates.Execute(w, "Upload successful.")
if err != nil {
log.Print(err)
}
default:
w.WriteHeader(http.StatusMethodNotAllowed)
}
}
// keeping this simple, do something with the err, like log
// if the uploader fails in the goroutine, there is potential
// for false positive uploads... channels are not really good here
// either, for that, bubble the error up,
// and don't spin up a goroutine.. same thing as waiting for the channel to return.
func S3Upload(body bytes.Buffer, fileName string) {
_, err := uploader.Upload(&s3manager.UploadInput{
Bucket: aws.String(bucket),
Key: aws.String(fileName),
Body: bytes.NewReader(body.Bytes()),
})
}

Related

Uploading for to internet site

With the below code I can download a file from internet asking with monitoring the downloaded percentage.
How can I do something to upload file to internet as well as monitoring the upload progress. I want to upload executable file at github assets
package main
import (
"fmt"
"io"
"net/http"
"os"
"strings"
"github.com/dustin/go-humanize"
)
// WriteCounter counts the number of bytes written to it. It implements to the io.Writer interface
// and we can pass this into io.TeeReader() which will report progress on each write cycle.
type WriteCounter struct {
Total uint64
}
func (wc *WriteCounter) Write(p []byte) (int, error) {
n := len(p)
wc.Total += uint64(n)
wc.PrintProgress()
return n, nil
}
func (wc WriteCounter) PrintProgress() {
// Clear the line by using a character return to go back to the start and remove
// the remaining characters by filling it with spaces
fmt.Printf("\r%s", strings.Repeat(" ", 35))
// Return again and print current status of download
// We use the humanize package to print the bytes in a meaningful way (e.g. 10 MB)
fmt.Printf("\rDownloading... %s complete", humanize.Bytes(wc.Total))
}
func main() {
fmt.Println("Download Started")
fileUrl := "https://upload.wikimedia.org/wikipedia/commons/d/d6/Wp-w4-big.jpg"
err := DownloadFile("avatar.jpg", fileUrl)
if err != nil {
panic(err)
}
fmt.Println("Download Finished")
}
// DownloadFile will download a url to a local file. It's efficient because it will
// write as it downloads and not load the whole file into memory. We pass an io.TeeReader
// into Copy() to report progress on the download.
func DownloadFile(filepath string, url string) error {
// Create the file, but give it a tmp file extension, this means we won't overwrite a
// file until it's downloaded, but we'll remove the tmp extension once downloaded.
out, err := os.Create(filepath + ".tmp")
if err != nil {
return err
}
// Get the data
resp, err := http.Get(url)
if err != nil {
out.Close()
return err
}
defer resp.Body.Close()
// Create our progress reporter and pass it to be used alongside our writer
counter := &WriteCounter{}
if _, err = io.Copy(out, io.TeeReader(resp.Body, counter)); err != nil {
out.Close()
return err
}
// The progress use the same line so print a new line once it's finished downloading
fmt.Print("\n")
// Close the file without defer so it can happen before Rename()
out.Close()
if err = os.Rename(filepath+".tmp", filepath); err != nil {
return err
}
return nil
}
I just modify your code. It works for my file server.
func UploadFile(filepath string, url string) error {
// Create the file, but give it a tmp file extension, this means we won't overwrite a
// file until it's downloaded, but we'll remove the tmp extension once downloaded.
out, err := os.Open(filepath)
if err != nil {
return err
}
// Create our progress reporter and pass it to be used alongside our writer
counter := &WriteCounter{}
// Get the data
resp, err := http.Post(url, "multipart/form-data", io.TeeReader(out, counter))
if err != nil {
out.Close()
log.Println(err.Error())
return err
}
defer resp.Body.Close()
// The progress use the same line so print a new line once it's finished downloading
fmt.Print("\n")
// Close the file without defer so it can happen before Rename()
out.Close()
return nil
}

Why does this Golang app use more memory the longer it runs?

I made this to monitor a few websites and notify me if one of them goes down. I'm testing it on just two urls. When it starts it uses about 5mb of memory (I checked with systemctl status monitor). After 40 minutes, it's using 7.4mb. After 8 hours, it uses over 50mb of memory. Why is it doing this? Is this called a memory leak?
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"sync"
"time"
"monitor/utils/slack"
"gopkg.in/yaml.v2"
)
var config struct {
Frequency int
Urls []string
}
type statusType struct {
values map[string]int
mux sync.Mutex
}
var status = statusType{values: make(map[string]int)}
func (s *statusType) set(url string, value int) {
s.mux.Lock()
s.values[url] = value
s.mux.Unlock()
}
func init() {
data, err := ioutil.ReadFile("config.yaml")
if err != nil {
fmt.Printf("Invalid config: %s\n", err)
os.Exit(0)
}
err = yaml.Unmarshal(data, &config)
if err != nil {
fmt.Printf("Invalid config: %s\n", err)
os.Exit(0)
}
for _, url := range config.Urls {
status.set(url, 200)
}
}
func main() {
ticker := time.NewTicker(time.Duration(config.Frequency) * time.Second)
for _ = range ticker.C {
for _, url := range config.Urls {
go check(url)
}
}
}
func check(url string) {
res, err := http.Get(url)
if err != nil {
res = &http.Response{StatusCode: 500}
}
// the memory problem occurs when this condition is never satisfied, so I didn't post the slack package.
if res.StatusCode != status.values[url] {
status.set(url, res.StatusCode)
err := slack.Alert(url, res.StatusCode)
if err != nil {
fmt.Println(err)
}
}
}
If this belongs in Code Review then I will put it there.
Yes, this is a memory leak. One obvious source I can spot is that you're not closing the response bodies from your requests:
func check(url string) {
res, err := http.Get(url)
if err != nil {
res = &http.Response{StatusCode: 500}
} else {
defer res.Body.Close() // You need to close the response body!
}
if res.StatusCode != status.values[url] {
status.set(url, res.StatusCode)
err := slack.Alert(url, res.StatusCode)
if err != nil {
fmt.Println(err)
}
}
}
Better still, so that Go can use keepalive, you want to read the full body and close it:
defer func() {
io.Copy(ioutil.Discard, res.Body)
res.Body.Close()
}()
You can further analyse where memory usage is coming from by profiling your application with pprof. There's a good rundown on the Go blog and a web search will turn up many more articles on the topic.

rpc.ServerCodec Still Serving?

I was performing some RPC tests, and stumbled across a problem I can't seem to solve. In my testing I create three separate RPC servers, all of which I try to close and shutdown. However upon performing my last test (TestRpcCodecServerClientComm), it seems my client connection is connecting to the first RPC server I started (I know this because I at some point attached IDs to the RPCHandlers), even though I attempted everything I could to make sure it was shutdown. Though the code is not there I have attempted to inspect every single error I could, but that did not bring about anything.
rpc.go
package rbot
import (
"io"
"net"
"net/rpc"
"net/rpc/jsonrpc"
)
func RpcCodecClientWithPort(port string) (rpc.ClientCodec, error) {
conn, err := net.Dial("tcp", "localhost:"+port)
if err != nil {
return nil, err
}
return jsonrpc.NewClientCodec(conn), nil
}
func RpcCodecServer(conn io.ReadWriteCloser) rpc.ServerCodec {
return jsonrpc.NewServerCodec(conn)
}
rpc_test.go
package rbot
import (
"errors"
"fmt"
"net"
"net/rpc"
"testing"
)
type RPCHandler struct {
RPCServer net.Listener
conn rpc.ServerCodec
done chan bool
TestPort string
stop bool
GotRPC bool
}
func (r *RPCHandler) SetupTest() {
r.stop = false
r.GotRPC = false
r.done = make(chan bool)
r.TestPort = "5556"
}
// TODO: Create separate function to handle erroring
func (r *RPCHandler) CreateRPCServer() error {
rpc.RegisterName("TestMaster", TestAPI{r})
var err error
r.RPCServer, err = net.Listen("tcp", ":"+r.TestPort)
if err != nil {
return err
}
go func() {
for {
conn, err := r.RPCServer.Accept()
if err != nil || r.stop {
r.done <- true
return
}
r.conn = RpcCodecServer(conn)
rpc.ServeCodec(r.conn)
}
}()
return nil
}
func (r *RPCHandler) CloseRPCServer() error {
r.stop = true
if r.conn != nil {
err := r.conn.Close()
if err != nil {
fmt.Println(err)
}
}
err := r.RPCServer.Close()
<-r.done
return err
}
type TestAPI struct {
t *RPCHandler
}
func (tapi TestAPI) Send(msg string, result *string) error {
if msg == "Got RPC?" {
tapi.t.GotRPC = true
return nil
}
return errors.New("Didn't receive right message")
}
// Check if we can create and close an RPC server successfully using the RPC server codec.
func TestRpcCodecServer(t *testing.T) {
r := RPCHandler{}
r.SetupTest()
err := r.CreateRPCServer()
if err != nil {
t.Fatalf("Could not create rpc server! %s:", err.Error())
}
err = r.CloseRPCServer()
if err != nil {
t.Fatalf("Could not close RPC server! %s:", err.Error())
}
}
// Check if we can create a client without erroring.
func TestRpcCodecClientWithPortt(t *testing.T) {
r := RPCHandler{}
r.SetupTest()
r.CreateRPCServer()
defer r.CloseRPCServer()
RPCClient, err := RpcCodecClientWithPort(r.TestPort)
defer RPCClient.Close()
if err != nil {
t.Fatalf("Could not create an RPC client! %s:", err.Error())
}
}
// Let's double check and make sure our server and client can speak to each other
func TestRpcCodecServerClientComm(t *testing.T) {
r := RPCHandler{}
r.SetupTest()
r.CreateRPCServer()
defer r.CloseRPCServer()
RPCCodec, _ := RpcCodecClientWithPort(r.TestPort)
RPCClient := rpc.NewClientWithCodec(RPCCodec)
defer RPCClient.Close()
var result string
err := RPCClient.Call("TestMaster.Send", "Got RPC?", &result)
if err != nil {
t.Fatalf("Error while trying to send RPC message: %s", err.Error())
}
if !r.GotRPC {
t.Fatalf("Could not send correct message over RPC")
}
}
Not sure if I'm just mishandling the connection or something of the like, any help would be much appreciated.
For the Record The RPC api does receive the correct string message
While not the source of your problems, your test configuration has a few race conditions which you should take care of before they cause problems. Always check for issues with the -race option. You should also let the OS allocate the port so you don't run into conflicts. See for example how httptest.Server works.
Your failure here is that you're not creating a new rpc.Server for each test, you're reusing the rpc.DefaultServer. The first call to CreateRPCServer registers a TestAPI under the name TestMaster. Each subsequent call uses the already registered instance.
If you create a new rpc.Server each time you setup the test and register a new TestAPI, the final test will pass.
srv := rpc.NewServer()
srv.RegisterName("TestMaster", testAPI)
...
// and then use srv to handle the new connection
srv.ServeCodec(RpcCodecServer(conn))

golang http handler context

I'm trying to understand variable scopes in golang with the following code.
In this example, calling in http a page will echo the uri query combined with a stored value in Boltdb.
The problem is that the database driver doesn't seem to run correctly in the http handler context: it doesn't print anything to stdout nor to the http request.
I was expecting it to print :
He's loving <'uri query content'> but prefers pizza (data from bolt.db driver)
How to fix this code?
package main
import (
"fmt"
"net/http"
"log"
"github.com/boltdb/bolt"
)
var db bolt.DB
func handler(w http.ResponseWriter, r *http.Request) {
dberr := db.Update(func(tx *bolt.Tx) error {
log.Println("here")
b := tx.Bucket([]byte("MyBucket"))
loving := b.Get([]byte("loving"))
log.Printf("He's loving %s but prefers %s",r.URL.Path[1:], string(loving))
fmt.Fprintf(w,"He's loving %s but prefers %s",r.URL.Path[1:], string(loving) )
return nil
})
if dberr != nil {
fmt.Errorf("db update: %s", dberr)
}
log.Printf("Finished handling")
}
func main() {
db, err := bolt.Open("my.db", 0600, nil)
if err != nil {
log.Fatal(err)
}else{
log.Println("database opened")
}
dberr := db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists([]byte("MyBucket"))
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
err2 := b.Put([]byte("loving"), []byte("pizza"))
if err2 != nil {
return fmt.Errorf("put loving: %s", err2)
}
loving := b.Get([]byte("loving"))
log.Printf("He's loving %s", string(loving))
return nil
})
if dberr != nil {
fmt.Errorf("db update: %s", err)
}
defer db.Close()
http.HandleFunc("/", handler)
http.ListenAndServe(":8080", nil)
}
I think I see your bug. This one is usually a little difficult to track because its just the : in front of the equals. It was basically a scoping issue because you declared db as a global while at the same time creating a db variable that was scoped to your main function.
You used db, err := ... to assign the values instead of just =. := will both declare and infer the type. Since its also doing declaration, the db you're using in the main function is not the db you have declared in the global scope. Meanwhile the handler is still attempting to use the db that was declared in the global scope. The below code is the same code as you initially had with a few comments in the code to outline what the working changes are. Hope this helps!
package main
import (
"fmt"
"log"
"net/http"
"github.com/boltdb/bolt"
)
var db *bolt.DB // this is going to be a pointer and is going to be nil until its set by the main function
func handler(w http.ResponseWriter, r *http.Request) {
dberr := db.Update(func(tx *bolt.Tx) error {
log.Println("here")
b := tx.Bucket([]byte("MyBucket"))
loving := b.Get([]byte("loving"))
log.Printf("He's loving %s but prefers %s", r.URL.Path[1:], string(loving))
fmt.Fprintf(w, "He's loving %s but prefers %s", r.URL.Path[1:], string(loving))
return nil
})
if dberr != nil {
fmt.Errorf("db update: %s", dberr)
}
log.Printf("Finished handling")
}
func main() {
var err error // this will have to be declared because of the next line to assign db the first value returned from `bolt.Open`
db, err = bolt.Open("my.db", 0600, nil) // notice that this has changed and is no longer `db, err := ...` rather its `db, err = ...`
if err != nil {
log.Fatal(err)
} else {
log.Println("database opened")
}
dberr := db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists([]byte("MyBucket"))
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
err2 := b.Put([]byte("loving"), []byte("pizza"))
if err2 != nil {
return fmt.Errorf("put loving: %s", err2)
}
loving := b.Get([]byte("loving"))
log.Printf("He's loving %s", string(loving))
return nil
})
if dberr != nil {
fmt.Errorf("db update: %s", err)
}
defer db.Close()
http.HandleFunc("/", handler)
http.ListenAndServe(":3000", nil)
}

Go file downloader

I have the following code which is suppose to download file by splitting it into multiple parts. But right now it only works on images, when I try downloading other files like tar files the output is an invalid file.
UPDATED:
Used os.WriteAt instead of os.Write and removed os.O_APPEND file mode.
package main
import (
"errors"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"strconv"
)
var file_url string
var workers int
var filename string
func init() {
flag.StringVar(&file_url, "url", "", "URL of the file to download")
flag.StringVar(&filename, "filename", "", "Name of downloaded file")
flag.IntVar(&workers, "workers", 2, "Number of download workers")
}
func get_headers(url string) (map[string]string, error) {
headers := make(map[string]string)
resp, err := http.Head(url)
if err != nil {
return headers, err
}
if resp.StatusCode != 200 {
return headers, errors.New(resp.Status)
}
for key, val := range resp.Header {
headers[key] = val[0]
}
return headers, err
}
func download_chunk(url string, out string, start int, stop int) {
client := new(http.Client)
req, _ := http.NewRequest("GET", url, nil)
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", start, stop))
resp, _ := client.Do(req)
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Fatalln(err)
return
}
file, err := os.OpenFile(out, os.O_WRONLY, 0600)
if err != nil {
if file, err = os.Create(out); err != nil {
log.Fatalln(err)
return
}
}
defer file.Close()
if _, err := file.WriteAt(body, int64(start)); err != nil {
log.Fatalln(err)
return
}
fmt.Println(fmt.Sprintf("Range %d-%d: %d", start, stop, resp.ContentLength))
}
func main() {
flag.Parse()
headers, err := get_headers(file_url)
if err != nil {
fmt.Println(err)
} else {
length, _ := strconv.Atoi(headers["Content-Length"])
bytes_chunk := length / workers
fmt.Println("file length: ", length)
for i := 0; i < workers; i++ {
start := i * bytes_chunk
stop := start + (bytes_chunk - 1)
go download_chunk(file_url, filename, start, stop)
}
var input string
fmt.Scanln(&input)
}
}
Basically, it just reads the length of the file, divides it with the number of workers then each file downloads using HTTP's Range header, after downloading it seeks to a position in the file where that chunk is written.
If you really ignore many errors like seen above then your code is not supposed to work reliably for any file type.
However, I guess I can see on problem in your code. I think that mixing O_APPEND and seek is probably a mistake (Seek should be ignored with this mode). I suggest to use (*os.File).WriteAt instead.
IIRC, O_APPEND forces any write to happen at the [current] end of file. However, your download_chunk function instances for file parts can be executing in unpredictable order, thus "reordering" the file parts. The result is then a corrupted file.
1.the sequence of the go routine is not sure。
eg. the execute result maybe as follows:
...
file length:20902
Range 10451-20901:10451
Range 0-10450:10451
...
so the chunks can't just append.
2.when write chunk datas must have a sys.Mutex
(my english is poor,please forget it)

Resources