I'm wondering if it's possible to count and print the number of bytes downloaded while the file is being downloaded.
out, err := os.Create("file.txt")
defer out.Close()
if err != nil {
fmt.Println(fmt.Sprint(err))
panic(err)
}
resp, err := http.Get("http://example.com/zip")
defer resp.Body.Close()
if err != nil {
fmt.Println(fmt.Sprint(err))
panic(err)
}
n, er := io.Copy(out, resp.Body)
if er != nil {
fmt.Println(fmt.Sprint(err))
}
fmt.Println(n, "bytes ")
If I understand you correctly, you wish to display the number of bytes read, while the data is transferring. Presumably to maintain some kind of a progress bar or something. In which case, you can use Go's compositional data structures to wrap the reader or writer in a custom io.Reader or io.Writer implementation.
It simply forwards the respective Read or Write call to the underlying stream, while doing some additional work with the (int, error) values returned by them. Here is an example you can run on the Go playground.
package main
import (
"bytes"
"fmt"
"io"
"os"
"strings"
)
// PassThru wraps an existing io.Reader.
//
// It simply forwards the Read() call, while displaying
// the results from individual calls to it.
type PassThru struct {
io.Reader
total int64 // Total # of bytes transferred
}
// Read 'overrides' the underlying io.Reader's Read method.
// This is the one that will be called by io.Copy(). We simply
// use it to keep track of byte counts and then forward the call.
func (pt *PassThru) Read(p []byte) (int, error) {
n, err := pt.Reader.Read(p)
pt.total += int64(n)
if err == nil {
fmt.Println("Read", n, "bytes for a total of", pt.total)
}
return n, err
}
func main() {
var src io.Reader // Source file/url/etc
var dst bytes.Buffer // Destination file/buffer/etc
// Create some random input data.
src = bytes.NewBufferString(strings.Repeat("Some random input data", 1000))
// Wrap it with our custom io.Reader.
src = &PassThru{Reader: src}
count, err := io.Copy(&dst, src)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fmt.Println("Transferred", count, "bytes")
}
The output it generates is this:
Read 512 bytes for a total of 512
Read 1024 bytes for a total of 1536
Read 2048 bytes for a total of 3584
Read 4096 bytes for a total of 7680
Read 8192 bytes for a total of 15872
Read 6128 bytes for a total of 22000
Transferred 22000 bytes
The stdlib now provides something like jimt's PassThru: io.TeeReader. It helps simplify things a bit:
// WriteCounter counts the number of bytes written to it.
type WriteCounter struct {
Total int64 // Total # of bytes transferred
}
// Write implements the io.Writer interface.
//
// Always completes and never returns an error.
func (wc *WriteCounter) Write(p []byte) (int, error) {
n := len(p)
wc.Total += int64(n)
fmt.Printf("Read %d bytes for a total of %d\n", n, wc.Total)
return n, nil
}
func main() {
// ...
// Wrap it with our custom io.Reader.
src = io.TeeReader(src, &WriteCounter{})
// ...
}
playground
The grab Go package implements progress updates (and many other features) for file downloads.
An example of printing progress updates while a download is in process is included in the following walkthrough: http://cavaliercoder.com/blog/downloading-large-files-in-go.html
You can basically call grab.GetAsync which downloads in a new Go routine and then monitor the BytesTransferred or Progress of the returned grab.Response from the calling thread.
Other answers have explained about PassThru. Just provide a full example with callback function base on Dave Jack's answer.
package main
import (
"fmt"
"io"
"net/http"
"os"
"strconv"
)
// writeCounter counts the number of bytes written to it.
type writeCounter struct {
total int64 // total size
downloaded int64 // downloaded # of bytes transferred
onProgress func(downloaded int64, total int64)
}
// Write implements the io.Writer interface.
//
// Always completes and never returns an error.
func (wc *writeCounter) Write(p []byte) (n int, e error) {
n = len(p)
wc.downloaded += int64(n)
wc.onProgress(wc.downloaded, wc.total)
return
}
func newWriter(size int64, onProgress func(downloaded, total int64)) io.Writer {
return &writeCounter{total: size, onProgress: onProgress}
}
func main() {
client := http.DefaultClient
url := "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
saveTo := "/Users/tin/Desktop/ForBiggerFun.mp4"
download(client, url, saveTo, func(downloaded, total int64) {
fmt.Printf("Downloaded %d bytes for a total of %d\n", downloaded, total)
})
}
func download(client *http.Client, url, filePath string, onProgress func(downloaded, total int64)) (err error) {
// Create file writer
file, err := os.Create(filePath)
if err != nil {
return
}
defer file.Close()
// Determinate the file size
resp, err := client.Head(url)
if err != nil {
return
}
contentLength := resp.Header.Get("content-length")
length, err := strconv.Atoi(contentLength)
if err != nil {
return
}
// Make request
resp, err = client.Get(url)
if err != nil {
return
}
defer resp.Body.Close()
// pipe stream
body := io.TeeReader(resp.Body, newWriter(int64(length), onProgress))
_, err = io.Copy(file, body)
return err
}
Base #Dave Jack
I add progress value and receiving file data from NC (direct TCP data transfer)
// WriteCounter counts the number of bytes written to it.
type WriteCounter struct {
Total int64 // Total # of bytes transferred
Last int64
LastUpdate time.Time
}
// Write implements the io.Writer interface.
//
// Always completes and never returns an error.
func (wc *WriteCounter) Write(p []byte) (int, error) {
n := len(p)
wc.Total += int64(n)
now := time.Now()
duration := now.Sub(wc.LastUpdate).Seconds()
if duration > 1 {
wc.LastUpdate = now
rate := float64(wc.Total-wc.Last) / (duration) / 1024.0
wc.Last = wc.Total
fmt.Printf("Read %d bytes for a total of %d , Rate %.1fKb/s \n", n, wc.Total, rate)
}
return n, nil
}
func Server(dest string) {
outputFile, err := os.Create(dest)
if err != nil {
fmt.Println(err)
}
defer outputFile.Close()
fileWriter := bufio.NewWriter(outputFile)
serverListener, err := net.Listen("tcp", "0.0.0.0:"+PORT)
if err != nil {
fmt.Println(err)
}
defer serverListener.Close()
serverConn, err := serverListener.Accept()
if err != nil {
fmt.Println(err)
}
defer serverConn.Close()
wc := &WriteCounter{}
reader := io.TeeReader(serverConn, wc)
serverConnReader := bufio.NewReaderSize(reader, 32*1024*1024)
io.Copy(fileWriter, serverConnReader)
fileWriter.Flush()
outputFile.Sync()
fmt.Println("Done: Writer")
}
Related
The first TCP connection running on localhost on osx always parses the binary sent to it correctly. Subsequent requests lose the binary data, only seeing the first byte [8]. How have I failed to set up my Reader?
package main
import (
"fmt"
"log"
"net"
"os"
"app/src/internal/handler"
"github.com/golang-collections/collections/stack"
)
func main() {
port := os.Getenv("SERVER_PORT")
s := stack.New()
ln, err := net.Listen("tcp", ":8080")
if err != nil {
log.Fatalf("net.Listen: %v", err)
}
fmt.Println("Serving on " + port)
for {
conn, err := ln.Accept()
// defer conn.Close()
if err != nil {
log.Fatal("ln.Accept")
}
go handler.Handle(conn, s)
}
}
package handler
import (
"fmt"
"io"
"log"
"net"
"github.com/golang-collections/collections/stack"
)
func Handle(c net.Conn, s *stack.Stack) {
fmt.Printf("Serving %s\n", c.RemoteAddr().String())
buf := make([]byte, 0, 256)
tmp := make([]byte, 128)
n, err := c.Read(tmp)
if err != nil {
if err != io.EOF {
log.Fatalf("connection Read() %v", err)
}
return
}
buf = append(buf, tmp[:n]...)
}
log:
Serving [::1]:51699
------------- value ---------------:QCXhoy5t
Buffer Length: 9. First Value: 8
Serving [::1]:51700
------------- value ---------------:
Buffer Length: 1. First Value: 8
Serving [::1]:51701
test sent over:
push random string:
QCXhoy5t
push random string:
GPh0EnbS
push random string:
4kJ0wN0R
The docs for Reader say:
Read reads up to len(p) bytes into p. It returns the number of bytes read (0 <= n
<= len(p)) and any error encountered. Even if Read returns n < len(p), it may use
all of p as scratch space during the call. If some data is available but not
len(p) bytes, Read conventionally returns what is available instead of waiting
for more.
So the most likely cause of your issue is that Read is returning the data available (in this case a single character). You can fix this by using ioutil.ReadAll or performing the read in a loop (the fact the data is being added to a buffer makes it look like that was the original intention) with something like:
for {
n, err := c.Read(tmp)
if err != nil {
if err != io.EOF {
// Note that data might have also been received - you should process that
// if appropriate.
log.Fatalf("connection Read() %v", err)
return
}
break // All data received so process it
}
buf = append(buf, tmp[:n]...)
}
Note: There is no guarantee that any data is received; you should check the length before trying to access it (i.e. buf[0] may panic)
I'm trying to improve the performance of an app.
One part of its code uploads a file to a server in chunks.
The original version simply does this in a sequential loop. However, it's slow and during the sequence it also needs to talk to another server before uploading each chunk.
The upload of chunks could simply be placed in a goroutine. It works, but is not a good solution because if the source file is extremely large it ends up using a large amount of memory.
So, I try to limit the number of active goroutines by using a buffered channel. Here is some code that shows my attempt. I've stripped it down to show the concept and you can run it to test for yourself.
package main
import (
"fmt"
"io"
"os"
"time"
)
const defaultChunkSize = 1 * 1024 * 1024
// Lets have 4 workers
var c = make(chan int, 4)
func UploadFile(f *os.File) error {
fi, err := f.Stat()
if err != nil {
return fmt.Errorf("err: %s", err)
}
size := fi.Size()
total := (int)(size/defaultChunkSize + 1)
// Upload parts
buf := make([]byte, defaultChunkSize)
for partno := 1; partno <= total; partno++ {
readChunk := func(offset int, buf []byte) (int, error) {
fmt.Println("readChunk", partno, offset)
n, err := f.ReadAt(buf, int64(offset))
if err != nil {
return n, err
}
return n, nil
}
// This will block if there are not enough worker slots available
c <- partno
// The actual worker.
go func() {
offset := (partno - 1) * defaultChunkSize
n, err := readChunk(offset, buf)
if err != nil && err != io.EOF {
return
}
err = uploadPart(partno, buf[:n])
if err != nil {
fmt.Println("Uploadpart failed:", err)
}
<-c
}()
}
return nil
}
func uploadPart(partno int, buf []byte) error {
fmt.Printf("Uploading partno: %d, buflen=%d\n", partno, len(buf))
// Actually upload the part. Lets test it by instead writing each
// buffer to another file. We can then use diff to compare the
// source and dest files.
// Open file. Seek to (partno - 1) * defaultChunkSize, write buffer
f, err := os.OpenFile("/home/matthewh/Downloads/out.tar.gz", os.O_CREATE|os.O_WRONLY, 0755)
if err != nil {
fmt.Printf("err: %s\n", err)
}
n, err := f.WriteAt(buf, int64((partno-1)*defaultChunkSize))
if err != nil {
fmt.Printf("err=%s\n", err)
}
fmt.Printf("%d bytes written\n", n)
defer f.Close()
return nil
}
func main() {
filename := "/home/matthewh/Downloads/largefile.tar.gz"
fmt.Printf("Opening file: %s\n", filename)
f, err := os.Open(filename)
if err != nil {
panic(err)
}
UploadFile(f)
}
It almost works. But there are several problems.
1) The final partno 22 is occuring 3 times. The correct length is actually 612545 as the file length isn't a multiple of 1MB.
// Sample output
...
readChunk 21 20971520
readChunk 22 22020096
Uploading partno: 22, buflen=1048576
Uploading partno: 22, buflen=612545
Uploading partno: 22, buflen=1048576
Another problem, the upload could fail and I am not familiar enough with go and how best to solve failure of the goroutine.
Finally, I want to ordinarily return some data from the uploadPart when it succeeds. Specifically, it'll be a string (an HTTP ETag header value). These etag values need to be collected by the main function.
What is a better way to structure this code in this instance? I've not yet found a good golang design pattern that correctly fulfills my needs here.
Skipping for the moment the question of how better to structure this code, I see a bug in your code which may be causing the problem you're seeing. Since the function you're running in the goroutine uses the variable partno, which changes with each iteration of the loop, your goroutine isn't necessarily seeing the value of partno at the time you invoked the goroutine. A common way of fixing this is to create a local copy of that variable inside the loop:
for partno := 1; partno <= total; partno++ {
partno := partno
// ...
}
Data race #1
Multiple goroutines are using the same buffer concurrently. Note that one gorouting may be filling it with a new chunk while another is still reading an old chunk from it. Instead, each goroutine should have it's own buffer.
Data race #2
As Andy Schweig has pointed, the value in partno is updated by the loop before the goroutine created in that iteration has a chance to read it. This is why the final partno 22 occurs multiple times. To fix it, you can pass partno as a argument to the anonymous function. That will ensure each goroutine has it's own part number.
Also, you can use a channel to pass the results from the workers. Maybe a struct type with the part number and error. That way, you will be able to observe the progress and retry failed uploads.
For an example of a good pattern check out this example from the GOPL book.
Suggested changes
As noted by dev.bmax buf moved into go routine, as noted by Andy Schweig partno is param to anon function, also added WaitGroup since UploadFile was exiting before uploads were complete. Also defer f.Close() file, good habit.
package main
import (
"fmt"
"io"
"os"
"sync"
"time"
)
const defaultChunkSize = 1 * 1024 * 1024
// wg for uploads to complete
var wg sync.WaitGroup
// Lets have 4 workers
var c = make(chan int, 4)
func UploadFile(f *os.File) error {
// wait for all the uploads to complete before function exit
defer wg.Wait()
fi, err := f.Stat()
if err != nil {
return fmt.Errorf("err: %s", err)
}
size := fi.Size()
fmt.Printf("file size: %v\n", size)
total := int(size/defaultChunkSize + 1)
// Upload parts
for partno := 1; partno <= total; partno++ {
readChunk := func(offset int, buf []byte, partno int) (int, error) {
fmt.Println("readChunk", partno, offset)
n, err := f.ReadAt(buf, int64(offset))
if err != nil {
return n, err
}
return n, nil
}
// This will block if there are not enough worker slots available
c <- partno
// The actual worker.
go func(partno int) {
// wait for me to be done
wg.Add(1)
defer wg.Done()
buf := make([]byte, defaultChunkSize)
offset := (partno - 1) * defaultChunkSize
n, err := readChunk(offset, buf, partno)
if err != nil && err != io.EOF {
return
}
err = uploadPart(partno, buf[:n])
if err != nil {
fmt.Println("Uploadpart failed:", err)
}
<-c
}(partno)
}
return nil
}
func uploadPart(partno int, buf []byte) error {
fmt.Printf("Uploading partno: %d, buflen=%d\n", partno, len(buf))
// Actually do the upload. Simulate long running task with a sleep
time.Sleep(time.Second)
return nil
}
func main() {
filename := "/home/matthewh/Downloads/largefile.tar.gz"
fmt.Printf("Opening file: %s\n", filename)
f, err := os.Open(filename)
if err != nil {
panic(err)
}
defer f.Close()
UploadFile(f)
}
I'm sure you can deal a little smarter with the buf situation. I'm just letting go deal with the garbage. Since you are limiting your workers to specific number 4 you really need only 4 x defaultChunkSize buffers. Please do share if you come up with something simple and shareworth.
Have fun!
Say I would like to generate a secure random int between 0 and 27 using:
func Int(rand io.Reader, max *big.Int) (n *big.Int, err error)
in the "crypto/rand" package.
How would I do that?
I do not really understand how this works, why does it not return one of the built in Go ints instead of pointer to some big.Int type?
EDIT:
Would this be considered secure enough for tokens?
func getToken(length int) string {
token := ""
codeAlphabet := "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
codeAlphabet += "abcdefghijklmnopqrstuvwxyz"
codeAlphabet += "0123456789"
for i := 0; i < length; i++ {
token += string(codeAlphabet[cryptoRandSecure(int64(len(codeAlphabet)))])
}
return token
}
func cryptoRandSecure(max int64) int64 {
nBig, err := rand.Int(rand.Reader, big.NewInt(max))
if err != nil {
log.Println(err)
}
return nBig.Int64()
}
func main() {
fmt.Println(getToken(32))
}
This would output something like this:
qZDbuPwNQGrgVmZCU9A7FUWbp8eIfn0Z
EwZVoQ5D5SEfdhiRsDfH6dU6tAovILCZ
cOqzODVP0GwbiNBwtmqLA78rFgV9d3VT
Here is some working code :
package main
import (
"fmt"
"crypto/rand"
"math/big"
)
func main() {
nBig, err := rand.Int(rand.Reader, big.NewInt(27))
if err != nil {
panic(err)
}
n := nBig.Int64()
fmt.Printf("Here is a random %T in [0,27) : %d\n", n, n)
}
But to generate a random token, I'd do something like this :
package main
import (
"crypto/rand"
"encoding/base32"
"fmt"
)
func main() {
token := getToken(10)
fmt.Println("Here is a random token : ", token)
}
func getToken(length int) string {
randomBytes := make([]byte, 32)
_, err := rand.Read(randomBytes)
if err != nil {
panic(err)
}
return base32.StdEncoding.EncodeToString(randomBytes)[:length]
}
If you're generating secure tokens for session IDs, OAuth Bearer tokens, CSRF or similar: you want to generate a token of (ideally) 256 bits (32 bytes) or no less than 192 bits (24 bytes).
A token with values between (0-27) can be brute-forced in less than a second and could not be considered secure.
e.g.
package main
import (
"crypto/rand"
"encoding/base64"
)
// GenerateRandomBytes returns securely generated random bytes.
// It will return an error if the system's secure random
// number generator fails to function correctly, in which
// case the caller should not continue.
func GenerateRandomBytes(n int) ([]byte, error) {
b := make([]byte, n)
_, err := rand.Read(b)
// Note that err == nil only if we read len(b) bytes.
if err != nil {
return nil, err
}
return b, nil
}
// GenerateRandomString returns a URL-safe, base64 encoded
// securely generated random string.
func GenerateRandomString(s int) (string, error) {
b, err := GenerateRandomBytes(s)
return base64.URLEncoding.EncodeToString(b), err
}
func main() {
// Example: this will give us a 44 byte, base64 encoded output
token, err := GenerateRandomString(32)
if err != nil {
// Serve an appropriately vague error to the
// user, but log the details internally.
}
}
The base64 output is safe for headers, HTTP forms, JSON bodies, etc.
If you need an integer it may help to explain your use-case, as it would be odd for a system to require tokens as ints.
If you only need a small number (i.e. [0, 255]), you could just read a byte out of the package's Reader:
b := []byte{0}
if _, err := rand.Reader.Read(b); err != nil {
panic(err)
}
n := b[0]
fmt.Println(n)
Playground: http://play.golang.org/p/4VO52LiEVh (the example won't work there, I don't know if it's working as intended or it's a playground bug).
I am trying to read a buffered stream of signed 16 bit integers (wav format), but the bufio.Read method only accepts an array of bytes. My question is a 2-parter:
Can I preformat the byte stream into a buffered int16 array?
If I can't, whats the best way of post-processing the byte array into int16 array? My initial thought is to use tmp arrays and keep pushing/processing them, but I was curious if there was a more idiomatic way of doing this?
package main
import (
"bufio"
"io"
"log"
"os/exec"
)
func main() {
app := "someapp"
cmd := exec.Command(app)
stdout, err := cmd.StdoutPipe()
r := bufio.NewReader(stdout)
if err != nil {
log.Fatal(err)
}
if err := cmd.Start(); err != nil {
log.Fatal(err)
}
//"someapp" outputs signed 16bit integers (little endian))
buf := make([]byte, 0, 4*1024)
for {
n, err := r.Read(buf[:cap(buf)]) //r.Read only accepts type []byte
buf = buf[:n]
if n == 0 {
if err == nil {
continue
}
if err == io.EOF {
break
}
log.Fatal(err)
}
log.Printf("%x\n", buf)
//process buf here
if err != nil && err != io.EOF {
log.Fatal(err)
}
}
}
When working with IO, you always work with []bytes, there's no way to substitute that with []int16, or pre-format that as int16s, it's always a stream of bytes.
You can look at the encoding/binary package to decode this stream.
// to get the first uint16 as i
i := binary.LittleEndian.Uint16(buf[:2])
You can then iterate through the buf as needed.
You can also use binary.Read to read directly from the io.Reader.
var i uint16
for {
err := binary.Read(r, binary.LittleEndian, &i)
if err != nil {
log.Println(err)
break
}
fmt.Println(i)
}
It may worth noting the simplicity of what needs to be done. Each uint16 is created via:
func (littleEndian) Uint16(b []byte) uint16 {
return uint16(b[0]) | uint16(b[1])<<8
}
You can use encoding/binary.Read to fill an []int16 directly from your reader, although technically the answer to your first question is still no (check the source of binary.Read, it reads the data to a []byte first).
I am trying to parse a file that annoying consists of many separately zipped segments. I have parsed these segments one at a time into a slice of bytes and I want to uncompress them as I go.
Here is my current code that does the decompressing, which doesn't work. from and to are just set at the top as an example, in reality they are set by the code. data is the byte array containing the entire file. I don't want to seek it while it's on disk because its location on another server, so it's only realistic for me to load the entire file to []byte first and then parse it.
from, to := 0, 1000;
b := bytes.NewReader(data[from:from+to])
z, err := zlib.NewReader(b)
CheckErr(err)
defer z.Close()
p := make([]byte,0,1024)
z.Read(p)
fmt.Println(string(p))
So how is it so massively difficult just to unzip a slice of bytes? Anyway...
The problem appears to with how I am reading it out. Where it says z.Read, that doesn't seem to do anything.
How can I read the entire thing in one go into a slice of bytes?
Here's an outline for you. Note: In Go, CHECK FOR ERRORS!
package main
import (
"bytes"
"compress/zlib"
"fmt"
"io/ioutil"
)
func readSegment(data []byte, from, to int) ([]byte, error) {
b := bytes.NewReader(data[from : from+to])
z, err := zlib.NewReader(b)
if err != nil {
return nil, err
}
defer z.Close()
p, err := ioutil.ReadAll(z)
if err != nil {
return nil, err
}
return p, nil
}
func main() {
from, to := 0, 1000
data := make([]byte, from+to)
// ** parse input segments into data **
p, err := readSegment(data, from, to)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(p))
}
Use ReadAll(r io.Reader) ([]byte, error) from the io/ioutil package.
p, err := ioutil.ReadAll(b)
fmt.Println(string(p))
Read only reads up to the length of the given slice (1024 bytes in your case).
To read in chunks of 1024 bytes:
p := make([]byte,1024)
for {
numBytes, err := l.Read(p)
if err == io.EOF {
// you are done, numBytes might be less than len(p)
break
}
// do what you want with p
}
If you are getting the data from a webserver, you might even do
import (
"net/http"
"io/ioutil"
)
...
resp, errGet := http.Get("http://example.com/somefile")
// do error handling
z, errZ := zlib.NewReader(resp.Body)
// do error handling
resp.Body.Close()
p, err := ioutil.ReadAll(b)
// do error handling
since resp.Body happens to be an io.Reader as most io related types.