coders. Here is the basic tcp-server, that accepts connection, reads the incoming data and writes back.
package main
import (
"bufio"
"io"
"log"
"net"
)
func main() {
li, err := net.Listen("tcp", ":8080")
if err != nil {
log.Fatalln(err)
}
defer li.Close()
for {
conn, err := li.Accept()
if err != nil {
log.Fatalln(err)
}
scanner := bufio.NewScanner(conn)
for scanner.Scan() {
ln := scanner.Text()
io.WriteString(conn, ln+"\n")
}
conn.Close()
}
}
However, there is a nested loop for scanner and declaring new scanner on each iteration of external loop. I heard, that nested loops lead to extra complexety and maybe declaring new scanner on each iteration of infinite loop leads to some memory leaks. Actually, I don't know how to do it in another way and I'm just want to ask 2 things:
Is it possible to do the same in another way?
Do we actually need more optimization on such low-level server
abstraction?
The outer loop is waiting for new connections, the inner loop is parsing the input data, so from that point of view it's fine. Not all nested loops are evil. However, while you're handling that single connection, the server is not accepting them any more (you can test that by trying to connect to the server from multiple clients). To fix that, handle the connection in a goroutine:
for {
conn, err := li.Accept()
if err != nil {
log.Fatalln(err)
}
go func() {
defer conn.Close()
scanner := bufio.NewScanner(conn)
for scanner.Scan() {
ln := scanner.Text()
io.WriteString(conn, ln+"\n")
}
}()
}
Related
Problem
I have written a TCP echo server in Go and I am trying to write/read as often as I can in 10s to measure how much data got transfered in this time. Weirdly, the value is way too high and does not depend on the length of the bytearray which I am transfering (but it should!). It is always around 600k connections in this 10 seconds (The length of the "result" Array depicts how much connections were made in the 10s). As soon as I add let's say a print statement to the server and the values get processed, I get more realistic values that depend on the length of the bytearray as a result.
Why doesn't the length of the bytearray matter in the first case?
Code
Server
package main
import (
"fmt"
"log"
"net"
)
func main() {
tcpAddr, err := net.ResolveTCPAddr("tcp", fmt.Sprintf("127.0.0.1:8888"))
checkError(err)
ln, err := net.ListenTCP("tcp", tcpAddr)
checkError(err)
for {
conn, err := ln.Accept()
checkError(err)
go handleConnection(conn)
}
}
func checkError(err error) {
if err != nil {
log.Fatal(err)
}
}
func handleConnection(conn net.Conn) {
var input [1000000]byte
for {
n, err := conn.Read(input[0:])
checkError(err)
//fmt.Println(input[0:n])
_, err = conn.Write(input[0:n])
checkError(err)
}
}
Client
package main
import (
"fmt"
"log"
"net"
"time"
)
var (
result []int
elapsed time.Duration
)
func main() {
input := make([]byte, 1000)
tcpAddr, err := net.ResolveTCPAddr("tcp", "127.0.0.1:8888")
checkError(err)
conn, err := net.DialTCP("tcp", nil, tcpAddr)
checkError(err)
for start := time.Now(); time.Since(start) < time.Second*time.Duration(10); {
startTimer := time.Now()
_, err = conn.Write(input)
checkError(err)
_, err := conn.Read(input[0:])
checkError(err)
elapsed = time.Since(startTimer)
result = append(result, int(elapsed))
}
fmt.Println(fmt.Sprintf("result: %v", len(result)))
}
func checkError(err error) {
if err != nil {
log.Fatal(err)
}
}
Read in the client loop is not guaranteed to read all of the data sent in the previous call to Write.
When input is small enough to be transmitted in a single packet on the network, Read in the client returns all of the data in the previous call to Write in the client. In this mode, the application measures the time to execute request/response pairs.
For larger sizes of input, read on the client can fall behind what the client is writing. When this happens, the calls to Read complete faster because the calls return data from an earlier call to Write. The application is pipelining in this mode. The throughput for pipelining is higher than the throughput for request/response pairs. The client will not read all data in this mode, but the timing impact of that is not significant.
Use the following code to time request/response pairs for arbitrary sizes of input.
for start := time.Now(); time.Since(start) < time.Second*time.Duration(10); {
startTimer := time.Now()
_, err = conn.Write(input)
checkError(err)
_, err := io.ReadFull(conn, input) // <-- read all of the data
checkError(err)
elapsed = time.Since(startTimer)
result = append(result, int(elapsed))
}
To measure full-on pipelining, modify the client to read and write from different goroutines. An example follows.
go func() {
for start := time.Now(); time.Since(start) < time.Second*time.Duration(10); {
_, err = conn.Write(input)
checkError(err)
}
conn.CloseWrite() // tell server that we are done sending data
}()
start := time.Now()
output := make([]byte, 4096)
for {
_, err := conn.Read(output)
if err != nil {
if err == io.EOF {
break
}
checkError(err)
}
}
fmt.Println(time.Since(start))
Several hundred MB of memory is allocated for 50 requests of 5 MB. Memory is allocated and is no longer released.
How can I clear my memory? Why can this happen?
I've tried on Ubuntu on my home pc and on VPS
package main
import (
"fmt"
"io/ioutil"
"net/http"
"time"
)
func main() {
fmt.Println("start")
for i := 0; i < 50; i++ {
go func() {
DoRequest()
}()
time.Sleep(10 * time.Millisecond)
}
time.Sleep(10 * time.Minute)
}
func DoRequest() error {
requestUrl := "https://blockchain.info/rawblock/0000000000000000000eebedea046425bd54626e6c56eb032e66e714d0141ea6"
req, err := http.NewRequest("GET", requestUrl, nil)
if err != nil {
return err
}
req.Header.Set("user-agent", "free")
httpClient := &http.Client{
Timeout: time.Second * 10,
}
resp, err := httpClient.Do(req)
if resp != nil {
defer resp.Body.Close()
}
if err != nil {
return err
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
fmt.Println("bodylen", len(body))
return nil
}
Allocated somewhere 400MB
You are creating an http client for each go-routine.
Http client is designed to be create once & used many times. They are go-routine safe.
They allow for connection reuse & other efficiency savers.
Create the http client once in main (instead of in your go-routine) & then pass this single reference to all of your 50 go-routines.
Edit: Also, while it may not make a practical difference in your case, the order for a request is usually like so:
resp, err := httpClient.Do(req)
if err != nil {
return err // check error first
}
defer resp.Body.Close() // no error - so resp will *NOT* be nil - so this is safe
Edit 2: As #Adrian has mentioned: go's garbage collection is not instantaneous - nor should it be - as it is an expensive operation. If you no longer need a block of memory - simply don't reference it anymore. Let the GC do its job, so you can focus on yours!
If you're curious about the evolution of go's GC:
https://blog.golang.org/ismmkeynote (heavy on the technical side)
What kind of Garbage Collection does Go use?
for i := 0; i < 50; i++ {
go func() {
DoRequest()
}()
time.Sleep(10 * time.Millisecond)
}
Never create go-routines like this. Always make sure you create go-routines the way it not fill large ( all ) memory in any case ( including worst case )
Simple solution is control the count of go-routines can spawned ( or running ) at time.
You can pre-calculate memory to be occupied in worst case by multiplying max-number of go-routines you want to run at a time and max-memory can be used by one go-routine.
You can control instances of go-routines by using channles.
Refer first answer of this stackoverflow question
Always have x number of goroutines running at any time
Always use balanced solution between perforamce and required resources.
Update June 11,2019
Here is example go program
https://play.golang.org/p/HovNRgp6FxH
I want to write a mime/multipart message in Python to standard output and read that message in Golang using the mime/multipart package. This is just a learning exercise.
I tried simulating this example.
output.py
#!/usr/bin/env python2.7
import sys
s = "--foo\r\nFoo: one\r\n\r\nA section\r\n" +"--foo\r\nFoo: two\r\n\r\nAnd another\r\n" +"--foo--\r\n"
print s
main.go
package main
import (
"io"
"os/exec"
"mime/multipart"
"log"
"io/ioutil"
"fmt"
"sync"
)
var wg sync.WaitGroup
func main() {
pr,pw := io.Pipe()
defer pw.Close()
cmd := exec.Command("python","output.py")
cmd.Stdout = pw
mr := multipart.NewReader(pr,"foo")
wg.Add(1)
go func() {
defer wg.Done()
for {
p, err := mr.NextPart()
if err == io.EOF {
fmt.Println("EOF")
return
}
if err != nil {
log.Fatal(err)
}
slurp, err := ioutil.ReadAll(p)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Part : %q\n", slurp)
return
}
}()
if err := cmd.Start(); err != nil {
log.Fatal(err)
}
cmd.Wait()
wg.Wait()
}
Output of go run main.go:
fatal error: all goroutines are asleep - deadlock!
Other answers regarding this topic on StackOverflow are related to channels not being closed, but I am not even using a channel. I understand that somewhere, there is infinite loop or something similar, but I don't see it.
Try something like this (explanation below):
package main
import (
"fmt"
"io"
"io/ioutil"
"log"
"mime/multipart"
"os"
"os/exec"
"sync"
"github.com/pkg/errors"
)
func readCommand(cmdStdout io.ReadCloser, wg *sync.WaitGroup, resc chan<- []byte, errc chan<- error) {
defer wg.Done()
defer close(errc)
defer close(resc)
mr := multipart.NewReader(cmdStdout, "foo")
for {
part, err := mr.NextPart()
if err != nil {
if err == io.EOF {
fmt.Println("EOF")
} else {
errc <- errors.Wrap(err, "failed to get next part")
}
return
}
slurp, err := ioutil.ReadAll(part)
if err != nil {
errc <- errors.Wrap(err, "failed to read part")
return
}
resc <- slurp
}
}
func main() {
cmd := exec.Command("python", "output.py")
cmd.Stderr = os.Stderr
pr, err := cmd.StdoutPipe()
if err != nil {
log.Fatal(err)
}
var wg sync.WaitGroup
wg.Add(1)
resc := make(chan []byte)
errc := make(chan error)
go readCommand(pr, &wg, resc, errc)
if err := cmd.Start(); err != nil {
log.Fatal(err)
}
for {
select {
case err, ok := <-errc:
if !ok {
errc = nil
break
}
if err != nil {
log.Fatal(errors.Wrap(err, "error from goroutine"))
}
case res, ok := <-resc:
if !ok {
resc = nil
break
}
fmt.Printf("Part from goroutine: %q\n", res)
}
if errc == nil && resc == nil {
break
}
}
cmd.Wait()
wg.Wait()
}
In no particular order:
Rather than using an io.Pipe() as the command's Stdout, just ask the command for it's StdoutPipe(). cmd.Wait() will ensure it's closed for you.
Set cmd.Stderr to os.Stderr so that you can see errors generated by your Python program.
I noticed this program was hanging anytime the Python program wrote to standard error. Now it doesn't :)
Don't make the WaitGroup a global variable; pass a reference to it to the goroutine.
Rather than log.Fatal()ing inside the goroutine, create an error channel to communicate errors back to main().
Rather than printing results inside the goroutine, create a result channel to communicate results back to main().
Ensure channels are closed to prevent blocking/goroutine leaks.
Separate out the goroutine into a proper function to make the code easier to read and follow.
In this example, we can create the multipart.Reader() inside our goroutine, since this is the only part of our code that uses it.
Note that I am using Wrap() from the errors package to add context to the error messages. This is, of course, not relevant to your question, but is a good habit.
The for { select { ... } } part may be confusing. This is one article I found introducing the concept. Basically, select is letting us read from whichever of these two channels (resc and errc) are currently readable, and then setting each to nil when the channel is closed. When both channels are nil, the loop exits. This lets us handle "either a result or an error" as they come in.
Edit: As johandalabacka said on the Golang Forum, it looks like the main issue here was that Python on Windows was adding an extra \r to the output, and that the problem is your Python program should omit the \r in the output string or sys.stdout.write() instead of print() ing. The output could also be cleaned up on the Golang side, but, aside from not being able to parse properly without modifying the Python side, this answer will still improve the concurrency mechanics of your program.
I need my program to be in the middle of the connection and transfer data correctly in both directions. I wrote this code, but it does not work properly
package main
import (
"fmt"
"net"
)
func main() {
listener, err := net.Listen("tcp", ":8120")
if err != nil {
fmt.Println(err)
return
}
defer listener.Close()
fmt.Println("Server is listening...")
for {
var conn1, conn2 net.Conn
var err error
conn1, err = listener.Accept()
if err != nil {
fmt.Println(err)
conn1.Close()
continue
}
conn2, err = net.Dial("tcp", "185.151.245.51:80")
if err != nil {
fmt.Println(err)
conn2.Close()
continue
}
go handleConnection(conn1, conn2)
go handleConnection(conn2, conn1)
}
}
func handleConnection(conn1, conn2 net.Conn) {
defer conn1.Close()
for {
input := make([]byte, 1024)
n, err := conn1.Read(input)
if n == 0 || err != nil {
break
}
conn2.Write([]byte(input))
}
}
The problem is that the data is corrupted,
for example.
Left one is original, right one is what i got.
End of the final gotten file is unreadable.
But at the beginnig everything is ok.
I tried to change input slice size. If size > 0 and < 8, everything is fine, but slow. If i set input size very large, corruption of data become more awful.
What I'm doing wrong?
In handleConnection, you always write 1024 bytes, no matter what conn1.Read returns.
You want to write the data like this:
conn2.Write(input[:n])
You should also check your top-level for loop. Are you sure you're not accepting multiple connections and smushing them all together? I'd sprinkle in some log statements so you can see when connections are made and closed.
Another (probably inconsequential) mistake, is that you treat n==0 as a termination condition. In the documentation of io.Reader it's recommended that you ignore n==0, err==nil. Without checking the code I can't be sure, but I expect that conn.Read never returns n==0, err==nil, so it's unlikely that this is causing you trouble.
Although it doesn't affect correctness, you could also lift the definition of input out of the loop so that it's reused on each iteration; it's likely to reduce the amount of work the garbage collector has to do.
I am trying build a zip archive from a large number of small-medium sized files. I want to be able to do this concurrently, since compression is CPU intensive, and I'm running on a multi core server. Also I don't want to have the whole archive in memory, since its might turn out to be large.
My question is that do I have to compress every file and then combine manually combine everything together with zip header, checksum etc?
Any help would be greatly appreciated.
I don't think you can combine the zip headers.
What you could do is, run the zip.Writer sequentially, in a separate goroutine, and then spawn a new goroutine for each file that you want to read, and pipe those to the goroutine that is zipping them.
This should reduce the IO overhead that you get by reading the files sequentially, although it probably won't leverage multiple cores for the archiving itself.
Here's a working example. Note that, to keep things simple,
it does not handle errors nicely, just panics if something goes wrong,
and it does not use the defer statement too much, to demonstrate the order in which things should happen.
Since defer is LIFO, it can sometimes be confusing when you stack a lot of them together.
package main
import (
"archive/zip"
"io"
"os"
"sync"
)
func ZipWriter(files chan *os.File) *sync.WaitGroup {
f, err := os.Create("out.zip")
if err != nil {
panic(err)
}
var wg sync.WaitGroup
wg.Add(1)
zw := zip.NewWriter(f)
go func() {
// Note the order (LIFO):
defer wg.Done() // 2. signal that we're done
defer f.Close() // 1. close the file
var err error
var fw io.Writer
for f := range files {
// Loop until channel is closed.
if fw, err = zw.Create(f.Name()); err != nil {
panic(err)
}
io.Copy(fw, f)
if err = f.Close(); err != nil {
panic(err)
}
}
// The zip writer must be closed *before* f.Close() is called!
if err = zw.Close(); err != nil {
panic(err)
}
}()
return &wg
}
func main() {
files := make(chan *os.File)
wait := ZipWriter(files)
// Send all files to the zip writer.
var wg sync.WaitGroup
wg.Add(len(os.Args)-1)
for i, name := range os.Args {
if i == 0 {
continue
}
// Read each file in parallel:
go func(name string) {
defer wg.Done()
f, err := os.Open(name)
if err != nil {
panic(err)
}
files <- f
}(name)
}
wg.Wait()
// Once we're done sending the files, we can close the channel.
close(files)
// This will cause ZipWriter to break out of the loop, close the file,
// and unblock the next mutex:
wait.Wait()
}
Usage: go run example.go /path/to/*.log.
This is the order in which things should be happening:
Open output file for writing.
Create a zip.Writer with that file.
Kick off a goroutine listening for files on a channel.
Go through each file, this can be done in one goroutine per file.
Send each file to the goroutine created in step 3.
After processing each file in said goroutine, close the file to free up resources.
Once each file has been sent to said goroutine, close the channel.
Wait until the zipping has been done (which is done sequentially).
Once zipping is done (channel exhausted), the zip writer should be closed.
Only when the zip writer is closed, should the output file be closed.
Finally everything is closed, so close the sync.WaitGroup to tell the calling function that we're good to go. (A channel could also be used here, but sync.WaitGroup seems more elegant.)
When you get the signal from the zip writer that everything is properly closed, you can exit from main and terminate nicely.
This might not answer your question, but I've been using similar code to generate zip archives on-the-fly for a web service some time ago. It performed quite well, even though the actual zipping was done in a single goroutine. Overcoming the IO bottleneck can already be an improvement.
From the look of it, you won't be able to parallelise the compression using the standard library archive/zip package because:
Compression is performed by the io.Writer returned by zip.Writer.Create or CreateHeader.
Calling Create/CreateHeader implicitly closes the writer returned by the previous call.
So passing the writers returned by Create to multiple goroutines and writing to them in parallel will not work.
If you wanted to write your own parallel zip writer, you'd probably want to structure it something like this:
Have multiple goroutines compress files using the compress/flate module, and keep track of the CRC32 value and length of the uncompressed data. The output should be directed to temporary files. Note the compressed size of the data.
Once everything has been compressed, start writing the Zip file starting with the header.
Write out the file header followed by the contents of the corresponding temporary file for each compressed file.
Write out the central directory record and end record at the end of the file. All the required information should be available at this point.
For added parallelism, step 1 could be performed in parallel with the remaining steps by using a channel to indicate when compression of each file completes.
Due to the file format, you won't be able to perform parallel compression without either storing compressed data in memory or in temporary files.
With Go1.17, parallel compression and merging of zip files are possible using the archive/zip package.
An example is below. In the example, I create zip workers to create individual zip files and an entry provider worker which provides entries to be added to a zip file via a channel to zip workers. Actual files can be provided to the zip workers but I skipped that part.
package main
import (
"archive/zip"
"context"
"fmt"
"io"
"log"
"os"
"strings"
"golang.org/x/sync/errgroup"
)
const numOfZipWorkers = 10
type entry struct {
name string
rc io.ReadCloser
}
func main() {
log.SetFlags(log.LstdFlags | log.Lshortfile)
entCh := make(chan entry, numOfZipWorkers)
zpathCh := make(chan string, numOfZipWorkers)
group, ctx := errgroup.WithContext(context.Background())
for i := 0; i < numOfZipWorkers; i++ {
group.Go(func() error {
return zipWorker(ctx, entCh, zpathCh)
})
}
group.Go(func() error {
defer close(entCh) // Signal workers to stop.
return entryProvider(ctx, entCh)
})
err := group.Wait()
if err != nil {
log.Fatal(err)
}
f, err := os.OpenFile("output.zip", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
log.Fatal(err)
}
zw := zip.NewWriter(f)
close(zpathCh)
for path := range zpathCh {
zrd, err := zip.OpenReader(path)
if err != nil {
log.Fatal(err)
}
for _, zf := range zrd.File {
err := zw.Copy(zf)
if err != nil {
log.Fatal(err)
}
}
_ = zrd.Close()
_ = os.Remove(path)
}
err = zw.Close()
if err != nil {
log.Fatal(err)
}
err = f.Close()
if err != nil {
log.Fatal(err)
}
}
func entryProvider(ctx context.Context, entCh chan<- entry) error {
for i := 0; i < 2*numOfZipWorkers; i++ {
select {
case <-ctx.Done():
return ctx.Err()
case entCh <- entry{
name: fmt.Sprintf("file_%d", i+1),
rc: io.NopCloser(strings.NewReader(fmt.Sprintf("content %d", i+1))),
}:
}
}
return nil
}
func zipWorker(ctx context.Context, entCh <-chan entry, zpathch chan<- string) error {
f, err := os.CreateTemp(".", "tmp-part-*")
if err != nil {
return err
}
zw := zip.NewWriter(f)
Loop:
for {
var (
ent entry
ok bool
)
select {
case <-ctx.Done():
err = ctx.Err()
break Loop
case ent, ok = <-entCh:
if !ok {
break Loop
}
}
hdr := &zip.FileHeader{
Name: ent.name,
Method: zip.Deflate, // zip.Store can also be used.
}
hdr.SetMode(0644)
w, e := zw.CreateHeader(hdr)
if e != nil {
_ = ent.rc.Close()
err = e
break
}
_, e = io.Copy(w, ent.rc)
_ = ent.rc.Close()
if e != nil {
err = e
break
}
}
if e := zw.Close(); e != nil && err == nil {
err = e
}
if e := f.Close(); e != nil && err == nil {
err = e
}
if err == nil {
select {
case <-ctx.Done():
err = ctx.Err()
case zpathch <- f.Name():
}
}
return err
}