I'm using goroutine to traverse all files in the specified directory. I want to know how to limit the number of goroutines under recursive traversal
I wrote this code, but it prompted panic chan send
`
package main
import (
"fmt"
"os"
"runtime"
"sync"
"time"
)
var (
wg sync.WaitGroup
// limit = make(chan bool, 20)
// totalchan = make(chan bool)
)
func Work(path string, size int, finished bool) {
fmt.Printf("gn1111: %d\n", runtime.NumGoroutine())
defer func() { wg.Done() }()
fl, err := os.ReadDir(path)
if err == nil {
for _, file := range fl {
if file.IsDir() {
// limit <- true
wg.Add(1)
go Work(path+file.Name()+"/", size, false)
} else {
fmt.Printf("gn2222: %d\n", runtime.NumGoroutine())
}
}
}
// if !finished {
// <-limit
// }
}
func main() {
path := "C:/Windows/"
size := 0
start := time.Now()
wg.Add(1)
go Work(path, size, true)
wg.Wait()
fmt.Printf("total= %d, cost: %v\n", 0, time.Since(start))
}
`
the errgroup has a Limiting functionality
SetLimit limits the number of active goroutines in this group to at most n. A negative value indicates no limit.
Any subsequent call to the Go method will block until it can add an active goroutine without exceeding the configured limit.
The limit must not be modified while any goroutines in the group are active.
Related
I want to process from channel if channel is full or certain time is elapsed. My use case is similar to existing question and I have tried to modify the existing ans
My code is at https://go.dev/play/p/HaGZ9HHqj0i,
package main
import (
"fmt"
"sync"
"time"
)
type Audit struct {
ID int
}
const batchSize = 5
var batch = make([]Audit, 0, batchSize)
func upsertBigQueryAudits(audits []Audit) {
fmt.Printf("Processing batch of %d\n", len(audits))
for _, a := range audits {
fmt.Printf("%d ", a.ID)
}
fmt.Println()
batch = []Audit{}
}
func process(full <-chan struct{}) {
ticker := time.NewTicker(1 * time.Nanosecond)
for {
select {
case <-full:
fmt.Println("From full")
upsertBigQueryAudits(batch)
case <-ticker.C:
fmt.Println("From ticker")
if len(batch) > 0 {
fmt.Println("From ticker sending batch")
upsertBigQueryAudits(batch)
}
}
}
}
func processAudits(audits <-chan Audit, full chan<- struct{}, batchSize int) {
for audit := range audits {
batch = append(batch, audit)
if len(batch) == cap(batch) {
// upsertBigQueryAudits(batch)
fmt.Println("Sending full")
full <- struct{}{}
}
}
}
func produceAudits(x int, to chan Audit) {
for i := 0; i < x; i++ {
to <- Audit{
ID: i,
}
}
}
func main() {
var wg sync.WaitGroup
audits := make(chan Audit)
full := make(chan struct{})
wg.Add(1)
go func() {
defer wg.Done()
process(full)
}()
wg.Add(1)
go func() {
defer wg.Done()
processAudits(audits, full, batchSize)
}()
wg.Add(1)
go func() {
defer wg.Done()
produceAudits(25, audits)
close(audits)
}()
wg.Wait()
fmt.Println("Complete")
}
Here I want to process the batch when it is filled with batchSize elements Or when certain time (for eg 1 nano second) is elapsed.
Edit: Added my concern inline.
This is sample output.
timeout running program
Sending full <--- this line comes twice
Sending full
From full
Processing batch of 10 <--- instead of 5 it is processing 10 items
0 1 2 3 4 5 6 7 8 9
From full
Processing batch of 1
10
Sending full
Sending full
From full
Processing batch of 1 <-- instead of 5 it is processing 1 items. It does not process more than 1 item.
11
From full
Processing batch of 0
...
...
Sending full
Sending full
From full
Processing batch of 1 <-- instead of 5 it is processing 1 items
24
From full
Processing batch of 0
From ticker <-- this line comes only after consuming 25 items.
From ticker
From ticker
From ticker
I found answer at https://elliotchance.medium.com/batch-a-channel-by-size-or-time-in-go-92fa3098f65
package main
import (
"context"
"fmt"
"time"
)
func BatchStringsCtx[T any](ctx context.Context, values <-chan T, maxItems int, maxTimeout time.Duration) chan []T {
batches := make(chan []T)
go func() {
defer close(batches)
for keepGoing := true; keepGoing; {
var batch []T
expire := time.After(maxTimeout)
for {
select {
case <-ctx.Done():
keepGoing = false
goto done
case value, ok := <-values:
if !ok {
keepGoing = false
goto done
}
batch = append(batch, value)
if len(batch) == maxItems {
goto done
}
case <-expire:
goto done
}
}
done:
if len(batch) > 0 {
batches <- batch
}
}
}()
return batches
}
func main() {
strings := make(chan string)
go func() {
strings <- "hello"
strings <- "there" // hit limit of 2
strings <- "how"
time.Sleep(15 * time.Millisecond) // hit timeout
strings <- "are"
strings <- "you" // hit limit of 2
// A really long time without any new values.
// The context was cancelled around 300ms,
// before this sleep finished.
time.Sleep(500 * time.Millisecond)
strings <- "doing?" // never read
close(strings)
}()
//ctx, cancel := context.WithTimeout(context.Background(), 300 * time.Millisecond)
ctx := context.Background()
start := time.Now()
batches := BatchStringsCtx[string](ctx, strings, 2, 10*time.Millisecond)
for batch := range batches {
fmt.Println(time.Now().Sub(start), batch)
}
}
I'm trying to run a function concurrently. It makes a call to my DB that may take 2-10 seconds. I would like it to continue on to the next routine once it has finished, even if the other one is still processing, but only ever want it be processing a max of 2 at a time. I want this to happen indefinitely. I feel like I'm almost there, but waitGroup forces both routines to wait until completion prior to continuing to another iteration.
const ROUTINES = 2;
for {
var wg sync.WaitGroup
_, err:= db.Exec(`Random DB Call`)
if err != nil {
panic(err)
}
ch := createRoutines(db, &wg)
wg.Add(ROUTINES)
for i := 1; i <= ROUTINES; i++ {
ch <- i
time.Sleep(2 * time.Second)
}
close(ch)
wg.Wait()
}
func createRoutines(db *sqlx.DB, wg *sync.WaitGroup) chan int {
var ch = make(chan int, 5)
for i := 0; i < ROUTINES ; i++ {
go func(db *sqlx.DB) {
defer wg.Done()
for {
_, ok := <-ch
if !ok {
return
}
doStuff(db)
}
}(db)
}
return ch
}
If you need to only have n number of goroutines running at the same time, you can have a buffered channel of size n and use that to block creating new goroutines when there is no space left, something like this
package main
import (
"fmt"
"math/rand"
"time"
)
func main() {
const ROUTINES = 2
rand.Seed(time.Now().UnixNano())
stopper := make(chan struct{}, ROUTINES)
var counter int
for {
counter++
stopper <- struct{}{}
go func(c int) {
fmt.Println("+ Starting goroutine", c)
time.Sleep(time.Duration(rand.Intn(3)) * time.Second)
fmt.Println("- Stopping goroutine", c)
<-stopper
}(counter)
}
}
In this example you see how you can only have ROUTINES number of goroutines that live 0, 1 or 2 seconds. In the output you can also see how every time one goroutine ends another one starts.
This adds an external dependency, but consider this implementation:
package main
import (
"context"
"database/sql"
"log"
"github.com/MicahParks/ctxerrpool"
)
func main() {
// Create a pool of 2 workers for database queries. Log any errors.
databasePool := ctxerrpool.New(2, func(_ ctxerrpool.Pool, err error) {
log.Printf("Failed to execute database query.\nError: %s", err.Error())
})
// Get a list of queries to execute.
queries := []string{
"SELECT first_name, last_name FROM customers",
"SELECT price FROM inventory WHERE sku='1234'",
"other queries...",
}
// TODO Make a database connection.
var db *sql.DB
for _, query := range queries {
// Intentionally shadow the looped variable for scope.
query := query
// Perform the query on a worker. If no worker is ready, it will block until one is.
databasePool.AddWorkItem(context.TODO(), func(workCtx context.Context) (err error) {
_, err = db.ExecContext(workCtx, query)
return err
})
}
// Wait for all workers to finish.
databasePool.Wait()
}
I have a simple concurrency use case in go, and I cannot figure out an elegant solution to my problem.
I want to write a method fetchAll that queries an unspecified number of resources from remote servers in parallel. If any of the fetches fails, I want to return that first error immediately.
My initial implementation leaks goroutines:
package main
import (
"fmt"
"math/rand"
"sync"
"time"
)
func fetchAll() error {
wg := sync.WaitGroup{}
errs := make(chan error)
leaks := make(map[int]struct{})
defer fmt.Println("these goroutines leaked:", leaks)
// run all the http requests in parallel
for i := 0; i < 4; i++ {
leaks[i] = struct{}{}
wg.Add(1)
go func(i int) {
defer wg.Done()
defer delete(leaks, i)
// pretend this does an http request and returns an error
time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
errs <- fmt.Errorf("goroutine %d's error returned", i)
}(i)
}
// wait until all the fetches are done and close the error
// channel so the loop below terminates
go func() {
wg.Wait()
close(errs)
}()
// return the first error
for err := range errs {
if err != nil {
return err
}
}
return nil
}
func main() {
fmt.Println(fetchAll())
}
Playground: https://play.golang.org/p/Be93J514R5
I know from reading https://blog.golang.org/pipelines that I can create a signal channel to cleanup the other threads. Alternatively, I could probably use context to accomplish it. But it seems like such a simple use case should have a simpler solution that I'm missing.
Using Error Group makes this even simpler. This automatically waits for all the supplied Go Routines to complete successfully, or cancels all those remaining in the case of any one routine returning an error (in which case that error is the one bubble back up to the caller).
package main
import (
"context"
"fmt"
"math/rand"
"time"
"golang.org/x/sync/errgroup"
)
func fetchAll(ctx context.Context) error {
errs, ctx := errgroup.WithContext(ctx)
// run all the http requests in parallel
for i := 0; i < 4; i++ {
errs.Go(func() error {
// pretend this does an http request and returns an error
time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
return fmt.Errorf("error in go routine, bailing")
})
}
// Wait for completion and return the first error (if any)
return errs.Wait()
}
func main() {
fmt.Println(fetchAll(context.Background()))
}
All but one of your goroutines are leaked, because they're still waiting to send to the errs channel - you never finish the for-range that empties it. You're also leaking the goroutine who's job is to close the errs channel, because the waitgroup is never finished.
(Also, as Andy pointed out, deleting from map is not thread-safe, so that'd need protection from a mutex.)
However, I don't think maps, mutexes, waitgroups, contexts etc. are even necessary here. I'd rewrite the whole thing to just use basic channel operations, something like the following:
package main
import (
"fmt"
"math/rand"
"time"
)
func fetchAll() error {
var N = 4
quit := make(chan bool)
errc := make(chan error)
done := make(chan error)
for i := 0; i < N; i++ {
go func(i int) {
// dummy fetch
time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
err := error(nil)
if rand.Intn(2) == 0 {
err = fmt.Errorf("goroutine %d's error returned", i)
}
ch := done // we'll send to done if nil error and to errc otherwise
if err != nil {
ch = errc
}
select {
case ch <- err:
return
case <-quit:
return
}
}(i)
}
count := 0
for {
select {
case err := <-errc:
close(quit)
return err
case <-done:
count++
if count == N {
return nil // got all N signals, so there was no error
}
}
}
}
func main() {
rand.Seed(time.Now().UnixNano())
fmt.Println(fetchAll())
}
Playground link: https://play.golang.org/p/mxGhSYYkOb
EDIT: There indeed was a silly mistake, thanks for pointing it out. I fixed the code above (I think...). I also added some randomness for added Realism™.
Also, I'd like to stress that there really are multiple ways to approach this problem, and my solution is but one way. Ultimately it comes down to personal taste, but in general, you want to strive towards "idiomatic" code - and towards a style that feels natural and easy to understand for you.
Here's a more complete example using errgroup suggested by joth. It shows processing successful data, and will exit on the first error.
https://play.golang.org/p/rU1v-Mp2ijo
package main
import (
"context"
"fmt"
"golang.org/x/sync/errgroup"
"math/rand"
"time"
)
func fetchAll() error {
g, ctx := errgroup.WithContext(context.Background())
results := make(chan int)
for i := 0; i < 4; i++ {
current := i
g.Go(func() error {
// Simulate delay with random errors.
time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond)
if rand.Intn(2) == 0 {
return fmt.Errorf("goroutine %d's error returned", current)
}
// Pass processed data to channel, or receive a context completion.
select {
case results <- current:
return nil
// Close out if another error occurs.
case <-ctx.Done():
return ctx.Err()
}
})
}
// Elegant way to close out the channel when the first error occurs or
// when processing is successful.
go func() {
g.Wait()
close(results)
}()
for result := range results {
fmt.Println("processed", result)
}
// Wait for all fetches to complete.
return g.Wait()
}
func main() {
fmt.Println(fetchAll())
}
As long as each goroutine completes, you won't leak anything. You should create the error channel as buffered with the buffer size equal to the number of goroutines so that the send operations on the channel won't block. Each goroutine should always send something on the channel when it finishes, whether it succeeds or fails. The loop at the bottom can then just iterate for the number of goroutines and return if it gets a non-nil error. You don't need the WaitGroup or the other goroutine that closes the channel.
I think the reason it appears that goroutines are leaking is that you return when you get the first error, so some of them are still running.
By the way, maps are not goroutine safe. If you share a map among goroutines and some of them are making changes to the map, you need to protect it with a mutex.
This answer includes the ability to get the responses back into doneData -
package main
import (
"fmt"
"math/rand"
"os"
"strconv"
)
var doneData []string // responses
func fetchAll(n int, doneCh chan bool, errCh chan error) {
partialDoneCh := make(chan string)
for i := 0; i < n; i++ {
go func(i int) {
if r := rand.Intn(100); r != 0 && r%10 == 0 {
// simulate an error
errCh <- fmt.Errorf("e33or for reqno=" + strconv.Itoa(r))
} else {
partialDoneCh <- strconv.Itoa(i)
}
}(i)
}
// mutation of doneData
for d := range partialDoneCh {
doneData = append(doneData, d)
if len(doneData) == n {
close(partialDoneCh)
doneCh <- true
}
}
}
func main() {
// rand.Seed(1)
var n int
var e error
if len(os.Args) > 1 {
if n, e = strconv.Atoi(os.Args[1]); e != nil {
panic(e)
}
} else {
n = 5
}
doneCh := make(chan bool)
errCh := make(chan error)
go fetchAll(n, doneCh, errCh)
fmt.Println("main: end")
select {
case <-doneCh:
fmt.Println("success:", doneData)
case e := <-errCh:
fmt.Println("failure:", e, doneData)
}
}
Execute using go run filename.go 50 where N=50 i.e amount of parallelism
I'm here to find out the most idiomatic way to do the follow task.
Task:
Write data from a channel to a file.
Problem:
I have a channel ch := make(chan int, 100)
I need to read from the channel and write the values I read from the channel to a file. My question is basically how do I do so given that
If channel ch is full, write the values immediately
If channel ch is not full, write every 5s.
So essentially, data needs to be written to the file at least every 5s (assuming that data will be filled into the channel at least every 5s)
Whats the best way to use select, for and range to do my above task?
Thanks!
There is no such "event" as "buffer of channel is full", so you can't detect that [*]. This means you can't idiomatically solve your problem with language primitives using only 1 channel.
[*] Not entirely true: you could detect if the buffer of a channel is full by using select with default case when sending on the channel, but that requires logic from the senders, and repetitive attempts to send.
I would use another channel from which I would receive as values are sent on it, and "redirect", store the values in another channel which has a buffer of 100 as you mentioned. At each redirection you may check if the internal channel's buffer is full, and if so, do an immediate write. If not, continue to monitor the "incoming" channel and a timer channel with a select statement, and if the timer fires, do a "regular" write.
You may use len(chInternal) to check how many elements are in the chInternal channel, and cap(chInternal) to check its capacity. Note that this is "safe" as we are the only goroutine handling the chInternal channel. If there would be multiple goroutines, value returned by len(chInternal) could be outdated by the time we use it to something (e.g. comparing it).
In this solution chInternal (as its name says) is for internal use only. Others should only send values on ch. Note that ch may or may not be a buffered channel, solution works in both cases. However, you may improve efficiency if you also give some buffer to ch (so chances that senders get blocked will be lower).
var (
chInternal = make(chan int, 100)
ch = make(chan int) // You may (should) make this a buffered channel too
)
func main() {
delay := time.Second * 5
timer := time.NewTimer(delay)
for {
select {
case v := <-ch:
chInternal <- v
if len(chInternal) == cap(chInternal) {
doWrite() // Buffer is full, we need to write immediately
timer.Reset(delay)
}
case <-timer.C:
doWrite() // "Regular" write: 5 seconds have passed since last write
timer.Reset(delay)
}
}
}
If an immediate write happens (due to a "buffer full" situation), this solution will time the next "regular" write 5 seconds after this. If you don't want this and you want the 5-second regular writes be independent from the immediate writes, simply do not reset the timer following the immediate write.
An implementation of doWrite() may be as follows:
var f *os.File // Make sure to open file for writing
func doWrite() {
for {
select {
case v := <-chInternal:
fmt.Fprintf(f, "%d ", v) // Write v to the file
default: // Stop when no more values in chInternal
return
}
}
}
We can't use for ... range as that only returns when the channel is closed, but our chInternal channel is not closed. So we use a select with a default case so when no more values are in the buffer of chInternal, we return.
Improvements
Using a slice instead of 2nd channel
Since the chInternal channel is only used by us, and only on a single goroutine, we may also choose to use a single []int slice instead of a channel (reading/writing a slice is much faster than a channel).
Showing only the different / changed parts, it could look something like this:
var (
buf = make([]int, 0, 100)
)
func main() {
// ...
for {
select {
case v := <-ch:
buf = append(buf, v)
if len(buf) == cap(buf) {
// ...
}
}
func doWrite() {
for _, v := range buf {
fmt.Fprintf(f, "%d ", v) // Write v to the file
}
buf = buf[:0] // "Clear" the buffer
}
With multiple goroutines
If we stick to leave chInternal a channel, the doWrite() function may be called on another goroutine to not block the other one, e.g. go doWrite(). Since data to write is read from a channel (chInternal), this requires no further synchronization.
if you just use 5 seconds write, to increase the file write performance,
you may fill the channel any time you need,
then writer goroutine writes that data to the buffered file,
see this very simple and idiomatic sample without using timer
with just using for...range:
package main
import (
"bufio"
"fmt"
"os"
"sync"
)
var wg sync.WaitGroup
func WriteToFile(filename string, ch chan int) {
f, e := os.Create(filename)
if e != nil {
panic(e)
}
w := bufio.NewWriterSize(f, 4*1024*1024)
defer wg.Done()
defer f.Close()
defer w.Flush()
for v := range ch {
fmt.Fprintf(w, "%d ", v)
}
}
func main() {
ch := make(chan int, 100)
wg.Add(1)
go WriteToFile("file.txt", ch)
for i := 0; i < 500000; i++ {
ch <- i // do the job
}
close(ch) // Finish the job and close output file
wg.Wait()
}
and notice the defers order.
and in case of 5 seconds write, you may add one interval timer just to flush the buffer of this file to the disk, like this:
package main
import (
"bufio"
"fmt"
"os"
"sync"
"time"
)
var wg sync.WaitGroup
func WriteToFile(filename string, ch chan int) {
f, e := os.Create(filename)
if e != nil {
panic(e)
}
w := bufio.NewWriterSize(f, 4*1024*1024)
ticker := time.NewTicker(5 * time.Second)
quit := make(chan struct{})
go func() {
for {
select {
case <-ticker.C:
if w.Buffered() > 0 {
fmt.Println(w.Buffered())
w.Flush()
}
case <-quit:
ticker.Stop()
return
}
}
}()
defer wg.Done()
defer f.Close()
defer w.Flush()
defer close(quit)
for v := range ch {
fmt.Fprintf(w, "%d ", v)
}
}
func main() {
ch := make(chan int, 100)
wg.Add(1)
go WriteToFile("file.txt", ch)
for i := 0; i < 25; i++ {
ch <- i // do the job
time.Sleep(500 * time.Millisecond)
}
close(ch) // Finish the job and close output file
wg.Wait()
}
here I used time.NewTicker(5 * time.Second) for interval timer with quit channel, you may use time.AfterFunc() or time.Tick() or time.Sleep().
with some optimizations ( removing quit channel):
package main
import (
"bufio"
"fmt"
"os"
"sync"
"time"
)
var wg sync.WaitGroup
func WriteToFile(filename string, ch chan int) {
f, e := os.Create(filename)
if e != nil {
panic(e)
}
w := bufio.NewWriterSize(f, 4*1024*1024)
ticker := time.NewTicker(5 * time.Second)
defer wg.Done()
defer f.Close()
defer w.Flush()
for {
select {
case v, ok := <-ch:
if ok {
fmt.Fprintf(w, "%d ", v)
} else {
fmt.Println("done.")
ticker.Stop()
return
}
case <-ticker.C:
if w.Buffered() > 0 {
fmt.Println(w.Buffered())
w.Flush()
}
}
}
}
func main() {
ch := make(chan int, 100)
wg.Add(1)
go WriteToFile("file.txt", ch)
for i := 0; i < 25; i++ {
ch <- i // do the job
time.Sleep(500 * time.Millisecond)
}
close(ch) // Finish the job and close output file
wg.Wait()
}
I hope this helps.
Code below works fine with hard coded JSON data however doesn't work when I read JSON data from a file. I'm getting fatal error: all goroutines are asleep - deadlock error when using sync.WaitGroup.
WORKING EXAMPLE WITH HARD-CODED JSON DATA:
package main
import (
"bytes"
"fmt"
"os/exec"
"time"
)
func connect(host string) {
cmd := exec.Command("ssh", host, "uptime")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
fmt.Println(err)
}
fmt.Printf("%s: %q\n", host, out.String())
time.Sleep(time.Second * 2)
fmt.Printf("%s: DONE\n", host)
}
func listener(c chan string) {
for {
host := <-c
go connect(host)
}
}
func main() {
hosts := [2]string{"user1#111.79.154.111", "user2#111.79.190.222"}
var c chan string = make(chan string)
go listener(c)
for i := 0; i < len(hosts); i++ {
c <- hosts[i]
}
var input string
fmt.Scanln(&input)
}
OUTPUT:
user#user-VirtualBox:~/go$ go run channel.go
user1#111.79.154.111: " 09:46:40 up 86 days, 18:16, 0 users, load average: 5"
user2#111.79.190.222: " 09:46:40 up 86 days, 17:27, 1 user, load average: 9"
user1#111.79.154.111: DONE
user2#111.79.190.222: DONE
NOT WORKING - EXAMPLE WITH READING JSON DATA FILE:
package main
import (
"bytes"
"fmt"
"os/exec"
"time"
"encoding/json"
"os"
"sync"
)
func connect(host string) {
cmd := exec.Command("ssh", host, "uptime")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
fmt.Println(err)
}
fmt.Printf("%s: %q\n", host, out.String())
time.Sleep(time.Second * 2)
fmt.Printf("%s: DONE\n", host)
}
func listener(c chan string) {
for {
host := <-c
go connect(host)
}
}
type Content struct {
Username string `json:"username"`
Ip string `json:"ip"`
}
func main() {
var wg sync.WaitGroup
var source []Content
var hosts []string
data := json.NewDecoder(os.Stdin)
data.Decode(&source)
for _, value := range source {
hosts = append(hosts, value.Username + "#" + value.Ip)
}
var c chan string = make(chan string)
go listener(c)
for i := 0; i < len(hosts); i++ {
wg.Add(1)
c <- hosts[i]
defer wg.Done()
}
var input string
fmt.Scanln(&input)
wg.Wait()
}
OUTPUT
user#user-VirtualBox:~/go$ go run deploy.go < hosts.txt
user1#111.79.154.111: " 09:46:40 up 86 days, 18:16, 0 users, load average: 5"
user2#111.79.190.222: " 09:46:40 up 86 days, 17:27, 1 user, load average: 9"
user1#111.79.154.111 : DONE
user2#111.79.190.222: DONE
fatal error: all goroutines are asleep - deadlock!
goroutine 1 [semacquire]:
sync.runtime_Semacquire(0xc210000068)
/usr/lib/go/src/pkg/runtime/sema.goc:199 +0x30
sync.(*WaitGroup).Wait(0xc210047020)
/usr/lib/go/src/pkg/sync/waitgroup.go:127 +0x14b
main.main()
/home/user/go/deploy.go:64 +0x45a
goroutine 3 [chan receive]:
main.listener(0xc210038060)
/home/user/go/deploy.go:28 +0x30
created by main.main
/home/user/go/deploy.go:53 +0x30b
exit status 2
user#user-VirtualBox:~/go$
HOSTS.TXT
[
{
"username":"user1",
"ip":"111.79.154.111"
},
{
"username":"user2",
"ip":"111.79.190.222"
}
]
Go program ends when the main function ends.
From the language specification
Program execution begins by initializing the main package and then invoking the function main. When that function invocation returns, the program exits. It does not wait for other (non-main) goroutines to complete.
Therefore, you need to wait for your goroutines to finish. The common solution for this is to use sync.WaitGroup object.
The simplest possible code to synchronize goroutine:
package main
import "fmt"
import "sync"
var wg sync.WaitGroup // 1
func routine() {
defer wg.Done() // 3
fmt.Println("routine finished")
}
func main() {
wg.Add(1) // 2
go routine() // *
wg.Wait() // 4
fmt.Println("main finished")
}
And for synchronizing multiple goroutines
package main
import "fmt"
import "sync"
var wg sync.WaitGroup // 1
func routine(i int) {
defer wg.Done() // 3
fmt.Printf("routine %v finished\n", i)
}
func main() {
for i := 0; i < 10; i++ {
wg.Add(1) // 2
go routine(i) // *
}
wg.Wait() // 4
fmt.Println("main finished")
}
WaitGroup usage in order of execution.
Declaration of global variable. Making it global is the easiest way to make it visible to all functions and methods.
Increasing the counter. This must be done in main goroutine because there is no guarantee that newly started goroutine will execute before 4 due to memory model guarantees.
Decreasing the counter. This must be done at the exit of goroutine. Using deferred call, we make sure that it will be called whenever function ends no matter but no matter how it ends.
Waiting for the counter to reach 0. This must be done in main goroutine to prevent program exit.
* The actual parameters are evaluated before starting new gouroutine. Thus it is needed to evaluate them explicitly before wg.Add(1) so the possibly panicking code would not leave increased counter.
Use
param := f(x)
wg.Add(1)
go g(param)
instead of
wg.Add(1)
go g(f(x))
Thanks for the very nice and detailed explanation Grzegorz Żur.
One thing that I want to point it out that typically the func that needs to be threaded wont be in main(), so we would have something like this:
package main
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
"reflect"
"regexp"
"strings"
"sync"
"time"
)
var wg sync.WaitGroup // VERY IMP to declare this globally, other wise one //would hit "fatal error: all goroutines are asleep - deadlock!"
func doSomething(arg1 arg1Type) {
// cured cancer
}
func main() {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randTime := r.Intn(10)
wg.Add(1)
go doSomething(randTime)
wg.Wait()
fmt.Println("Waiting for all threads to finish")
}
The thing that I want to point it out is that global declaration of wg is very crucial for all threads to finish before main()
try this code snippest
package main
import (
"bytes"
"fmt"
"os/exec"
"time"
"sync"
)
func connect(host string, wg *sync.WaitGroup) {
defer wg.Done()
cmd := exec.Command("ssh", host, "uptime")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
fmt.Println(err)
}
fmt.Printf("%s: %q\n", host, out.String())
time.Sleep(time.Second * 2)
fmt.Printf("%s: DONE\n", host)
}
func listener(c chan string,wg *sync.WaitGroup) {
for {
host,ok := <-c
// check channel is closed or not
if !ok{
break
}
go connect(host)
}
}
func main() {
var wg sync.WaitGroup
hosts := [2]string{"user1#111.79.154.111", "user2#111.79.190.222"}
var c chan string = make(chan string)
go listener(c)
for i := 0; i < len(hosts); i++ {
wg.Add(1)
c <- hosts[i]
}
close(c)
var input string
fmt.Scanln(&input)
wg.Wait()
}