Intent:
I am looking for a means to run os-level shell commands in parallel, but want to be careful to not clobber CPU and am wondering if a buffered channel would fit this use case.
Implemented:
Create a series of Jobs with a simulated runtime duration. Send these jobs to a queue which will dispatch them to run over a buffered channel as throttled by EXEC_THROTTLE.
Observations:
This 'works' (to the extent that it compiles and runs), but I am wondering if the buffer is working as specified (see: 'Intent') to throttle the number of processes running in parallel.
Disclaimer:
Now, I am aware that newbies tend to over-use channels, but I feel this request for insight is honest, as I've at least exercised the restraint to use a sync.WaitGroup. Forgive the somewhat toy example, but all insight would be appreciated.
Playground
package main
import (
// "os/exec"
"log"
"math/rand"
"strconv"
"sync"
"time"
)
const (
EXEC_THROTTLE = 2
)
type JobsManifest []Job
type Job struct {
cmd string
result string
runtime int // Simulate long-running task
}
func (j JobsManifest) queueJobs(logChan chan<- string, runChan chan Job, wg *sync.WaitGroup) {
go dispatch(logChan, runChan)
for _, job := range j {
wg.Add(1)
runChan <- job
}
}
func dispatch(logChan chan<- string, runChan chan Job) {
for j := range runChan {
go run(j, logChan)
}
}
func run(j Job, logChan chan<- string) {
time.Sleep(time.Second * time.Duration(j.runtime))
j.result = strconv.Itoa(rand.Intn(10)) // j.result = os.Exec("/bin/bash", "-c", j.cmd).Output()
logChan <- j.result
log.Printf(" ran: %s\n", j.cmd)
}
func logger(logChan <-chan string, wg *sync.WaitGroup) {
for {
res := <-logChan
log.Printf("logged: %s\n", res)
wg.Done()
}
}
func main() {
jobs := []Job{
Job{
cmd: "ps -p $(pgrep vim) | tail -n 1 | awk '{print $3}'",
runtime: 1,
},
Job{
cmd: "wc -l /var/log/foo.log | awk '{print $1}'",
runtime: 2,
},
Job{
cmd: "ls -l ~/go/src/github.com/ | wc -l | awk '{print $1}'",
runtime: 3,
},
Job{
cmd: "find /var/log/ -regextype posix-extended -regex '.*[0-9]{10}'",
runtime: 4,
},
}
var wg sync.WaitGroup
logChan := make(chan string)
runChan := make(chan Job, EXEC_THROTTLE)
go logger(logChan, &wg)
start := time.Now()
JobsManifest(jobs).queueJobs(logChan, runChan, &wg)
wg.Wait()
log.Printf("finish: %s\n", time.Since(start))
}
You can also cap concurrency with buffered channel:
concurrencyLimit := 2 // Number of simultaneous jobs.
semaphore := make(chan struct{}, concurrencyLimit)
for job := range jobs {
job := job // Pin loop variable.
semaphore <- struct{}{} // Acquire semaphore slot.
go func() {
defer func() {
<-semaphore // Release semaphore slot.
}()
do(job) // Do the job.
}()
}
// Wait for goroutines to finish by acquiring all slots.
for i := 0; i < cap(semaphore); i++ {
semaphore <- struct{}{}
}
Replace processItem function with required execution of your job.
Below will execute jobs in proper order. Atmost EXEC_CONCURRENT items will be executed concurrently.
package main
import (
"fmt"
"sync"
"time"
)
func processItem(i int, done chan int, wg *sync.WaitGroup) {
fmt.Printf("Async Start: %d\n", i)
time.Sleep(100 * time.Millisecond * time.Duration(i))
fmt.Printf("Async Complete: %d\n", i)
done <- 1
wg.Done()
}
func popItemFromBufferChannelWhenItemDoneExecuting(items chan int, done chan int) {
_ = <- done
_ = <-items
}
func main() {
EXEC_CONCURRENT := 3
items := make(chan int, EXEC_CONCURRENT)
done := make(chan int)
var wg sync.WaitGroup
for i:= 1; i < 11; i++ {
items <- i
wg.Add(1)
go processItem(i, done, &wg)
go popItemFromBufferChannelWhenItemDoneExecuting(items, done)
}
wg.Wait()
}
Below will execute jobs in Random order. Atmost EXEC_CONCURRENT items will be executed concurrently.
package main
import (
"fmt"
"sync"
"time"
)
func processItem(i int, items chan int, wg *sync.WaitGroup) {
items <- i
fmt.Printf("Async Start: %d\n", i)
time.Sleep(100 * time.Millisecond * time.Duration(i))
fmt.Printf("Async Complete: %d\n", i)
_ = <- items
wg.Done()
}
func main() {
EXEC_CONCURRENT := 3
items := make(chan int, EXEC_CONCURRENT)
var wg sync.WaitGroup
for i:= 1; i < 11; i++ {
wg.Add(1)
go processItem(i, items, &wg)
}
wg.Wait()
}
You can choose according to your requirement.
If I understand you right, you mean to establish a mechanism to ensure that at any time at most a number of EXEC_THROTTLE jobs are running. And if that is your intention, the code does not work.
It is because when you start a job, you have already consumed the channel - allowing another job to be started, yet no jobs have been finished. You can debug this by add an counter (you'll need atomic add or mutex).
You may do the work by simply start a group of goroutine with an unbuffered channel and block when executating jobs:
func Run(j Job) r Result {
//Run your job here
}
func Dispatch(ch chan Job) {
for j:=range ch {
wg.Add(1)
Run(j)
wg.Done()
}
}
func main() {
ch := make(chan Job)
for i:=0; i<EXEC_THROTTLE; i++ {
go Dispatch(ch)
}
//call dispatch according to the queue here.
}
It works because as along as one goroutine is consuming the channel, it means at least one goroutine is not running and there is at most EXEC_THROTTLE-1 jobs running so it is good to execuate one more and it does so.
I use this a lot. https://github.com/dustinevan/go-utils
package async
import (
"context"
"github.com/pkg/errors"
)
type Semaphore struct {
buf chan struct{}
ctx context.Context
cancel context.CancelFunc
}
func NewSemaphore(max int, parentCtx context.Context) *Semaphore {
s := &Semaphore{
buf: make(chan struct{}, max),
ctx: parentCtx,
}
go func() {
<-s.ctx.Done()
close(s.buf)
drainStruct(s.buf)
}()
return s
}
var CLOSED = errors.New("the semaphore has been closed")
func (s *Semaphore) Acquire() error {
select {
case <-s.ctx.Done():
return CLOSED
case s.buf <- struct{}{}:
return nil
}
}
func (s *Semaphore) Release() {
<-s.buf
}
you'd use it like this:
func main() {
sem := async.NewSemaphore(10, context.Background())
...
var wg sync.Waitgroup
for _, job := range jobs {
go func() {
wg.Add(1)
err := sem.Acquire()
if err != nil {
// handle err,
}
defer sem.Release()
defer wg.Done()
job()
}
wg.Wait()
}
Related
Unable to find a reason as to why this code deadlocks. The aim here is make the worker go routines do some work only after they are signaled.
If the signalStream channel is removed from the code, it works fine. But when this is introduced, it deadlocks. Not sure the reason for this. Also if there are any tools to explain the occurrence of deadlock, it would help.
package main
import (
"log"
"sync"
)
const jobs = 10
const workers = 5
var wg sync.WaitGroup
func main() {
// create channel
dataStream := make(chan interface{})
signalStream := make(chan interface{})
// Generate workers
for i := 1; i <= workers; i++ {
wg.Add(1)
go worker(dataStream, signalStream, i*100)
}
// Generate jobs
for i := 1; i <= jobs; i++ {
dataStream <- i
}
close(dataStream)
// start consuming data
close(signalStream)
wg.Wait()
}
func worker(c <-chan interface{}, s <-chan interface{}, id int) {
defer wg.Done()
<-s
for i := range c {
log.Printf("routine - %d - %d \n", id, i)
}
}
Generate the jobs in a separate gorouine, i.e. put the whole jobs loop into a goroutine. If you don't then dataStream <- i will block and your program will never "start consuming data"
// Generate jobs
go func() {
for i := 1; i <= jobs; i++ {
dataStream <- i
}
close(dataStream)
}()
https://go.dev/play/p/ChlbsJlgwdE
In the code hereunder, I don't understand why the "Worker" methods seem to exit instead of pulling values from the input channel "in" and processing them.
I had assumed they would only return after having consumed all input from the input channel "in" and processing them
package main
import (
"fmt"
"sync"
)
type ParallelCallback func(chan int, chan Result, int, *sync.WaitGroup)
type Result struct {
i int
val int
}
func Worker(in chan int, out chan Result, id int, wg *sync.WaitGroup) {
for item := range in {
item *= item // returns the square of the input value
fmt.Printf("=> %d: %d\n", id, item)
out <- Result{item, id}
}
wg.Done()
fmt.Printf("%d exiting ", id)
}
func Run_parallel(n_workers int, in chan int, out chan Result, Worker ParallelCallback) {
wg := sync.WaitGroup{}
for id := 0; id < n_workers; id++ {
fmt.Printf("Starting : %d\n", id)
wg.Add(1)
go Worker(in, out, id, &wg)
}
wg.Wait() // wait for all workers to complete their tasks
close(out) // close the output channel when all tasks are completed
}
const (
NW = 4
)
func main() {
in := make(chan int)
out := make(chan Result)
go func() {
for i := 0; i < 100; i++ {
in <- i
}
close(in)
}()
Run_parallel(NW, in, out, Worker)
for item := range out {
fmt.Printf("From out : %d: %d", item.i, item.val)
}
}
The output is
Starting : 0
Starting : 1
Starting : 2
Starting : 3
=> 3: 0
=> 0: 1
=> 1: 4
=> 2: 9
fatal error: all goroutines are asleep - deadlock!
fatal error: all goroutines are asleep - deadlock!
The full error shows where each goroutine is "stuck". If you run this in the playground, it will even show you the line number. That made it easy for me to diagnose.
Your Run_parallel runs in the main groutine, so before main can read from out, Run_parallel must return. Before Run_parallel can return, it must wg.Wait(). But before the workers call wg.Done(), they must write to out. That's what causes a deadlock.
One solution is simple: just run Run_parallel concurrently in its own Goroutine.
go Run_parallel(NW, in, out, Worker)
Now, main ranges over out, waiting on outs closure to signal completion. Run_parallel waits for the workers with wg.Wait(), and the workers will range over in. All the work will get done, and the program won't end until it's all done. (https://go.dev/play/p/oMrgH2U09tQ)
Solution :
Run_parallel has to run in it’s own goroutine:
package main
import (
"fmt"
"sync"
)
type ParallelCallback func(chan int, chan Result, int, *sync.WaitGroup)
type Result struct {
id int
val int
}
func Worker(in chan int, out chan Result, id int, wg *sync.WaitGroup) {
defer wg.Done()
for item := range in {
item *= 2 // returns the double of the input value (Bogus handling of data)
out <- Result{id, item}
}
}
func Run_parallel(n_workers int, in chan int, out chan Result, Worker ParallelCallback) {
wg := sync.WaitGroup{}
for id := 0; id < n_workers; id++ {
wg.Add(1)
go Worker(in, out, id, &wg)
}
wg.Wait() // wait for all workers to complete their tasks
close(out) // close the output channel when all tasks are completed
}
const (
NW = 8
)
func main() {
in := make(chan int)
out := make(chan Result)
go func() {
for i := 0; i < 10; i++ {
in <- i
}
close(in)
}()
go Run_parallel(NW, in, out, Worker)
for item := range out {
fmt.Printf("From out [%d]: %d\n", item.id, item.val)
}
println("- - - All done - - -")
}
Alternative formulation of the solution:
In that alternative formulation , it is not necessary to start Run_parallel as a goroutine (it triggers its own goroutine).
I prefer that second solution, because it automates the fact that Run_parallel() has to run parallel to the main function. Also, for the same reason it's safer, less error-prone (no need to remember to run Run_parallel with the go keyword).
package main
import (
"fmt"
"sync"
)
type ParallelCallback func(chan int, chan Result, int, *sync.WaitGroup)
type Result struct {
id int
val int
}
func Worker(in chan int, out chan Result, id int, wg *sync.WaitGroup) {
defer wg.Done()
for item := range in {
item *= 2 // returns the double of the input value (Bogus handling of data)
out <- Result{id, item}
}
}
func Run_parallel(n_workers int, in chan int, out chan Result, Worker ParallelCallback) {
go func() {
wg := sync.WaitGroup{}
defer close(out) // close the output channel when all tasks are completed
for id := 0; id < n_workers; id++ {
wg.Add(1)
go Worker(in, out, id, &wg)
}
wg.Wait() // wait for all workers to complete their tasks *and* trigger the -differed- close(out)
}()
}
const (
NW = 8
)
func main() {
in := make(chan int)
out := make(chan Result)
go func() {
defer close(in)
for i := 0; i < 10; i++ {
in <- i
}
}()
Run_parallel(NW, in, out, Worker)
for item := range out {
fmt.Printf("From out [%d]: %d\n", item.id, item.val)
}
println("- - - All done - - -")
}
I am getting fatal error: all goroutines are asleep - deadlock!
on the line wg.Wait()
It happens for about ~30% of the runs, the rest are finished with no error. I guess I am using WaitGroup the wrong way, but not sure what am I doing wrong.
Maybe someone can help me identify my bug? Thanks!
package main
import (
"fmt"
"math/rand"
"sync"
"time"
)
const (
numOfPhilosophers = 5
numOfMeals = 3
maxEaters = 2
)
var doOnce sync.Once
func main() {
chopsticks := make([]sync.Mutex, 5)
permissionChannel := make(chan bool)
finishEating := make(chan bool)
go permissionFromHost(permissionChannel,finishEating)
var wg sync.WaitGroup
wg.Add(numOfPhilosophers)
for i:=1 ; i<=numOfPhilosophers ; i++ {
go eat(i, chopsticks[i-1], chopsticks[i%numOfPhilosophers], &wg, permissionChannel, finishEating)
}
wg.Wait()
}
func eat(philosopherId int, left sync.Mutex, right sync.Mutex, wg *sync.WaitGroup, permissionChannel <-chan bool, finishEatingChannel chan<- bool) {
defer wg.Done()
for i:=1 ; i<=numOfMeals ; i++ {
//lock chopsticks in random order
if RandBool() {
left.Lock()
right.Lock()
} else {
right.Lock()
left.Lock()
}
fmt.Printf("waiting for permission from host %d\n",philosopherId)
<-permissionChannel
fmt.Printf("starting to eat %d (time %d)\n", philosopherId, i)
fmt.Printf("finish to eat %d (time %d)\n", philosopherId, i)
//release chopsticks
left.Unlock()
right.Unlock()
//let host know I am done eating
finishEatingChannel<-true
}
}
func permissionFromHost(permissionChannel chan<-bool, finishEating <-chan bool) {
ctr := 0
for {
select {
case <-finishEating:
ctr--
default:
if ctr<maxEaters {
ctr++
permissionChannel<-true
}
}
}
}
func RandBool() bool {
rand.Seed(time.Now().UnixNano())
return rand.Intn(2) == 1
}
Edit 1: I fixed the mutex to be passed by reference. It didn't solve the problem.
Edit 2: I tried to use buffered channel permissionChannel:=make(chan bool, numOfPhilosophers) which makes it work
Edit 3: also #Jaroslaw example makes it work
The last goroutine will not exit, it will get blocked in its last iteration when it is writing to the finishEatingChannel channel as there are no consumers for it.
The reason there are no consumers for the finishEatingChannel is that the select case in the function permissionFromHost is writing to permissionChannel<-true but there are no consumers for permissionChannel as it is waiting for it to be read so we have a deadlock.
You can make the permissionFromHost channel buffered, it will resolve the issue.
There is also a bug in your code, you are passing mutex by value which is not allowed
The go vet command says
./main.go:26:13: call of eat copies lock value: sync.Mutex
./main.go:26:30: call of eat copies lock value: sync.Mutex
./main.go:31:34: eat passes lock by value: sync.Mutex
./main.go:31:52: eat passes lock by value: sync.Mutex
Another problem is that there are times when goroutines (philosophers) get blocked when trying to send an acknowledgement on finishEatingChannel, because the goroutine (host) responsible for reading data from this unbuffered channel is busy trying to send a permission. Here is the exact part of code:
if ctr<maxEaters {
ctr++
// This goroutine stucks since the last philosopher is not reading from permissionChannel.
// Philosopher is not reading from this channel at is busy trying to write finishEating channel which is not read by this goroutine.
// Thus the deadlock happens.
permissionChannel<-true
}
Deadlock is 100% reproducible when there is only one philosopher left who needs to eat twice.
Fixed version of code:
package main
import (
"fmt"
"math/rand"
"sync"
"time"
)
const (
numOfPhilosophers = 5
numOfMeals = 3
maxEaters = 2
)
func main() {
chopsticks := make([]sync.Mutex, 5)
permissionChannel := make(chan bool)
finishEating := make(chan bool)
go permissionFromHost(permissionChannel, finishEating)
var wg sync.WaitGroup
wg.Add(numOfPhilosophers)
for i := 1; i <= numOfPhilosophers; i++ {
go eat(i, &chopsticks[i-1], &chopsticks[i%numOfPhilosophers], &wg, permissionChannel, finishEating)
}
wg.Wait()
}
func eat(philosopherId int, left *sync.Mutex, right *sync.Mutex, wg *sync.WaitGroup, permissionChannel <-chan bool, finishEatingChannel chan<- bool) {
defer wg.Done()
for i := 1; i <= numOfMeals; i++ {
//lock chopsticks in random order
if RandBool() {
left.Lock()
right.Lock()
} else {
right.Lock()
left.Lock()
}
fmt.Printf("waiting for permission from host %d\n", philosopherId)
<-permissionChannel
fmt.Printf("starting to eat %d (time %d)\n", philosopherId, i)
fmt.Printf("finish to eat %d (time %d)\n", philosopherId, i)
//release chopsticks
left.Unlock()
right.Unlock()
//let host know I am done eating
finishEatingChannel <- true
}
}
func permissionFromHost(permissionChannel chan<- bool, finishEating <-chan bool) {
ctr := 0
for {
if ctr < maxEaters {
select {
case <-finishEating:
ctr--
case permissionChannel <- true:
ctr++
}
} else {
<-finishEating
ctr--
}
}
}
func RandBool() bool {
rand.Seed(time.Now().UnixNano())
return rand.Intn(2) == 1
}
FATAL Error All go routines are asleep. Deadlock.
This is what I tried. I am calling wg.Done(). What is missing?
package main
import (
"fmt"
"strconv"
"sync"
)
func sender(wg *sync.WaitGroup, cs chan int) {
defer wg.Done()
for i := 0; i < 2; i++ {
fmt.Println(i)
cs <- i
}
}
func reciever(wg *sync.WaitGroup, cs chan int) {
x, ok := <-cs
for ok {
fmt.Println("Retrieved" + strconv.Itoa(x))
x, ok = <-cs
if !ok {
wg.Done()
break
}
}
}
func main() {
wg := &sync.WaitGroup{}
cs := make(chan int, 1000)
wg.Add(1)
go sender(wg, cs)
for i := 1; i < 30; i++ {
wg.Add(1)
go reciever(wg, cs)
}
wg.Wait()
close(cs)
}
You should to close channel before wg.Wait.
All your receivers are waiting for data from channel. That's why you have deadlock.
You can close channel at defer statement of sender function.
Also you need to wg.Done() if the first attempt of receiving from channel was unsuccessful (because channel already closed)
http://play.golang.org/p/qdEIEfY-kl
There are couple of things:
You need to close the channel once sender is completed.
In receiver, range over channel
Don't need to add 1 to wait group and call Done in sender
Please refer to http://play.golang.org/p/vz39RY6WA7
package main
import (
"fmt"
"runtime"
"sync"
"time"
)
func main() {
intInputChan := make(chan int, 50)
var wg sync.WaitGroup
for i := 0; i < 3; i++ {
wg.Add(1)
go worker(intInputChan, wg)
}
for i := 1; i < 51; i++ {
fmt.Printf("Inputs. %d \n", i)
intInputChan <- i
}
close(intInputChan)
wg.Wait()
fmt.Println("Existing Main App... ")
panic("---------------")
}
func worker(input chan int, wg sync.WaitGroup) {
defer func() {
fmt.Println("Executing defer..")
wg.Done()
}()
for {
select {
case intVal, ok := <-input:
time.Sleep(100 * time.Millisecond)
if !ok {
input = nil
return
}
fmt.Printf("%d %v\n", intVal, ok)
default:
runtime.Gosched()
}
}
}
error thrown is.
fatal error: all goroutines are asleep - deadlock!
goroutine 1 [semacquire]:
sync.(*WaitGroup).Wait(0xc082004600)
c:/go/src/sync/waitgroup.go:132 +0x170
main.main()
E:/Go/go_projects/go/src/Test.go:22 +0x21a
I just tried it (playground) passing a wg *sync.WaitGroup and it works.
Passing sync.WaitGroup means passing a copy of the sync.WaitGroup (passing by value): the goroutine mentions Done() to a different sync.WaitGroup.
var wg sync.WaitGroup
for i := 0; i < 3; i++ {
wg.Add(1)
go worker(intInputChan, &wg)
}
Note the &wg: you are passing by value the pointer to the original sync.WaitGroup, for the goroutine to use.
As mentioned, don't pass types from the sync package around by value, right near the top of the sync package documentation: "Values containing the types defined in this package should not be copied." That also includes the types themselves (sync.Mutex, sync.WaitGroup, etc).
However, several notes:
You can use just a single call to wg.Add if you know how many you're going to add (but as documented make sure it's done before anything can call Wait).
You don't want to call runtime.Gosched like that; it makes the workers busy loop.
You can use range to read from the channel to simplify stopping when it's closed.
For small functions you can use a closure and not bother to pass the channel or wait group at all.
That turns it into this:
package main
import (
"fmt"
"sync"
"time"
)
func main() {
const numWorkers = 3
c := make(chan int, 10)
var wg sync.WaitGroup
wg.Add(numWorkers)
for i := 0; i < numWorkers; i++ {
go func() {
defer func() {
fmt.Println("Executing defer…")
wg.Done()
}()
for v := range c {
fmt.Println("recv:", v)
time.Sleep(100 * time.Millisecond)
}
}()
}
for i := 1; i < 51; i++ {
fmt.Println("send:", i)
c <- i
}
fmt.Println("closing…")
close(c)
fmt.Println("waiting…")
wg.Wait()
fmt.Println("Exiting Main App... ")
}
playground