Golang: newbie -- Master-Worker concurrency - go

I'm having an issue trying to implement this (all goroutines asleep - deadlock!)
Here's the gist of the code:
var workers = runtime.NumCPU()
func main() {
jobs := make(chan *myStruct, workers)
done := make(chan *myStruct, workers)
go produceWork(file_with_jobs, jobs)
for i := 0; i < runtime.NumCPU(); i++ {
go Worker(jobs, done)
}
consumeWork(done)
}
func produceWork(vf string, jobs chan *utils.DigSigEntries) {
defer close(jobs)
// load file with jobs
file, err := ini.LoadFile(vf)
// get data for processing
for data, _ := range file {
// ...
jobs <- &myStruct{data1, data2, data3, false}
}
}
func Worker(in, out chan *myStruct) {
for {
item, open := <-in
if !open {
break
}
process(item)
out <- item
}
// close(out) --> tried closing the out channel, but then not all items are processed
// though no panics occur.
}
func process(item *myStruct) {
//...modify the item
item.status = true
}
func consumeWork(done chan *myStruct) {
for val := range done {
if !val.status {
fmt.Println(val)
}
}
}
I'm mainly trying to understand how to do this without using the sync/Wait stuff - just pure channels - is this possible? The goal of this routine is to have a single producer load up items that are processed by N workers - appreciate any pointers / help.

You can, as siritinga suggested, use a third signalling or counter channel, eg signal chan boolean, where the produceWork goroutine would add a value before each job entered into the jobs channel. So, an equal count of values will be passed to signal as to jobs:
func produceWork(vf string, jobs chan *utils.DigSigEntries, signal chan boolean) {
defer close(jobs)
// load file with jobs
file, err := ini.LoadFile(vf)
// get data for processing
for data, _ := range file {
// ...
signal <- true
jobs <- &myStruct{data1, data2, data3, false}
}
close(signal)
}
The consume would then start by reading from the signal channel. If there is a value, it can be certain there will be a value to read from the out channel (once a worker has passed it on). If the signal is closed, then all is done. We can close the remaining done channel:
func consumeWork(done chan *myStruct, signal chan boolean) {
for _ := range signal {
val <- done
if !val.status {
fmt.Println(val)
}
}
close(done)
}
While this is possible, I would not really recommend it. It doesn't make the code more clear than when using a sync.WaitGroup. After all, the signal channel would basically only work as a counter. A WaitGroup would have the same purpose and would do it cheaper.
But your question was not about how to solve the problem, but rather if it was possible to do it with pure channels.

Sorry, i didn't notice that you wanted to skip /sync :/
I'll leave the answer, maybe someone is looking for this.
import (
"sync"
)
func main() {
jobs := make(chan *myStruct, workers)
done := make(chan *myStruct, workers)
var workerWg sync.WaitGroup // waitGroup for workers
var consumeWg sync.WaitGroup // waitGroup for consumer
consumeWg.Add(1) // add one active Consumer
for i := 0; i < runtime.NumCPU(); i++ {
go Worker(&workerWg, jobs, done)
workerWg.Add(1)
}
go consumeWork(&consumeWg, done)
produceWork(file_with_jobs, jobs)
close(jobs)
workerWg.Wait()
close(done)
consumeWg.Wait()
}
func produceWork(vf string, jobs chan *utils.DigSigEntries) {
// load file with jobs
file, err := ini.LoadFile(vf)
// get data for processing
for data, _ := range file {
// ...
jobs <- &myStruct{data1, data2, data3, false}
}
}
func Worker(wg *sync.WaitGroup, done chan *myStruct) {
defer wg.Done()
for job := range jobs {
result := process(job)
out <- result
}
// close(out) --> tried closing the out channel, but then not all items are processed
// though no panics occur.
}
func process(item *myStruct) {
//...modify the item
item.status = true
}
func consumeWork(wg *sync.WaitGroup, done chan *myStruct) {
defer wg.Done()
for val := range done {
if !val.status {
fmt.Println(val)
}
}
}

Related

Execute multiple independent jobs continuously

I have a set of jobs which are independent of each other. Hence each of these jobs can be run concurrently using goroutines. Note that once a single job completes, it should wait for few seconds and start again (applies to all the jobs) and this goes on in a loop until the Go API service stops. Also note that all these jobs execute the same goroutine (makes a REST call). What would be the best pattern to implement this in Go. Please note that I would also want to wait for currently executing jobs to complete before my service shuts down.
If I got you right, you are looking for something likes this
This is a service with a consumer pool to execute jobs concurrently. When a job is done, it will repeat again after a interval until you stop the service.
type job struct {
id int
result chan error
}
func newJob(id int) job {
return job{
id: id,
result: make(chan error, 1),
}
}
type service struct {
pending chan job
consumerLimit int
repeatInterval time.Duration
isClosed chan struct{}
shutdown chan chan error
}
func newService(repeatInterval time.Duration, consumerLimit int, pendingChannelSize int) *service {
s := &service{
pending: make(chan job, pendingChannelSize),
consumerLimit: consumerLimit,
repeatInterval: repeatInterval,
isClosed: make(chan struct{}, consumerLimit),
shutdown: make(chan chan error),
}
for i := 0; i < s.consumerLimit; i++ {
go s.consumer()
}
return s
}
func (s *service) do(ctx context.Context, job job) error {
select {
case <-ctx.Done():
return ctx.Err()
case s.pending <- job:
return <-job.result
case <-s.isClosed:
return errors.New("service has been shut down")
}
}
func (s *service) consumer() {
for {
select {
case j := <-s.pending:
//Simulate working process
time.Sleep(time.Duration(rand.Intn(200)) + 200)
j.result <- nil
fmt.Println(fmt.Sprintf("job %v is done", j.id))
go func() {
//Repeat after a time
time.Sleep(s.repeatInterval)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if err := s.do(ctx, newJob(j.id)); err != nil {
fmt.Println(fmt.Errorf("failed to send job to repeat: %v", err))
}
}()
case result := <-s.shutdown:
result <- nil
return
}
}
}
func (s *service) close() error {
result := make(chan error, 1)
for i := 0; i < s.consumerLimit; i++ {
s.shutdown <- result
}
close(s.isClosed)
return <-result
}
func main() {
interrupt := make(chan os.Signal, 1)
signal.Notify(interrupt, os.Interrupt)
service := newService(time.Second, 5, 1000)
//Assign jobs
for i := 1; i < 10; i++ {
go func(i int) {
if err := service.do(context.Background(), newJob(i)); err != nil {
fmt.Println(fmt.Errorf("failed to send job: %v", err))
}
}(i)
}
select {
case <-interrupt:
switch err := service.close(); err {
case nil:
fmt.Println("service has been shutdown successfully")
default:
fmt.Println(fmt.Errorf("failed to graceful shut down service: %w", err))
}
return
}
}
If I understand correctly, you are looking for something like this.
This code will run the workers in a loop, the workers run parallel as a group until you exit the program sending an end signal, but wait for the current loop to finihish before exiting.
func main() {
srv := server{
workers: 5,
}
srv.Run()
}
// inspired by: https://goinbigdata.com/golang-wait-for-all-goroutines-to-finish/#:~:text=A%20WaitGroup%20allows%20to%20wait,until%20all%20goroutines%20have%20finished.
func work(wg *sync.WaitGroup, i int) {
defer wg.Done()
rand.Seed(time.Now().UnixNano())
n := rand.Intn(10)
fmt.Printf("Worker %v: Started\n", i)
time.Sleep(time.Duration(n) * time.Second)
fmt.Printf("Worker %v: Finished\n", i)
}
type server struct {
running bool
workers int
}
func (srv *server) Run() {
done := make(chan bool, 1) // this channel
signalCh := make(chan os.Signal, 1) // this channel will get a signal on system call
signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM)
go func() {
<-signalCh
srv.running = false
done <- true
}()
srv.running = true
for srv.running {
var wg sync.WaitGroup
for i := 0; i < srv.workers; i++ {
wg.Add(1)
go work(&wg, i)
}
wg.Wait()
}
<-done
}
You want to implement worker pools. Here is the simple way to create pools of worker. You can customize worker method and jobs type according to your requirement.
package main
import (
"fmt"
"sync"
)
type Jobs struct {
ID string
// or anything you want to add
}
func main() {
jobs := make(Jobs)
var wg sync.WaitGroup
numWorker := 16
for i := 0; i < numWorker; i++ {
wg.Add(1)
go func() {
worker(jobs)
wg.Done()
}()
}
tasks := []Jobs{}
// inset your task here
for _, i := range tasks {
jobs <- i
}
close(jobs)
wg.Wait()
}
func worker(jobs chan Jobs) {
for job := range jobs {
// do whatever you want to do
doSomething(job)
}
}
func doSomething(job Jobs) {
fmt.Println(job)
}

Channel-based pipeline getting stuck

I am trying to build a pipeline for ingesting data with Go. The 3 stages are "Download batches", "Transform each message", and "Enqueue messages into a queue".
The logic which seemed natural to me is to create 3 functions for each stage, and to tie these functions with unbuffered channels.
Somewhere in my code, I am not implementing channels correctly, or not using waitgroups? As only 1 message gets to the final stage and program seems to stop/block.
func (c *worker) startWork() {
// channel for messages to be sent to the queue
chMessagesToEnqueue := make(chan types.Foo)
// channel for messages to be transformed
chMessagesToTransform := make(chan []types.UpstreamFooType)
// start the goroutines with the channels
go c.startTransformer(chMessagesToTransform, chMessagesToEnqueue)
go c.startEnqueuer(chMessagesToEnqueue)
go c.startDownloader(chMessagesToTransform)
}
func (c *worker) startDownloader(out chan []types.UpstreamFooType) {
// https://github.com/SebastiaanKlippert/go-soda
// uses a library here to fetch data from upstream APIs, but the gist is:
var wg sync.WaitGroup
for i := 0; i < c.workerCount; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for {
// i've cut out some meat out to make more concise
var results []types.UpstreamFooType
err = json.NewDecoder(resp.Body).Decode(&results)
out <- results
}
}()
}
wg.Wait()
}
func (c *worker) startTransformer(in <-chan []types.UpstreamFooType, out chan types.Foo) {
data := <-in
for _, record := range data {
msg := types.Foo{
name: record.fifa,
}
out <- msg
}
}
func (c *worker) startEnqueuer(in <-chan []types.Foo) {
data := <-in
c.logger.Infow("startEnqueuer", "data", data)
}

Multiple producers, single consumer: all goroutines are asleep - deadlock

I have been following a pattern of checking if there is anything in the channel before proceeding with work:
func consume(msg <-chan message) {
for {
if m, ok := <-msg; ok {
fmt.Println("More messages:", m)
} else {
break
}
}
}
that is based on this video. Here is my full code:
package main
import (
"fmt"
"strconv"
"strings"
"sync"
)
type message struct {
body string
code int
}
var markets []string = []string{"BTC", "ETH", "LTC"}
// produces messages into the chan
func produce(n int, market string, msg chan<- message, wg *sync.WaitGroup) {
// for i := 0; i < n; i++ {
var msgToSend = message{
body: strings.Join([]string{"market: ", market, ", #", strconv.Itoa(1)}, ""),
code: 1,
}
fmt.Println("Producing:", msgToSend)
msg <- msgToSend
// }
wg.Done()
}
func receive(msg <-chan message, wg *sync.WaitGroup) {
for {
if m, ok := <-msg; ok {
fmt.Println("Received:", m)
} else {
fmt.Println("Breaking from receiving")
break
}
}
wg.Done()
}
func main() {
wg := sync.WaitGroup{}
msgC := make(chan message, 100)
defer func() {
close(msgC)
}()
for ix, market := range markets {
wg.Add(1)
go produce(ix+1, market, msgC, &wg)
}
wg.Add(1)
go receive(msgC, &wg)
wg.Wait()
}
If you try to run it, we get the deadlock at the very end before we ever print a message that we are about to break. Which, tbh, makes sense, since the last time, when there is nothing else in the chan, we are trying to pull the value out, and so we get this error. But then this pattern isn't workable if m, ok := <- msg; ok. How do I make this code work & why do I get this deadlock error (presumably this pattern should work?).
Given that you do have multiple writers on a single channel, you have a bit of a challenge, because the easy way to do this in Go in general is to have a single writer on a single channel, and then have that single writer close the channel upon sending the last datum:
func produce(... args including channel) {
defer close(ch)
for stuff_to_produce {
ch <- item
}
}
This pattern has the nice property that no matter how you get out of produce, the channel gets closed, signalling the end of production.
You're not using this pattern—you deliver one channel to many goroutines, each of which can send one message—so you need to move the close (or, of course, use yet some other pattern). The simplest way to express the pattern you need is this:
func overall_produce(... args including channel ...) {
var pg sync.WaitGroup
defer close(ch)
for stuff_to_produce {
pg.Add(1)
go produceInParallel(ch, &pg) // add more args if appropriate
}
pg.Wait()
}
The pg counter accumulates active producers. Each must call pg.Done() to indicate that it is done using ch. The overall producer now waits for them all to be done, then it closes the channel on its way out.
(If you write the inner produceInParallel function as a closure, you don't need to pass ch and pg to it explicitly. You may also write overallProducer as a closure.)
Note that your single consumer's loop is probably best expressed using the for ... range construct:
func receive(msg <-chan message, wg *sync.WaitGroup) {
for m := range msg {
fmt.Println("Received:", m)
}
wg.Done()
}
(You mention an intent to add a select to the loop so that you can do some other computing if a message is not yet ready. If that code cannot be spun off into independent goroutines, you will in fact need the fancier m, ok := <-msg construct.)
Note also that the wg for receive—which may turn out to be unnecessary, depending on how you structure other things—is quite independent from the wait-group pg for the producers. While it's true that, as written, the consumer cannot be done until all the producers are done, we'd like to wait independently for the producers to be done, so that we can close the channel in the overall-producer wrapper.
Try this code, I have made few fixes that made it work:
package main
import (
"fmt"
"strconv"
"strings"
"sync"
)
type message struct {
body string
code int
}
var markets []string = []string{"BTC", "ETH", "LTC"}
// produces messages into the chan
func produce(n int, market string, msg chan<- message, wg *sync.WaitGroup) {
// for i := 0; i < n; i++ {
var msgToSend = message{
body: strings.Join([]string{"market: ", market, ", #", strconv.Itoa(1)}, ""),
code: 1,
}
fmt.Println("Producing:", msgToSend)
msg <- msgToSend
// }
}
func receive(msg <-chan message, wg *sync.WaitGroup) {
for {
if m, ok := <-msg; ok {
fmt.Println("Received:", m)
wg.Done()
}
}
}
func consume(msg <-chan message) {
for {
if m, ok := <-msg; ok {
fmt.Println("More messages:", m)
} else {
break
}
}
}
func main() {
wg := sync.WaitGroup{}
msgC := make(chan message, 100)
defer func() {
close(msgC)
}()
for ix, market := range markets {
wg.Add(1)
go produce(ix+1, market, msgC, &wg)
}
go receive(msgC, &wg)
wg.Wait()
fmt.Println("Breaking from receiving")
}
Only when main returns, you can close(msgC), but meanwhile receive is waiting for close signal, that's why DeadLock occurs. After producing messages, close the channel.
package main
import (
"fmt"
"strconv"
"strings"
"sync"
)
type message struct {
body string
code int
}
var markets []string = []string{"BTC", "ETH", "LTC"}
// produces messages into the chan
func produce(n int, market string, msg chan<- message, wg *sync.WaitGroup) {
// for i := 0; i < n; i++ {
var msgToSend = message{
body: strings.Join([]string{"market: ", market, ", #", strconv.Itoa(1)}, ""),
code: 1,
}
fmt.Println("Producing:", msgToSend)
msg <- msgToSend
// }
wg.Done()
}
func receive(msg <-chan message, wg *sync.WaitGroup) {
for {
if m, ok := <-msg; ok {
fmt.Println("Received:", m)
} else {
fmt.Println("Breaking from receiving")
break
}
}
wg.Done()
}
func main() {
wg := sync.WaitGroup{}
msgC := make(chan message, 100)
// defer func() {
// close(msgC)
// }()
for ix, market := range markets {
wg.Add(1)
go produce(ix+1, market, msgC, &wg)
}
wg.Wait() // wait for producer
close(msgC)
wg.Add(1)
go receive(msgC, &wg)
wg.Wait()
}

Get responses from multiple go routines into an array

I need to fetch responses from multiple go routines and put them into an array. I know that channels could be used for this, however I am not sure how I can make sure that all go routines have finished processing the results. Thus I am using a waitgroup.
Code
func main() {
log.Info("Collecting ints")
var results []int32
for _, broker := range e.BrokersByBrokerID {
wg.Add(1)
go getInt32(&wg)
}
wg.Wait()
log.info("Collected")
}
func getInt32(wg *sync.WaitGroup) (int32, error) {
defer wg.Done()
// Just to show that this method may just return an error and no int32
err := broker.Open(config)
if err != nil && err != sarama.ErrAlreadyConnected {
return 0, fmt.Errorf("Cannot connect to broker '%v': %s", broker.ID(), err)
}
defer broker.Close()
return 1003, nil
}
My question
How can I put all the response int32 (which may return an error) into my int32 array, making sure that all go routines have finished their processing work and returned either the error or the int?
If you don't process the return values of the function launched as a goroutine, they are discarded. See What happens to return value from goroutine.
You may use a slice to collect the results, where each goroutine could receive the index to put the results to, or alternatively the address of the element. See Can I concurrently write different slice elements. Note that if you use this, the slice must be pre-allocated and only the element belonging to the goroutine may be written, you can't "touch" other elements and you can't append to the slice.
Or you may use a channel, on which the goroutines send values that include the index or ID of the item they processed, so the collecting goroutine can identify or order them. See How to collect values from N goroutines executed in a specific order?
If processing should stop on the first error encountered, see Close multiple goroutine if an error occurs in one in go
Here's an example how it could look like when using a channel. Note that no waitgroup is needed here, because we know that we expect as many values on the channel as many goroutines we launch.
type result struct {
task int32
data int32
err error
}
func main() {
tasks := []int32{1, 2, 3, 4}
ch := make(chan result)
for _, task := range tasks {
go calcTask(task, ch)
}
// Collect results:
results := make([]result, len(tasks))
for i := range results {
results[i] = <-ch
}
fmt.Printf("Results: %+v\n", results)
}
func calcTask(task int32, ch chan<- result) {
if task > 2 {
// Simulate failure
ch <- result{task: task, err: fmt.Errorf("task %v failed", task)}
return
}
// Simulate success
ch <- result{task: task, data: task * 2, err: nil}
}
Output (try ot on the Go Playground):
Results: [{task:4 data:0 err:0x40e130} {task:1 data:2 err:<nil>} {task:2 data:4 err:<nil>} {task:3 data:0 err:0x40e138}]
I also believe you have to use channel, it must be something like this:
package main
import (
"fmt"
"log"
"sync"
)
var (
BrokersByBrokerID = []int32{1, 2, 3}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup
var results []result
ch := make(chan result)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go getInt32(ch, &wg, broker)
}
go func() {
for v := range ch {
results = append(results, v)
}
}()
wg.Wait()
close(ch)
log.Printf("collected %v", results)
}
func getInt32(ch chan result, wg *sync.WaitGroup, broker int32) {
defer wg.Done()
if broker == 1 {
ch <- result{err: fmt.Sprintf("error: gor broker 1")}
return
}
ch <- result{data: fmt.Sprintf("broker %d - ok", broker)}
}
Result will look like this:
2019/02/05 15:26:28 collected [{broker 3 - ok } {broker 2 - ok } { error: gor broker 1}]
package main
import (
"fmt"
"log"
"sync"
)
var (
BrokersByBrokerID = []int{1, 2, 3, 4}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup
var results []int
ch := make(chan int)
done := make(chan bool)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go func(i int) {
defer wg.Done()
ch <- i
if i == 4 {
done <- true
}
}(broker)
}
L:
for {
select {
case v := <-ch:
results = append(results, v)
if len(results) == 4 {
//<-done
close(ch)
break L
}
case _ = <-done:
break
}
}
fmt.Println("STOPPED")
//<-done
wg.Wait()
log.Printf("collected %v", results)
}
Thank cn007b and Edenshaw. My answer is based on their answers.
As Edenshaw commented, need another sync.Waitgroup for goroutine which getting results from channel, or you may get an incomplete array.
package main
import (
"fmt"
"sync"
"encoding/json"
)
type Resp struct {
id int
}
func main() {
var wg sync.WaitGroup
chanRes := make(chan interface{}, 3)
for i := 0; i < 3; i++ {
wg.Add(1)
resp := &Resp{}
go func(i int, resp *Resp) {
defer wg.Done()
resp.id = i
chanRes <- resp
}(i, resp)
}
res := make([]interface{}, 0)
var wg2 sync.WaitGroup
wg2.Add(1)
go func() {
defer wg2.Done()
for v := range chanRes {
res = append(res, v.(*Resp).id)
}
}()
wg.Wait()
close(chanRes)
wg2.Wait()
resStr, _ := json.Marshal(res)
fmt.Println(string(resStr))
}
package main
import (
"fmt"
"log"
"sync"
"time"
)
var (
BrokersByBrokerID = []int{1, 2, 3, 4}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup.
var results []int
ch := make(chan int)
done := make(chan bool)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go func(i int) {
defer wg.Done()
ch <- i
if i == 4 {
done <- true
}
}(broker)
}
for v := range ch {
results = append(results, v)
if len(results) == 4 {
close(ch)
}
}
fmt.Println("STOPPED")
<-done
wg.Wait()
log.Printf("collected %v", results)
}
</pre>

go routine deadlock with single channel

I started learning go recently and I am stuck on a problem.
I have a simple go routine which either returns or pushes value to a channel.
And my main fn delegates work to this routine till it meets condition or data is exhausted.
This code seem to deadlock on "found" channel. What am I doing wrong?
There are multiple workers
Item can be found in more than one worker at the same time
Once item is found, all workers should be stopped.
.
func workerRoutine(data Data, found chan bool, wg *sync.WaitGroup){
defer (*wg).Done()
// data processing
// return on false
// multiple routines can set this at the same time
found <-true
}
func main {
// ....
found:=make(chan bool)
var wg sync.WaitGroup
itemFound:=false
Loop:
for i:=0; i<limit; i++ {
select {
case <-found:
itemFound = true
break Loop
default:
if(some_check) {
wg.Add(1)
go workerRoutine(mdata,found,&wg)
}
}
}
wg.Wait()
// use itemFound
}
One possible solution is to avoid select statement and use separate goroutine for receiver (or sender, or both).
Example:
package main
import "sync"
func worker(res chan bool, wg *sync.WaitGroup) {
res <- true
wg.Done()
}
func receiver(res chan bool, wg *sync.WaitGroup) {
for range res {
}
wg.Done()
}
func main() {
var wg, wg2 sync.WaitGroup
wg.Add(1)
wg2.Add(10)
found := make(chan bool)
go receiver(found, &wg)
for i := 0; i < 10; i++ {
go worker(found, &wg2)
}
wg2.Wait()
close(found)
wg.Done()
}

Resources