I'm trying to fetch the content of an API with numerous goroutines.
I'm using a for loop to iterate over different character, but it seems like the forloop reaches its final value, before the requests are sent off.
package main
import (
"encoding/json"
"fmt"
"net/http"
"sync"
)
type people struct {
Name string `json:"name"`
}
func main(){
names := make(chan string, 25)
var wg sync.WaitGroup
for i := 0; i < 25; i++ {
wg.Add(1)
go func() {
defer wg.Done()
var p people
url := fmt.Sprintf("https://swapi.dev/api/people/%d", i)
getJSON(url, &p)
names <- p.Name
}()
}
name := <-names
fmt.Println(name)
wg.Wait()
}
func getJSON(url string, target interface{}) error {
r, err := http.Get(url)
if err != nil {
return err
}
defer r.Body.Close()
json.NewDecoder(r.Body).Decode(target)
return nil
}
Also, if somebody could improve my code quality, I'd be very grateful, I'm very new to Golang and don't have anybody to learn from!
You go routines are all using the same variable i. So on the first loop, you launch a goroutine that makes a url from i, and on the next loop i is incremented before that routine has a chance to run.
It's a common mistake in GoLang. The solution is to make a variable for each loop, and pass that one forward. You can either do it with a closure like this (playground).
for i := 0; i < 25; i++ {
wg.Add(1)
localI := i
go func() {
defer wg.Done()
var p people
// Use LocalI here
url := fmt.Sprintf("https://swapi.dev/api/people/%d", localI)
getJSON(url, &p)
names <- p.Name
}()
}
Or as an argument to the function (playground)
for i := 0; i < 25; i++ {
wg.Add(1)
localI := i
go func(localI int) {
defer wg.Done()
var p people
// Use LocalI here
url := fmt.Sprintf("https://swapi.dev/api/people/%d", localI)
getJSON(url, &p)
names <- p.Name
// Pass i here. Since I is a primitive, it is passed by value, not reference.
// Meaning a copy is made.
}(i)
}
Here is a good writeup on the mistake you made:
https://github.com/golang/go/wiki/CommonMistakes#using-goroutines-on-loop-iterator-variables
And the one above it is good to read too!
Related
I'm writing a program that reads a list of order numbers in a file called orders.csv and compares it with the other csv files that are present in the folder.
The problem is that it goes into deadlock even using waitgroup and I don't know why.
For some reason stackoverflow says that my post is mostly code, so I have to add this line, because the whole code is necessary if someone wants to help me debug this problem I'm having.
package main
import (
"bufio"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
)
type Files struct {
filenames []string
}
type Orders struct {
ID []string
}
var ordersFilename string = "orders.csv"
func main() {
var (
ordersFile *os.File
files Files
orders Orders
err error
)
mu := new(sync.Mutex)
wg := &sync.WaitGroup{}
wg.Add(1)
if ordersFile, err = os.Open(ordersFilename); err != nil {
log.Fatalln("Could not open file: " + ordersFilename)
}
orders = getOrderIDs(ordersFile)
files.filenames = getCSVsFromCurrentDir()
var filenamesSize = len(files.filenames)
var ch = make(chan map[string][]string, filenamesSize)
var done = make(chan bool)
for i, filename := range files.filenames {
go func(currentFilename string, ch chan<- map[string][]string, i int, orders Orders, wg *sync.WaitGroup, filenamesSize *int, mu *sync.Mutex, done chan<- bool) {
wg.Add(1)
defer wg.Done()
checkFile(currentFilename, orders, ch)
mu.Lock()
*filenamesSize--
mu.Unlock()
if i == *filenamesSize {
done <- true
close(done)
}
}(filename, ch, i, orders, wg, &filenamesSize, mu, done)
}
select {
case str := <-ch:
fmt.Printf("%+v\n", str)
case <-done:
wg.Done()
break
}
wg.Wait()
close(ch)
}
// getCSVsFromCurrentDir returns a string slice
// with the filenames of csv files inside the
// current directory that are not "orders.csv"
func getCSVsFromCurrentDir() []string {
var filenames []string
err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error {
if path != "." && strings.HasSuffix(path, ".csv") && path != ordersFilename {
filenames = append(filenames, path)
}
return nil
})
if err != nil {
log.Fatalln("Could not read file names in current dir")
}
return filenames
}
// getOrderIDs returns an Orders struct filled
// with order IDs retrieved from the file
func getOrderIDs(file *os.File) Orders {
var (
orders Orders
err error
fileContent string
)
reader := bufio.NewReader(file)
if fileContent, err = readLine(reader); err != nil {
log.Fatalln("Could not read file: " + ordersFilename)
}
for err == nil {
orders.ID = append(orders.ID, fileContent)
fileContent, err = readLine(reader)
}
return orders
}
func checkFile(filename string, orders Orders, ch chan<- map[string][]string) {
var (
err error
file *os.File
fileContent string
orderFilesMap map[string][]string
counter int
)
orderFilesMap = make(map[string][]string)
if file, err = os.Open(filename); err != nil {
log.Fatalln("Could not read file: " + filename)
}
reader := bufio.NewReader(file)
if fileContent, err = readLine(reader); err != nil {
log.Fatalln("Could not read file: " + filename)
}
for err == nil {
if containedInSlice(fileContent, orders.ID) && !containedInSlice(fileContent, orderFilesMap[filename]) {
orderFilesMap[filename] = append(orderFilesMap[filename], fileContent)
// fmt.Println("Found: ", fileContent, " in ", filename)
} else {
// fmt.Printf("Could not find: '%s' in '%s'\n", fileContent, filename)
}
counter++
fileContent, err = readLine(reader)
}
ch <- orderFilesMap
}
// containedInSlice returns true or false
// based on whether the string is contained
// in the slice
func containedInSlice(str string, slice []string) bool {
for _, ID := range slice {
if ID == str {
return true
}
}
return false
}
// readLine returns a line from the passed reader
func readLine(r *bufio.Reader) (string, error) {
var (
isPrefix bool = true
err error = nil
line, ln []byte
)
for isPrefix && err == nil {
line, isPrefix, err = r.ReadLine()
ln = append(ln, line...)
}
return string(ln), err
}
The first issue is the wg.Add always must be outside of the goroutine(s) it stands for. If it isn't, the
wg.Wait call might be called before the goutine(s) have actually started running (and called wg.Add) and therefore will "think"
that there is nothing to wait for.
The second issue with the code is that there are multiple ways it waits for the routines to be done. There is
the WaitGroup and there is the done channel. Use only one of them. Which one depends also on how the results of the
goroutines are used. Here we come to the next problem.
The third issue is with gathering the results. Currently the code only prints / uses a single result from the goroutines.
Put a for { ... } loop around the select and use return to break out of the loop if the done channel is closed.
(Note that you don't need to send anything on the done channel, closing it is enough.)
Improved Version 0.0.1
So here the first version (including some other "code cleanup") with a done channel used for closing and the WaitGroup removed:
func main() {
ordersFile, err := os.Open(ordersFilename)
if err != nil {
log.Fatalln("Could not open file: " + ordersFilename)
}
orders := getOrderIDs(ordersFile)
files := Files{
filenames: getCSVsFromCurrentDir(),
}
var (
mu = new(sync.Mutex)
filenamesSize = len(files.filenames)
ch = make(chan map[string][]string, filenamesSize)
done = make(chan bool)
)
for i, filename := range files.filenames {
go func(currentFilename string, ch chan<- map[string][]string, i int, orders Orders, filenamesSize *int, mu *sync.Mutex, done chan<- bool) {
checkFile(currentFilename, orders, ch)
mu.Lock()
*filenamesSize--
mu.Unlock()
// TODO: This also accesses filenamesSize, so it also needs to be protected with the mutex:
if i == *filenamesSize {
done <- true
close(done)
}
}(filename, ch, i, orders, &filenamesSize, mu, done)
}
// Note: closing a channel is not really needed, so you can omit this:
defer close(ch)
for {
select {
case str := <-ch:
fmt.Printf("%+v\n", str)
case <-done:
return
}
}
}
Improved Version 0.0.2
In your case we have some advantage however. We know exactly how many goroutines we started and therefore also how
many results we expect. (Of course if each goroutine returns a result which currently this code does.) That gives
us another option as we can collect the results with another for loop having the same amount of iterations:
func main() {
ordersFile, err := os.Open(ordersFilename)
if err != nil {
log.Fatalln("Could not open file: " + ordersFilename)
}
orders := getOrderIDs(ordersFile)
files := Files{
filenames: getCSVsFromCurrentDir(),
}
var (
// Note: a buffered channel helps speed things up. The size does not need to match the size of the items that will
// be passed through the channel. A fixed, small size is perfect here.
ch = make(chan map[string][]string, 5)
)
for _, filename := range files.filenames {
go func(filename string) {
// orders and channel are not variables of the loop and can be used without copying
checkFile(filename, orders, ch)
}(filename)
}
for range files.filenames {
str := <-ch
fmt.Printf("%+v\n", str)
}
}
A lot simpler, isn't it? Hope that helps!
There is a lot wrong with this code.
You're using the WaitGroup wrong. Add has to be called in the main goroutine, else there is a chance that Wait is called before all Add calls complete.
There's an extraneous Add(1) call right after initializing the WaitGroup that isn't matched by a Done() call, so Wait will never return (assuming the point above is fixed).
You're using both a WaitGroup and a done channel to signal completion. This is redundant at best.
You're reading filenamesSize while not holding the lock (in the if i == *filenamesSize statement). This is a race condition.
The i == *filenamesSize condition makes no sense in the first place. Goroutines execute in an arbitrary order, so you can't be sure that the goroutine with i == 0 is the last one to decrement filenamesSize
This can all be simplified by getting rid of most if the synchronization primitives and simply closing the ch channel when all goroutines are done:
func main() {
ch := make(chan map[string][]string)
var wg WaitGroup
for _, filename := range getCSVsFromCurrentDir() {
filename := filename // capture loop var
wg.Add(1)
go func() {
checkFile(filename, orders, ch)
wg.Done()
}()
}
go func() {
wg.Wait() // after all goroutines are done...
close(ch) // let range loop below exit
}()
for str := range ch {
// ...
}
}
not an answer, but some comments that does not fit the comment box.
In this part of the code
func main() {
var (
ordersFile *os.File
files Files
orders Orders
err error
)
mu := new(sync.Mutex)
wg := &sync.WaitGroup{}
wg.Add(1)
The last statement is a call to wg.Add that appears dangling. By that i mean we can hardly understand what will trigger the required wg.Done counter part. This is a mistake to call for wg.Add without a wg.Done, this is prone to errors to not write them in such way we can not immediately find them in pair.
In that part of the code, it is clearly wrong
go func(currentFilename string, ch chan<- map[string][]string, i int, orders Orders, wg *sync.WaitGroup, filenamesSize *int, mu *sync.Mutex, done chan<- bool) {
wg.Add(1)
defer wg.Done()
Consider that by the time the routine is executed, and that you added 1 to the waitgroup, the parent routine continues to execute. See this example: https://play.golang.org/p/N9Chaqkv4bd
The main routine does not wait for the waitgroup because it does not have time to increment.
There is more to say but i find it hard to understand the purpose of your code so i am not sure how to help you further without basically rewrite it.
I want to use goroutines to batch requests from different customers' with different date.
I mean 50 consumer goroutines to consume all customers from db, and 2 date consumer goroutines to consume date slice.
Main codes as below, but it hung and didn't exit as expected.
Why doesn't it exit as expected?
func Run(){
var syncWg sync.WaitGroup
syncWg.Add(1)
go SyncCustomerMetricsHistory(&syncWg)
syncWg.Wait()
}
func SyncCustomerMetricsHistory(wg *sync.WaitGroup){
defer wg.Done()
odb := orm.NewOrm()
start := time.Now()
logs.Info("start sync customer metrics, time:[%v]", start)
qs := odb.QueryTable("gg_customer")
var customers []*db.GgCustomer
if num, err := qs.All(&customers); err != nil || num == 0 {
logs.Error("Get customer error, rows:[%v], err:[%v]", num, err)
}
customersChan := make(chan *db.GgCustomer, 50)
var wgC sync.WaitGroup
wgC.Add(50)
for i := 0; i < 50; i++ {
go syncCustomerMetricsHistory(customersChan, &wgC)
}
go func() {
for _, customer := range customers {
customersChan <- customer
}
close(customersChan)
}()
wgC.Wait()
}
func syncCustomerMetricsHistory(customerChan <- chan *db.GgCustomer, wg *sync.WaitGroup){
defer wg.Done()
for customer := range customerChan{
dateChan := make(chan string, 2)
var wgD sync.WaitGroup
wgD.Add(2)
for i := 1; i < 2; i++{
go test(dateChan, customer, &wgD)
}
go func(){
for _, date := range GetAllYearDate(){
dateChan <- date
}
close(dateChan)
}()
wgD.Wait()
}
}
}
func test(dateChan <- chan string, customer *db.GgCustomer, wg *sync.WaitGroup){
defer wg.Done()
for date := range dateChan{
fmt.Println(date, customer)
}
}
func GetAllYearDate() []string{
return []string{"2019-10-01", "2019-10-02"}
}
I have not tried to run this (as it requires additional code) but believe your issue is:
wgD.Add(2)
for i := 1; i < 2; i++{
go test(dateChan, customer, &wgD)
}
That for loop will only iterate once but you called wgD.Add(2) (I think you probably meant the loop to iterate twice; try i <= 2).
One other bit of feedback; the way you are using waitgroups will work but is hard to follow (perhaps leading to you not spotting the issue); how about something like:
func Run(){
SyncCustomerMetricsHistory() // No wait group needed as this will not return before done
}
func SyncCustomerMetricsHistory(){
odb := orm.NewOrm()
start := time.Now()
logs.Info("start sync customer metrics, time:[%v]", start)
qs := odb.QueryTable("gg_customer")
var customers []*db.GgCustomer
if num, err := qs.All(&customers); err != nil || num == 0 {
logs.Error("Get customer error, rows:[%v], err:[%v]", num, err)
}
customersChan := make(chan *db.GgCustomer, 50)
var wgC sync.WaitGroup
wgC.Add(50)
for i := 0; i < 50; i++ {
go func() {
syncCustomerMetricsHistory(customersChan)
wgC.Done()
}()
}
go func() {
for _, customer := range customers {
customersChan <- customer
}
close(customersChan)
}()
wgC.Wait()
}
func syncCustomerMetricsHistory(customerChan <- chan *db.GgCustomer){
for customer := range customerChan{
dateChan := make(chan string, 2)
var wgD sync.WaitGroup
wgD.Add(2)
for i := 1; i < 2; i++{
go func() {
test(dateChan, customer)
wgD.Done()
}()
}
go func(){
for _, date := range GetAllYearDate(){
dateChan <- date
}
close(dateChan)
}()
wgD.Wait()
}
}
}
I think this is easier to follow because you can see where wg.Done() is being called. It's also really easy to stick some fmt.Println commands on either side which makes it simpler to debug this kind of issue.
I'm using goroutines in my project and I want to to assign the values to the struct fields but I don't know that how I will assign the values get by using mongodb quires to the struct fields I'm showing my struct and the query too.
type AppLoadNew struct{
StripeTestKey string `json:"stripe_test_key" bson:"stripe_test_key,omitempty"`
Locations []Locations `json:"location" bson:"location,omitempty"`
}
type Locations struct{
Id int `json:"_id" bson:"_id"`
Location string `json:"location" bson:"location"`
}
func GoRoutine(){
values := AppLoadNew{}
go func() {
data, err := GetStripeTestKey(bson.M{"is_default": true})
if err == nil {
values.StripeTestKey := data.TestStripePublishKey
}
}()
go func() {
location, err := GetFormLocation(bson.M{"is_default": true})
if err == nil {
values.Locations := location
}
}()
fmt.Println(values) // Here it will nothing
// empty
}
Can you please help me that I will assign all the values to the AppLoadNew struct.
In Go no value is safe for concurrent read and write (from multiple goroutines). You must synchronize access.
Reading and writing variables from multiple goroutines can be protected using sync.Mutex or sync.RWMutex, but in your case there is something else involved: you should wait for the 2 launched goroutines to complete. For that, the go-to solution is sync.WaitGroup.
And since the 2 goroutines write 2 different fields of a struct (which act as 2 distinct variables), they don't have to be synchronized to each other (see more on this here: Can I concurrently write different slice elements). Which means using a sync.WaitGroup is sufficient.
This is how you can make it safe and correct:
func GoRoutine() {
values := AppLoadNew{}
wg := &sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
data, err := GetStripeTestKey(bson.M{"is_default": true})
if err == nil {
values.StripeTestKey = data.StripeTestKey
}
}()
wg.Add(1)
go func() {
defer wg.Done()
location, err := GetFormLocation(bson.M{"is_default": true})
if err == nil {
values.Locations = location
}
}()
wg.Wait()
fmt.Println(values)
}
See a (slightly modified) working example on the Go Playground.
See related / similar questions:
Reading values from a different thread
golang struct concurrent read and write without Lock is also running ok?
How to make a variable thread-safe
You can use sync package with WaitGroup, here is an example:
package main
import (
"fmt"
"sync"
"time"
)
type Foo struct {
One string
Two string
}
func main() {
f := Foo{}
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
// Perform long calculations
<-time.After(time.Second * 1)
f.One = "foo"
}()
wg.Add(1)
go func() {
defer wg.Done()
// Perform long calculations
<-time.After(time.Second * 2)
f.Two = "bar"
}()
fmt.Printf("Before %+v\n", f)
wg.Wait()
fmt.Printf("After %+v\n", f)
}
The output:
Before {One: Two:}
After {One:foo Two:bar}
I'm currently staring at a beefed up version of the following code:
func embarrassing(data []string) []string {
resultChan := make(chan string)
var waitGroup sync.WaitGroup
for _, item := range data {
waitGroup.Add(1)
go func(item string) {
defer waitGroup.Done()
resultChan <- doWork(item)
}(item)
}
go func() {
waitGroup.Wait()
close(resultChan)
}()
var results []string
for result := range resultChan {
results = append(results, result)
}
return results
}
This is just blowing my mind. All this is doing can be expressed in other languages as
results = parallelMap(data, doWork)
Even if it can't be done quite this easily in Go, isn't there still a better way than the above?
If you need all the results, you don't need the channel (and the extra goroutine to close it) to communicate the results, you can write directly into the results slice:
func cleaner(data []string) []string {
results := make([]string, len(data))
wg := &sync.WaitGroup{}
wg.Add(len(data))
for i, item := range data {
go func(i int, item string) {
defer wg.Done()
results[i] = doWork(item)
}(i, item)
}
wg.Wait()
return results
}
This is possible because slice elements act as distinct variables, and thus can be written individually without synchronization. For details, see Can I concurrently write different slice elements. You also get the results in the same order as your input for free.
Anoter variation: if doWork() would not return the result but get the address where the result should be "placed", and additionally the sync.WaitGroup to signal completion, that doWork() function could be executed "directly" as a new goroutine.
We can create a reusable wrapper for doWork():
func doWork2(item string, result *string, wg *sync.WaitGroup) {
defer wg.Done()
*result = doWork(item)
}
If you have the processing logic in such format, this is how it can be executed concurrently:
func cleanest(data []string) []string {
results := make([]string, len(data))
wg := &sync.WaitGroup{}
wg.Add(len(data))
for i, item := range data {
go doWork2(item, &results[i], wg)
}
wg.Wait()
return results
}
Yet another variation could be to pass a channel to doWork() on which it is supposed to deliver the result. This solution doesn't even require a sync.Waitgroup, as we know how many elements we want to receive from the channel:
func cleanest2(data []string) []string {
ch := make(chan string)
for _, item := range data {
go doWork3(item, ch)
}
results := make([]string, len(data))
for i := range results {
results[i] = <-ch
}
return results
}
func doWork3(item string, res chan<- string) {
res <- "done:" + item
}
"Weakness" of this last solution is that it may collect the result "out-of-order" (which may or may not be a problem). This approach can be improved to retain order by letting doWork() receive and return the index of the item. For details and examples, see How to collect values from N goroutines executed in a specific order?
You can also use reflection to achieve something similar.
In this example it distribute the handler function over 4 goroutines and returns the results in a new instance of the given source slice type.
package main
import (
"fmt"
"reflect"
"strings"
"sync"
)
func parralelMap(some interface{}, handle interface{}) interface{} {
rSlice := reflect.ValueOf(some)
rFn := reflect.ValueOf(handle)
dChan := make(chan reflect.Value, 4)
rChan := make(chan []reflect.Value, 4)
var waitGroup sync.WaitGroup
for i := 0; i < 4; i++ {
waitGroup.Add(1)
go func() {
defer waitGroup.Done()
for v := range dChan {
rChan <- rFn.Call([]reflect.Value{v})
}
}()
}
nSlice := reflect.MakeSlice(rSlice.Type(), rSlice.Len(), rSlice.Cap())
for i := 0; i < rSlice.Len(); i++ {
dChan <- rSlice.Index(i)
}
close(dChan)
go func() {
waitGroup.Wait()
close(rChan)
}()
i := 0
for v := range rChan {
nSlice.Index(i).Set(v[0])
i++
}
return nSlice.Interface()
}
func main() {
fmt.Println(
parralelMap([]string{"what", "ever"}, strings.ToUpper),
)
}
Test here https://play.golang.org/p/iUPHqswx8iS
I see lots of tutorials and examples on how to make Go wait for x number of goroutines to finish, but what I'm trying to do is have ensure there are always x number running, so a new goroutine is launched as soon as one ends.
Specifically I have a few hundred thousand 'things to do' which is processing some stuff that is coming out of MySQL. So it works like this:
db, err := sql.Open("mysql", connection_string)
checkErr(err)
defer db.Close()
rows,err := db.Query(`SELECT id FROM table`)
checkErr(err)
defer rows.Close()
var id uint
for rows.Next() {
err := rows.Scan(&id)
checkErr(err)
go processTheThing(id)
}
checkErr(err)
rows.Close()
Currently that will launch several hundred thousand threads of processTheThing(). What I need is that a maximum of x number (we'll call it 20) goroutines are launched. So it starts by launching 20 for the first 20 rows, and from then on it will launch a new goroutine for the next id the moment that one of the current goroutines has finished. So at any point in time there are always 20 running.
I'm sure this is quite simple/standard, but I can't seem to find a good explanation on any of the tutorials or examples or how this is done.
You may find Go Concurrency Patterns article interesting, especially Bounded parallelism section, it explains the exact pattern you need.
You can use channel of empty structs as a limiting guard to control number of concurrent worker goroutines:
package main
import "fmt"
func main() {
maxGoroutines := 10
guard := make(chan struct{}, maxGoroutines)
for i := 0; i < 30; i++ {
guard <- struct{}{} // would block if guard channel is already filled
go func(n int) {
worker(n)
<-guard
}(i)
}
}
func worker(i int) { fmt.Println("doing work on", i) }
Here I think something simple like this will work :
package main
import "fmt"
const MAX = 20
func main() {
sem := make(chan int, MAX)
for {
sem <- 1 // will block if there is MAX ints in sem
go func() {
fmt.Println("hello again, world")
<-sem // removes an int from sem, allowing another to proceed
}()
}
}
Thanks to everyone for helping me out with this. However, I don't feel that anyone really provided something that both worked and was simple/understandable, although you did all help me understand the technique.
What I have done in the end is I think much more understandable and practical as an answer to my specific question, so I will post it here in case anyone else has the same question.
Somehow this ended up looking a lot like what OneOfOne posted, which is great because now I understand that. But OneOfOne's code I found very difficult to understand at first because of the passing functions to functions made it quite confusing to understand what bit was for what. I think this way makes a lot more sense:
package main
import (
"fmt"
"sync"
)
const xthreads = 5 // Total number of threads to use, excluding the main() thread
func doSomething(a int) {
fmt.Println("My job is",a)
return
}
func main() {
var ch = make(chan int, 50) // This number 50 can be anything as long as it's larger than xthreads
var wg sync.WaitGroup
// This starts xthreads number of goroutines that wait for something to do
wg.Add(xthreads)
for i:=0; i<xthreads; i++ {
go func() {
for {
a, ok := <-ch
if !ok { // if there is nothing to do and the channel has been closed then end the goroutine
wg.Done()
return
}
doSomething(a) // do the thing
}
}()
}
// Now the jobs can be added to the channel, which is used as a queue
for i:=0; i<50; i++ {
ch <- i // add i to the queue
}
close(ch) // This tells the goroutines there's nothing else to do
wg.Wait() // Wait for the threads to finish
}
Create channel for passing data to goroutines.
Start 20 goroutines that processes the data from channel in a loop.
Send the data to the channel instead of starting a new goroutine.
Grzegorz Żur's answer is the most efficient way to do it, but for a newcomer it could be hard to implement without reading code, so here's a very simple implementation:
type idProcessor func(id uint)
func SpawnStuff(limit uint, proc idProcessor) chan<- uint {
ch := make(chan uint)
for i := uint(0); i < limit; i++ {
go func() {
for {
id, ok := <-ch
if !ok {
return
}
proc(id)
}
}()
}
return ch
}
func main() {
runtime.GOMAXPROCS(4)
var wg sync.WaitGroup //this is just for the demo, otherwise main will return
fn := func(id uint) {
fmt.Println(id)
wg.Done()
}
wg.Add(1000)
ch := SpawnStuff(10, fn)
for i := uint(0); i < 1000; i++ {
ch <- i
}
close(ch) //should do this to make all the goroutines exit gracefully
wg.Wait()
}
playground
This is a simple producer-consumer problem, which in Go can be easily solved using channels to buffer the paquets.
To put it simple: create a channel that accept your IDs. Run a number of routines which will read from the channel in a loop then process the ID. Then run your loop that will feed IDs to the channel.
Example:
func producer() {
var buffer = make(chan uint)
for i := 0; i < 20; i++ {
go consumer(buffer)
}
for _, id := range IDs {
buffer <- id
}
}
func consumer(buffer chan uint) {
for {
id := <- buffer
// Do your things here
}
}
Things to know:
Unbuffered channels are blocking: if the item wrote into the channel isn't accepted, the routine feeding the item will block until it is
My example lack a closing mechanism: you must find a way to make the producer to wait for all consumers to end their loop before returning. The simplest way to do this is with another channel. I let you think about it.
I've wrote a simple package to handle concurrency for Golang. This package will help you limit the number of goroutines that are allowed to run concurrently:
https://github.com/zenthangplus/goccm
Example:
package main
import (
"fmt"
"goccm"
"time"
)
func main() {
// Limit 3 goroutines to run concurrently.
c := goccm.New(3)
for i := 1; i <= 10; i++ {
// This function have to call before any goroutine
c.Wait()
go func(i int) {
fmt.Printf("Job %d is running\n", i)
time.Sleep(2 * time.Second)
// This function have to when a goroutine has finished
// Or you can use `defer c.Done()` at the top of goroutine.
c.Done()
}(i)
}
// This function have to call to ensure all goroutines have finished
// after close the main program.
c.WaitAllDone()
}
Also can take a look here: https://github.com/LiangfengChen/goutil/blob/main/concurrent.go
The example can refer the test case.
func TestParallelCall(t *testing.T) {
format := "test:%d"
data := make(map[int]bool)
mutex := sync.Mutex{}
val, err := ParallelCall(1000, 10, func(pos int) (interface{}, error) {
mutex.Lock()
defer mutex.Unlock()
data[pos] = true
return pos, errors.New(fmt.Sprintf(format, pos))
})
for i := 0; i < 1000; i++ {
if _, ok := data[i]; !ok {
t.Errorf("TestParallelCall pos not found: %d", i)
}
if val[i] != i {
t.Errorf("TestParallelCall return value is not right (%d,%v)", i, val[i])
}
if err[i].Error() != fmt.Sprintf(format, i) {
t.Errorf("TestParallelCall error msg is not correct (%d,%v)", i, err[i])
}
}
}