how to limit goroutine - go

I'm developing a gmail client based on google api.
I have a list of labels obtained through this call
r, err := s.gClient.Service.Users.Labels.List(s.gClient.User).Do()
Then, for every label I need to get details
for _, l := range r.Labels {
d, err := s.gClient.Service.Users.Labels.Get(s.gClient.User, l.Id).Do()
}
I'd like to handle the loop in a more powerful way so I have implemented a goroutine in the loop:
ch := make(chan label.Label)
for _, l := range r.Labels {
go func(gmailLabels *gmailclient.Label, gClient *gmail.Client, ch chan<- label.Label) {
d, err := s.gClient.Service.Users.Labels.Get(s.gClient.User, l.Id).Do()
if err != nil {
panic(err)
}
// Performs some operation with the label `d`
preparedLabel := ....
ch <- preparedLabel
}(l, s.gClient, ch)
}
for i := 0; i < len(r.Labels); i++ {
lab := <-ch
fmt.Printf("Processed %v\n", lab.LabelID)
}
The problem with this code is that gmail api has a rate limit, so, I get this error:
panic: googleapi: Error 429: Too many concurrent requests for user, rateLimitExceeded
What is the correct way to handle this situation?

How about only starting e.g. 10 goroutines and pass the values in from one for loop in another go routine. The channels have a small buffer to decrease synchronisation time.
chIn := make(chan label.Label, 20)
chOut := make(chan label.Label, 20)
for i:=0;i<10;i++ {
go func(gClient *gmail.Client, chIn chan label.Label, chOut chan<- label.Label) {
for gmailLabels := range chIn {
d, err := s.gClient.Service.Users.Labels.Get(s.gClient.User, l.Id).Do()
if err != nil {
panic(err)
}
// Performs some operation with the label `d`
preparedLabel := ....
chOut <- preparedLabel
}
}(s.gClient, chIn, chOut)
}
go func(chIn chan label.Label) {
defer close(chIn)
for _, l := range r.Labels {
chIn <- l
}
}(chIn)
for i := 0; i < len(r.Labels); i++ {
lab := <-chOut
fmt.Printf("Processed %v\n", lab.LabelID)
}
EDIT:
Here a playground sample.

Related

Concurrency not running any faster [closed]

Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed 1 year ago.
Improve this question
I have written a code, tried to use concurrency but it's not helping to run any faster. How can I improve that?
package main
import (
"bufio"
"fmt"
"os"
"strings"
"sync"
)
var wg sync.WaitGroup
func checkerr(e error) {
if e != nil {
fmt.Println(e)
}
}
func readFile() {
file, err := os.Open("data.txt")
checkerr(err)
fres, err := os.Create("resdef.txt")
checkerr(err)
defer file.Close()
defer fres.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
wg.Add(1)
go func() {
words := strings.Fields(scanner.Text())
shellsort(words)
writeToFile(fres, words)
wg.Done()
}()
wg.Wait()
}
}
func shellsort(words []string) {
for inc := len(words) / 2; inc > 0; inc = (inc + 1) * 5 / 11 {
for i := inc; i < len(words); i++ {
j, temp := i, words[i]
for ; j >= inc && strings.ToLower(words[j-inc]) > strings.ToLower(temp); j -= inc {
words[j] = words[j-inc]
}
words[j] = temp
}
}
}
func writeToFile(f *os.File, words []string) {
datawriter := bufio.NewWriter(f)
for _, s := range words {
datawriter.WriteString(s + " ")
}
datawriter.WriteString("\n")
datawriter.Flush()
}
func main() {
readFile()
}
Everything works well except that it take the same time to do everything as without concurrency.
You must place wg.Wait() after the for loop:
for condition {
wg.Add(1)
go func() {
// a concurrent job here
wg.Done()
}()
}
wg.Wait()
Note: the work itself should have a concurrent nature.
Here is my tested solution - read from the input file sequentially then do n concurrent tasks and finally write to the output file sequentially in order, try this:
package main
import (
"bufio"
"fmt"
"log"
"os"
"runtime"
"sort"
"strings"
"sync"
)
type sortQueue struct {
index int
data []string
}
func main() {
n := runtime.NumCPU()
a := make(chan sortQueue, n)
b := make(chan sortQueue, n)
var wg sync.WaitGroup
for i := 0; i < n; i++ {
wg.Add(1)
go parSort(a, b, &wg)
}
go func() {
file, err := os.Open("data.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
i := 0
for scanner.Scan() {
a <- sortQueue{index: i, data: strings.Fields(scanner.Text())}
i++
}
close(a)
err = scanner.Err()
if err != nil {
log.Fatal(err)
}
}()
fres, err := os.Create("resdef.txt")
if err != nil {
log.Fatal(err)
}
defer fres.Close()
go func() {
wg.Wait()
close(b)
}()
writeToFile(fres, b, n)
}
func writeToFile(f *os.File, b chan sortQueue, n int) {
m := make(map[int][]string, n)
order := 0
for v := range b {
m[v.index] = v.data
var slice []string
exist := true
for exist {
slice, exist = m[order]
if exist {
delete(m, order)
order++
s := strings.Join(slice, " ")
fmt.Println(s)
_, err := f.WriteString(s + "\n")
if err != nil {
log.Fatal(err)
}
}
}
}
}
func parSort(a, b chan sortQueue, wg *sync.WaitGroup) {
defer wg.Done()
for q := range a {
sort.Slice(q.data, func(i, j int) bool { return q.data[i] < q.data[j] })
b <- q
}
}
data.txt file:
1 2 0 3
a 1 b d 0 c
aa cc bb
Output:
0 1 2 3
0 1 a b c d
aa bb cc
You're not parallelizing anything, because for every call to wg.Add(1) you have matching call to wg.Wait(). It's one-to-one: You spawn a Go routine, and then immediately block the main Go routine waiting for the newly spawned routine to finish.
The point of a WaitGroup is to wait for many things to finish, with a single call to wg.Wait() when all the Go routines have been spawned.
However, in addition to fixing your call to wg.Wait, you need to control concurrent access to your scanner. One approach to this might be to use a channel for your scanner to emit lines of text to waiting Go routines:
lines := make(chan string)
go func() {
for line := range lines {
go func(line string) {
words := strings.Fields(line)
shellsort(words)
writeToFile(fres, words)
}(line)
}
}()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines <- scanner.Text()
}
close(lines)
Note that this may lead to garbled output in your file, as you have many concurrent Go routines all writing their results at the same time. You can control output through a second channel:
lines := make(chan string)
out := make(chan []string)
go func() {
for line := range lines {
go func(line string) {
words := strings.Fields(line)
shellsort(words)
out <- words
}(line)
}
}()
go func() {
for words := range out {
writeToFile(fres, words)
}
}()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines <- scanner.Text()
}
close(lines)
close(out)
At this point, you can refactor into a "reader", a "processor" and a "writer", which form a pipeline that communicates via channels.
The reader and writer use a single go routine to prevent concurrent access to a resource, while the processor spawns many go routines (currently unbounded) to "fan out" the work across many processors:
package main
import (
"bufio"
"os"
"strings"
)
func main() {
lines := reader()
out := processor(lines)
writer(out)
}
func reader() chan<- string {
lines := make(chan string)
file, err := os.Open("data.txt")
checkerr(err)
go func() {
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines <- scanner.Text()
}
close(lines)
}()
return lines
}
func processor(lines chan<- string) chan []string {
out := make(chan []string)
go func() {
for line := range lines {
go func(line string) {
words := strings.Fields(line)
shellsort(words)
out <- words
}(line)
}
close(out)
}()
return out
}
func writer(out chan<- []string) {
fres, err := os.Create("resdef.txt")
checkerr(err)
for words := range out {
writeToFile(fres, words)
}
}
As other answers have said, by waiting on the WaitGroup each loop iteration, you're limiting your concurrency to 1 (no concurrency). There are a number of ways to solve this, but what's correct depends entirely on what is taking time, and that hasn't been shown in the question. Concurrency doesn't magically make things faster; it just lets things happen at the same time, which only makes things faster if things that take a lot of time can happen concurrently.
Presumably, in your code, the thing that takes a long time is the sort. If that is the case, you could do something like this:
results := make(chan []string)
for scanner.Scan() {
wg.Add(1)
go func(line string) {
words := strings.Fields(line)
shellsort(words)
result <- words
}(scanner.Text())
}
go func() {
wg.Wait()
close(results)
}()
for words := range results {
writeToFile(fres, words)
}
This moves the Wait to where it should be, and avoids concurrent use of the scanner and writer. This should be faster than serial processing, if the sort is taking a significant amount of processing time.

Slice automatically be sorted?

While I want to create my own pipeline to practice with goroutines, there's something particularly weird.
I use the random perm function to generate some int numbers, randomly obviously, I write them to IO writer and then read them form IO reader, cuz its binary source so I print them and they are sorted!!
Here's the code:
func RandomSource(tally int) chan int {
out := make(chan int)
sli := rand.Perm(tally)
fmt.Println(sli)
go func() {
for num := range sli {
out <- num
}
close(out)
}()
return out
}
func ReaderSource(reader io.Reader) chan int {
out := make(chan int)
go func() {
buffer := make([]byte, 8)
for ; ; {
n, err := reader.Read(buffer)
if n > 0 {
v := int(binary.BigEndian.Uint64(buffer))
out <- v
}
if err != nil {
break
}
}
close(out)
}()
return out
}
func WriterSink(writer io.Writer, in chan int) {
for v := range in {
buffer := make([]byte, 8)
binary.BigEndian.PutUint64(
buffer, uint64(v))
writer.Write(buffer)
}
}
func main() {
fileName := "small.in"
file, err := os.Create(fileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
p := RandomSource(500)
WriterSink(file, p)
file, err = os.Open(fileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
p = ReaderSource(file)
for v := range p {
fmt.Println(v)
}
}
range returns an index as the first value for an array or slice, which always goes from 0 up to len - 1. Use for _, num := range sli { if you want to iterate over the values themselves rather than the set of indices.

How to implement concurrent goroutines (and/or limit them) properly to yield consistent results?

I'm using this: (symbols is []string as well as filteredSymbols)
concurrency := 5
sem := make(chan bool, concurrency)
for i := range symbols {
sem <- true
go func(int) {
defer func() { <-sem }()
rows, err := stmt.Query(symbols[i])
if <some condition is true> {
filteredSymbols = append(filteredSymbols, symbols[i])
}
}(i)
}
for i := 0; i < cap(sem); i++ {
sem <- true
}
to limit number of goroutines running concurrently. I need to limit it because every goroutine interacts with Postgres database and sometimes I do have more than 3000 symbols to evaluate. The code is for analysing big financial data, stocks and other securities. I'm also using same code to get OHLC and pre-calculated data from db. Is this a modern approach for this? I'm asking this because WaitGroups already exist and I'm looking for a way to use those instead.
Also, I observed that my method above sometimes yield different results. I had a code where sometimes the resulting number of filteredSymbols is 1409. Without changing the parameters, it would then yield 1407 results, then 1408 at times.
I even had a code where there were big deficit in results.
The code below was very inconsistent so I removed the concurrency. (NOTE that in code below, I don't even have to limit concurrent goroutines since they only use in-memory resources). Removing concurrency fixed it
func getCommonSymbols(symbols1 []string, symbols2 []string) (symbols []string) {
defer timeTrack(time.Now(), "Get common symbols")
// concurrency := len(symbols1)
// sem := make(chan bool, concurrency)
// for _, s := range symbols1 {
for _, sym := range symbols1 {
// sym := s
// sem <- true
// go func(string) {
// defer func() { <-sem }()
for k := range symbols2 {
if sym == symbols2[k] {
symbols = append(symbols, sym)
break
}
}
// }(sym)
}
// for i := 0; i < cap(sem); i++ {
// sem <- true
// }
return
}
You have a data race, multiple goroutines are updating filteredSymbols at the same time. The smallest change you can make to fix it is to add a mutex lock around the append call, e.g.
concurrency := 5
sem := make(chan bool, concurrency)
l := sync.Mutex{}
for i := range symbols {
sem <- true
go func(int) {
defer func() { <-sem }()
rows, err := stmt.Query(symbols[i])
if <some condition is true> {
l.Lock()
filteredSymbols = append(filteredSymbols, symbols[i])
l.Unlock()
}
}(i)
}
for i := 0; i < cap(sem); i++ {
sem <- true
}
The Race Detector could of helped you spot this as well.
One common alternative would be to use a channel to get work into a goroutine, and a channel to get the results out, something like.
concurrency := 5
workCh := make(chan string, concurrency)
resCh := make(chan string, concurrency)
workersWg := sync.WaitGroup{}
// start the required number of workers, use the WaitGroup to see when they're done
for i := 0; i < concurrency; i++ {
workersWg.Add(1)
go func() {
defer workersWg.Done()
for symbol := range workCh {
// do some work
if cond {
resCh <- symbol
}
}
}()
}
go func() {
// when all the workers are done, close the resultsCh
workersWg.Wait()
close(resCh)
}()
// submit all the work
for _, s := range symbols {
workCh <- s
}
close(workCh)
// collect up the results
for r := range resCh {
filteredSymbols = append(filteredSymbols, r)
}

Solving data race in benchmark function that simulates stream

I'm trying to write a function that basically benchmarks streamed CSV over to a HTTP endpoint.
To do this I want to generate data and POST that data.
However, gos data race detector says that there is a data race and the benchmark finishes faster than I would believe is reasonable, so I guess the HTTP request is not properly processed.
How should I structure my test code to avoid this?
Is there a way to wait until the HTTP client call has been processed?
func BenchmarkStream(b *testing.B) {
header := "header\n"
buf := bytes.NewBufferString(header)
var wg sync.WaitGroup
wg.Add(1)
go func() {
for i := 0; i < b.N; i++ {
buf.WriteString(fmt.Sprintf("%d\n", i+1))
}
wg.Done()
}() <-- this line is mentioned in the data race detector
w := httptest.NewRecorder()
r, _ := http.NewRequest("POST", "/", buf)
h := &MyHandler{}
h.ServeHTTP(w, r)
wg.Wait()
if w.Code != 200 {
b.Errorf("test failed")
}
}
EDIT: #Grzegorz Żurs comment made me question my approach to begin with, I refactored it with an io.Pipe:
func BenchmarkStream(b *testing.B) {
pr, pw := io.Pipe()
go func() {
pw.Write([]byte("header\n"))
for i := 0; i < b.N; i++ {
pw.Write([]byte(fmt.Sprintf("%d\n", i+1)))
}
}()
w := httptest.NewRecorder()
r, _ := http.NewRequest("POST", "/", pr)
h := &MyHandler{}
h.ServeHTTP(w, r)
if w.Code != 200 {
b.Errorf("test failed")
}
}
You are sharing buf between two goroutines.
You're not going to get useful benchmark results if you only invoke the handler once. Build the request body once and then invoke your handler over and over again.
buf := &bytes.Buffer{}
buf.WriteString("header\n")
buf.WriteString(strings.Repeat("1\n", 1000)
body := buf.Bytes()
b.ResetTimer()
for i := 0; i < b.N; i++ {
w := httptest.NewRecorder()
r, err := http.NewRequest("POST", "/", bytes.NewReader(body))
if err != nil {
b.Fatal(err)
}
h := &MyHandler{}
h.ServeHTTP(w, r)
if w.Code != 200 {
b.Errorf("test failed")
}
}

write string to file in goroutine

I am using go routine in code as follow:
c := make(chan string)
work := make(chan string, 1000)
clvl := runtime.NumCPU()
for i := 0; i < clvl; i++ {
go func(i int) {
f, err := os.Create(fmt.Sprintf("/tmp/sample_match_%d.csv", i))
if nil != err {
panic(err)
}
defer f.Close()
w := bufio.NewWriter(f)
for jdId := range work {
for _, itemId := range itemIdList {
w.WriteString("test")
}
w.Flush()
c <- fmt.Sprintf("done %s", jdId)
}
}(i)
}
go func() {
for _, jdId := range jdIdList {
work <- jdId
}
close(work)
}()
for resp := range c {
fmt.Println(resp)
}
This is ok, but can I all go routine just write to one files? just like this:
c := make(chan string)
work := make(chan string, 1000)
clvl := runtime.NumCPU()
f, err := os.Create("/tmp/sample_match_%d.csv")
if nil != err {
panic(err)
}
defer f.Close()
w := bufio.NewWriter(f)
for i := 0; i < clvl; i++ {
go func(i int) {
for jdId := range work {
for _, itemId := range itemIdList {
w.WriteString("test")
}
w.Flush()
c <- fmt.Sprintf("done %s", jdId)
}
}(i)
}
This can not work, error : panic: runtime error: slice bounds out of range
The bufio.Writer type does not support concurrent access. Protect it with a mutex.
Because the short strings are flushed on every write, there's no point in using a bufio.Writer. Write to the file directly (and protect it with a mutex).
There's no code to ensure that the goroutines complete before the file is closed or the program exits. Use a sync.WaitGroup.

Resources