While I want to create my own pipeline to practice with goroutines, there's something particularly weird.
I use the random perm function to generate some int numbers, randomly obviously, I write them to IO writer and then read them form IO reader, cuz its binary source so I print them and they are sorted!!
Here's the code:
func RandomSource(tally int) chan int {
out := make(chan int)
sli := rand.Perm(tally)
fmt.Println(sli)
go func() {
for num := range sli {
out <- num
}
close(out)
}()
return out
}
func ReaderSource(reader io.Reader) chan int {
out := make(chan int)
go func() {
buffer := make([]byte, 8)
for ; ; {
n, err := reader.Read(buffer)
if n > 0 {
v := int(binary.BigEndian.Uint64(buffer))
out <- v
}
if err != nil {
break
}
}
close(out)
}()
return out
}
func WriterSink(writer io.Writer, in chan int) {
for v := range in {
buffer := make([]byte, 8)
binary.BigEndian.PutUint64(
buffer, uint64(v))
writer.Write(buffer)
}
}
func main() {
fileName := "small.in"
file, err := os.Create(fileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
p := RandomSource(500)
WriterSink(file, p)
file, err = os.Open(fileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
p = ReaderSource(file)
for v := range p {
fmt.Println(v)
}
}
range returns an index as the first value for an array or slice, which always goes from 0 up to len - 1. Use for _, num := range sli { if you want to iterate over the values themselves rather than the set of indices.
Related
Suppose I have a helper function helper(n int) which returns a slice of integers of variable length. I would like to run helper(n) in parallel for various values of n and collect the output in one big slice. My first attempt at this is the following:
package main
import (
"fmt"
"golang.org/x/sync/errgroup"
)
func main() {
out := make([]int, 0)
ch := make(chan int)
go func() {
for i := range ch {
out = append(out, i)
}
}()
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
// time.Sleep(time.Second)
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
func helper(n int) []int {
out := make([]int, 0)
for i := 0; i < n; i++ {
out = append(out, i)
}
return out
}
However, if I run this example I do not get all 5 expected values, instead I get
[0 1 0 1]
(If I uncomment the time.Sleep I do get all five values, [0 1 2 0 1], but this is not an acceptable solution).
It seems that the problem with this is that out is being updated in a goroutine, but the main function returns before it is done updating.
One thing that would work is using a buffered channel of size 5:
func main() {
ch := make(chan int, 5)
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
out := make([]int, 0)
for i := range ch {
out = append(out, i)
}
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
However, although in this simplified example I know what the size of the output should be, in my actual application this is not known a priori. Essentially what I would like is an 'infinite' buffer such that sending to the channel never blocks, or a more idiomatic way to achieve the same thing; I've read https://blog.golang.org/pipelines but wasn't able to find a close match to my use case. Any ideas?
In this version of the code, the execution is blocked until ch is closed.
ch is always closed at the end of a routine that is responsible to push into ch. Because the program pushes to ch in a routine, it is not needed to use a buffered channel.
package main
import (
"fmt"
"golang.org/x/sync/errgroup"
)
func main() {
ch := make(chan int)
go func() {
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
}()
out := make([]int, 0)
for i := range ch {
out = append(out, i)
}
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
func helper(n int) []int {
out := make([]int, 0)
for i := 0; i < n; i++ {
out = append(out, i)
}
return out
}
Here is the fixed version of the first code, it is convoluted but demonstrates the usage of sync.WaitGroup.
package main
import (
"fmt"
"sync"
"golang.org/x/sync/errgroup"
)
func main() {
out := make([]int, 0)
ch := make(chan int)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for i := range ch {
out = append(out, i)
}
}()
g := new(errgroup.Group)
for n := 2; n <= 3; n++ {
n := n
g.Go(func() error {
for _, i := range helper(n) {
ch <- i
}
return nil
})
}
if err := g.Wait(); err != nil {
panic(err)
}
close(ch)
wg.Wait()
// time.Sleep(time.Second)
fmt.Println(out) // should have the same elements as [0 1 0 1 2]
}
func helper(n int) []int {
out := make([]int, 0)
for i := 0; i < n; i++ {
out = append(out, i)
}
return out
}
There is the following code, the use of which leads to an infinite loop. The values from the channel are correct, the value of the variable sum is also correct. All the goroutines end up without errors.
func responseHandler(w http.ResponseWriter, r *http.Request) {
var c = make(chan string)
for i := 0; i < 100; i++ {
url := fmt.Sprintf("someurl/page%v/etc", i)
go parse(url, i, c)
if i%5 == 0 {
time.Sleep(1000 * time.Millisecond)
}
}
for range c {
sum = append(sum, <-c)
}
fmt.Println("Exit from channel wait")
fmt.Fprintln(w, sum)
}
func parse(url string, num int, c chan string) {
response, err1 := http.Get(url)
if err1 != nil {
log.Fatal(err1)
}
defer response.Body.Close()
if response.StatusCode != 200 {
log.Fatalf("status code error: %d %s", response.StatusCode,
response.Status)
}
res, err := DecodeHTMLBody(response.Body, "windows-1251")
doc, err := goquery.NewDocumentFromReader(res)
if err != nil {
log.Fatal(err)
}
doc.Find(".b-advItem__content").Each(func(i int, s *goquery.Selection) {
title := strings.TrimSpace(s.Find(".someclass").Text())
price := strings.TrimSpace(s.Find(".someclass").Text())
formatPrice := parsePrice(price)
c <- fmt.Sprintf("output %d: %s:%s\n", i, title, formatPrice)
})
fmt.Printf("Channel %d - exit\n", num)
Sum - global []string.
The range statement over a channel exits only when the channel is closed (well, think about it: how the range would otherwise detect there's no more data to fetch?), and nothing closes the channel in your code.
func responseHandler(w http.ResponseWriter, r *http.Request) {
...
for range c {
sum = append(sum, <-c)
if len(aa) == 100 {
close(c)
}
fmt.Fprintln(w, sum)
}
func parse(...){
...
aa = append(aa, num)
}
Adding such a check allows you to exit the loop correctly
I'm developing a gmail client based on google api.
I have a list of labels obtained through this call
r, err := s.gClient.Service.Users.Labels.List(s.gClient.User).Do()
Then, for every label I need to get details
for _, l := range r.Labels {
d, err := s.gClient.Service.Users.Labels.Get(s.gClient.User, l.Id).Do()
}
I'd like to handle the loop in a more powerful way so I have implemented a goroutine in the loop:
ch := make(chan label.Label)
for _, l := range r.Labels {
go func(gmailLabels *gmailclient.Label, gClient *gmail.Client, ch chan<- label.Label) {
d, err := s.gClient.Service.Users.Labels.Get(s.gClient.User, l.Id).Do()
if err != nil {
panic(err)
}
// Performs some operation with the label `d`
preparedLabel := ....
ch <- preparedLabel
}(l, s.gClient, ch)
}
for i := 0; i < len(r.Labels); i++ {
lab := <-ch
fmt.Printf("Processed %v\n", lab.LabelID)
}
The problem with this code is that gmail api has a rate limit, so, I get this error:
panic: googleapi: Error 429: Too many concurrent requests for user, rateLimitExceeded
What is the correct way to handle this situation?
How about only starting e.g. 10 goroutines and pass the values in from one for loop in another go routine. The channels have a small buffer to decrease synchronisation time.
chIn := make(chan label.Label, 20)
chOut := make(chan label.Label, 20)
for i:=0;i<10;i++ {
go func(gClient *gmail.Client, chIn chan label.Label, chOut chan<- label.Label) {
for gmailLabels := range chIn {
d, err := s.gClient.Service.Users.Labels.Get(s.gClient.User, l.Id).Do()
if err != nil {
panic(err)
}
// Performs some operation with the label `d`
preparedLabel := ....
chOut <- preparedLabel
}
}(s.gClient, chIn, chOut)
}
go func(chIn chan label.Label) {
defer close(chIn)
for _, l := range r.Labels {
chIn <- l
}
}(chIn)
for i := 0; i < len(r.Labels); i++ {
lab := <-chOut
fmt.Printf("Processed %v\n", lab.LabelID)
}
EDIT:
Here a playground sample.
I am using go routine in code as follow:
c := make(chan string)
work := make(chan string, 1000)
clvl := runtime.NumCPU()
for i := 0; i < clvl; i++ {
go func(i int) {
f, err := os.Create(fmt.Sprintf("/tmp/sample_match_%d.csv", i))
if nil != err {
panic(err)
}
defer f.Close()
w := bufio.NewWriter(f)
for jdId := range work {
for _, itemId := range itemIdList {
w.WriteString("test")
}
w.Flush()
c <- fmt.Sprintf("done %s", jdId)
}
}(i)
}
go func() {
for _, jdId := range jdIdList {
work <- jdId
}
close(work)
}()
for resp := range c {
fmt.Println(resp)
}
This is ok, but can I all go routine just write to one files? just like this:
c := make(chan string)
work := make(chan string, 1000)
clvl := runtime.NumCPU()
f, err := os.Create("/tmp/sample_match_%d.csv")
if nil != err {
panic(err)
}
defer f.Close()
w := bufio.NewWriter(f)
for i := 0; i < clvl; i++ {
go func(i int) {
for jdId := range work {
for _, itemId := range itemIdList {
w.WriteString("test")
}
w.Flush()
c <- fmt.Sprintf("done %s", jdId)
}
}(i)
}
This can not work, error : panic: runtime error: slice bounds out of range
The bufio.Writer type does not support concurrent access. Protect it with a mutex.
Because the short strings are flushed on every write, there's no point in using a bufio.Writer. Write to the file directly (and protect it with a mutex).
There's no code to ensure that the goroutines complete before the file is closed or the program exits. Use a sync.WaitGroup.
I have a line containing 3 numbers that I want to read from stdin with fmt.Scanln() but this code won't work:
X := make([]int, 3)
fmt.Scanln(X...)
fmt.Printf("%v\n", X)
I get this error message:
cannot use X (type []int) as type []interface {} in function argument
I don't get it.
Idiomatic Go would be:
func read(n int) ([]int, error) {
in := make([]int, n)
for i := range in {
_, err := fmt.Scan(&in[i])
if err != nil {
return in[:i], err
}
}
return in, nil
}
interface{} means nothing. Please don't use it if you don't have to.
For example,
package main
import "fmt"
func intScanln(n int) ([]int, error) {
x := make([]int, n)
y := make([]interface{}, len(x))
for i := range x {
y[i] = &x[i]
}
n, err := fmt.Scanln(y...)
x = x[:n]
return x, err
}
func main() {
x, err := intScanln(3)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("%v\n", x)
}
Input:
1 2 3
Output:
[1 2 3]
I think the the correct version should be
X := make([]int, 3)
fmt.Scanln(&X[0], &X[1], &X[2])
fmt.Printf("%v\n", X)
This error message occurs b/c there's no reasonable way to convert []int to []interface{}. Note, this is in reference to a slice. So the syntax your using is correct, but fmt.Scanln expects []interface{}. This has implications outside of pkg fmt.
The reason I've seen given for this is due to Go giving you control over memory layout so it currently has no reasonable way to do the slice conversion. This means you'll need to do the conversion manually before passing it to a function expecting the slice of a given type. For example:
package main
import (
"fmt"
)
func main() {
x := make([]int, 3)
y := make([]interface{}, 3)
y[0] = x[0]
y[1] = x[1]
y[2] = x[2]
fmt.Println(y...)
}
Or something a little more general:
x := make([]int, 3)
y := make([]interface{}, len(x))
for i, v := range x {
y[i] = v
}
fmt.Println(y...)
Regarding your specific issue, see the following:
x := make([]*int, 3)
for i := range x {
x[i] = new(int)
}
y := make([]interface{}, 3)
for i, v := range x {
y[i] = v
}
if _, err := fmt.Scanln(y...); err != nil {
fmt.Println("Scanln err: ", err)
}
for _, v := range y {
val := v.(*int)
fmt.Println(*val)
}
I saw in a comment you said the lines can have different lengths. In that case
you can implement your own fmt.Scanner:
package main
import (
"bytes"
"fmt"
)
type slice struct {
tok []int
}
func (s *slice) Scan(state fmt.ScanState, verb rune) error {
tok, err := state.Token(false, func(r rune) bool { return r != '\n' })
if err != nil { return err }
if _, _, err := state.ReadRune(); err != nil {
if len(tok) == 0 {
panic(err)
}
}
b := bytes.NewReader(tok)
for {
var d int
_, err := fmt.Fscan(b, &d)
if err != nil { break }
s.tok = append(s.tok, d)
}
return nil
}
func main() {
var s slice
fmt.Scan(&s)
fmt.Println(s.tok)
}
https://golang.org/pkg/fmt#Scanner