How to write a better two channel select - go

In the following code there are two channels A & B that contain work, in the real code they are different structures, the workers need to drain both channels before quitting. The workers need the information coming in from both channels. The two select statements work but it's very clumsy. If I add default: to make them non-blocking then the code fails to drain the channels. Is there a better way of writing the selects?
Right now if channel A has no work then channel B does not get serviced either. Another problem to solve, but not my main concern.
playground for testing following code:
package main
import (
"fmt"
"time"
)
const (
fillCount = 10 // number of elements in each input channel
numWorkers = 3 // number of consumers.
)
func Wait() {
time.Sleep(2000 * time.Millisecond)
}
func fillChannel(work chan string, name string) {
for i := 0; i < fillCount; i++ {
work <- fmt.Sprintf("%s%d", name, i)
}
close(work) // we're finished
}
func doWork(id int, ch1 chan string, ch2 chan string, done chan bool) {
fmt.Println("Running worker", id)
defer fmt.Println("Ending worker", id)
for ch1Open, ch2Open := true, true; ch1Open && ch2Open; {
cnt1 := len(ch1)
cnt2 := len(ch2)
if ch1Open {
select {
case str, more := <-ch1:
if more {
fmt.Printf("%d: ch1(%d) %s\n", id, cnt1, str)
} else {
fmt.Printf("%d: ch1 closed\n", id)
ch1Open = false
}
}
}
if ch2Open {
select {
case str, more := <-ch2:
if more {
fmt.Printf("%d: ch2(%d) %s\n", id, cnt2, str)
} else {
fmt.Printf("%d: ch2 closed\n", id)
ch2Open = false
}
}
}
}
done <- true
}
func main() {
a := make(chan string, 2) // a small channel
b := make(chan string, 5) // a bigger channel
// generate work
go fillChannel(a, "A")
go fillChannel(b, "B")
// launch the consumers
done := make(chan bool)
for i := 0; i < numWorkers; i++ {
go doWork(i, a, b, done)
}
// wait for the goroutines to finish.
for i := 0; i < numWorkers; i++ {
<-done
}
fmt.Println("All workers done.")
Wait() // without this the defered prints from the workers doesn't flush
}

Select on both channels in a loop. When a channel is closed, set the channel variable to nil to make receive on that channel not ready. Break out of the loop when both channels are nil.
http://play.golang.org/p/9gRY1yKqJ9
package main
import (
"fmt"
"time"
)
const (
fillCount = 10 // number of elements in each input channel
numWorkers = 3 // number of consumers.
)
func fillChannel(work chan string, name string) {
for i := 0; i < fillCount; i++ {
work <- fmt.Sprintf("%s%d", name, i)
}
close(work) // we're finished
}
func doWork(id int, ch1 chan string, ch2 chan string, done chan bool) {
fmt.Println("Running worker", id)
for ch1 != nil || ch2 != nil {
select {
case str, ok := <-ch1:
if ok {
fmt.Printf("%d: ch1(%d) %s\n", id, len(ch1), str)
} else {
ch1 = nil
fmt.Printf("%d: ch1 closed\n", id)
}
case str, ok := <-ch2:
if ok {
fmt.Printf("%d: ch2(%d) %s\n", id, len(ch2), str)
} else {
ch2 = nil
fmt.Printf("%d: ch2 closed\n", id)
}
}
}
fmt.Println("Ending worker", id)
done <- true
}
func main() {
a := make(chan string, 2) // a small channel
b := make(chan string, 5) // a bigger channel
// generate work
go fillChannel(a, "A")
go fillChannel(b, "B")
// launch the consumers
done := make(chan bool)
for i := 0; i < numWorkers; i++ {
go doWork(i, a, b, done)
}
// wait for the goroutines to finish.
for i := 0; i < numWorkers; i++ {
<-done
}
fmt.Println("All workers done.")
}

Related

Stuck in infinite loop in for select in Golang

The code given below is the sample code for my use case. I want to read data from ch1 and ch2 but got stuck into infinite loop.
package main
import "fmt"
func main() {
ch1, ch2 := func() (<-chan int, <-chan int) {
ch_1 := make(chan int)
ch_2 := make(chan int)
go worker_1(ch_1, ch_2)
go worker_2(ch_1, ch_2)
return ch_1, ch_2
}()
// trying to read this way but it is not working
for {
select {
case a := <-ch1:
fmt.Println("from ch1", a)
case a := <-ch2:
fmt.Println("from ch2", a)
default:
fmt.Println("done")
}
}
}
func worker_1(ch1, ch2 chan int) {
for i := 0; i < 100; i++ {
if i%2 == 0 {
ch1 <- i
} else {
ch2 <- i
}
}
}
func worker_2(ch1, ch2 chan int) {
for i := 101; i < 200; i++ {
if i%2 == 0 {
ch1 <- i
} else {
ch2 <- i
}
}
}
Here is one solution:
package main
import (
"fmt"
"sync"
)
func main() {
// Create channels
ch1, ch2 := make(chan int), make(chan int)
// Create workers waitgroup with a counter of 2
wgWorkers := sync.WaitGroup{}
wgWorkers.Add(2)
// Run workers
go worker(&wgWorkers, ch1, ch2, 0, 100) // Worker 1
go worker(&wgWorkers, ch1, ch2, 101, 200) // Worker 2
// Create readers waitgroup with a counter of 2
wgReader := sync.WaitGroup{}
wgReader.Add(2)
// Run readers
go reader(&wgReader, ch1, 1) // Reader 1
go reader(&wgReader, ch2, 2) // Reader 2
// Wait for workers to finish
wgWorkers.Wait()
// Close workers channels
close(ch1) // Makes reader 1 exit after processing the last element in the channel
close(ch2) // Makes reader 2 exit after processing the last element in the channel
// Wait for both readers to finish processing before exiting the program
wgReader.Wait()
}
// Worker function definition
func worker(wg *sync.WaitGroup, ch1, ch2 chan<- int, from, to int) {
// Decrement worker waitgroup counter by one when function returns
defer wg.Done()
for i := from; i < to; i++ {
if i%2 == 0 {
ch1 <- i
} else {
ch2 <- i
}
}
}
// Reader function definition
func reader(wg *sync.WaitGroup, ch <-chan int, chNum int) {
// Decrement reader waitgroup counter by one when function returns
defer wg.Done()
// Here we iterate on the channel fed by worker 1 or worker 2.
// for-range on a channel exits when the channel is closed.
for i := range ch {
fmt.Printf("from ch%d: %d\n", chNum, i)
}
}
Explainations are in the code comments.
Close the channels when the workers are done. Break out of the receive loop after both channels are closed.
package main
import (
"fmt"
"sync"
)
func main() {
ch1, ch2 := func() (<-chan int, <-chan int) {
ch_1 := make(chan int)
ch_2 := make(chan int)
var wg sync.WaitGroup
wg.Add(2)
go worker_1(&wg, ch_1, ch_2)
go worker_2(&wg, ch_1, ch_2)
// Close channels after goroutiens complete.
go func() {
wg.Wait()
close(ch_1)
close(ch_2)
}()
return ch_1, ch_2
}()
// While we still have open channels ...
for ch1 != nil || ch2 != nil {
select {
case a, ok := <-ch1:
if ok {
fmt.Println("from ch1", a)
} else {
// note that channel is closed.
ch1 = nil
}
case a, ok := <-ch2:
if ok {
fmt.Println("from ch2", a)
} else {
// note that channel is closed.
ch2 = nil
}
}
}
}
func worker_1(wg *sync.WaitGroup, ch1, ch2 chan int) {
defer wg.Done()
for i := 0; i < 100; i++ {
if i%2 == 0 {
ch1 <- i
} else {
ch2 <- i
}
}
}
func worker_2(wg *sync.WaitGroup, ch1, ch2 chan int) {
defer wg.Done()
for i := 101; i < 200; i++ {
if i%2 == 0 {
ch1 <- i
} else {
ch2 <- i
}
}
}

Get responses from multiple go routines into an array

I need to fetch responses from multiple go routines and put them into an array. I know that channels could be used for this, however I am not sure how I can make sure that all go routines have finished processing the results. Thus I am using a waitgroup.
Code
func main() {
log.Info("Collecting ints")
var results []int32
for _, broker := range e.BrokersByBrokerID {
wg.Add(1)
go getInt32(&wg)
}
wg.Wait()
log.info("Collected")
}
func getInt32(wg *sync.WaitGroup) (int32, error) {
defer wg.Done()
// Just to show that this method may just return an error and no int32
err := broker.Open(config)
if err != nil && err != sarama.ErrAlreadyConnected {
return 0, fmt.Errorf("Cannot connect to broker '%v': %s", broker.ID(), err)
}
defer broker.Close()
return 1003, nil
}
My question
How can I put all the response int32 (which may return an error) into my int32 array, making sure that all go routines have finished their processing work and returned either the error or the int?
If you don't process the return values of the function launched as a goroutine, they are discarded. See What happens to return value from goroutine.
You may use a slice to collect the results, where each goroutine could receive the index to put the results to, or alternatively the address of the element. See Can I concurrently write different slice elements. Note that if you use this, the slice must be pre-allocated and only the element belonging to the goroutine may be written, you can't "touch" other elements and you can't append to the slice.
Or you may use a channel, on which the goroutines send values that include the index or ID of the item they processed, so the collecting goroutine can identify or order them. See How to collect values from N goroutines executed in a specific order?
If processing should stop on the first error encountered, see Close multiple goroutine if an error occurs in one in go
Here's an example how it could look like when using a channel. Note that no waitgroup is needed here, because we know that we expect as many values on the channel as many goroutines we launch.
type result struct {
task int32
data int32
err error
}
func main() {
tasks := []int32{1, 2, 3, 4}
ch := make(chan result)
for _, task := range tasks {
go calcTask(task, ch)
}
// Collect results:
results := make([]result, len(tasks))
for i := range results {
results[i] = <-ch
}
fmt.Printf("Results: %+v\n", results)
}
func calcTask(task int32, ch chan<- result) {
if task > 2 {
// Simulate failure
ch <- result{task: task, err: fmt.Errorf("task %v failed", task)}
return
}
// Simulate success
ch <- result{task: task, data: task * 2, err: nil}
}
Output (try ot on the Go Playground):
Results: [{task:4 data:0 err:0x40e130} {task:1 data:2 err:<nil>} {task:2 data:4 err:<nil>} {task:3 data:0 err:0x40e138}]
I also believe you have to use channel, it must be something like this:
package main
import (
"fmt"
"log"
"sync"
)
var (
BrokersByBrokerID = []int32{1, 2, 3}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup
var results []result
ch := make(chan result)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go getInt32(ch, &wg, broker)
}
go func() {
for v := range ch {
results = append(results, v)
}
}()
wg.Wait()
close(ch)
log.Printf("collected %v", results)
}
func getInt32(ch chan result, wg *sync.WaitGroup, broker int32) {
defer wg.Done()
if broker == 1 {
ch <- result{err: fmt.Sprintf("error: gor broker 1")}
return
}
ch <- result{data: fmt.Sprintf("broker %d - ok", broker)}
}
Result will look like this:
2019/02/05 15:26:28 collected [{broker 3 - ok } {broker 2 - ok } { error: gor broker 1}]
package main
import (
"fmt"
"log"
"sync"
)
var (
BrokersByBrokerID = []int{1, 2, 3, 4}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup
var results []int
ch := make(chan int)
done := make(chan bool)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go func(i int) {
defer wg.Done()
ch <- i
if i == 4 {
done <- true
}
}(broker)
}
L:
for {
select {
case v := <-ch:
results = append(results, v)
if len(results) == 4 {
//<-done
close(ch)
break L
}
case _ = <-done:
break
}
}
fmt.Println("STOPPED")
//<-done
wg.Wait()
log.Printf("collected %v", results)
}
Thank cn007b and Edenshaw. My answer is based on their answers.
As Edenshaw commented, need another sync.Waitgroup for goroutine which getting results from channel, or you may get an incomplete array.
package main
import (
"fmt"
"sync"
"encoding/json"
)
type Resp struct {
id int
}
func main() {
var wg sync.WaitGroup
chanRes := make(chan interface{}, 3)
for i := 0; i < 3; i++ {
wg.Add(1)
resp := &Resp{}
go func(i int, resp *Resp) {
defer wg.Done()
resp.id = i
chanRes <- resp
}(i, resp)
}
res := make([]interface{}, 0)
var wg2 sync.WaitGroup
wg2.Add(1)
go func() {
defer wg2.Done()
for v := range chanRes {
res = append(res, v.(*Resp).id)
}
}()
wg.Wait()
close(chanRes)
wg2.Wait()
resStr, _ := json.Marshal(res)
fmt.Println(string(resStr))
}
package main
import (
"fmt"
"log"
"sync"
"time"
)
var (
BrokersByBrokerID = []int{1, 2, 3, 4}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup.
var results []int
ch := make(chan int)
done := make(chan bool)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go func(i int) {
defer wg.Done()
ch <- i
if i == 4 {
done <- true
}
}(broker)
}
for v := range ch {
results = append(results, v)
if len(results) == 4 {
close(ch)
}
}
fmt.Println("STOPPED")
<-done
wg.Wait()
log.Printf("collected %v", results)
}
</pre>

Synchronization for several goroutines using channels

I need to start a number of workers with single task queue and single result queue. Each worker should be started in different goroutine. And I need to wait till all workers will be finished and task queue will be empty before exiting from program.
I have prepare small example for goroutine synchronization.
The main idea was that we count tasks in queue and waiting for all workers to finish jobs. But current implementation sometime miss values.
Why this happends and how to solve the problem?
The sample code:
import (
"fmt"
"os"
"os/signal"
"strconv"
)
const num_workers = 5
type workerChannel chan uint64
// Make channel for tasks
var workCh workerChannel
// Make channel for task counter
var cntChannel chan int
// Task counter
var tskCnt int64
// Worker function
func InitWorker(input workerChannel, result chan string, num int) {
for {
select {
case inp := <-input:
getTask()
result <- ("Worker " + strconv.Itoa(num) + ":" + strconv.FormatUint(inp, 10))
}
}
}
// Function to manage task counter
// should be in uniq goroutine
func taskCounter(inp chan int) {
for {
val := <-inp
tskCnt += int64(val)
}
}
// Put pask to the queue
func putTask(val uint64) {
func() {
fmt.Println("Put ", val)
cntChannel <- int(1)
workCh <- val
}()
}
// Get task from queue
func getTask() {
func() {
cntChannel <- int(-1)
}()
}
func main() {
// Init service channels
abort := make(chan os.Signal)
done := make(chan bool)
// init queue for results
result := make(chan string)
// init task queue
workCh = make(workerChannel)
// start some workers
for i := uint(0); i < num_workers; i++ {
go InitWorker(workCh, result, int(i))
}
// init counter for synchro
cntChannel = make(chan int)
go taskCounter(cntChannel)
// goroutine that put some tasks into queue
go func() {
for i := uint(0); i < 21; i++ {
putTask(uint64(i))
}
// wait for processing all tasks and close application
for len(cntChannel) != 0 {}
for tskCnt != 0 {}
for len(workCh) != 0 {}
for len(result) != 0 {}
// send signal for close
done <- true
}()
signal.Notify(abort, os.Interrupt)
for {
select {
case <-abort:
fmt.Println("Aborted.")
os.Exit(0)
// print results
case res := <-result:
fmt.Println(res)
case <-done:
fmt.Println("Done")
os.Exit(0)
}
}
}
Use sync.WaitGroup to wait for goroutines to complete. Close channels to cause loops reading on channels to exit.
package main
import (
"fmt"
"sync"
)
type workerChannel chan uint64
const num_workers = 5
func main() {
results := make(chan string)
workCh := make(workerChannel)
// Start workers
var wg sync.WaitGroup
wg.Add(num_workers)
for i := 0; i < num_workers; i++ {
go func(num int) {
defer wg.Done()
// Loop processing work until workCh is closed
for w := range workCh {
results <- fmt.Sprintf("worker %d, task %d", num, w)
}
}(i)
}
// Close result channel when workers are done
go func() {
wg.Wait()
close(results)
}()
// Send work to be done
go func() {
for i := 0; i < 21; i++ {
workCh <- uint64(i)
}
// Closing the channel causes workers to break out of loop
close(workCh)
}()
// Process results. Loop exits when result channel is closed.
for r := range results {
fmt.Println(r)
}
}
https://play.golang.org/p/ZifpzsP6fNv
I suggest using close(chan) for this kind of tasks.
WaitGroup version.
package main
import (
"log"
"sync"
)
func worker(in chan int, wg *sync.WaitGroup) {
defer wg.Done()
for i := range in {
log.Println(i)
}
}
func main() {
in := make(chan int)
lc := 25
maxValue := 30
wg := sync.WaitGroup{}
wg.Add(lc)
for i := 0; i < lc; i++ {
go worker(in, &wg)
}
for c := 0; c <= maxValue; c++ {
in <- c
}
close(in)
wg.Wait()
}
Channel version
package main
import (
"log"
"os"
)
func worker(in chan int, end chan struct{}) {
defer func() { end <- struct{}{} }()
for i := range in {
log.Println(i)
}
}
func main() {
in := make(chan int)
lc := 25
maxValue := 30
end := make(chan struct{})
var fin int
go func() {
for {
<-end
fin++
log.Println(`fin`, fin)
if fin == lc {
break
}
}
close(end)
os.Exit(0)
}()
for i := 0; i < lc; i++ {
go worker(in, end)
}
for c := 0; c <= maxValue; c++ {
in <- c
}
close(in)
<-make(chan struct{})
}

Understanding correct use of channels in golang concurrent context

I am writing a go project which is a simple web crawler to crawl links on the website. I want to experiment the concurrent features such as goroutines and channels. But when I run it it didn't go through. Nothing is showed as if there is nothing happening. I have no idea what went wrong. Can somebody point it out for me?
It works and shows all the crawled links if I remove the channels logic but I want it to send the links into a buffered channel and then display the links before ending the program. The program is supposed to be able to go to any depth as specified in the program. Currently the depth is 1.
package main
import (
"fmt"
"log"
"net/http"
"os"
"strings"
"time"
"golang.org/x/net/html"
)
// Link type to be sent over channel
type Link struct {
URL string
ok bool
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: crawl [URL].")
}
url := os.Args[1]
if !strings.HasPrefix(url, "http://") {
url = "http://" + url
}
ch := make(chan *Link, 5)
crawl(url, 1, ch)
visited := make(map[string]bool)
time.Sleep(2 * time.Second)
for link := range ch {
if _, ok := visited[link.URL]; !ok {
visited[link.URL] = true
}
}
close(ch)
for l := range visited {
fmt.Println(l)
}
}
func crawl(url string, n int, ch chan *Link) {
if n < 1 {
return
}
resp, err := http.Get(url)
if err != nil {
log.Fatalf("Can not reach the site. Error = %v\n", err)
os.Exit(1)
}
b := resp.Body
defer b.Close()
z := html.NewTokenizer(b)
nextN := n - 1
for {
token := z.Next()
switch token {
case html.ErrorToken:
return
case html.StartTagToken:
current := z.Token()
if current.Data != "a" {
continue
}
result, ok := getHrefTag(current)
if !ok {
continue
}
hasProto := strings.HasPrefix(result, "http")
if hasProto {
go crawl(result, nextN, ch)
ch <- &Link{result, true}
}
}
}
}
func getHrefTag(token html.Token) (result string, ok bool) {
for _, a := range token.Attr {
if a.Key == "href" {
result = a.Val
ok = true
break
}
}
return
}
UPDATED:
After some fiddling I figured out to change the code to remove the data races, however I still don't know how to avoid crawling urls that were visited previously (maybe I should start another question?):
package main
import (
"fmt"
"log"
"net/http"
"os"
"strings"
"golang.org/x/net/html"
)
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: crawl [URL].")
}
url := os.Args[1]
if !strings.HasPrefix(url, "http://") {
url = "http://" + url
}
for link := range newCrawl(url, 1) {
fmt.Println(link)
}
}
func newCrawl(url string, num int) chan string {
ch := make(chan string, 20)
go func() {
crawl(url, 1, ch)
close(ch)
}()
return ch
}
func crawl(url string, n int, ch chan string) {
if n < 1 {
return
}
resp, err := http.Get(url)
if err != nil {
log.Fatalf("Can not reach the site. Error = %v\n", err)
os.Exit(1)
}
b := resp.Body
defer b.Close()
z := html.NewTokenizer(b)
nextN := n - 1
for {
token := z.Next()
switch token {
case html.ErrorToken:
return
case html.StartTagToken:
current := z.Token()
if current.Data != "a" {
continue
}
result, ok := getHrefTag(current)
if !ok {
continue
}
hasProto := strings.HasPrefix(result, "http")
if hasProto {
done := make(chan struct{})
go func() {
crawl(result, nextN, ch)
close(done)
}()
<-done
ch <- result
}
}
}
}
func getHrefTag(token html.Token) (result string, ok bool) {
for _, a := range token.Attr {
if a.Key == "href" {
result = a.Val
ok = true
break
}
}
return
}
I think that recursive calling of goroutines is not good idea. It can simply goes out of control.. I would prefer more flat model like this:
package main
import (
"fmt"
"log"
"net/http"
"os"
"strings"
"sync"
"golang.org/x/net/html"
)
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: crawl [URL].")
}
url := os.Args[1]
if !strings.HasPrefix(url, "http://") {
url = "http://" + url
}
wg := NewWorkGroup(1)
wg.Crawl(url)
for k, v := range wg.urlMap {
fmt.Printf("%s: %d\n", k, v)
}
}
// represents single link and its deph
type Link struct {
url string
deph uint32
}
// wraps all around to group
type WorkGroup struct {
*sync.WaitGroup
maxDeph uint32
numW int
pool chan *Worker
linkQ chan Link
urlMap map[string]uint32
}
type Worker struct {
result chan []Link
}
func newWorker() *Worker {
return &Worker{
result: make(chan []Link),
}
}
func NewWorkGroup(maxDeph uint32) *WorkGroup {
numW := int(maxDeph)
if maxDeph > 10 {
numW = 10
}
return &WorkGroup{
WaitGroup: new(sync.WaitGroup),
maxDeph: maxDeph,
numW: numW,
pool: make(chan *Worker, numW),
linkQ: make(chan Link, 100),
urlMap: make(map[string]uint32),
}
}
// dispatch workers -> filter visited -> send not visited to channel
// pool + dispatcher keep order so workers go level by level
func (wg *WorkGroup) spawnDispatcher() {
wg.Add(1)
go func() {
defer wg.Done()
defer close(wg.linkQ)
for w := range wg.pool {
links := <-w.result
for i := 0; i < len(links); i++ {
if _, ok := wg.urlMap[links[i].url]; !ok {
wg.urlMap[links[i].url] = links[i].deph
// dont process links that reach max deph
if links[i].deph < wg.maxDeph {
select {
case wg.linkQ <- links[i]:
// goes well
continue
default:
// channel is too short, protecting possible deadlock
}
// drop rest of links
break
}
}
}
// empty link channel + nothing in process = end
if len(wg.linkQ) == 0 && len(wg.pool) == 0 {
return
}
}
}()
}
//initialize goroutines and crawl url
func (wg *WorkGroup) Crawl(url string) {
defer close(wg.pool)
wg.spawnCrawlers()
wg.spawnDispatcher()
wg.linkQ <- Link{url: url, deph: 0}
wg.Wait()
}
func (wg *WorkGroup) spawnCrawlers() {
// custom num of workers, used maxDeph
for i := 0; i < wg.numW; i++ {
wg.newCrawler()
}
}
func (wg *WorkGroup) newCrawler() {
wg.Add(1)
go func(w *Worker) {
defer wg.Done()
defer close(w.result)
for link := range wg.linkQ {
wg.pool <- w
w.result <- getExternalUrls(link)
}
}(newWorker())
}
// default sligtly modified crawl function
func getExternalUrls(source Link) []Link {
resp, err := http.Get(source.url)
if err != nil {
log.Printf("Can not reach the site. Error = %v\n", err)
return nil
}
b := resp.Body
defer b.Close()
z := html.NewTokenizer(b)
links := []Link{}
for {
token := z.Next()
switch token {
case html.ErrorToken:
return links
case html.StartTagToken:
current := z.Token()
if current.Data != "a" {
continue
}
url, ok := getHrefTag(current)
if ok && strings.HasPrefix(url, "http") {
links = append(links, Link{url: url, deph: source.deph + 1})
}
}
}
return links
}
//default function
func getHrefTag(token html.Token) (result string, ok bool) {
for _, a := range token.Attr {
if a.Key == "href" {
result = a.Val
ok = true
break
}
}
return
}

Go: One producer many consumers

So I have seen a lot of ways of implementing one consumer and many producers in Go - the classic fanIn function from the Concurrency in Go talk.
What I want is a fanOut function. It takes as a parameter a channel it reads a value from and returns a slice of channels that it writes copies of this value to.
Is there a correct/recommended way of implementing this?
You pretty much described the best way to do it but here is a small sample of code that does it.
Go playground: https://play.golang.org/p/jwdtDXVHJk
package main
import (
"fmt"
"time"
)
func producer(iters int) <-chan int {
c := make(chan int)
go func() {
for i := 0; i < iters; i++ {
c <- i
time.Sleep(1 * time.Second)
}
close(c)
}()
return c
}
func consumer(cin <-chan int) {
for i := range cin {
fmt.Println(i)
}
}
func fanOut(ch <-chan int, size, lag int) []chan int {
cs := make([]chan int, size)
for i, _ := range cs {
// The size of the channels buffer controls how far behind the recievers
// of the fanOut channels can lag the other channels.
cs[i] = make(chan int, lag)
}
go func() {
for i := range ch {
for _, c := range cs {
c <- i
}
}
for _, c := range cs {
// close all our fanOut channels when the input channel is exhausted.
close(c)
}
}()
return cs
}
func fanOutUnbuffered(ch <-chan int, size int) []chan int {
cs := make([]chan int, size)
for i, _ := range cs {
// The size of the channels buffer controls how far behind the recievers
// of the fanOut channels can lag the other channels.
cs[i] = make(chan int)
}
go func() {
for i := range ch {
for _, c := range cs {
c <- i
}
}
for _, c := range cs {
// close all our fanOut channels when the input channel is exhausted.
close(c)
}
}()
return cs
}
func main() {
c := producer(10)
chans := fanOutUnbuffered(c, 3)
go consumer(chans[0])
go consumer(chans[1])
consumer(chans[2])
}
The important part to note is how we close the output channels once the input channel has been exhausted. Also if one of the output channels blocks on the send it will hold up the send on the other output channels. We control the amount of lag by setting the buffer size of the channels.
This solution below is a bit contrived, but it works for me:
package main
import (
"fmt"
"time"
"crypto/rand"
"encoding/binary"
)
func handleNewChannels(arrchangen chan [](chan uint32),
intchangen chan (chan uint32)) {
currarr := []chan uint32{}
arrchangen <- currarr
for {
newchan := <-intchangen
currarr = append(currarr, newchan)
arrchangen <- currarr
}
}
func sendToChannels(arrchangen chan [](chan uint32)) {
tick := time.Tick(1 * time.Second)
currarr := <-arrchangen
for {
select {
case <-tick:
sent := false
var n uint32
binary.Read(rand.Reader, binary.LittleEndian, &n)
for i := 0 ; i < len(currarr) ; i++ {
currarr[i] <- n
sent = true
}
if sent {
fmt.Println("Sent generated ", n)
}
case newarr := <-arrchangen:
currarr = newarr
}
}
}
func handleChannel(tchan chan uint32) {
for {
val := <-tchan
fmt.Println("Got the value ", val)
}
}
func createChannels(intchangen chan (chan uint32)) {
othertick := time.Tick(5 * time.Second)
for {
<-othertick
fmt.Println("Creating new channel! ")
newchan := make(chan uint32)
intchangen <- newchan
go handleChannel(newchan)
}
}
func main() {
arrchangen := make(chan [](chan uint32))
intchangen := make(chan (chan uint32))
go handleNewChannels(arrchangen, intchangen)
go sendToChannels(arrchangen)
createChannels(intchangen)
}
First, see related question What is the neatest idiom for producer/consumer in Go? and One thread showing interest in another thread (consumer / producer). Also, take a look to producer-consumer problem. About concurrency see how to achieve concurrency In Google Go.
We can handle multiple consumers without making the copy of channel data for each consumer.
Go playground: https://play.golang.org/p/yOKindnqiZv
package main
import (
"fmt"
"sync"
)
type data struct {
msg string
consumers int
}
func main() {
ch := make(chan *data) // both block or non-block are ok
var wg sync.WaitGroup
consumerCount := 3 // specify no. of consumers
producer := func() {
obj := &data {
msg: "hello everyone!",
consumers: consumerCount,
}
ch <- obj
}
consumer := func(idx int) {
defer wg.Done()
obj := <-ch
fmt.Printf("consumer %d received data %v\n", idx, obj)
obj.consumers--
if obj.consumers > 0 {
ch <- obj // forward to others
} else {
fmt.Printf("last receiver: %d\n", idx)
}
}
go producer()
for i:=1; i<=consumerCount; i++ {
wg.Add(1)
go consumer(i)
}
wg.Wait()
}

Resources