Synchronize Buffered channel and Waitgroup - go

I am having issue while using waitgroup with the buffered channel. The problem is waitgroup closes before channel is read completely, which make my channel is half read and break in between.
func main() {
var wg sync.WaitGroup
var err error
start := time.Now()
students := make([]studentDetails, 0)
studentCh := make(chan studentDetail, 10000)
errorCh := make(chan error, 1)
wg.Add(1)
go s.getDetailStudents(rCtx, studentCh , errorCh, &wg, s.Link, false)
go func(ch chan studentDetail, e chan error) {
LOOP:
for {
select {
case p, ok := <-ch:
if ok {
L.Printf("Links %s: [%s]\n", p.title, p.link)
students = append(students, p)
} else {
L.Print("Closed channel")
break LOOP
}
case err = <-e:
if err != nil {
break
}
}
}
}(studentCh, errorCh)
wg.Wait()
close(studentCh)
close(errorCh)
L.Warnln("closed: all wait-groups completed!")
L.Warnf("total items fetched: %d", len(students))
elapsed := time.Since(start)
L.Warnf("operation took %s", elapsed)
}
The problem is this function is recursive. I mean some http call to fetch students and then make more calls depending on condition.
func (s Student) getDetailStudents(rCtx context.Context, content chan<- studentDetail, errorCh chan<- error, wg *sync.WaitGroup, url string, subSection bool) {
util.MustNotNil(rCtx)
L := logger.GetLogger(rCtx)
defer func() {
L.Println("Closing all waitgroup!")
wg.Done()
}()
wc := getWC()
httpClient := wc.Registry.MustHTTPClient()
res, err := httpClient.Get(url)
if err != nil {
L.Fatal(err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
L.Errorf("status code error: %d %s", res.StatusCode, res.Status)
errorCh <- errors.New("service_status_code")
return
}
// parse response and return error if found some through errorCh as done above.
// decide page subSection based on response if it is more.
if !subSection {
wg.Add(1)
go s.getDetailStudents(rCtx, content, errorCh, wg, link, true)
// L.Warnf("total pages found %d", pageSub.Length()+1)
}
// Find students from response list and parse each Student
students := s.parseStudentItemList(rCtx, item)
for _, student := range students {
content <- student
}
L.Warnf("Calling HTTP Service for %q with total %d record", url, elementsSub.Length())
}
Variables are changed to avoid original code base.
The problem is students are read randomly as soon as Waitgroup complete. I am expecting to hold the execution until all students are read, In case of error it should break as soon error encounter.

You need to know when the receiving goroutine completes. The WaitGroup does that for the generating goroutine. So, you can use two waitgroups:
wg.Add(1)
go s.getDetailStudents(rCtx, studentCh , errorCh, &wg, s.Link, false)
wgReader.Add(1)
go func(ch chan studentDetail, e chan error) {
defer wgReader.Done()
...
}
wg.Wait()
close(studentCh)
close(errorCh)
wgReader.Wait() // Wait for the readers to complete

Since you are using buffered channels you can retrieve the remaining values after closing the channel. You will also need a mechanism to prevent your main function from exiting too early while the reader is still doing work ,as #Burak Serdar has advised.
I restructured the code to give a working example but it should get the point across.
package main
import (
"context"
"log"
"sync"
"time"
)
type studentDetails struct {
title string
link string
}
func main() {
var wg sync.WaitGroup
var err error
students := make([]studentDetails, 0)
studentCh := make(chan studentDetails, 10000)
errorCh := make(chan error, 1)
start := time.Now()
wg.Add(1)
go getDetailStudents(context.TODO(), studentCh, errorCh, &wg, "http://example.com", false)
LOOP:
for {
select {
case p, ok := <-studentCh:
if ok {
log.Printf("Links %s: [%s]\n", p.title, p.link)
students = append(students, p)
} else {
log.Println("Draining student channel")
for p := range studentCh {
log.Printf("Links %s: [%s]\n", p.title, p.link)
students = append(students, p)
}
break LOOP
}
case err = <-errorCh:
if err != nil {
break LOOP
}
case <-wrapWait(&wg):
close(studentCh)
}
}
close(errorCh)
elapsed := time.Since(start)
log.Printf("operation took %s", elapsed)
}
func getDetailStudents(rCtx context.Context, content chan<- studentDetails, errorCh chan<- error, wg *sync.WaitGroup, url string, subSection bool) {
defer func() {
log.Println("Closing")
wg.Done()
}()
if !subSection {
wg.Add(1)
go getDetailStudents(rCtx, content, errorCh, wg, url, true)
// L.Warnf("total pages found %d", pageSub.Length()+1)
}
content <- studentDetails{
title: "title",
link: "link",
}
}
// helper function to allow using WaitGroup in a select
func wrapWait(wg *sync.WaitGroup) <-chan struct{} {
out := make(chan struct{})
go func() {
wg.Wait()
out <- struct{}{}
}()
return out
}

wg.Add(1)
go func(){
defer wg.Done()
// I do not think that you need a recursive function.
// this function overcomplicated.
s.getDetailStudents(rCtx, studentCh , errorCh, &wg, s.Link, false)
}(...)
wg.Add(1)
go func(ch chan studentDetail, e chan error) {
defer wg.Done()
...
}(...)
wg.Wait()
close(studentCh)
close(errorCh)
This should solve the problem. s.getDetailStudents function must be simplified. Making it recursive does not have any benefit.

Related

Questions about goroutines performance

I'm new to Golang, I would like to understand more about goroutine. I'll leave two examples and I wanted the opinion of which of the two is more performative and why?
func doRequest(method string, url string, body io.Reader) (*http.Response, error) {
request, _ := http.NewRequest(method, url, body)
response, err := c.httpClient.Do(request)
request.Close = true
c.httpClient.CloseIdleConnections()
return response, err
}
first:
func test() {
var wg *sync.WaitGroup = new(sync.WaitGroup)
qtd := 5
wg.Add(qtd)
for i := 0; i < qtd; i++ {
go func(wg *sync.WaitGroup) {
defer wg.Done()
doRequest(http.MethodGet, "http://test.com", nil)
}(wg)
}
wg.Wait()
}
Second:
func test() {
var wg *sync.WaitGroup = new(sync.WaitGroup)
wg.Add(1)
go func(wg *sync.WaitGroup) {
defer wg.Done()
for i := 0; i < 5; i++ {
doRequest(http.MethodGet, "http://test.com", nil)
}
}(wg)
wg.Wait()
}
Is there a better way than these two?
If not, which of the two is more performant?
The go keyword before a function will create a goroutine.
5 gouroutines, 1 doRequest() in per goroutine
1 gouroutine, 5 doRequest() in a goroutine
In the 1st case, 5 goroutines will run concurrently.
Check out concurrency in: A Tour of Go

How to return the success or failure result in the goroutine to the main function and fmt.Println the number of successes

I want to write the contents of the process function with a function different from the main function.
I want to count the number of urls that did not result in an error with "resp, err: = client.Do (req)".
I want to write the number of successes before fmt.Println ("Finish!") In the main function.
What should I do?
func main() {
site_list := [][]string{
{"site1","https://www.aaaa"},
{"site2","https://www.bbbb"},
{"site3","https://www.cccc"},
{"site4","https://www.dddd"},
}
var wg sync.WaitGroup
maxChan := make(chan bool, 2)
for _, v := range site_list {
maxChan <- true
wg.Add(1)
go process(v[1], maxChan, &wg)
}
wg.Wait()
fmt.Println("Finish!")
}
func process(site_url string, maxChan chan bool, wg *sync.WaitGroup) {
defer wg.Done()
defer func(maxChan chan bool) {
<-maxChan
}(maxChan)
req, err := http.NewRequest("GET", site_url, nil)
client := new(http.Client)
resp, err := client.Do(req)
if err != nil {
fmt.Println("client.Doエラー" + site_url)
return
}
defer resp.Body.Close()
}
As per my comment above, create a custom work item struct:
type urlItem struct {
Name string
Url string
Err error
}
Use pointers, so that the process() function can update the underlying struct:
site_list := []*urlItem{
&urlItem{"site1", "https://www.aaaa", nil},
&urlItem{"site2", "https://www.bbbb", nil},
&urlItem{"site3", "https://www.cccc", nil},
&urlItem{"site4", "https://www.dddd", nil},
}
process() can then safely update it's particular work item (without the need for any mutexs or synchronization) as each go-routine gets a unique work item.
func process(site *urlItem, maxChan chan bool, wg *sync.WaitGroup) { /* ... */ }
Then tally the go-routine errors at the end:
var failCount int
for _, v := range site_list {
if v.Err != nil {
failCount++
}
}
Playground Example

Confusion regarding channel directions and blocking in Go

In a function definition, if a channel is an argument without a direction, does it have to send or receive something?
func makeRequest(url string, ch chan<- string, results chan<- string) {
start := time.Now()
resp, err := http.Get(url)
defer resp.Body.Close()
if err != nil {
fmt.Printf("%v", err)
}
resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
defer resp.Body.Close()
secs := time.Since(start).Seconds()
if err != nil {
fmt.Printf("%v", err)
}
// Cannot move past this.
ch <- fmt.Sprintf("%f", secs)
results <- <- ch
}
func MakeRequestHelper(url string, ch chan string, results chan string, iterations int) {
for i := 0; i < iterations; i++ {
makeRequest(url, ch, results)
}
for i := 0; i < iterations; i++ {
fmt.Println(<-ch)
}
}
func main() {
args := os.Args[1:]
threadString := args[0]
iterationString := args[1]
url := args[2]
threads, err := strconv.Atoi(threadString)
if err != nil {
fmt.Printf("%v", err)
}
iterations, err := strconv.Atoi(iterationString)
if err != nil {
fmt.Printf("%v", err)
}
channels := make([]chan string, 100)
for i := range channels {
channels[i] = make(chan string)
}
// results aggregate all the things received by channels in all goroutines
results := make(chan string, iterations*threads)
for i := 0; i < threads; i++ {
go MakeRequestHelper(url, channels[i], results, iterations)
}
resultSlice := make([]string, threads*iterations)
for i := 0; i < threads*iterations; i++ {
resultSlice[i] = <-results
}
}
In the above code,
ch <- or <-results
seems to be blocking every goroutine that executes makeRequest.
I am new to concurrency model of Go. I understand that sending to and receiving from a channel blocks but find it difficult what is blocking what in this code.
I'm not really sure that you are doing... It seems really convoluted. I suggest you read up on how to use channels.
https://tour.golang.org/concurrency/2
That being said you have so much going on in your code that it was much easier to just gut it to something a bit simpler. (It can be simplified further). I left comments to understand the code.
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
// using structs is a nice way to organize your code
type Worker struct {
wg sync.WaitGroup
semaphore chan struct{}
result chan Result
client http.Client
}
// group returns so that you don't have to send to many channels
type Result struct {
duration float64
results string
}
// closing your channels will stop the for loop in main
func (w *Worker) Close() {
close(w.semaphore)
close(w.result)
}
func (w *Worker) MakeRequest(url string) {
// a semaphore is a simple way to rate limit the amount of goroutines running at any single point of time
// google them, Go uses them often
w.semaphore <- struct{}{}
defer func() {
w.wg.Done()
<-w.semaphore
}()
start := time.Now()
resp, err := w.client.Get(url)
if err != nil {
log.Println("error", err)
return
}
defer resp.Body.Close()
// don't have any examples where I need to also POST anything but the point should be made
// resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
// if err != nil {
// log.Println("error", err)
// return
// }
// defer resp.Body.Close()
secs := time.Since(start).Seconds()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("error", err)
return
}
w.result <- Result{duration: secs, results: string(b)}
}
func main() {
urls := []string{"https://facebook.com/", "https://twitter.com/", "https://google.com/", "https://youtube.com/", "https://linkedin.com/", "https://wordpress.org/",
"https://instagram.com/", "https://pinterest.com/", "https://wikipedia.org/", "https://wordpress.com/", "https://blogspot.com/", "https://apple.com/",
}
workerNumber := 5
worker := Worker{
semaphore: make(chan struct{}, workerNumber),
result: make(chan Result),
client: http.Client{Timeout: 5 * time.Second},
}
// use sync groups to allow your code to wait for
// all your goroutines to finish
for _, url := range urls {
worker.wg.Add(1)
go worker.MakeRequest(url)
}
// by declaring wait and close as a seperate goroutine
// I can get to the for loop below and iterate on the results
// in a non blocking fashion
go func() {
worker.wg.Wait()
worker.Close()
}()
// do something with the results channel
for res := range worker.result {
fmt.Printf("Request took %2.f seconds.\nResults: %s\n\n", res.duration, res.results)
}
}
The channels in channels are nil (no make is executed; you make the slice but not the channels), so any send or receive will block. I'm not sure exactly what you're trying to do here, but that's the basic problem.
See https://golang.org/doc/effective_go.html#channels for an explanation of how channels work.

Defer never called when writing to channel

I'm trying to write to a channel as last action in a goroutine function.
Unfortunately this is not working. And the waitGroup is never done.
import (
"sync"
"github.com/SlyMarbo/rss"
"fmt"
)
func main() {
urls := []string{"http://rss.cnn.com/rss/edition.rss", "http://rss.time.com/web/time/rss/top/index.xml"}
var c = make(chan string)
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
go receiveRss(url, &wg, c)
}
wg.Wait()
fmt.Println("==============DONE=================")
}
func receiveRss(url string, wg *sync.WaitGroup, c chan string) {
defer wg.Done()
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Failed to retrieve RSS feed", err)
}
items := feed.Items
for _, item := range items {
c <- item.Title
}
}
When replacing c <- item.Title with fmt.Println(item.Title) the deferred function is called and DONE is printed.
The problem is that I was only writing to the channel. Never reading from it.
Without doing this the channel would be useless.
The solution to this is:
Reading from the channel after the loop which starts the goroutines:
for title := range c {
fmt.Println(title)
}
This then causes an endless loop if the channel is never closed.
So I just close the channel after writing to it:
close(c)
Here is the whole code:
func main() {
urls := []string{"http://rss.cnn.com/rss/edition.rss", "http://rss.time.com/web/time/rss/top/index.xml"}
var c = make(chan []string)
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
go receiveRss(url, &wg, c)
}
for title := range c {
fmt.Println(title)
}
wg.Wait()
fmt.Println("==============DONE=================")
}
func receiveRss(url string, wg *sync.WaitGroup, c chan []string) {
defer wg.Done()
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Failed to retrieve RSS feed", err)
}
items := feed.Items
var titles []string
for _, item := range items {
titles = append(titles, item.Title)
}
c <- titles
close(c)
}

Golang program hangs without finishing execution

I have the following golang program;
package main
import (
"fmt"
"net/http"
"time"
)
var urls = []string{
"http://www.google.com/",
"http://golang.org/",
"http://yahoo.com/",
}
type HttpResponse struct {
url string
response *http.Response
err error
status string
}
func asyncHttpGets(url string, ch chan *HttpResponse) {
client := http.Client{}
if url == "http://www.google.com/" {
time.Sleep(500 * time.Millisecond) //google is down
}
fmt.Printf("Fetching %s \n", url)
resp, err := client.Get(url)
u := &HttpResponse{url, resp, err, "fetched"}
ch <- u
fmt.Println("sent to chan")
}
func main() {
fmt.Println("start")
ch := make(chan *HttpResponse, len(urls))
for _, url := range urls {
go asyncHttpGets(url, ch)
}
for i := range ch {
fmt.Println(i)
}
fmt.Println("Im done")
}
Run it on Playground
However when I run it; it hangs (ie the last part that ought to print Im done doesnt run.)
Here's the terminal output;;
$ go run get.go
start
Fetching http://yahoo.com/
Fetching http://golang.org/
Fetching http://www.google.com/
sent to chan
&{http://www.google.com/ 0xc820144120 fetched}
sent to chan
&{http://golang.org/ 0xc82008b710 fetched}
sent to chan
&{http://yahoo.com/ 0xc82008b7a0 fetched}
The problem is that ranging over a channel in a for loop will continue forever unless the channel is closed. If you want to read precisely len(urls) values from the channel, you should loop that many times:
for i := 0; i < len(urls); i++ {
fmt.Println(<-ch)
}
Another good dirty devious trick would be to use sync.WaitGroup and increment it per goroutine and then monitor it with a Wait and after its done it will close your channel allowing the next blocks of code to run, the reason I am offering you this approach is because it gets away from using a static number in a loop like len(urls) so that you can have a dynamic slice that might change and what not.
The reason Wait and close are in their own goroutine is so that your code can reach the for loop to range over your channel
package main
import (
"fmt"
"net/http"
"time"
"sync"
)
var urls = []string{
"http://www.google.com/",
"http://golang.org/",
"http://yahoo.com/",
}
type HttpResponse struct {
url string
response *http.Response
err error
status string
}
func asyncHttpGets(url string, ch chan *HttpResponse, wg *sync.WaitGroup) {
client := http.Client{}
if url == "http://www.google.com/" {
time.Sleep(500 * time.Millisecond) //google is down
}
fmt.Printf("Fetching %s \n", url)
resp, err := client.Get(url)
u := &HttpResponse{url, resp, err, "fetched"}
ch <- u
fmt.Println("sent to chan")
wg.Done()
}
func main() {
fmt.Println("start")
ch := make(chan *HttpResponse, len(urls))
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
go asyncHttpGets(url, ch, &wg)
}
go func() {
wg.Wait()
close(ch)
}()
for i := range ch {
fmt.Println(i)
}
fmt.Println("Im done")
}

Resources