Any better way to keep track of goroutine responses? - go

I'm trying to get my head around goroutines. I've created a simple program that performs the same search in parallel across multiple search engines. At the moment to keep track of the number of responses, I count the number I've received. It seems a bit amateur though.
Is there a better way of knowing when I've received a response from all of the goroutines in the following code?
package main
import (
"fmt"
"net/http"
"log"
)
type Query struct {
url string
status string
}
func search (url string, out chan Query) {
fmt.Printf("Fetching URL %s\n", url)
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
out <- Query{url, resp.Status}
}
func main() {
searchTerm := "carrot"
fmt.Println("Hello world! Searching for ", searchTerm)
searchEngines := []string{
"http://www.bing.co.uk/?q=",
"http://www.google.co.uk/?q=",
"http://www.yahoo.co.uk/?q="}
out := make(chan Query)
for i := 0; i < len(searchEngines); i++ {
go search(searchEngines[i] + searchTerm, out)
}
progress := 0
for {
// is there a better way of doing this step?
if progress >= len(searchEngines) {
break
}
fmt.Println("Polling...")
query := <-out
fmt.Printf("Status from %s was %s\n", query.url, query.status)
progress++
}
}

Please use sync.WaitGroup, there is an example in the pkg doc
searchEngines := []string{
"http://www.bing.co.uk/?q=",
"http://www.google.co.uk/?q=",
"http://www.yahoo.co.uk/?q="}
var wg sync.WaitGroup
out := make(chan Query)
for i := 0; i < len(searchEngines); i++ {
wg.Add(1)
go func (url string) {
defer wg.Done()
fmt.Printf("Fetching URL %s\n", url)
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
out <- Query{url, resp.Status}
}(searchEngines[i] + searchTerm)
}
wg.Wait()

Related

Go: negative WaitGroup counter

I'm somewhat new to go and am reworking code that I found somewhere else to fit my needs. Because of that, I don't totally understand what is happening here, although I get the general idea.
I'm running a few websocket clients using go routines, but I'm getting an unexpected error that causes the program to crash. My program seems to close one too many threads (excuse me if this is the wrong terminology) when there is an error reading a message from the websocket (check the conn.ReadMessage() func in the readHandler func). Any ideas on how would I work around this issue? I would really appreciate anyone taking the time to look through it. Thanks in advance!
package main
import (
"context"
"fmt"
"os"
"time"
"os/signal"
"syscall"
"sync"
"net/url"
"github.com/gorilla/websocket"
"strconv"
"encoding/json"
"log"
"bytes"
"compress/gzip"
"io/ioutil"
)
// Structs
type Ping struct {
Ping int64 `json:"ping"`
}
type Pong struct {
Pong int64 `json:"pong"`
}
type SubParams struct {
Sub string `json:"sub"`
ID string `json:"id"`
}
func InitSub(subType string, pair string, i int) []byte {
var idInt string = "id" + strconv.Itoa(i)
subStr := "market." + pair + "." + subType
sub := &SubParams{
Sub: subStr,
ID: idInt,
}
out, err := json.MarshalIndent(sub, "", " ")
if err != nil {
log.Println(err);
}
//log.Println(string(out))
return out
}
// main func
func main() {
var server string = "api.huobi.pro"
pairs := []string{"btcusdt", "ethusdt", "ltcusdt"}
comms := make(chan os.Signal, 1)
signal.Notify(comms, os.Interrupt, syscall.SIGTERM)
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
var wg sync.WaitGroup
for x, pair := range pairs {
wg.Add(1)
go control(server, "ws", pair, ctx, &wg, x+1)
}
<-comms
cancel()
wg.Wait()
}
func control(server string, path string, pair string, ctx context.Context, wg *sync.WaitGroup, i int) {
fmt.Printf("Started control for %s\n", server)
url := url.URL {
Scheme: "wss",
Host: server,
Path: path,
}
fmt.Println(url.String())
conn, _, err := websocket.DefaultDialer.Dial(url.String(), nil)
if err != nil {
panic(err)
}
subscribe(conn, pair, i)
defer conn.Close()
var localwg sync.WaitGroup
localwg.Add(1)
go readHandler(ctx, conn, &localwg, server)
<- ctx.Done()
localwg.Wait()
wg.Done()
return
}
func readHandler(ctx context.Context, conn *websocket.Conn, wg *sync.WaitGroup, server string) {
for {
select {
case <- ctx.Done():
wg.Done()
return
default:
_, p, err := conn.ReadMessage()
if err != nil {
wg.Done()
fmt.Println(err)
}
r, err := gzip.NewReader(bytes.NewReader(p))
if(err == nil) {
result, err := ioutil.ReadAll(r)
if(err != nil) {
fmt.Println(err)
}
d := string(result)
fmt.Println(d)
var ping Ping
json.Unmarshal([]byte(d), &ping)
if (ping.Ping > 0) {
str := Pong{Pong: ping.Ping}
msg, err := json.Marshal(str)
if (err == nil) {
fmt.Println(string(msg))
conn.WriteMessage(websocket.TextMessage, []byte(msg))
}
}
}
}
}
}
func subscribe(conn *websocket.Conn, pair string, id int) {
sub := string(InitSub("trade.detail", pair, id))
err := conn.WriteMessage(websocket.TextMessage, []byte(sub))
if err != nil {
panic(err)
}
}
Break out of the readHandler loop when the connection fails:
_, p, err := conn.ReadMessage()
if err != nil {
wg.Done()
fmt.Println(err)
return // <--- add this line
}
Without the return, the function spins in a tight loop reading errors until the panic.
Use defer wg.Done() at the beginning of the goroutine to ensure that Done is called exactly once.
func readHandler(ctx context.Context, conn *websocket.Conn, wg *sync.WaitGroup, server string) {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
default:
_, p, err := conn.ReadMessage()
if err != nil {
fmt.Println(err)
return
}
...
Update the control function also.
Because the caller does not execute any code concurrently with readHander, there's no value in running readHandler is a goroutine. Remove all references to wait groups from readHandler and call the function directly: change go readHandler(ctx, conn, &localwg, server) to readHandler(ctx, conn, server).
There are more issues, but this should move you further along.

Go is not writing complete data to text file

I am trying to explore Go concurrency. Here Grabber() prints and writes the result of the execution. The program prints the expected result, but does not write it to urls.txt. Can anyone explain to me what i am missing here?
main.go
package main
import (
"bufio"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"regexp"
"strings"
"sync"
)
var wg sync.WaitGroup
var mt sync.Mutex
// Final Literation
func main() {
file, err := os.Open("ip.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
go Grabber(scanner.Text())
wg.Add(1)
}
wg.Wait()
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
}
// stringInArray do If string in list return true false otherwise.
func stringInArray(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
// Grabber Do Search the bing and collect array of sitelist
func Grabber(ip string) {
defer wg.Done()
var output []string
outfile, err := os.Create("urls.txt")
if err != nil {
log.Fatal(err)
}
defer outfile.Close()
if ip == "" {
}
page := 1
for page < 251 {
client := &http.Client{}
req, err := http.NewRequest(
http.MethodGet,
fmt.Sprintf(
"http://www.bing.com/search?q=ip:%s+&count=50&first=1",
ip,
),
nil,
)
if err != nil {
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:57.0) Gecko/20100101 Firefox/57.0")
res, err := client.Do(req)
if err != nil {
fmt.Println("Invalid Request")
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
fmt.Println("Couldn't Read")
}
re := regexp.MustCompile(`<h2><a href="(.*?)"`)
links := re.FindAllString(string(body), -1)
if links != nil {
for l := range links {
o := strings.Split(links[l], `"`)
d := strings.Split(o[1], "/")
s := d[0] + "//" + d[2]
if !stringInArray(s, output) {
output = append(output, s)
}
}
}
page = page + 50
}
for _, links := range output {
fmt.Println(links)
fmt.Fprintln(outfile, links)
}
}
Ip.txt as input
103.253.145.129
103.253.146.125
103.253.146.239
103.253.147.72
146.185.176.79
146.185.176.45
146.185.179.250
146.185.180.35
146.185.180.185
146.185.180.113
146.185.181.51
146.185.183.107
146.185.183.202
146.185.183.248
146.185.183.219
146.185.184.69
146.185.185.169
git repo URLGrabber
You are calling create in each goroutine, which will truncate the file. Instead, create the file outside, and serialize the writes to it using another goroutine:
outfile, err := os.Create("urls.txt")
results:=make(chan []string)
go func() {
for output:=range results {
for _, links := range output {
fmt.Fprintln(outfile, links)
}
}
}()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
go Grabber(scanner.Text(), results)
wg.Add(1)
}
wg.Wait()
close(results)
When you get the results in Grabber, instead of writing it to the file, write it to the channel:
results<-output
for _, links := range output {
fmt.Println(links)
}

Confusion regarding channel directions and blocking in Go

In a function definition, if a channel is an argument without a direction, does it have to send or receive something?
func makeRequest(url string, ch chan<- string, results chan<- string) {
start := time.Now()
resp, err := http.Get(url)
defer resp.Body.Close()
if err != nil {
fmt.Printf("%v", err)
}
resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
defer resp.Body.Close()
secs := time.Since(start).Seconds()
if err != nil {
fmt.Printf("%v", err)
}
// Cannot move past this.
ch <- fmt.Sprintf("%f", secs)
results <- <- ch
}
func MakeRequestHelper(url string, ch chan string, results chan string, iterations int) {
for i := 0; i < iterations; i++ {
makeRequest(url, ch, results)
}
for i := 0; i < iterations; i++ {
fmt.Println(<-ch)
}
}
func main() {
args := os.Args[1:]
threadString := args[0]
iterationString := args[1]
url := args[2]
threads, err := strconv.Atoi(threadString)
if err != nil {
fmt.Printf("%v", err)
}
iterations, err := strconv.Atoi(iterationString)
if err != nil {
fmt.Printf("%v", err)
}
channels := make([]chan string, 100)
for i := range channels {
channels[i] = make(chan string)
}
// results aggregate all the things received by channels in all goroutines
results := make(chan string, iterations*threads)
for i := 0; i < threads; i++ {
go MakeRequestHelper(url, channels[i], results, iterations)
}
resultSlice := make([]string, threads*iterations)
for i := 0; i < threads*iterations; i++ {
resultSlice[i] = <-results
}
}
In the above code,
ch <- or <-results
seems to be blocking every goroutine that executes makeRequest.
I am new to concurrency model of Go. I understand that sending to and receiving from a channel blocks but find it difficult what is blocking what in this code.
I'm not really sure that you are doing... It seems really convoluted. I suggest you read up on how to use channels.
https://tour.golang.org/concurrency/2
That being said you have so much going on in your code that it was much easier to just gut it to something a bit simpler. (It can be simplified further). I left comments to understand the code.
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
// using structs is a nice way to organize your code
type Worker struct {
wg sync.WaitGroup
semaphore chan struct{}
result chan Result
client http.Client
}
// group returns so that you don't have to send to many channels
type Result struct {
duration float64
results string
}
// closing your channels will stop the for loop in main
func (w *Worker) Close() {
close(w.semaphore)
close(w.result)
}
func (w *Worker) MakeRequest(url string) {
// a semaphore is a simple way to rate limit the amount of goroutines running at any single point of time
// google them, Go uses them often
w.semaphore <- struct{}{}
defer func() {
w.wg.Done()
<-w.semaphore
}()
start := time.Now()
resp, err := w.client.Get(url)
if err != nil {
log.Println("error", err)
return
}
defer resp.Body.Close()
// don't have any examples where I need to also POST anything but the point should be made
// resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
// if err != nil {
// log.Println("error", err)
// return
// }
// defer resp.Body.Close()
secs := time.Since(start).Seconds()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("error", err)
return
}
w.result <- Result{duration: secs, results: string(b)}
}
func main() {
urls := []string{"https://facebook.com/", "https://twitter.com/", "https://google.com/", "https://youtube.com/", "https://linkedin.com/", "https://wordpress.org/",
"https://instagram.com/", "https://pinterest.com/", "https://wikipedia.org/", "https://wordpress.com/", "https://blogspot.com/", "https://apple.com/",
}
workerNumber := 5
worker := Worker{
semaphore: make(chan struct{}, workerNumber),
result: make(chan Result),
client: http.Client{Timeout: 5 * time.Second},
}
// use sync groups to allow your code to wait for
// all your goroutines to finish
for _, url := range urls {
worker.wg.Add(1)
go worker.MakeRequest(url)
}
// by declaring wait and close as a seperate goroutine
// I can get to the for loop below and iterate on the results
// in a non blocking fashion
go func() {
worker.wg.Wait()
worker.Close()
}()
// do something with the results channel
for res := range worker.result {
fmt.Printf("Request took %2.f seconds.\nResults: %s\n\n", res.duration, res.results)
}
}
The channels in channels are nil (no make is executed; you make the slice but not the channels), so any send or receive will block. I'm not sure exactly what you're trying to do here, but that's the basic problem.
See https://golang.org/doc/effective_go.html#channels for an explanation of how channels work.

How to close a channel

I try to adapt this example:
https://gobyexample.com/worker-pools
But I don't know how to stop the channel because program don't exit at the end of the channel loop.
Can you explain how to exit the program?
package main
import (
"github.com/SlyMarbo/rss"
"bufio"
"fmt"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne +1
} else {
continue
}
}
}
}
func main() {
jobs := make(chan string)
results := make(chan string)
for w := 1; w <= 16; w++ {
go worker(w, jobs, results)
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
The flux.txt is a flat text file like :
http://blog.case.edu/news/feed.atom
...
The problem is that, in the example you are referring to, the worker pool reads from results 9 times:
for a := 1; a <= 9; a++ {
<-results
}
Your program, on the other hand, does a range loop over the results which has a different semantics in go. The range operator does not stop until the channel is closed.
for msg := range results {
fmt.Println(msg)
}
To fix your problem you'd need to close the results channel. However, if you just call close(results) before the for loop, you most probably will
get a panic, because the workers might be writing on results.
To fix this problem, you need to add another channel to be notified when all the workers are done. You can do this either using a sync.WaitGroup or :
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
var wg sync.WaitGroup
for w := 0; w < workers; w++ {
go func() {
wg.Add(1)
defer wg.Done()
worker(w, jobs, results)
}()
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
wg.Wait()
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
Or a done channel:
package main
import (
"bufio"
"fmt"
"github.com/SlyMarbo/rss"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string, done chan struct{}) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne + 1
} else {
continue
}
}
}
close(done)
}
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
dones := make([]chan struct{}, workers)
for w := 0; w < workers; w++ {
dones[w] = make(chan struct{})
go worker(w, jobs, results, dones[w])
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
for _, done := range dones {
<-done
}
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}

golang sync.WaitGroup never completes

I have the below code that fetches a list of URL's and then conditionally downloads a file and saves it to the filesystem. The files are fetched concurrently and the main goroutine waits for all the files to be fetched. But, the program never exits (and there are no errors) after completing all the requests.
What I think is happening is that somehow the amount of go routines in the WaitGroup is either incremented by too many to begin with (via Add) or not decremented by enough (a Done call is not happening).
Is there something I am obviously doing wrong? How would I inspect how many go routines are presently in the WaitGroup so I can better debug what's happening?
package main
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strings"
"sync"
)
func main() {
links := parseLinks()
var wg sync.WaitGroup
for _, url := range links {
if isExcelDocument(url) {
wg.Add(1)
go downloadFromURL(url, wg)
} else {
fmt.Printf("Skipping: %v \n", url)
}
}
wg.Wait()
}
func downloadFromURL(url string, wg sync.WaitGroup) error {
tokens := strings.Split(url, "/")
fileName := tokens[len(tokens)-1]
fmt.Printf("Downloading %v to %v \n", url, fileName)
content, err := os.Create("temp_docs/" + fileName)
if err != nil {
fmt.Printf("Error while creating %v because of %v", fileName, err)
return err
}
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Could not fetch %v because %v", url, err)
return err
}
defer resp.Body.Close()
_, err = io.Copy(content, resp.Body)
if err != nil {
fmt.Printf("Error while saving %v from %v", fileName, url)
return err
}
fmt.Printf("Download complete for %v \n", fileName)
defer wg.Done()
return nil
}
func isExcelDocument(url string) bool {
return strings.HasSuffix(url, ".xlsx") || strings.HasSuffix(url, ".xls")
}
func parseLinks() []string {
linksData, err := ioutil.ReadFile("links.txt")
if err != nil {
fmt.Printf("Trouble reading file: %v", err)
}
links := strings.Split(string(linksData), ", ")
return links
}
There are two problems with this code. First, you have to pass a pointer to the WaitGroup to downloadFromURL(), otherwise the object will be copied and Done() will not be visible in main().
See:
func main() {
...
go downloadFromURL(url, &wg)
...
}
Second, defer wg.Done() should be one of the first statements in downloadFromURL(), otherwise if you return from the function before that statement, it won't get "registered" and won't get called.
func downloadFromURL(url string, wg *sync.WaitGroup) error {
defer wg.Done()
...
}
Arguments in Go are always passed by value. Use a pointer when an argument may be modified. Also, make sure that you always execute wg.Done().For example,
package main
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strings"
"sync"
)
func main() {
links := parseLinks()
wg := new(sync.WaitGroup)
for _, url := range links {
if isExcelDocument(url) {
wg.Add(1)
go downloadFromURL(url, wg)
} else {
fmt.Printf("Skipping: %v \n", url)
}
}
wg.Wait()
}
func downloadFromURL(url string, wg *sync.WaitGroup) error {
defer wg.Done()
tokens := strings.Split(url, "/")
fileName := tokens[len(tokens)-1]
fmt.Printf("Downloading %v to %v \n", url, fileName)
content, err := os.Create("temp_docs/" + fileName)
if err != nil {
fmt.Printf("Error while creating %v because of %v", fileName, err)
return err
}
resp, err := http.Get(url)
if err != nil {
fmt.Printf("Could not fetch %v because %v", url, err)
return err
}
defer resp.Body.Close()
_, err = io.Copy(content, resp.Body)
if err != nil {
fmt.Printf("Error while saving %v from %v", fileName, url)
return err
}
fmt.Printf("Download complete for %v \n", fileName)
return nil
}
func isExcelDocument(url string) bool {
return strings.HasSuffix(url, ".xlsx") || strings.HasSuffix(url, ".xls")
}
func parseLinks() []string {
linksData, err := ioutil.ReadFile("links.txt")
if err != nil {
fmt.Printf("Trouble reading file: %v", err)
}
links := strings.Split(string(linksData), ", ")
return links
}
As #Bartosz mentioned, you will need to pass a reference to your WaitGroup object. He did a great job discussing the importance of defer ws.Done()
I like WaitGroup's simplicity. However, I do not like that we need to pass the reference to the goroutine because that would mean that the concurrency logic would be mixed with your business logic.
So I came up with this generic function to solve this problem for me:
// Parallelize parallelizes the function calls
func Parallelize(functions ...func()) {
var waitGroup sync.WaitGroup
waitGroup.Add(len(functions))
defer waitGroup.Wait()
for _, function := range functions {
go func(copy func()) {
defer waitGroup.Done()
copy()
}(function)
}
}
So your example could be solved this way:
func main() {
links := parseLinks()
functions := []func(){}
for _, url := range links {
if isExcelDocument(url) {
function := func(url string){
return func() { downloadFromURL(url) }
}(url)
functions = append(functions, function)
} else {
fmt.Printf("Skipping: %v \n", url)
}
}
Parallelize(functions...)
}
func downloadFromURL(url string) {
...
}
If you would like to use it, you can find it here https://github.com/shomali11/util

Resources