Is it a better way to do parallel programming that this? - go

I made this script for getting the follower count of "influencers" from instagram
the "runtime" number I am getting from it is between 550-750ms.
It is not that bad, but I am wondering whether it could be better or not (as I am a golang noob - learning it 3 weeks only)
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(in <-chan string) <-chan int {
out := make(chan int)
go func() {
for un := range in {
URL := "https://www.instagram.com/" + un + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
// handle error
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
var u user
err = json.Unmarshal(body, &u)
if err != nil {
fmt.Println(err)
}
// return u.User.Followers.Count
out <- u.User.Followers.Count
}
close(out)
}()
return out
}
func merge(cs ...<-chan int) <-chan int {
var wg sync.WaitGroup
out := make(chan int)
output := func(c <-chan int) {
for n := range c {
out <- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}
func gen(users ...string) <-chan string {
out := make(chan string)
go func() {
for _, u := range users {
out <- u
}
close(out)
}()
return out
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
in := gen(usrs...)
d1 := getFollowerCount(in)
d2 := getFollowerCount(in)
d3 := getFollowerCount(in)
d4 := getFollowerCount(in)
d5 := getFollowerCount(in)
d6 := getFollowerCount(in)
d7 := getFollowerCount(in)
d8 := getFollowerCount(in)
d9 := getFollowerCount(in)
d10 := getFollowerCount(in)
for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
fmt.Println(d)
}
elapsed := time.Since(start)
log.Println("runtime", elapsed)
}

I agree with jeevatkm, there are numerous way to implement your task and improve it. Some notes:
Separate the function that actually do the job (i.e. fetch result from remote service) and the function which is responsible for coordinating all the jobs.
It is a good practice to propagate an errorto the caller instead of consumes (handles) it in a function to be called.
Since the jobs are done in parallel, the result could be returned in undetermined order. Thus, besides follower count, result should contains other related information(s).
The following implementation may be one alternative:
package main
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
//Wrap username, count, and error. See (3) above.
type follower struct {
Username string
Count int
Error error
}
//GetFollowerCountFunc is a function for
//fetching follower count of a specific user.
type GetFollowerCountFunc func(string) (int, error)
//Mockup function for test
func mockGetFollowerCountFor(userName string) (int, error) {
if len(userName) < 9 {
return -1, errors.New("mocking error in get follower count")
}
return 10, nil
}
//Fetch result from remote service. See (1) above.
func getFollowerCountFor(userName string) (int, error) {
URL := "https://www.instagram.com/" + userName + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
return -1, err
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
return -1, err
}
return u.User.Followers.Count, nil
}
//Function that coordinates/distributes the jobs. See (1), (2) above.
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
//allocate channels for storing result
//number of allocated channels define the maximum *parallel* worker
followers := make(chan follower, len(users))
//The following is also valid
//followers := make(chan follower, 5)
//Do the job distribution in goroutine (Asynchronously)
go func() {
var wg sync.WaitGroup
wg.Add(len(users))
for _, u := range users {
//Run a *parallel* worker
go func(uid string) {
cnt, err := fn(uid)
if err != nil {
followers <- follower{uid, -1, err}
} else {
followers <- follower{uid, cnt, nil}
}
wg.Done()
}(u)
}
//wait all workers finish
wg.Wait()
//close the channels so the `for ... range` will exit gracefully
close(followers)
}()
//This function will returns immediately
return followers
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
results := getFollowersAsync(usrs, getFollowerCountFor)
//For TESTING:
//results := getFollowersAsync(usrs, mockGetFollowerCountFor)
for r := range results {
if r.Error != nil {
fmt.Printf("Error for user '%s' => %v", r.Username, r.Error)
} else {
fmt.Printf("%s: %d\n", r.Username, r.Count)
}
}
elapsed := time.Since(start)
fmt.Println("runtime", elapsed)
}

Welcome to Go, happy learning.
You're doing good, you can improve your program many ways (such as json decoder, less no of chan, etc). Following is one of the approach. Execution time is between 352-446ms (take it with grain of salt, since network call is involved in your code. Might vary based on server response time).
Your updated code:
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
defer wg.Done()
reqURL := "https://www.instagram.com/" + username + "/?__a=1"
resp, err := http.Get(reqURL)
if err != nil {
log.Println(err)
return
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
log.Println(err)
return
}
result <- u.User.Followers.Count
}
func execute(users []string, result chan<- int) {
wg := &sync.WaitGroup{}
for _, username := range users {
wg.Add(1)
go getFollowerCount(username, result, wg)
}
wg.Wait()
result <- -1
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
result := make(chan int)
go execute(usrs, result)
for v := range result {
if v == -1 {
break
}
fmt.Println(v)
}
elapsed := time.Since(start)
fmt.Println("runtime:", elapsed)
}

Related

Go: negative WaitGroup counter

I'm somewhat new to go and am reworking code that I found somewhere else to fit my needs. Because of that, I don't totally understand what is happening here, although I get the general idea.
I'm running a few websocket clients using go routines, but I'm getting an unexpected error that causes the program to crash. My program seems to close one too many threads (excuse me if this is the wrong terminology) when there is an error reading a message from the websocket (check the conn.ReadMessage() func in the readHandler func). Any ideas on how would I work around this issue? I would really appreciate anyone taking the time to look through it. Thanks in advance!
package main
import (
"context"
"fmt"
"os"
"time"
"os/signal"
"syscall"
"sync"
"net/url"
"github.com/gorilla/websocket"
"strconv"
"encoding/json"
"log"
"bytes"
"compress/gzip"
"io/ioutil"
)
// Structs
type Ping struct {
Ping int64 `json:"ping"`
}
type Pong struct {
Pong int64 `json:"pong"`
}
type SubParams struct {
Sub string `json:"sub"`
ID string `json:"id"`
}
func InitSub(subType string, pair string, i int) []byte {
var idInt string = "id" + strconv.Itoa(i)
subStr := "market." + pair + "." + subType
sub := &SubParams{
Sub: subStr,
ID: idInt,
}
out, err := json.MarshalIndent(sub, "", " ")
if err != nil {
log.Println(err);
}
//log.Println(string(out))
return out
}
// main func
func main() {
var server string = "api.huobi.pro"
pairs := []string{"btcusdt", "ethusdt", "ltcusdt"}
comms := make(chan os.Signal, 1)
signal.Notify(comms, os.Interrupt, syscall.SIGTERM)
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
var wg sync.WaitGroup
for x, pair := range pairs {
wg.Add(1)
go control(server, "ws", pair, ctx, &wg, x+1)
}
<-comms
cancel()
wg.Wait()
}
func control(server string, path string, pair string, ctx context.Context, wg *sync.WaitGroup, i int) {
fmt.Printf("Started control for %s\n", server)
url := url.URL {
Scheme: "wss",
Host: server,
Path: path,
}
fmt.Println(url.String())
conn, _, err := websocket.DefaultDialer.Dial(url.String(), nil)
if err != nil {
panic(err)
}
subscribe(conn, pair, i)
defer conn.Close()
var localwg sync.WaitGroup
localwg.Add(1)
go readHandler(ctx, conn, &localwg, server)
<- ctx.Done()
localwg.Wait()
wg.Done()
return
}
func readHandler(ctx context.Context, conn *websocket.Conn, wg *sync.WaitGroup, server string) {
for {
select {
case <- ctx.Done():
wg.Done()
return
default:
_, p, err := conn.ReadMessage()
if err != nil {
wg.Done()
fmt.Println(err)
}
r, err := gzip.NewReader(bytes.NewReader(p))
if(err == nil) {
result, err := ioutil.ReadAll(r)
if(err != nil) {
fmt.Println(err)
}
d := string(result)
fmt.Println(d)
var ping Ping
json.Unmarshal([]byte(d), &ping)
if (ping.Ping > 0) {
str := Pong{Pong: ping.Ping}
msg, err := json.Marshal(str)
if (err == nil) {
fmt.Println(string(msg))
conn.WriteMessage(websocket.TextMessage, []byte(msg))
}
}
}
}
}
}
func subscribe(conn *websocket.Conn, pair string, id int) {
sub := string(InitSub("trade.detail", pair, id))
err := conn.WriteMessage(websocket.TextMessage, []byte(sub))
if err != nil {
panic(err)
}
}
Break out of the readHandler loop when the connection fails:
_, p, err := conn.ReadMessage()
if err != nil {
wg.Done()
fmt.Println(err)
return // <--- add this line
}
Without the return, the function spins in a tight loop reading errors until the panic.
Use defer wg.Done() at the beginning of the goroutine to ensure that Done is called exactly once.
func readHandler(ctx context.Context, conn *websocket.Conn, wg *sync.WaitGroup, server string) {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
default:
_, p, err := conn.ReadMessage()
if err != nil {
fmt.Println(err)
return
}
...
Update the control function also.
Because the caller does not execute any code concurrently with readHander, there's no value in running readHandler is a goroutine. Remove all references to wait groups from readHandler and call the function directly: change go readHandler(ctx, conn, &localwg, server) to readHandler(ctx, conn, server).
There are more issues, but this should move you further along.

Get responses from multiple go routines into an array

I need to fetch responses from multiple go routines and put them into an array. I know that channels could be used for this, however I am not sure how I can make sure that all go routines have finished processing the results. Thus I am using a waitgroup.
Code
func main() {
log.Info("Collecting ints")
var results []int32
for _, broker := range e.BrokersByBrokerID {
wg.Add(1)
go getInt32(&wg)
}
wg.Wait()
log.info("Collected")
}
func getInt32(wg *sync.WaitGroup) (int32, error) {
defer wg.Done()
// Just to show that this method may just return an error and no int32
err := broker.Open(config)
if err != nil && err != sarama.ErrAlreadyConnected {
return 0, fmt.Errorf("Cannot connect to broker '%v': %s", broker.ID(), err)
}
defer broker.Close()
return 1003, nil
}
My question
How can I put all the response int32 (which may return an error) into my int32 array, making sure that all go routines have finished their processing work and returned either the error or the int?
If you don't process the return values of the function launched as a goroutine, they are discarded. See What happens to return value from goroutine.
You may use a slice to collect the results, where each goroutine could receive the index to put the results to, or alternatively the address of the element. See Can I concurrently write different slice elements. Note that if you use this, the slice must be pre-allocated and only the element belonging to the goroutine may be written, you can't "touch" other elements and you can't append to the slice.
Or you may use a channel, on which the goroutines send values that include the index or ID of the item they processed, so the collecting goroutine can identify or order them. See How to collect values from N goroutines executed in a specific order?
If processing should stop on the first error encountered, see Close multiple goroutine if an error occurs in one in go
Here's an example how it could look like when using a channel. Note that no waitgroup is needed here, because we know that we expect as many values on the channel as many goroutines we launch.
type result struct {
task int32
data int32
err error
}
func main() {
tasks := []int32{1, 2, 3, 4}
ch := make(chan result)
for _, task := range tasks {
go calcTask(task, ch)
}
// Collect results:
results := make([]result, len(tasks))
for i := range results {
results[i] = <-ch
}
fmt.Printf("Results: %+v\n", results)
}
func calcTask(task int32, ch chan<- result) {
if task > 2 {
// Simulate failure
ch <- result{task: task, err: fmt.Errorf("task %v failed", task)}
return
}
// Simulate success
ch <- result{task: task, data: task * 2, err: nil}
}
Output (try ot on the Go Playground):
Results: [{task:4 data:0 err:0x40e130} {task:1 data:2 err:<nil>} {task:2 data:4 err:<nil>} {task:3 data:0 err:0x40e138}]
I also believe you have to use channel, it must be something like this:
package main
import (
"fmt"
"log"
"sync"
)
var (
BrokersByBrokerID = []int32{1, 2, 3}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup
var results []result
ch := make(chan result)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go getInt32(ch, &wg, broker)
}
go func() {
for v := range ch {
results = append(results, v)
}
}()
wg.Wait()
close(ch)
log.Printf("collected %v", results)
}
func getInt32(ch chan result, wg *sync.WaitGroup, broker int32) {
defer wg.Done()
if broker == 1 {
ch <- result{err: fmt.Sprintf("error: gor broker 1")}
return
}
ch <- result{data: fmt.Sprintf("broker %d - ok", broker)}
}
Result will look like this:
2019/02/05 15:26:28 collected [{broker 3 - ok } {broker 2 - ok } { error: gor broker 1}]
package main
import (
"fmt"
"log"
"sync"
)
var (
BrokersByBrokerID = []int{1, 2, 3, 4}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup
var results []int
ch := make(chan int)
done := make(chan bool)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go func(i int) {
defer wg.Done()
ch <- i
if i == 4 {
done <- true
}
}(broker)
}
L:
for {
select {
case v := <-ch:
results = append(results, v)
if len(results) == 4 {
//<-done
close(ch)
break L
}
case _ = <-done:
break
}
}
fmt.Println("STOPPED")
//<-done
wg.Wait()
log.Printf("collected %v", results)
}
Thank cn007b and Edenshaw. My answer is based on their answers.
As Edenshaw commented, need another sync.Waitgroup for goroutine which getting results from channel, or you may get an incomplete array.
package main
import (
"fmt"
"sync"
"encoding/json"
)
type Resp struct {
id int
}
func main() {
var wg sync.WaitGroup
chanRes := make(chan interface{}, 3)
for i := 0; i < 3; i++ {
wg.Add(1)
resp := &Resp{}
go func(i int, resp *Resp) {
defer wg.Done()
resp.id = i
chanRes <- resp
}(i, resp)
}
res := make([]interface{}, 0)
var wg2 sync.WaitGroup
wg2.Add(1)
go func() {
defer wg2.Done()
for v := range chanRes {
res = append(res, v.(*Resp).id)
}
}()
wg.Wait()
close(chanRes)
wg2.Wait()
resStr, _ := json.Marshal(res)
fmt.Println(string(resStr))
}
package main
import (
"fmt"
"log"
"sync"
"time"
)
var (
BrokersByBrokerID = []int{1, 2, 3, 4}
)
type result struct {
data string
err string // you must use error type here
}
func main() {
var wg sync.WaitGroup.
var results []int
ch := make(chan int)
done := make(chan bool)
for _, broker := range BrokersByBrokerID {
wg.Add(1)
go func(i int) {
defer wg.Done()
ch <- i
if i == 4 {
done <- true
}
}(broker)
}
for v := range ch {
results = append(results, v)
if len(results) == 4 {
close(ch)
}
}
fmt.Println("STOPPED")
<-done
wg.Wait()
log.Printf("collected %v", results)
}
</pre>

Confusion regarding channel directions and blocking in Go

In a function definition, if a channel is an argument without a direction, does it have to send or receive something?
func makeRequest(url string, ch chan<- string, results chan<- string) {
start := time.Now()
resp, err := http.Get(url)
defer resp.Body.Close()
if err != nil {
fmt.Printf("%v", err)
}
resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
defer resp.Body.Close()
secs := time.Since(start).Seconds()
if err != nil {
fmt.Printf("%v", err)
}
// Cannot move past this.
ch <- fmt.Sprintf("%f", secs)
results <- <- ch
}
func MakeRequestHelper(url string, ch chan string, results chan string, iterations int) {
for i := 0; i < iterations; i++ {
makeRequest(url, ch, results)
}
for i := 0; i < iterations; i++ {
fmt.Println(<-ch)
}
}
func main() {
args := os.Args[1:]
threadString := args[0]
iterationString := args[1]
url := args[2]
threads, err := strconv.Atoi(threadString)
if err != nil {
fmt.Printf("%v", err)
}
iterations, err := strconv.Atoi(iterationString)
if err != nil {
fmt.Printf("%v", err)
}
channels := make([]chan string, 100)
for i := range channels {
channels[i] = make(chan string)
}
// results aggregate all the things received by channels in all goroutines
results := make(chan string, iterations*threads)
for i := 0; i < threads; i++ {
go MakeRequestHelper(url, channels[i], results, iterations)
}
resultSlice := make([]string, threads*iterations)
for i := 0; i < threads*iterations; i++ {
resultSlice[i] = <-results
}
}
In the above code,
ch <- or <-results
seems to be blocking every goroutine that executes makeRequest.
I am new to concurrency model of Go. I understand that sending to and receiving from a channel blocks but find it difficult what is blocking what in this code.
I'm not really sure that you are doing... It seems really convoluted. I suggest you read up on how to use channels.
https://tour.golang.org/concurrency/2
That being said you have so much going on in your code that it was much easier to just gut it to something a bit simpler. (It can be simplified further). I left comments to understand the code.
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
// using structs is a nice way to organize your code
type Worker struct {
wg sync.WaitGroup
semaphore chan struct{}
result chan Result
client http.Client
}
// group returns so that you don't have to send to many channels
type Result struct {
duration float64
results string
}
// closing your channels will stop the for loop in main
func (w *Worker) Close() {
close(w.semaphore)
close(w.result)
}
func (w *Worker) MakeRequest(url string) {
// a semaphore is a simple way to rate limit the amount of goroutines running at any single point of time
// google them, Go uses them often
w.semaphore <- struct{}{}
defer func() {
w.wg.Done()
<-w.semaphore
}()
start := time.Now()
resp, err := w.client.Get(url)
if err != nil {
log.Println("error", err)
return
}
defer resp.Body.Close()
// don't have any examples where I need to also POST anything but the point should be made
// resp, err = http.Post(url, "text/plain", bytes.NewBuffer([]byte("Hey")))
// if err != nil {
// log.Println("error", err)
// return
// }
// defer resp.Body.Close()
secs := time.Since(start).Seconds()
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Println("error", err)
return
}
w.result <- Result{duration: secs, results: string(b)}
}
func main() {
urls := []string{"https://facebook.com/", "https://twitter.com/", "https://google.com/", "https://youtube.com/", "https://linkedin.com/", "https://wordpress.org/",
"https://instagram.com/", "https://pinterest.com/", "https://wikipedia.org/", "https://wordpress.com/", "https://blogspot.com/", "https://apple.com/",
}
workerNumber := 5
worker := Worker{
semaphore: make(chan struct{}, workerNumber),
result: make(chan Result),
client: http.Client{Timeout: 5 * time.Second},
}
// use sync groups to allow your code to wait for
// all your goroutines to finish
for _, url := range urls {
worker.wg.Add(1)
go worker.MakeRequest(url)
}
// by declaring wait and close as a seperate goroutine
// I can get to the for loop below and iterate on the results
// in a non blocking fashion
go func() {
worker.wg.Wait()
worker.Close()
}()
// do something with the results channel
for res := range worker.result {
fmt.Printf("Request took %2.f seconds.\nResults: %s\n\n", res.duration, res.results)
}
}
The channels in channels are nil (no make is executed; you make the slice but not the channels), so any send or receive will block. I'm not sure exactly what you're trying to do here, but that's the basic problem.
See https://golang.org/doc/effective_go.html#channels for an explanation of how channels work.

why `sync.WaitGroup` can't finish?

Here is my code:
package main
import (
"bytes"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"runtime"
"sync"
)
type Data struct {
Link string `json:"url"`
}
type Result struct {
Code uint32
Msg string `json:"msg"`
Data Data `json:"data"`
}
const (
URL = "http://qiye.wxsdc.ediankai.com/api/v1/suppliers/1/staff/1/box/get"
SIGNKEY = "i5OqMrNXVyOJ5GEMYoEtRHqN1P9ghk6I"
DATA_ID = "2965612126"
EQU_ID = "1482806063"
)
func getMD5Hash(text string) string {
hasher := md5.New()
hasher.Write([]byte(text))
return hex.EncodeToString(hasher.Sum(nil))
}
func getUrl(payload []byte, wg *sync.WaitGroup, result chan string) {
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
var res Result
json.Unmarshal(body, &res)
log.Println(res.Data.Link)
result <- res.Data.Link
wg.Add(-1)
}
func main() {
parameterStr := fmt.Sprintf("%vdata_id%vequ_id%v%v", SIGNKEY, DATA_ID, EQU_ID, SIGNKEY)
log.Println(parameterStr)
sign := getMD5Hash(parameterStr)
log.Println(sign)
var payload map[string]string = make(map[string]string)
payload["equ_id"] = EQU_ID
payload["data_id"] = DATA_ID
payload["sign"] = sign
payloadJson, err := json.Marshal(payload)
if err != nil {
log.Fatalln("convet paylod failed!")
}
log.Println(string(payloadJson))
runtime.GOMAXPROCS(runtime.NumCPU())
var wg sync.WaitGroup
result := make(chan string)
for i := 0; i < 10; i++ {
wg.Add(1)
go getUrl(payloadJson, &wg, result)
}
wg.Wait()
for link := range result {
fmt.Println(link)
}
log.Println("Done!")
}
But:
for link := range result {
fmt.Println(link)
}
log.Println("Done!")
can't be executed, what's the reason?
You need to close the result channel so the for loop which reads from it is interrupted when its finished. For that you could rewrite the last part as:
var wg sync.WaitGroup
result := make(chan string)
go func() {
for link := range result {
fmt.Println(link)
}
}()
for i := 0; i < 10; i++ {
wg.Add(1)
go getUrl(payloadJson, &wg, result)
}
wg.Wait()
close(result)
Your for loop never stop. When all the getUrl finish, it continues to wait on result. Try this:
wg.Wait()
close(result)
for link := range result {
fmt.Println(link)
}
log.Println("Done!")
If you want to print the result, you could do like:
package main
import (
"bytes"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"runtime"
"sync"
)
type Data struct {
Link string `json:"url"`
}
type Result struct {
Code uint32
Msg string `json:"msg"`
Data Data `json:"data"`
}
const (
URL = "http://qiye.wxsdc.ediankai.com/api/v1/suppliers/1/staff/1/box/get"
SIGNKEY = "i5OqMrNXVyOJ5GEMYoEtRHqN1P9ghk6I"
DATA_ID = "2965612126"
EQU_ID = "1482806063"
)
func getMD5Hash(text string) string {
hasher := md5.New()
hasher.Write([]byte(text))
return hex.EncodeToString(hasher.Sum(nil))
}
func getUrl(payload []byte, wg *sync.WaitGroup, result chan string) {
defer func(){
wg.Done()
}();
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return
}
var res Result
json.Unmarshal(body, &res)
log.Println(res.Data.Link)
result <- res.Data.Link
}
func monitor(wg *sync.WaitGroup, cs chan string) {
wg.Wait()
close(cs)
}
func printResult(result <-chan string, done chan<- bool) {
for i := range result {
fmt.Println(i)
}
done <- true
}
func main() {
parameterStr := fmt.Sprintf("%vdata_id%vequ_id%v%v", SIGNKEY, DATA_ID, EQU_ID, SIGNKEY)
log.Println(parameterStr)
sign := getMD5Hash(parameterStr)
log.Println(sign)
var payload map[string]string = make(map[string]string)
payload["equ_id"] = EQU_ID
payload["data_id"] = DATA_ID
payload["sign"] = sign
payloadJson, err := json.Marshal(payload)
if err != nil {
log.Fatalln("convet paylod failed!")
}
log.Println(string(payloadJson))
runtime.GOMAXPROCS(runtime.NumCPU())
var wg sync.WaitGroup
result := make(chan string)
for i := 0; i < 10; i++ {
wg.Add(1)
go getUrl(payloadJson, &wg, result)
}
go monitor(&wg, result)
done := make(chan bool, 1)
go printResult(result, done)
<-done
log.Println("Done!")
}

How to close a channel

I try to adapt this example:
https://gobyexample.com/worker-pools
But I don't know how to stop the channel because program don't exit at the end of the channel loop.
Can you explain how to exit the program?
package main
import (
"github.com/SlyMarbo/rss"
"bufio"
"fmt"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne +1
} else {
continue
}
}
}
}
func main() {
jobs := make(chan string)
results := make(chan string)
for w := 1; w <= 16; w++ {
go worker(w, jobs, results)
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
The flux.txt is a flat text file like :
http://blog.case.edu/news/feed.atom
...
The problem is that, in the example you are referring to, the worker pool reads from results 9 times:
for a := 1; a <= 9; a++ {
<-results
}
Your program, on the other hand, does a range loop over the results which has a different semantics in go. The range operator does not stop until the channel is closed.
for msg := range results {
fmt.Println(msg)
}
To fix your problem you'd need to close the results channel. However, if you just call close(results) before the for loop, you most probably will
get a panic, because the workers might be writing on results.
To fix this problem, you need to add another channel to be notified when all the workers are done. You can do this either using a sync.WaitGroup or :
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
var wg sync.WaitGroup
for w := 0; w < workers; w++ {
go func() {
wg.Add(1)
defer wg.Done()
worker(w, jobs, results)
}()
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
wg.Wait()
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
Or a done channel:
package main
import (
"bufio"
"fmt"
"github.com/SlyMarbo/rss"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string, done chan struct{}) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne + 1
} else {
continue
}
}
}
close(done)
}
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
dones := make([]chan struct{}, workers)
for w := 0; w < workers; w++ {
dones[w] = make(chan struct{})
go worker(w, jobs, results, dones[w])
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
for _, done := range dones {
<-done
}
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}

Resources