why `sync.WaitGroup` can't finish? - go

Here is my code:
package main
import (
"bytes"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"runtime"
"sync"
)
type Data struct {
Link string `json:"url"`
}
type Result struct {
Code uint32
Msg string `json:"msg"`
Data Data `json:"data"`
}
const (
URL = "http://qiye.wxsdc.ediankai.com/api/v1/suppliers/1/staff/1/box/get"
SIGNKEY = "i5OqMrNXVyOJ5GEMYoEtRHqN1P9ghk6I"
DATA_ID = "2965612126"
EQU_ID = "1482806063"
)
func getMD5Hash(text string) string {
hasher := md5.New()
hasher.Write([]byte(text))
return hex.EncodeToString(hasher.Sum(nil))
}
func getUrl(payload []byte, wg *sync.WaitGroup, result chan string) {
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
var res Result
json.Unmarshal(body, &res)
log.Println(res.Data.Link)
result <- res.Data.Link
wg.Add(-1)
}
func main() {
parameterStr := fmt.Sprintf("%vdata_id%vequ_id%v%v", SIGNKEY, DATA_ID, EQU_ID, SIGNKEY)
log.Println(parameterStr)
sign := getMD5Hash(parameterStr)
log.Println(sign)
var payload map[string]string = make(map[string]string)
payload["equ_id"] = EQU_ID
payload["data_id"] = DATA_ID
payload["sign"] = sign
payloadJson, err := json.Marshal(payload)
if err != nil {
log.Fatalln("convet paylod failed!")
}
log.Println(string(payloadJson))
runtime.GOMAXPROCS(runtime.NumCPU())
var wg sync.WaitGroup
result := make(chan string)
for i := 0; i < 10; i++ {
wg.Add(1)
go getUrl(payloadJson, &wg, result)
}
wg.Wait()
for link := range result {
fmt.Println(link)
}
log.Println("Done!")
}
But:
for link := range result {
fmt.Println(link)
}
log.Println("Done!")
can't be executed, what's the reason?

You need to close the result channel so the for loop which reads from it is interrupted when its finished. For that you could rewrite the last part as:
var wg sync.WaitGroup
result := make(chan string)
go func() {
for link := range result {
fmt.Println(link)
}
}()
for i := 0; i < 10; i++ {
wg.Add(1)
go getUrl(payloadJson, &wg, result)
}
wg.Wait()
close(result)

Your for loop never stop. When all the getUrl finish, it continues to wait on result. Try this:
wg.Wait()
close(result)
for link := range result {
fmt.Println(link)
}
log.Println("Done!")
If you want to print the result, you could do like:
package main
import (
"bytes"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"runtime"
"sync"
)
type Data struct {
Link string `json:"url"`
}
type Result struct {
Code uint32
Msg string `json:"msg"`
Data Data `json:"data"`
}
const (
URL = "http://qiye.wxsdc.ediankai.com/api/v1/suppliers/1/staff/1/box/get"
SIGNKEY = "i5OqMrNXVyOJ5GEMYoEtRHqN1P9ghk6I"
DATA_ID = "2965612126"
EQU_ID = "1482806063"
)
func getMD5Hash(text string) string {
hasher := md5.New()
hasher.Write([]byte(text))
return hex.EncodeToString(hasher.Sum(nil))
}
func getUrl(payload []byte, wg *sync.WaitGroup, result chan string) {
defer func(){
wg.Done()
}();
req, err := http.NewRequest("POST", URL, bytes.NewBuffer(payload))
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return
}
var res Result
json.Unmarshal(body, &res)
log.Println(res.Data.Link)
result <- res.Data.Link
}
func monitor(wg *sync.WaitGroup, cs chan string) {
wg.Wait()
close(cs)
}
func printResult(result <-chan string, done chan<- bool) {
for i := range result {
fmt.Println(i)
}
done <- true
}
func main() {
parameterStr := fmt.Sprintf("%vdata_id%vequ_id%v%v", SIGNKEY, DATA_ID, EQU_ID, SIGNKEY)
log.Println(parameterStr)
sign := getMD5Hash(parameterStr)
log.Println(sign)
var payload map[string]string = make(map[string]string)
payload["equ_id"] = EQU_ID
payload["data_id"] = DATA_ID
payload["sign"] = sign
payloadJson, err := json.Marshal(payload)
if err != nil {
log.Fatalln("convet paylod failed!")
}
log.Println(string(payloadJson))
runtime.GOMAXPROCS(runtime.NumCPU())
var wg sync.WaitGroup
result := make(chan string)
for i := 0; i < 10; i++ {
wg.Add(1)
go getUrl(payloadJson, &wg, result)
}
go monitor(&wg, result)
done := make(chan bool, 1)
go printResult(result, done)
<-done
log.Println("Done!")
}

Related

Is it possible to terminate a Go channel to a sink goroutine?

I have a goroutine that reads a file in chunks and passes them through a channel to another goroutine that calculates a checksum for the file. The consuming goroutine is kind of a sink for the channel.
Is it possible to have the consumer return the checksum string after all of the bytes have been received from the channel, or must I use a string stream to return the value? For the former, I get a deadlock, but I am not using any waitgroups; Not sure how to apply them in this case.
I'd appreciate your comments and thank you for your help.
// main()
var done = make(chan bool)
defer close(done)
checksum := FileCheckSum(ReadFileToChannel("mydata4.bin", done), done)
fmt.Println("Final Checksum: ", checksum)
// FileCheckSum()
import (
"crypto/sha256"
"encoding/hex"
"log"
)
func FileCheckSum(cin <-chan []byte, done <-chan bool) string {
chunkStream := make(chan []byte)
checksum := func(in <-chan []byte, done <-chan bool) string {
defer close(chunkStream)
hasher := sha256.New()
for chunk := range in {
_, err := hasher.Write(chunk[:len(chunk)])
if err != nil {
log.Fatal(err)
}
select {
case <-done:
return ""
}
}
return hex.EncodeToString(hasher.Sum(nil))
}(cin, done)
return checksum
}
Yes, and let me simplify your code - and read comments inside the following code.
Try this:
package main
import (
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
)
func FileCheckSum(cin <-chan []byte) string {
h := sha256.New()
for buf := range cin {
_, err := h.Write(buf)
if err != nil {
log.Fatal(err)
}
}
return hex.EncodeToString(h.Sum(nil))
}
func ReadFileToChannel(filename string) chan []byte {
gen := make(chan []byte)
go func() { // goroutine
defer close(gen) // signal end of reading file
for i := 0; i < 10; i++ { // e.g. read from file
b := make([]byte, 16) // make new slice every time
_, err := rand.Read(b) // fill it
if err != nil {
log.Fatal(err)
}
gen <- b // send it
}
}()
return gen
}
func main() {
ch := ReadFileToChannel("mydata4.bin")
crc := FileCheckSum(ch)
fmt.Println("Final Checksum: ", crc)
}
Output:
Final Checksum: 1e0ad2ec11bfe77833af670c6de296f530c2217d18aa1b8e600feddf6998fb95
Note
Your code needs a code review, you may head over here for the code review.

Go: negative WaitGroup counter

I'm somewhat new to go and am reworking code that I found somewhere else to fit my needs. Because of that, I don't totally understand what is happening here, although I get the general idea.
I'm running a few websocket clients using go routines, but I'm getting an unexpected error that causes the program to crash. My program seems to close one too many threads (excuse me if this is the wrong terminology) when there is an error reading a message from the websocket (check the conn.ReadMessage() func in the readHandler func). Any ideas on how would I work around this issue? I would really appreciate anyone taking the time to look through it. Thanks in advance!
package main
import (
"context"
"fmt"
"os"
"time"
"os/signal"
"syscall"
"sync"
"net/url"
"github.com/gorilla/websocket"
"strconv"
"encoding/json"
"log"
"bytes"
"compress/gzip"
"io/ioutil"
)
// Structs
type Ping struct {
Ping int64 `json:"ping"`
}
type Pong struct {
Pong int64 `json:"pong"`
}
type SubParams struct {
Sub string `json:"sub"`
ID string `json:"id"`
}
func InitSub(subType string, pair string, i int) []byte {
var idInt string = "id" + strconv.Itoa(i)
subStr := "market." + pair + "." + subType
sub := &SubParams{
Sub: subStr,
ID: idInt,
}
out, err := json.MarshalIndent(sub, "", " ")
if err != nil {
log.Println(err);
}
//log.Println(string(out))
return out
}
// main func
func main() {
var server string = "api.huobi.pro"
pairs := []string{"btcusdt", "ethusdt", "ltcusdt"}
comms := make(chan os.Signal, 1)
signal.Notify(comms, os.Interrupt, syscall.SIGTERM)
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
var wg sync.WaitGroup
for x, pair := range pairs {
wg.Add(1)
go control(server, "ws", pair, ctx, &wg, x+1)
}
<-comms
cancel()
wg.Wait()
}
func control(server string, path string, pair string, ctx context.Context, wg *sync.WaitGroup, i int) {
fmt.Printf("Started control for %s\n", server)
url := url.URL {
Scheme: "wss",
Host: server,
Path: path,
}
fmt.Println(url.String())
conn, _, err := websocket.DefaultDialer.Dial(url.String(), nil)
if err != nil {
panic(err)
}
subscribe(conn, pair, i)
defer conn.Close()
var localwg sync.WaitGroup
localwg.Add(1)
go readHandler(ctx, conn, &localwg, server)
<- ctx.Done()
localwg.Wait()
wg.Done()
return
}
func readHandler(ctx context.Context, conn *websocket.Conn, wg *sync.WaitGroup, server string) {
for {
select {
case <- ctx.Done():
wg.Done()
return
default:
_, p, err := conn.ReadMessage()
if err != nil {
wg.Done()
fmt.Println(err)
}
r, err := gzip.NewReader(bytes.NewReader(p))
if(err == nil) {
result, err := ioutil.ReadAll(r)
if(err != nil) {
fmt.Println(err)
}
d := string(result)
fmt.Println(d)
var ping Ping
json.Unmarshal([]byte(d), &ping)
if (ping.Ping > 0) {
str := Pong{Pong: ping.Ping}
msg, err := json.Marshal(str)
if (err == nil) {
fmt.Println(string(msg))
conn.WriteMessage(websocket.TextMessage, []byte(msg))
}
}
}
}
}
}
func subscribe(conn *websocket.Conn, pair string, id int) {
sub := string(InitSub("trade.detail", pair, id))
err := conn.WriteMessage(websocket.TextMessage, []byte(sub))
if err != nil {
panic(err)
}
}
Break out of the readHandler loop when the connection fails:
_, p, err := conn.ReadMessage()
if err != nil {
wg.Done()
fmt.Println(err)
return // <--- add this line
}
Without the return, the function spins in a tight loop reading errors until the panic.
Use defer wg.Done() at the beginning of the goroutine to ensure that Done is called exactly once.
func readHandler(ctx context.Context, conn *websocket.Conn, wg *sync.WaitGroup, server string) {
defer wg.Done()
for {
select {
case <-ctx.Done():
return
default:
_, p, err := conn.ReadMessage()
if err != nil {
fmt.Println(err)
return
}
...
Update the control function also.
Because the caller does not execute any code concurrently with readHander, there's no value in running readHandler is a goroutine. Remove all references to wait groups from readHandler and call the function directly: change go readHandler(ctx, conn, &localwg, server) to readHandler(ctx, conn, server).
There are more issues, but this should move you further along.

How to write a directory traversing program which can feedback the total number of subdirectories, files, etc?

Try to write a directory traversing program by goroutine and channel, but unable to get the needed results. Expect to get the number of total sub-directory, files count. But when I run the code below, it will stuck in "dirCount <-1". PS: is that possible to write such a program with infinite depth traversing
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"github.com/gorilla/mux"
)
type DirectoryItem struct {
Name string `json:"name,omitemty"`
IsDir bool `json:"isDir,omitempty"`
Size int64 `json:"size,omitempty"`
}
type DirectoryInfo struct {
Path string `json:"path,omitemty"`
Dirs []DirectoryItem `json:"dirs,omitempty"`
}
var dirItems []DirectoryItem
var dirInfo DirectoryInfo
func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
fpath := "E:\\"
query := req.URL.Query()
path := query["path"][0]
fpath = fpath + path
dirInfo, _ := CheckEachItem(fpath)
json.NewEncoder(w).Encode(dirInfo)
}
func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
var items []DirectoryItem
dir, err := ioutil.ReadDir(dirPath)
if err != nil {
return directory, err
}
for _, fi := range dir {
if fi.IsDir() {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
} else {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
}
}
directory = DirectoryInfo{Path: dirPath, Dirs: items}
return directory, nil
}
func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
query := req.URL.Query()
path := query["path"][0]
url := "http://localhost:8090/api/GetOneDirItems?path="
url += path
dirCount := make(chan int)
fileCount := make(chan int)
go Recr(url, dirCount, fileCount)
//
dirTotalCount := 0
for i := range dirCount {
dirTotalCount += i
}
fmt.Println(dirTotalCount)
}
func Recr(url string, dirCount chan int, fileCount chan int) {
fmt.Println(url)
resp, _ := http.Get(url)
dirInfo = DirectoryInfo{}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
json.Unmarshal([]byte(body), &dirInfo)
for _, itm := range dirInfo.Dirs {
fmt.Println("--")
if itm.IsDir {
newUrl := url + "/" + itm.Name
//// looks like stuck in here
dirCount <- 1
go Recr(newUrl, dirCount, fileCount)
} else {
fileCount <- 1
}
}
}
func main() {
router := mux.NewRouter()
//#1 func one:
//result sample:
//{"path":"E:\\code","dirs":[{"name":"A","isDir":true},{"name":"B","isDir":false}]}
router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
//#2 2nd api to invoke 1st api recursively
//expected result
//{"path":"E:\\code","dirCount":2, "fileCount":3]}
router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
log.Fatal(http.ListenAndServe(":8090", router))
}
find some code example but not feedback the right number...
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"path/filepath"
"sync"
"github.com/gorilla/mux"
)
//!+1
var done = make(chan struct{})
func cancelled() bool {
select {
case <-done:
return true
default:
return false
}
}
//!-1
type DirectoryItem struct {
Name string `json:"name,omitemty"`
IsDir bool `json:"isDir,omitempty"`
Size int64 `json:"size,omitempty"`
}
type DirectoryInfo struct {
Path string `json:"path,omitemty"`
Dirs []DirectoryItem `json:"dirs,omitempty"`
}
var dirItems []DirectoryItem
var dirInfo DirectoryInfo
func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
fpath := "E:\\"
query := req.URL.Query()
path := query["path"][0]
fpath = fpath + path
dirInfo, _ := CheckEachItem(fpath)
json.NewEncoder(w).Encode(dirInfo)
}
func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
var items []DirectoryItem
dir, err := ioutil.ReadDir(dirPath)
if err != nil {
return directory, err
}
for _, fi := range dir {
if fi.IsDir() {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
} else {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
}
}
directory = DirectoryInfo{Path: dirPath, Dirs: items}
return directory, nil
}
func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
query := req.URL.Query()
path := query["path"][0]
url := "http://localhost:8090/api/GetOneDirItems?path="
url += path
fpath := "E:\\"
fpath = fpath + path
dirInfo, _ := CheckEachItem(fpath)
fileSizes := make(chan int64)
dirCount := make(chan int, 100)
var n sync.WaitGroup
for _, item := range dirInfo.Dirs {
n.Add(1)
url = url + "/" + item.Name
go Recr(url, &n, dirCount, fileSizes)
}
go func() {
n.Wait()
close(fileSizes)
close(dirCount)
}()
// Print the results periodically.
// tick := time.Tick(500 * time.Millisecond)
var nfiles, ndirs, nbytes int64
loop:
//!+3
for {
select {
case <-done:
// Drain fileSizes to allow existing goroutines to finish.
for range fileSizes {
// Do nothing.
}
return
case size, ok := <-fileSizes:
// ...
//!-3
if !ok {
break loop // fileSizes was closed
}
nfiles++
nbytes += size
case _, ok := <-dirCount:
// ...
//!-3
if !ok {
break loop // dirCount was closed
}
ndirs++
// case <-tick:
// printDiskUsage(nfiles, ndirs, nbytes)
}
}
printDiskUsage(nfiles, ndirs, nbytes) // final totals
}
func Recr(url string, n *sync.WaitGroup, dirCount chan<- int, fileSizes chan<- int64) {
defer n.Done()
resp, _ := http.Get(url)
dirInfo = DirectoryInfo{}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
json.Unmarshal([]byte(body), &dirInfo)
for _, itm := range dirInfo.Dirs {
if itm.IsDir {
dirCount <- 1
n.Add(1)
newUrl := url + "/" + itm.Name
go Recr(newUrl, n, dirCount, fileSizes)
} else {
fileSizes <- itm.Size
}
}
}
func main() {
// Determine the initial directories.
roots := os.Args[1:]
if len(roots) == 0 {
roots = []string{"."}
}
// API Services
router := mux.NewRouter()
router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
log.Fatal(http.ListenAndServe(":8090", router))
}
func printDiskUsage(nfiles, ndirs, nbytes int64) {
fmt.Printf("%d files %.1f GB %d dirs\n", nfiles, float64(nbytes)/1e9, ndirs)
}
// walkDir recursively walks the file tree rooted at dir
// and sends the size of each found file on fileSizes.
//!+4
func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64, dirCount chan<- int) {
defer n.Done()
if cancelled() {
return
}
for _, entry := range dirents(dir) {
// ...
//!-4
if entry.IsDir() {
dirCount <- 1
n.Add(1)
subdir := filepath.Join(dir, entry.Name())
go walkDir(subdir, n, fileSizes, dirCount)
} else {
fileSizes <- entry.Size()
}
//!+4
}
}
//!-4
var sema = make(chan struct{}, 20) // concurrency-limiting counting semaphore
// dirents returns the entries of directory dir.
//!+5
func dirents(dir string) []os.FileInfo {
select {
case sema <- struct{}{}: // acquire token
case <-done:
return nil // cancelled
}
defer func() { <-sema }() // release token
// ...read directory...
//!-5
f, err := os.Open(dir)
if err != nil {
fmt.Fprintf(os.Stderr, "du: %v\n", err)
return nil
}
defer f.Close()
entries, err := f.Readdir(0) // 0 => no limit; read all entries
if err != nil {
fmt.Fprintf(os.Stderr, "du: %v\n", err)
// Don't return: Readdir may return partial results.
}
return entries
}
The problem here is the your program has no way of ending. Basically whenever the code recurses into another directory, you need to count that, and then when it finishes processing the directory, you need to push 1 to a done channel. When the count of directories recursed into == the number done, you can exit the channel select (that's the other missing part):
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"github.com/gorilla/mux"
)
type DirectoryItem struct {
Name string `json:"name,omitemty"`
IsDir bool `json:"isDir,omitempty"`
Size int64 `json:"size,omitempty"`
}
type DirectoryInfo struct {
Path string `json:"path,omitemty"`
Dirs []DirectoryItem `json:"dirs,omitempty"`
}
var dirItems []DirectoryItem
var dirInfo DirectoryInfo
func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
fpath := "E:\\"
query := req.URL.Query()
path := query["path"][0]
fpath = fpath + path
dirInfo, err := CheckEachItem(fpath)
if err != nil {
panic(err)
}
json.NewEncoder(w).Encode(dirInfo)
}
func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
var items []DirectoryItem
dir, err := ioutil.ReadDir(dirPath)
if err != nil {
return directory, err
}
for _, fi := range dir {
if fi.IsDir() {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
} else {
items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
}
}
directory = DirectoryInfo{Path: dirPath, Dirs: items}
return directory, nil
}
func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
query := req.URL.Query()
path := query["path"][0]
url := "http://localhost:8090/api/GetOneDirItems?path="
url += path
dirCount := make(chan int, 10)
fileCount := make(chan int, 10)
doneCount := make(chan int, 10)
go Recr(url, dirCount, fileCount, doneCount)
//
dirTotalCount := 0
doneTotalCount := 0
out:
for {
select {
case dir := <-dirCount:
dirTotalCount += dir
fmt.Printf("dirTotalCount=%d\n", dirTotalCount)
case <-fileCount:
case done := <-doneCount:
doneTotalCount += done
fmt.Printf("doneTotalCount=%d dirTotalCount=%d\n", doneTotalCount, dirTotalCount)
if doneTotalCount == dirTotalCount+1 { // need -1 because of the root dir, which is not counted as a subdirectory
break out
}
}
}
fmt.Println("ALL DONE")
fmt.Printf("TOTAL=%d\n", dirTotalCount)
}
func Recr(url string, dirCount chan int, fileCount chan int, doneCount chan int) {
// fmt.Printf("url=%s\n", url)
resp, _ := http.Get(url)
dirInfo = DirectoryInfo{}
body, _ := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
json.Unmarshal([]byte(body), &dirInfo)
// fmt.Printf("dirInfo=%+v body=%s", dirInfo, string(body))
for _, itm := range dirInfo.Dirs {
if itm.IsDir {
newUrl := url + "/" + itm.Name
//// looks like stuck in here
// fmt.Printf("pushing one dir from %s\n", url)
dirCount <- 1
go Recr(newUrl, dirCount, fileCount, doneCount)
} else {
// fmt.Println("pushing one file")
fileCount <- 1
}
}
doneCount <- 1
}
func main() {
router := mux.NewRouter()
//#1 func one:
//result sample:
//{"path":"E:\\code","dirs":[{"name":"A","isDir":true},{"name":"B","isDir":false}]}
router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
//#2 2nd api to invoke 1st api recursively
//expected result
//{"path":"E:\\code","dirCount":2, "fileCount":3]}
router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
log.Fatal(http.ListenAndServe(":8090", router))
}

Is it a better way to do parallel programming that this?

I made this script for getting the follower count of "influencers" from instagram
the "runtime" number I am getting from it is between 550-750ms.
It is not that bad, but I am wondering whether it could be better or not (as I am a golang noob - learning it 3 weeks only)
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(in <-chan string) <-chan int {
out := make(chan int)
go func() {
for un := range in {
URL := "https://www.instagram.com/" + un + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
// handle error
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
var u user
err = json.Unmarshal(body, &u)
if err != nil {
fmt.Println(err)
}
// return u.User.Followers.Count
out <- u.User.Followers.Count
}
close(out)
}()
return out
}
func merge(cs ...<-chan int) <-chan int {
var wg sync.WaitGroup
out := make(chan int)
output := func(c <-chan int) {
for n := range c {
out <- n
}
wg.Done()
}
wg.Add(len(cs))
for _, c := range cs {
go output(c)
}
go func() {
wg.Wait()
close(out)
}()
return out
}
func gen(users ...string) <-chan string {
out := make(chan string)
go func() {
for _, u := range users {
out <- u
}
close(out)
}()
return out
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
in := gen(usrs...)
d1 := getFollowerCount(in)
d2 := getFollowerCount(in)
d3 := getFollowerCount(in)
d4 := getFollowerCount(in)
d5 := getFollowerCount(in)
d6 := getFollowerCount(in)
d7 := getFollowerCount(in)
d8 := getFollowerCount(in)
d9 := getFollowerCount(in)
d10 := getFollowerCount(in)
for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
fmt.Println(d)
}
elapsed := time.Since(start)
log.Println("runtime", elapsed)
}
I agree with jeevatkm, there are numerous way to implement your task and improve it. Some notes:
Separate the function that actually do the job (i.e. fetch result from remote service) and the function which is responsible for coordinating all the jobs.
It is a good practice to propagate an errorto the caller instead of consumes (handles) it in a function to be called.
Since the jobs are done in parallel, the result could be returned in undetermined order. Thus, besides follower count, result should contains other related information(s).
The following implementation may be one alternative:
package main
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
//Wrap username, count, and error. See (3) above.
type follower struct {
Username string
Count int
Error error
}
//GetFollowerCountFunc is a function for
//fetching follower count of a specific user.
type GetFollowerCountFunc func(string) (int, error)
//Mockup function for test
func mockGetFollowerCountFor(userName string) (int, error) {
if len(userName) < 9 {
return -1, errors.New("mocking error in get follower count")
}
return 10, nil
}
//Fetch result from remote service. See (1) above.
func getFollowerCountFor(userName string) (int, error) {
URL := "https://www.instagram.com/" + userName + "/?__a=1"
resp, err := http.Get(URL)
if err != nil {
return -1, err
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
return -1, err
}
return u.User.Followers.Count, nil
}
//Function that coordinates/distributes the jobs. See (1), (2) above.
func getFollowersAsync(users []string, fn GetFollowerCountFunc) <-chan follower {
//allocate channels for storing result
//number of allocated channels define the maximum *parallel* worker
followers := make(chan follower, len(users))
//The following is also valid
//followers := make(chan follower, 5)
//Do the job distribution in goroutine (Asynchronously)
go func() {
var wg sync.WaitGroup
wg.Add(len(users))
for _, u := range users {
//Run a *parallel* worker
go func(uid string) {
cnt, err := fn(uid)
if err != nil {
followers <- follower{uid, -1, err}
} else {
followers <- follower{uid, cnt, nil}
}
wg.Done()
}(u)
}
//wait all workers finish
wg.Wait()
//close the channels so the `for ... range` will exit gracefully
close(followers)
}()
//This function will returns immediately
return followers
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
results := getFollowersAsync(usrs, getFollowerCountFor)
//For TESTING:
//results := getFollowersAsync(usrs, mockGetFollowerCountFor)
for r := range results {
if r.Error != nil {
fmt.Printf("Error for user '%s' => %v", r.Username, r.Error)
} else {
fmt.Printf("%s: %d\n", r.Username, r.Count)
}
}
elapsed := time.Since(start)
fmt.Println("runtime", elapsed)
}
Welcome to Go, happy learning.
You're doing good, you can improve your program many ways (such as json decoder, less no of chan, etc). Following is one of the approach. Execution time is between 352-446ms (take it with grain of salt, since network call is involved in your code. Might vary based on server response time).
Your updated code:
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"sync"
"time"
)
type user struct {
User userData `json:"user"`
}
type userData struct {
Followers count `json:"followed_by"`
}
type count struct {
Count int `json:"count"`
}
func getFollowerCount(username string, result chan<- int, wg *sync.WaitGroup) {
defer wg.Done()
reqURL := "https://www.instagram.com/" + username + "/?__a=1"
resp, err := http.Get(reqURL)
if err != nil {
log.Println(err)
return
}
defer resp.Body.Close()
var u user
if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
log.Println(err)
return
}
result <- u.User.Followers.Count
}
func execute(users []string, result chan<- int) {
wg := &sync.WaitGroup{}
for _, username := range users {
wg.Add(1)
go getFollowerCount(username, result, wg)
}
wg.Wait()
result <- -1
}
func main() {
start := time.Now()
fmt.Println("STARTING UP")
usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
result := make(chan int)
go execute(usrs, result)
for v := range result {
if v == -1 {
break
}
fmt.Println(v)
}
elapsed := time.Since(start)
fmt.Println("runtime:", elapsed)
}

How to close a channel

I try to adapt this example:
https://gobyexample.com/worker-pools
But I don't know how to stop the channel because program don't exit at the end of the channel loop.
Can you explain how to exit the program?
package main
import (
"github.com/SlyMarbo/rss"
"bufio"
"fmt"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne +1
} else {
continue
}
}
}
}
func main() {
jobs := make(chan string)
results := make(chan string)
for w := 1; w <= 16; w++ {
go worker(w, jobs, results)
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
The flux.txt is a flat text file like :
http://blog.case.edu/news/feed.atom
...
The problem is that, in the example you are referring to, the worker pool reads from results 9 times:
for a := 1; a <= 9; a++ {
<-results
}
Your program, on the other hand, does a range loop over the results which has a different semantics in go. The range operator does not stop until the channel is closed.
for msg := range results {
fmt.Println(msg)
}
To fix your problem you'd need to close the results channel. However, if you just call close(results) before the for loop, you most probably will
get a panic, because the workers might be writing on results.
To fix this problem, you need to add another channel to be notified when all the workers are done. You can do this either using a sync.WaitGroup or :
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
var wg sync.WaitGroup
for w := 0; w < workers; w++ {
go func() {
wg.Add(1)
defer wg.Done()
worker(w, jobs, results)
}()
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
wg.Wait()
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}
Or a done channel:
package main
import (
"bufio"
"fmt"
"github.com/SlyMarbo/rss"
"log"
"os"
)
func readLines(path string) ([]string, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
return lines, scanner.Err()
}
func worker(id int, jobs <-chan string, results chan<- string, done chan struct{}) {
for url := range jobs {
fmt.Println("worker", id, "processing job", url)
feed, err := rss.Fetch(url)
if err != nil {
fmt.Println("Error on: ", url)
continue
}
borne := 0
for _, value := range feed.Items {
if borne < 5 {
results <- value.Link
borne = borne + 1
} else {
continue
}
}
}
close(done)
}
const (
workers = 16
)
func main() {
jobs := make(chan string, 100)
results := make(chan string, 100)
dones := make([]chan struct{}, workers)
for w := 0; w < workers; w++ {
dones[w] = make(chan struct{})
go worker(w, jobs, results, dones[w])
}
urls, err := readLines("flux.txt")
if err != nil {
log.Fatalf("readLines: %s", err)
}
for _, url := range urls {
jobs <- url
}
close(jobs)
for _, done := range dones {
<-done
}
close(results)
// it seems program runs over...
for msg := range results {
fmt.Println(msg)
}
}

Resources