I'm working with this sample code but cannot understand how this function is called and which parameter is belong.
go func(r []string) {
processData(r)
ch <- r
}(record)
function closures:
Go functions may be closures. A closure is a function value that
references variables from outside its body. The function may access
and assign to the referenced variables; in this sense the function is
"bound" to the variables.
To Understand this:
go func(r []string) {
processData(r)
ch <- r
}(record)
let's first declare this function:
func routine(r []string) {
processData(r)
ch <- r
}
and this global variable:
var ch = make(chan []string)
Now you may call it:
go routine(record)
this calls function named routine with input parameter named record as a goroutine.
And see: https://gobyexample.com/goroutines
Try it on The Go Playground:
package main
import (
"encoding/csv"
"flag"
"fmt"
"io"
"os"
"strings"
"time"
)
func routine(r []string) {
processData(r)
ch <- r
}
var ch = make(chan []string)
func main() {
start := time.Now()
flag.Parse()
fmt.Print(strings.Join(flag.Args(), "\n"))
if *filename == "REQUIRED" {
return
}
csvfile, err := os.Open(*filename)
if err != nil {
fmt.Println(err)
return
}
defer csvfile.Close()
reader := csv.NewReader(csvfile)
i := 0
for {
record, err := reader.Read()
if err == io.EOF {
break
} else if err != nil {
fmt.Println(err)
return
}
i++
go routine(record)
fmt.Printf("go %d %s\n", i, record)
}
for ; i >= 0; i-- {
fmt.Printf("<- %d %s\n", i, <-ch)
}
fmt.Printf("\n%2fs", time.Since(start).Seconds())
}
func processData([]string) {
time.Sleep(10 * time.Millisecond)
}
var filename = flag.String("f", "REQUIRED", "source CSV file")
var numChannels = flag.Int("c", 4, "num of parallel channels")
//var bufferedChannels = flag.Bool("b", false, "enable buffered channels")
Related
I'm attempting to make a concurrent version of grep. The program walks directories/subdirectories and returns back any matching strings to a provided pattern.
I am attempting to run the file searching concurrently, once I have all the files to search (see searchPaths function). Originally I was getting:
fatal error: all goroutines are asleep - deadlock
Until I added the close(out) at the end of searchPaths, to which it now returns:
Panic: Send on a closed channel when running go routine in foor loop
I am attempting to implement something similar to:
https://go.dev/blog/pipelines#fan-out-fan-in
Is it the case that I am closing the channel at the wrong point?
package main
import (
"fmt"
"io/fs"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
"sync"
)
type SearchResult struct {
line string
lineNumber int
}
type Display struct {
filePath string
SearchResult
}
var wg sync.WaitGroup
func (d Display) PrettyPrint() {
fmt.Printf("Line Number: %v\nFilePath: %v\nLine: %v\n\n", d.lineNumber, d.filePath, d.line)
}
func searchLine(pattern string, line string, lineNumber int) (SearchResult, bool) {
if strings.Contains(line, pattern) {
return SearchResult{lineNumber: lineNumber + 1, line: line}, true
}
return SearchResult{}, false
}
func splitIntoLines(file string) []string {
lines := strings.Split(file, "\n")
return lines
}
func fileFromPath(path string) string {
fileContent, err := ioutil.ReadFile(path)
if err != nil {
log.Fatal(err)
}
return string(fileContent)
}
func getRecursiveFilePaths(inputDir string) []string {
var paths []string
err := filepath.Walk(inputDir, func(path string, info fs.FileInfo, err error) error {
if err != nil {
fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err)
return err
}
if !info.IsDir() {
paths = append(paths, path)
}
return nil
})
if err != nil {
fmt.Printf("Error walking the path %q: %v\n", inputDir, err)
}
return paths
}
func searchPaths(paths []string, pattern string) <-chan Display {
out := make(chan Display)
for _, path := range paths {
wg.Add(1)
go func() {
defer wg.Done()
for _, display := range searchFile(path, pattern) {
out <- display
}
}()
}
close(out)
return out
}
func searchFile(path string, pattern string) []Display {
var out []Display
input := fileFromPath(path)
lines := splitIntoLines(input)
for index, line := range lines {
if searchResult, ok := searchLine(pattern, line, index); ok {
out = append(out, Display{path, searchResult})
}
}
return out
}
func main() {
pattern := os.Args[1]
dirPath := os.Args[2]
paths := getRecursiveFilePaths(dirPath)
out := searchPaths(paths, pattern)
wg.Wait()
for d := range out {
d.PrettyPrint()
}
}
2 main issues with this code were
you need to close the channel only after wg.Wait() completes. you can do this in a seperate goroutine as shown below
as the path var in searchPaths func is reassigned multiple times as part of the for loop logic, it is not a good practice to use that var directly in the goroutines, a better approach will be to pass it as an argument.
package main
import (
"fmt"
"io/fs"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
"sync"
)
type SearchResult struct {
line string
lineNumber int
}
type Display struct {
filePath string
SearchResult
}
var wg sync.WaitGroup
func (d Display) PrettyPrint() {
fmt.Printf("Line Number: %v\nFilePath: %v\nLine: %v\n\n", d.lineNumber, d.filePath, d.line)
}
func searchLine(pattern string, line string, lineNumber int) (SearchResult, bool) {
if strings.Contains(line, pattern) {
return SearchResult{lineNumber: lineNumber + 1, line: line}, true
}
return SearchResult{}, false
}
func splitIntoLines(file string) []string {
lines := strings.Split(file, "\n")
return lines
}
func fileFromPath(path string) string {
fileContent, err := ioutil.ReadFile(path)
if err != nil {
log.Fatal(err)
}
return string(fileContent)
}
func getRecursiveFilePaths(inputDir string) []string {
var paths []string
err := filepath.Walk(inputDir, func(path string, info fs.FileInfo, err error) error {
if err != nil {
fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err)
return err
}
if !info.IsDir() {
paths = append(paths, path)
}
return nil
})
if err != nil {
fmt.Printf("Error walking the path %q: %v\n", inputDir, err)
}
return paths
}
func searchPaths(paths []string, pattern string) chan Display {
out := make(chan Display)
for _, path := range paths {
wg.Add(1)
go func(p string, w *sync.WaitGroup) { // as path var is changing value in the loop, it's better to provide it as a argument in goroutine
defer w.Done()
for _, display := range searchFile(p, pattern) {
out <- display
}
}(path, &wg)
}
return out
}
func searchFile(path string, pattern string) []Display {
var out []Display
input := fileFromPath(path)
lines := splitIntoLines(input)
for index, line := range lines {
if searchResult, ok := searchLine(pattern, line, index); ok {
out = append(out, Display{path, searchResult})
}
}
return out
}
func main() {
pattern := os.Args[1]
dirPath := os.Args[2]
paths := getRecursiveFilePaths(dirPath)
out := searchPaths(paths, pattern)
go func(){
wg.Wait() // waiting before closing the channel
close(out)
}()
count := 0
for d := range out {
fmt.Println(count)
d.PrettyPrint()
count += 1
}
}
I want to loop through the menu's options. However, it stops at the first option, since the select without "default:" is blocking and it does not know more options will appear dynamically.
Bellow is the broken code:
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os/exec"
"strings"
"time"
"github.com/getlantern/systray"
"gopkg.in/yaml.v3"
)
var menuItensPtr []*systray.MenuItem
var config map[string]string
var commands []string
func main() {
config = readconfig()
systray.Run(onReady, onExit)
}
func onReady() {
systray.SetIcon(getIcon("assets/menu.ico"))
menuItensPtr = make([]*systray.MenuItem,0)
commands = make([]string,0)
for k, v := range config {
menuItemPtr := systray.AddMenuItem(k, k)
menuItensPtr = append(menuItensPtr, menuItemPtr)
commands = append(commands, v)
}
systray.AddSeparator()
// mQuit := systray.AddMenuItem("Quit", "Quits this app")
go func() {
for {
systray.SetTitle("My tray menu")
systray.SetTooltip("https://github.com/evandrojr/my-tray-menu")
time.Sleep(1 * time.Second)
}
}()
go func() {
for{
for i, menuItenPtr := range menuItensPtr {
select {
/// EXECUTION GETS STUCK HERE!!!!!!!
case<-menuItenPtr.ClickedCh:
execute(commands[i])
}
}
// select {
// case <-mQuit.ClickedCh:
// systray.Quit()
// return
// // default:
// }
}
}()
}
func onExit() {
// Cleaning stuff will go here.
}
func getIcon(s string) []byte {
b, err := ioutil.ReadFile(s)
if err != nil {
fmt.Print(err)
}
return b
}
func execute(commands string){
command_array:= strings.Split(commands, " ")
command:=""
command, command_array = command_array[0], command_array[1:]
cmd := exec.Command(command, command_array ...)
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
log.Fatal(err)
}
// fmt.Printf("Output %s\n", out.String())
}
func readconfig() map[string]string{
yfile, err := ioutil.ReadFile("my-tray-menu.yaml")
if err != nil {
log.Fatal(err)
}
data := make(map[string]string)
err2 := yaml.Unmarshal(yfile, &data)
if err2 != nil {
log.Fatal(err2)
}
for k, v := range data {
fmt.Printf("%s -> %s\n", k, v)
}
return data
}
Bellow is the ugly workaround that works:
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"github.com/getlantern/systray"
"gopkg.in/yaml.v3"
)
var menuItensPtr []*systray.MenuItem
var config map[string]string
var commands []string
var labels []string
var programPath string
func main() {
setProgramPath()
config = readconfig()
time.Sleep(1 * time.Second)
systray.Run(onReady, onExit)
}
func onReady() {
systray.SetIcon(getIcon(filepath.Join(programPath,"assets/menu.ico")))
menuItensPtr = make([]*systray.MenuItem, 0)
i := 0
op0 := systray.AddMenuItem(labels[i], commands[i])
i++
op1 := systray.AddMenuItem(labels[i], commands[i])
i++
op2 := systray.AddMenuItem(labels[i], commands[i])
i++
op3 := systray.AddMenuItem(labels[i], commands[i])
i++
systray.AddSeparator()
mQuit := systray.AddMenuItem("Quit", "Quits this app")
go func() {
for {
systray.SetTitle("My tray menu")
systray.SetTooltip("https://github.com/evandrojr/my-tray-menu")
time.Sleep(1 * time.Second)
}
}()
go func() {
for {
select {
// HERE DOES NOT GET STUCK!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
case <-op0.ClickedCh:
execute(commands[0])
case <-op1.ClickedCh:
execute(commands[1])
case <-op2.ClickedCh:
execute(commands[2])
case <-op3.ClickedCh:
execute(commands[3])
case <-mQuit.ClickedCh:
systray.Quit()
return
}
}
}()
}
func onExit() {
// Cleaning stuff will go here.
}
func getIcon(s string) []byte {
b, err := ioutil.ReadFile(s)
if err != nil {
fmt.Print(err)
}
return b
}
func setProgramPath(){
ex, err := os.Executable()
if err != nil {
panic(err)
}
programPath = filepath.Dir(ex)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
func execute(commands string) {
command_array := strings.Split(commands, " ")
command := ""
command, command_array = command_array[0], command_array[1:]
cmd := exec.Command(command, command_array...)
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
log.Fatal(err)
}
fmt.Printf("Output %s\n", out.String())
}
func readconfig() map[string]string {
yfile, err := ioutil.ReadFile(filepath.Join(programPath,"my-tray-menu.yaml"))
if err != nil {
log.Fatal(err)
}
data := make(map[string]string)
err2 := yaml.Unmarshal(yfile, &data)
if err2 != nil {
log.Fatal(err2)
}
labels = make([]string, 0)
commands = make([]string, 0)
for k, v := range data {
labels = append(labels, k)
commands = append(commands, v)
fmt.Printf("%s -> %s\n", k, v)
}
fmt.Print(len(labels))
return data
}
Full source code here:
https://github.com/evandrojr/my-tray-menu
select "chooses which of a set of possible send or receive operations will proceed". The spec sets out how this choice is made:
If one or more of the communications can proceed, a single one that can proceed is chosen via a uniform pseudo-random selection. Otherwise, if there is a default case, that case is chosen. If there is no default case, the "select" statement blocks until at least one of the communications can proceed.
Your working example:
select {
case <-op0.ClickedCh:
execute(commands[0])
case <-op1.ClickedCh:
execute(commands[1])
// ...
}
uses select successfully to choose between one of the offered options. However if you pass a single option e.g.
select {
case<-menuItenPtr.ClickedCh:
execute(commands[i])
}
}
The select will block until <-menuItenPtr.ClickedCh is ready to proceed (e.g. something is received). This is effectively the same as not using a select:
<-menuItenPtr.ClickedCh:
execute(commands[i])
The result you were expecting can be achieved by providing a default option:
select {
case<-menuItenPtr.ClickedCh:
execute(commands[i])
}
default:
}
As per the quote from the spec above the default option will be chosen if none of the other options can proceed. While this may work it's not a very good solution because you effectively end up with:
for {
// Check if event happened (not blocking)
}
This will tie up CPU time unnecessarily as it continually loops checking for events. A better solution would be to start a goroutine to monitor each channel:
for i, menuItenPtr := range menuItensPtr {
go func(c chan struct{}, cmd string) {
for range c { execute(cmd) }
}(menuItenPtr.ClickedCh, commands[i])
}
// Start another goroutine to handle quit
The above will probably work but does lead to the possibility that execute will be called concurrently (which might cause issues if your code is not threadsafe). One way around this is to use the "fan in" pattern (as suggested by #kostix and in the Rob Pike video suggested by #John); something like:
cmdChan := make(chan int)
for i, menuItenPtr := range menuItensPtr {
go func(c chan struct{}, cmd string) {
for range c { cmdChan <- cmd }
}(menuItenPtr.ClickedCh, commands[i])
}
go func() {
for {
select {
case cmd := <- cmdChan:
execute(cmd) // Handle command
case <-mQuit.ClickedCh:
systray.Quit()
return
}
}
}()
note: all code above entered directly into the question so please treat as pseudo code!
I have a goroutine that reads a file in chunks and passes them through a channel to another goroutine that calculates a checksum for the file. The consuming goroutine is kind of a sink for the channel.
Is it possible to have the consumer return the checksum string after all of the bytes have been received from the channel, or must I use a string stream to return the value? For the former, I get a deadlock, but I am not using any waitgroups; Not sure how to apply them in this case.
I'd appreciate your comments and thank you for your help.
// main()
var done = make(chan bool)
defer close(done)
checksum := FileCheckSum(ReadFileToChannel("mydata4.bin", done), done)
fmt.Println("Final Checksum: ", checksum)
// FileCheckSum()
import (
"crypto/sha256"
"encoding/hex"
"log"
)
func FileCheckSum(cin <-chan []byte, done <-chan bool) string {
chunkStream := make(chan []byte)
checksum := func(in <-chan []byte, done <-chan bool) string {
defer close(chunkStream)
hasher := sha256.New()
for chunk := range in {
_, err := hasher.Write(chunk[:len(chunk)])
if err != nil {
log.Fatal(err)
}
select {
case <-done:
return ""
}
}
return hex.EncodeToString(hasher.Sum(nil))
}(cin, done)
return checksum
}
Yes, and let me simplify your code - and read comments inside the following code.
Try this:
package main
import (
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"fmt"
"log"
)
func FileCheckSum(cin <-chan []byte) string {
h := sha256.New()
for buf := range cin {
_, err := h.Write(buf)
if err != nil {
log.Fatal(err)
}
}
return hex.EncodeToString(h.Sum(nil))
}
func ReadFileToChannel(filename string) chan []byte {
gen := make(chan []byte)
go func() { // goroutine
defer close(gen) // signal end of reading file
for i := 0; i < 10; i++ { // e.g. read from file
b := make([]byte, 16) // make new slice every time
_, err := rand.Read(b) // fill it
if err != nil {
log.Fatal(err)
}
gen <- b // send it
}
}()
return gen
}
func main() {
ch := ReadFileToChannel("mydata4.bin")
crc := FileCheckSum(ch)
fmt.Println("Final Checksum: ", crc)
}
Output:
Final Checksum: 1e0ad2ec11bfe77833af670c6de296f530c2217d18aa1b8e600feddf6998fb95
Note
Your code needs a code review, you may head over here for the code review.
I have a function like this:
package main
import (
"fmt"
)
// PrintSomething prints some thing
func PrintSomething() {
fmt.Println("print something")
}
func main() {
PrintSomething()
}
How do I wrap PrintSomething to another function call CaptureSomething to save the string "print something" to a variable and return it?
Create pipe and set stdout to the pipe writer. Start a goroutine to copy the pipe reader to a buffer. When done, close the pipe writer and wait for goroutine to complete reading. Return the buffer as a string.
// capture replaces os.Stdout with a writer that buffers any data written
// to os.Stdout. Call the returned function to cleanup and get the data
// as a string.
func capture() func() (string, error) {
r, w, err := os.Pipe()
if err != nil {
panic(err)
}
done := make(chan error, 1)
save := os.Stdout
os.Stdout = w
var buf strings.Builder
go func() {
_, err := io.Copy(&buf, r)
r.Close()
done <- err
}()
return func() (string, error) {
os.Stdout = save
w.Close()
err := <-done
return buf.String(), err
}
}
Use it like this:
done := capture()
fmt.Println("Hello, playground")
capturedOutput, err := done()
if err != nil {
// handle error
}
playground example
For example,
package main
import (
"fmt"
"io/ioutil"
"os"
)
// PrintSomething prints some thing
func PrintSomething() {
fmt.Println("print something")
}
func CaptureSomething() (string, error) {
defer func(stdout *os.File) {
os.Stdout = stdout
}(os.Stdout)
out, err := ioutil.TempFile("", "stdout")
if err != nil {
return "", err
}
defer out.Close()
outname := out.Name()
os.Stdout = out
PrintSomething()
err = out.Close()
if err != nil {
return "", err
}
data, err := ioutil.ReadFile(outname)
if err != nil {
return "", err
}
os.Remove(outname)
return string(data), nil
}
func main() {
s, err := CaptureSomething()
if err != nil {
fmt.Println(err)
} else {
fmt.Print(s)
}
}
Playground: https://play.golang.org/p/O2kSegxYeGy
Output:
print something
Use one of these, whichever works for you:
package main
import (
"bytes"
"fmt"
"io"
"os"
"strings"
)
func PrintSomething() {
fmt.Println("print something")
}
func PrintSomethingBig() {
for i := 0; i < 100000; i++ {
fmt.Println("print something")
}
}
func PrintSomethingOut(out io.Writer) {
fmt.Fprintln(out, "print something to io.Writer")
}
func PrintSomethingString() string {
return fmt.Sprintln("print something into a string")
}
// not thread safe
// modified by zlynx#acm.org from original at http://craigwickesser.com/2015/01/capture-stdout-in-go/
func captureStdout(f func()) string {
old := os.Stdout
r, w, _ := os.Pipe()
os.Stdout = w
go func() {
f()
w.Close()
}()
buf := &bytes.Buffer{}
// Will complete when the goroutine calls w.Close()
io.Copy(buf, r)
// Clean up.
os.Stdout = old
r.Close()
return buf.String()
}
func main() {
str1 := &strings.Builder{}
str2 := PrintSomethingString()
PrintSomethingOut(str1)
PrintSomethingOut(os.Stdout)
str3 := captureStdout(PrintSomething)
str4 := captureStdout(PrintSomethingBig)
fmt.Println("string 1 is", str1)
fmt.Println("string 2 is", str2)
fmt.Println("string 3 is", str3)
fmt.Println("string 4 len", len(str4))
}
I a task written in Go to get a unique list from a bunch of text files. I put in some parallelization using channels and am having inconsistent results now - a variance of 5 records output/not output each time with the same input files.
The am testing it with go run process.go | wc -l on Fedora x86_64, go1.1.2, 8 core amd.
The code is:
package main
import (
"fmt"
"os"
"io"
"encoding/csv"
"regexp"
"log"
)
var (
cleanRe *regexp.Regexp = regexp.MustCompile("[^0-9]+")
comma rune ='\t'
fieldsPerRecord=-1
)
func clean(s string) string {
clean:=cleanRe.ReplaceAllLiteralString(s,"")
if len(clean)<6 {return ""}
return clean
}
func uniqueChannel(inputChan chan []string, controlChan chan string) {
defer func(){controlChan<-"Input digester."}()
uniq:=make(map[string]map[string]bool)
i:=0
for record:= range inputChan {
i++
id,v:=record[0],record[1]
if uniq[id]==nil {
uniq[id]=make(map[string]bool)
} else if !uniq[id][v] {
uniq[id][v]=true
fmt.Println(id,string(comma),v)
}
}
log.Println("digest ", i)
}
func processFile(fileName string, outputChan chan []string, controlChan chan string) {
defer func(){controlChan<-fileName}()
f,err:=os.Open(fileName)
if err!=nil{log.Fatal(err)}
r:=csv.NewReader(f)
r.FieldsPerRecord = fieldsPerRecord
r.Comma = comma
// Process the records
i:=0
for record,err:=r.Read();err!=io.EOF;record,err=r.Read() {
if err!=nil{continue}
id:=record[0]
for _,v:=range record[1:] {
if cleanV:=clean(v);cleanV!=""{
i++
outputChan<-[]string{id,cleanV}
}
}
}
log.Println(fileName,i)
}
func main() {
inputs:=[]string{}
recordChan:=make(chan []string,100)
processesLeft:=len(inputs)+1
controlChan:=make(chan string,processesLeft)
// Ingest the inputs
for _,fName:=range inputs {
go processFile(fName,recordChan,controlChan)
}
// This is the loop to ensure it's all unique
go uniqueChannel(recordChan,controlChan)
// Make sure all the channels close up
for processesLeft>0 {
if processesLeft==1{
close(recordChan)
}
c:=<-controlChan
log.Println(c)
processesLeft--
}
close(controlChan)
}
It seems like it closes the channel before it's empty and quite. Without the closing mechanism I was getting deadlocks - I'm out of ideas.
You could ditch the control channel and use a sync.WaitGroup:
package main
import (
"encoding/csv"
"fmt"
"io"
"log"
"os"
"regexp"
"sync"
)
var (
cleanRe *regexp.Regexp = regexp.MustCompile("[^0-9]+")
comma rune = '\t'
fieldsPerRecord = -1
)
func clean(s string) string {
clean := cleanRe.ReplaceAllLiteralString(s, "")
if len(clean) < 6 {
return ""
}
return clean
}
func uniqueChannel(inputChan chan []string) {
uniq := make(map[string]map[string]bool)
i := 0
for record := range inputChan {
i++
id, v := record[0], record[1]
if uniq[id] == nil {
uniq[id] = make(map[string]bool)
} else if !uniq[id][v] {
uniq[id][v] = true
fmt.Println(id, string(comma), v)
}
}
log.Println("digest ", i)
}
func processFile(fileName string, outputChan chan []string) {
f, err := os.Open(fileName)
if err != nil {
log.Fatal(err)
}
r := csv.NewReader(f)
r.FieldsPerRecord = fieldsPerRecord
r.Comma = comma
// Process the records
for record, err := r.Read(); err != io.EOF; record, err = r.Read() {
if err != nil {
continue
}
id := record[0]
for _, v := range record[1:] {
if cleanV := clean(v); cleanV != "" {
outputChan <- []string{id, cleanV}
}
}
}
}
func main() {
inputs := []string{"ex.tsv"}
recordChan := make(chan []string)
var wg sync.WaitGroup
// Ingest the inputs
for _, fName := range inputs {
wg.Add(1)
go func() {
processFile(fName, recordChan)
wg.Done()
}()
}
go func() {
wg.Wait()
close(recordChan)
}()
// This is the loop to ensure it's all unique
uniqueChannel(recordChan)
}