goroutines/channel - not exiting w/o ctlr-c - go

Please consider the following go code, which I wrote to scan a TCP port (os.Arg[2]) of hostnames from a given filename (os.Arg[1]). It reads each hostname, tries to connect. If the connect failed, it would append the failed hostname to outfile.
package main
import(
"fmt"
"os"
"log"
"bufio"
"time"
"net"
)
func main(){
argc := len(os.Args)
if argc < 3 {
fmt.Printf("Invalid usage")
log.Fatal("Invalid usage")
}
stamp := time.Now().UnixNano()
outfile := fmt.Sprintf("%s%d.txt", "/tmp/port_check", stamp)
filename := os.Args[1]
file, err := os.Open(filename)
checkerr(err)
f, err := os.OpenFile(outfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
checkerr(err)
defer f.Close()
port := os.Args[2]
channel := make(chan string,17)
fscanner := bufio.NewScanner(file)
for fscanner.Scan(){
_line := fscanner.Text()
go check_sender(_line,port,f,channel)
}
_count := 0
for out := range channel{
fmt.Println("_count is:", _count, out)
_count += 1
}
close(channel)
}
func checkerr(err error){
if err != nil {
fmt.Println(err)
log.Fatal(err)
}
}
func check_sender(sender string, port string, f *os.File, channel chan string){
address_string := fmt.Sprintf("%s:%s", sender, port)
_, err := net.DialTimeout("tcp", address_string,4 * time.Second)
if err != nil {
write_this := fmt.Sprintf("%s\n", sender)
f.WriteString(write_this)
}
channel <- sender
}
Generate some content for it operate on:
$ for i in `seq 1 5`; do echo "someblog$RANDOM$RANDOM.blogspot.com"; done > /tmp/meh.txt
And when run as:
$ go run port_scan.go /tmp/meh.txt 80
_count is: 0 someblog50063432.blogspot.com
_count is: 1 someblog922816893.blogspot.com
_count is: 2 someblog622823698.blogspot.com
_count is: 3 someblog1074223783.blogspot.com
_count is: 4 someblog1876411881.blogspot.com
^Csignal: interrupt < ----------------------- this
It hangs after the last hostname, and does not exit until a ctlr-c is sent.
I would like it to exit by itself, what am I doing wrong here?
update 1:
channel := make(chan string,17)
fscanner := bufio.NewScanner(file)
+ spin := 0
for fscanner.Scan(){
_line := fscanner.Text()
go check_sender(_line,port,f,channel)
+ spin += 1
}
_count := 0
for out := range channel{
fmt.Println("_count is:", _count, out)
_count += 1
+ if _count == spin {
+ close(channel)
+ }
}
- close(channel)
}

Related

io.Pipe() causes WaitGroup to get stuck

I am processing a huge data file which is approx. 100 GB. Each line in that huge file is a JSON piece of data which I'd like to read, compress, and store in an in memory database.
var wg sync.WaitGroup
for {
line, err := reader.ReadString('\n')
if err != nil {
break
}
go func(index int) {
wg.Add(1)
pr, pw := io.Pipe()
zw := lzw.NewWriter(pw, lzw.LSB, 8)
_, err := io.Copy(zw, strings.NewReader(line))
pw.Close()
zw.Close()
if err != nil {
fmt.Println(err.Error())
}
b, err := io.ReadAll(pr)
if err != nil {
fmt.Println(err.Error())
}
client.Set(ctx, fmt.Sprintf("%d", index), base64.StdEncoding.EncodeToString(b), time.Hour*1000)
pr.Close()
wg.Done()
}(index)
if index%10000 == 0 {
fmt.Println(index)
wg.Wait()
}
index += 1
}
However, this code stops after processing the first 10000 lines. When I move down the wg.Add(1) after the zw.Close() it keeps on processing the rest of the line (but becomes instable). Without the lzw and io.Pipe() when I try to store the exact values in an uncompressed way, then everything works without any issue.
I am not sure whether I am not using the WaitGroup correctly or there is something associated with the io.Pipe() which I am not aware of yet.
TLDR:
1- Removing pr, pw := io.Pipe() makes the code more simple, since it is superfluous,
try this:
line, err := reader.ReadString('\n')
if err == io.EOF {
wg.Wait()
break
}
if err != nil {
log.Fatal(err)
}
wg.Add(1)
go func(index int) {
var buf bytes.Buffer
{ // lexical scoping (static scoping)
zw := lzw.NewWriter(&buf, lzw.LSB, 8)
n, err := zw.Write([]byte(line)) // n, err := io.Copy(zw, strings.NewReader(line))
if err != nil {
log.Fatal(err)
}
if int(n) != len(line) {
log.Fatal(n, len(line))
}
// It is the caller's responsibility to call Close on the WriteCloser when finished writing.
if err = zw.Close(); err != nil {
log.Fatal(err)
}
}
ctx, cancelFunc := context.WithTimeout(context.Background(), 100*time.Millisecond)
client.Set(ctx, fmt.Sprintf("%d", index), base64.StdEncoding.EncodeToString(buf.Bytes()), 1000*time.Hour)
cancelFunc()
wg.Done()
}(index)
if index%tenThousand == 0 {
wg.Wait()
}
2- You need to put the wg.Add(1) before go func(index int) {:
wg.Add(1)
go func(index int) {
3- The wg.Wait() logic:
if index%10000 == 0 {
fmt.Println(index)
wg.Wait()
}
What happens for the last iteration if index%10000 != 0.
So here when err == io.EOF you need to wg.Wait() for all goroutines to join:
if err == io.EOF {
wg.Wait()
fmt.Println("\n**** All done **** index =", index)
break
}
4- You may use lexical scoping (static scoping) to limit some variables scope and make the code more manageable - and to know when to Close the lzw.NewWriter :
{ // lexical scoping (static scoping)
zw := lzw.NewWriter(bufio.NewWriter(&buf), lzw.LSB, 8)
n, err := io.Copy(zw, strings.NewReader(line))
if err != nil {
log.Fatal(err)
}
if int(n) != len(line) {
log.Fatal(n, len(line))
}
// It is the caller's responsibility to call Close on the WriteCloser when finished writing.
if err = zw.Close(); err != nil {
log.Fatal(err)
}
}
5- Always check the errors, e.g.:
if err = zw.Close(); err != nil {
log.Fatal(err)
}
This is the working version close to your code - try this just to experiment with concurrency logic to see what happens (not recommended since this has superfluous goroutines and io.Pipe - just working:
package main
import (
"bufio"
"compress/lzw"
"context"
"encoding/base64"
"fmt"
"io"
"log"
"strings"
"sync"
"time"
)
func main() {
index := 0
client := &myClient{}
reader := bufio.NewReader(file)
// your code:
var wg sync.WaitGroup
for {
index++
line, err := reader.ReadString('\n')
if err != nil {
msg <- fmt.Sprint(index, " Done not waiting with err: ", err, time.Now())
wg.Wait() // break waiting // if index%tenThousand != 0
break
}
wg.Add(1)
go func(i int) {
msg <- fmt.Sprint(i, " Enter running ... ", time.Now())
asyncReader, asyncWriter := io.Pipe() // make it async to read and write
zipWriter := lzw.NewWriter(asyncWriter, lzw.LSB, 8)
go func() { // async
_, err := io.Copy(zipWriter, strings.NewReader(line))
if err != nil {
log.Fatal(err)
}
_ = zipWriter.Close()
_ = asyncWriter.Close() // for io.ReadAll
}()
b, err := io.ReadAll(asyncReader)
if err != nil {
log.Fatal(err)
}
client.Set(context.Background(), fmt.Sprintf("%d", i), base64.StdEncoding.EncodeToString(b), time.Hour*1000)
asyncReader.Close()
time.Sleep(1 * time.Second)
msg <- fmt.Sprint(i, " Exit running ... ", time.Now())
wg.Done()
}(index)
msg <- fmt.Sprint(index, " ", index%tenThousand == 0, " after go call")
if index%tenThousand == 0 {
wg.Wait()
msg <- fmt.Sprint("..", index, " Done waiting after go call. ", time.Now())
}
}
msg <- "Bye forever."
wg.Wait()
close(msg)
wgMsg.Wait()
}
// just for the Go Playground:
const tenThousand = 2
type myClient struct {
}
func (p *myClient) Set(ctx context.Context, a, b string, t time.Duration) {
// fmt.Println("a =", a, ", b =", b, ", t =", t)
if ctx.Err() != nil {
fmt.Println(ctx.Err())
}
}
var file, myw = io.Pipe()
func init() {
go func() {
for i := 1; i <= tenThousand+1; i++ {
fmt.Fprintf(myw, "%d text to compress aaaaaaaaaaaaaa\n", i)
}
myw.Close()
}()
wgMsg.Add(1)
go func() {
defer wgMsg.Done()
for s := range msg {
fmt.Println(s)
}
}()
}
var msg = make(chan string, 100)
var wgMsg sync.WaitGroup
Output:
1 false after go call
2 true after go call
1 Enter running ... 2009-11-10 23:00:00 +0000 UTC m=+0.000000001
2 Enter running ... 2009-11-10 23:00:00 +0000 UTC m=+0.000000001
1 Exit running ... 2009-11-10 23:00:01 +0000 UTC m=+1.000000001
2 Exit running ... 2009-11-10 23:00:01 +0000 UTC m=+1.000000001
..2 Done waiting after go call. 2009-11-10 23:00:01 +0000 UTC m=+1.000000001
3 false after go call
3 Enter running ... 2009-11-10 23:00:01 +0000 UTC m=+1.000000001
4 Done not waiting with err: EOF 2009-11-10 23:00:01 +0000 UTC m=+1.000000001
3 Exit running ... 2009-11-10 23:00:02 +0000 UTC m=+2.000000001
Bye forever.

golang: `os.Stdin.Read` doesn't handle input consisting of a sole EOF?

I am writing a cat with a timeout on receiving the first byte. I have it working except that it can't handle echo -n:
❯ echo -n | time possiblycat 1000 # 1000 is the timeout in milliseconds
possiblycat 1000 0.00s user 0.00s system 0% cpu 1.008 total; max RSS 1864
cat itself has no issues with this; It notices the EOF and exits immediately:
❯ echo -n | time cat
cat 0.00s user 0.00s system 71% cpu 0.003 total; max RSS 664
This is the whole source of possiblycat:
package main
import (
"io"
"io/ioutil"
"os"
"strconv"
"time"
)
func main() {
wait := 10
if len(os.Args) >= 2 {
waitDummy, err := strconv.Atoi(os.Args[1])
if err != nil {
panic(err)
}
wait = waitDummy
}
b := make(chan byte, 1)
go scan(b)
select {
case res := <-b:
inBytes, err := ioutil.ReadAll(os.Stdin)
if err != nil {
panic(err)
}
stdin := append([]byte{res}, inBytes...)
_, err2 := os.Stdout.Write(stdin)
if err2 != nil {
panic(err2)
}
case <-time.After(time.Duration(wait) * time.Millisecond):
os.Exit(1)
}
}
func scan(out chan byte) {
var b []byte = make([]byte, 1)
_, err := os.Stdin.Read(b)
if err == io.EOF {
return
} else if err != nil {
panic(err)
}
out <- b[0]
}
Related:
Does echo -n | … send an EOF to the pipe?
When os.Stdin.Read returns EOF, you exit the scan function which is running in its own goroutine.
However, nothing is being done to tell the main goroutine that all input has been processed. It is waiting for data on channel b, or for the timeout. Since there is no data coming on b, the timeout gets reached.
To properly handle this, the err == io.EOF case should signal the main goroutine that there is no more work to be done. A common pattern (but certainly not the only one) is to have a done channel indicating that all work is finished.
done := make(chan bool, 1)
go scan(b, done)
select {
case res := <-b:
...
case <-done:
os.Exit(1)
case <-time.After(time.Duration(wait) * time.Millisecond):
os.Exit(1)
}
}
func scan(out chan byte, done chan bool) {
var b []byte = make([]byte, 1)
_, err := os.Stdin.Read(b)
if err == io.EOF {
fmt.Println("got EOF, exiting")
done <- true
return
} else if err != nil {
...
}
Another (even simpler) alternative is to simply close the data channel when you're done:
func scan(out chan byte) {
var b []byte = make([]byte, 1)
_, err := os.Stdin.Read(b)
if err == io.EOF {
fmt.Println("got EOF, exiting")
close(out)
return
} else if err != nil {
panic(err)
}
out <- b[0]
}

Goroutines stuck after execution

I want to have the limited number of goroutines that make some computation (func worker(), it makes some computation and places the result in a channel). Also a have another channel, that has "jobs" for my workers. As a result I can see that all jobs were computed correctly, but after computation executions stucks.
package main
import (
"bufio"
"fmt"
"os"
"net/http"
"io/ioutil"
"strings"
"time"
)
func worker(id int, urls <- chan string, results chan<- int) {
var data string
for url := range urls {
fmt.Println("worker", id, "started job", url)
if (strings.HasPrefix(url, "http") || strings.HasPrefix(url, "https")) {
resp, err := http.Get(url)
if err != nil {
fmt.Println(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println(err)
}
data = string(body)
} else {
body, err := ioutil.ReadFile(url)
if err != nil {
fmt.Println(err)
}
data = string(body)
}
number := strings.Count(data, "Go")
fmt.Println("worker", id, "finished job", url, "Number of Go is", number)
results <- number
}
return
}
func main() {
final_result := 0
maxNbConcurrentGoroutines := 5
numJobs := 0
urls := make(chan string)
results := make(chan int)
scanner := bufio.NewScanner(os.Stdin)
start := time.Now()
for w := 1; w <= maxNbConcurrentGoroutines; w++ {
go worker(w, urls, results)
}
for scanner.Scan() {
url := (scanner.Text())
urls <- url
numJobs += 1
}
close(urls)
for num := range results {
final_result += num
}
t := time.Now()
elapsed := t.Sub(start)
for i := 1; i <= numJobs; i++ {
one_result := <- results
final_result += one_result
}
fmt.Println("Number = ", final_result)
fmt.Println("Time = ", elapsed)
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "error:", err)
os.Exit(1)
}
}
I tried to use https://gobyexample.com/worker-pools to extract all the values from results channel, but was not succeed. What should I do to have it unstacked and gone further. Here is an example of how to run it:
echo -e 'https://golang.org\n/etc/passwd\nhttps://golang.org\nhttps://golang.org' | go run 1.go
Your program doesn't return because it waits the closed status of results channel.
In https://gobyexample.com/worker-pools the loop for getting results is different:
for a := 1; a <= numJobs; a++ {
<-results
}
If you want to use for num := range results you need close(results) and determine when to call it.
You can view another example using WaitGroup at https://gobyexample.com/waitgroups

How can I skip the first line of a file in Go?

How can I read a file in Go and skip the first line / headers?
In Python I know I could do
counter = 0
with open("my_file_path", "r") as fo:
try:
next(fo)
except:
pass
for _ in fo:
counter = counter + 1
This is my Go application
package main
import (
"bufio"
"flag"
"os"
)
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
//fmt.Println(fileScanner.Text())
counter = counter + 1
}
return counter
}
func main() {
fileLocation := flag.String("file_location", "default value", "file path to count lines")
flag.Parse()
counted := readFile(*fileLocation)
println(counted)
}
I will be reading a huge file and don't want to be evaluating each line if the index is 0.
How about to move to the next token before the loop
scanner := bufio.NewScanner(file)
scanner.Scan() // this moves to the next token
for scanner.Scan() {
fmt.Println(scanner.Text())
}
file
1
2
3
output
2
3
https://play.golang.org/p/I2w50zFdcg0
For example,
package main
import (
"bufio"
"fmt"
"os"
)
func readFile(filename string) (int, error) {
f, err := os.Open(filename)
if err != nil {
return 0, err
}
defer f.Close()
count := 0
s := bufio.NewScanner(f)
if s.Scan() {
for s.Scan() {
count++
}
}
if err := s.Err(); err != nil {
return 0, err
}
return count, nil
}
func main() {
filename := `test.file`
count, err := readFile(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
fmt.Println(count)
}
Output:
$ cat test.file
1234567890
abc
$ go run count.go
1
$
you can try something like this
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
for fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
break
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
counter = counter + 1
// do something with text
}
return counter
}
Edit:
func readFile(fileLocation string) int {
fileOpen, _ := os.Open(fileLocation)
defer fileOpen.Close()
fileScanner := bufio.NewScanner(fileOpen)
counter := 0
if fileScanner.Scan() {
// read first line and ignore
fileScanner.Text()
}
for fileScanner.Scan() {
// read remaining lines and process
txt := fileScanner.Text()
// do something with text
counter = counter + 1
}
return counter
}

Os/exec elegant, loop compatible stdin and stdout input/output

Example script is just wrapper to "wc -m" command, simple symbol counter.
I trying just feed input with "teststrings" slice elements. And receive number of symbol of each string at output listener goroutine. Looking for a way to make "wc" listen forever for input at all. I'v notice when i increase sleep to
time.Sleep(6000 * time.Nanosecond)
wc don't wait for input.
package main
import (
"bytes"
"fmt"
"os/exec"
"time"
)
func main() {
BashCommand := exec.Command("wc", "-m")
InputBytes := &bytes.Buffer{}
OutputBytes := &bytes.Buffer{}
BashCommand.Stdin = InputBytes
BashCommand.Stdout = OutputBytes
e := BashCommand.Start()
time.Sleep(1 * time.Nanosecond)
_, _ = InputBytes.Write([]byte("13symbolsting"))
if e != nil {
fmt.Println(e)
}
fmt.Println("after run")
teststrings := []string{
"one",
"twoo",
"threeeee",
}
for _, s := range teststrings {
_, _ = InputBytes.Write([]byte(s))
}
//result printer
go func() {
for {
line, _ := OutputBytes.ReadString('\n')
if line != "" {
fmt.Println(line)
}
}
}()
var input string
fmt.Scanln(&input) //dont exit until keypress
}
If you increase the sleep to a large value, the goroutine started by the command to pump InputBytes to the process runs before data is written to InputBytes. The goroutine closes the pipe to the child and exits without having read any data.
Use pipes instead of bytes.Buffer:
c := exec.Command("wc", "-m")
w, _ := c.StdinPipe()
r, _ := c.StdoutPipe()
if err := c.Start(); err != nil {
log.Fatal(err)
}
w.Write([]byte("13symbolsting"))
teststrings := []string{
"one",
"twoo",
"threeeee",
}
for _, s := range teststrings {
w.Write([]byte(s))
}
w.Close() // Close pipe to indicate input is done.
var wg sync.WaitGroup
wg.Add(1)
go func() {
s := bufio.NewScanner(r)
for s.Scan() {
fmt.Println(s.Text())
}
wg.Done()
}()
wg.Wait()
Another option is to write to the bytes.Buffer before starting the command and wait for command to complete before reading the output:
c := exec.Command("wc", "-m")
var w, r bytes.Buffer
c.Stdin = &w
c.Stdout = &r
// Write data before starting command.
w.Write([]byte("13symbolsting"))
teststrings := []string{
"one",
"twoo",
"threeeee",
}
for _, s := range teststrings {
w.Write([]byte(s))
}
if err := c.Start(); err != nil {
log.Fatal(err)
}
// Wait for command to complete before reading data.
if err := c.Wait(); err != nil {
log.Fatal(err)
}
s := bufio.NewScanner(&r)
for s.Scan() {
fmt.Println(s.Text())
}

Resources