How to do nested goroutines - go

Here is a snippet of my code so far
func DownloadFile(filepath string, url string) {
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
out, err := os.Create(filepath)
if err != nil {
log.Fatal(err)
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
if err != nil {
log.Fatal(err)
}
wg.Done()
}
func main() {
for {
fp := <computes filepath>
u := <computes url>
zero := len(x.String())
j := 1
for {
wg.Add(1)
pad := fmt.Sprintf("%0" + strconv.Itoa(zero) + "d", j)
go DownloadFile(fp + string(os.PathSeparator) + pad + ".txt", u)
j = j + 1
}
wg.Wait()
}
}
Right now, goroutines are launched in the inner for loop. I want to make my program faster by adding goroutines in the outer for loop as well.
EDIT : Progress Bar
The progress bar is incremented for each time the outer for loop is incremented. I want it to increment only if the inner for loop has completed.
func DownloadFile(filepath string, url string) {
resp, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
out, err := os.Create(filepath)
if err != nil {
log.Fatal(err)
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
if err != nil {
log.Fatal(err)
}
wg.Done()
}
func main() {
for {
fp := <computes filepath>
u := <computes url>
zero := len(x.String())
count := int(x.Int())
bar := pb.StartNew(count)
j := 1
for {
bar.Increment()
wg.Add(1)
pad := fmt.Sprintf("%0" + strconv.Itoa(zero) + "d", j)
go DownloadFile(fp + string(os.PathSeparator) + pad + ".txt", u)
j = j + 1
}
}
wg.Wait()
bar.Finish()
}

Related

Concurrency but only do each task once

func main() {
//switch statement here that runs grabusernames()
}
func grabusernames() {
f, err := os.OpenFile("longlist.txt", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
for {
line, err := rd.ReadString('\n')
line2 := strings.TrimSpace(line)
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
tellonym(line2)
}
}
func tellonym(line2 string) {
threads := 10
swg := sizedwaitgroup.New(threads)
for i := 0; i < 1000; i++ {
swg.Add()
go func(i int) {
defer swg.Done()
var client http.Client
resp, err := client.Get("https://tellonym.me/" + line2)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
//fmt.Println("Response code: ", resp.StatusCode)
if resp.StatusCode == 404 {
fmt.Println("Username" + line2 + "not taken")
} else if resp.StatusCode == 200 {
fmt.Println("username " + line2 + " taken")
} else {
fmt.Println("Something else, response code: ", resp.StatusCode)
}
}(i)
}
The issue with the code above is that it checks the same username 1,000 times
I'd like it to check each username in the longlist.txt once, but I want to concurrently do it ( it's a long list and I'd like it to be fast
Current output:
Username causenot taken
Username causenot taken
Username causenot taken
Username causenot taken
Desired output:
Username causenot taken
Username billybob taken
Username something taken
Username stacker taken
You have to use goroutines in tellonym(line2) function. In your for loop you are using same username with 1,000 times.
func main() {
//switch statement here that runs grabusernames()
}
func grabusernames() {
f, err := os.OpenFile("longlist.txt", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
for {
line, err := rd.ReadString('\n')
line2 := strings.TrimSpace(line)
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
go tellonym(line2) // use go routines in here
}
}
Also take care about this details:
if you're reading from io.Reader consider it as reading from the stream. It's the single input source, which you can't 'read in parallel' because of it's nature - under the hood, you're getting byte, waiting for another one, getting one more and so on. Tokenizing it in words comes later, in buffer.
Second, I hope you're not trying to use goroutines as a 'silver bullet' in a 'let's add gouroutines and everything will just speed up' manner. If Go gives you such an easy way to use concurrency, it doesn't mean you should use it everywhere.
And finally, if you really need to split huge file into words in parallel and you think that splitting part will be the bottleneck (don't know your case, but I really doubt that) - then you have to invent your own algorithm and use 'os' package to Seek()/Read() parts of the file, each processed by it's own gouroutine and track somehow which parts were already processed.
Try this
func grabusernames() {
f, err := os.OpenFile("longlist.txt", os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
rd := bufio.NewReader(f)
ch := make(chan struct{}, 10)
var sem sync.WaitGroup
for {
line, err := rd.ReadString('\n')
line2 := strings.TrimSpace(line)
if err != nil {
if err == io.EOF {
break
}
log.Fatalf("read file line error: %v", err)
return
}
ch <- struct{}{}
sem.Add(1)
go tellonym(line2, ch, &sem)
}
sem.Wait()
}
func tellonym(line2 string, ch chan struct{}, sem *sync.WaitGroup) {
defer func() {
sem.Done()
<-ch
}()
var client http.Client
resp, err := client.Get("https://tellonym.me/" + line2)
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
//fmt.Println("Response code: ", resp.StatusCode)
if resp.StatusCode == 404 {
fmt.Println("Username" + line2 + "not taken")
} else if resp.StatusCode == 200 {
fmt.Println("username " + line2 + " taken")
} else {
fmt.Println("Something else, response code: ", resp.StatusCode)
}
}

How do I call database insert operation using ants golang

I have a for loop, which inserts data into the 2 different tables. How can I use ants(below package) in this case.
GH Package Ref: https://github.com/panjf2000/ants
for _, row := range rows {
user := User{}
user.Name = row.Name
user.Email = row.Email
err := dm.Insert(&user)
if err != nil {
panic(err)
}
address := Address{}
address.Address1 = row.Address1
address.Address2 = row.Address2
address.PinCode = row.PinCode
address.City = row.City
err := dm.Insert(&address)
if err != nil {
panic(err)
}
}
Something like:
func insertRow() {
// TODO add code to get 'rows'
const workerCount = 10
p, err := NewPool(workerCount)
if err != nil {
panic(err)
}
defer p.Release()
rowChan := make(chan RowType)
var wg sync.WaitGroup
insertRecords := func() {
defer wg.Done()
row <- rowChan
user := User{}
user.Name = row.Name
user.Email = row.Email
err := dm.Insert(&user)
if err != nil {
panic(err)
}
address := Address{}
address.Address1 = row.Address1
address.Address2 = row.Address2
address.PinCode = row.PinCode
address.City = row.City
err := dm.Insert(&address)
if err != nil {
panic(err)
}
}
for _, row := range rows {
wg.Add(1)
_ = ants.Submit(insertRecords)
rowChan <- row
}
wg.Wait()
}

Client stuck when trying to read with io.CopyN() in golang

I am trying to make TCP server for transferring files. I am suing io.CopyN for reading and writing. From server side, I am sending files to client so from server side, it sends perfectly all bytes but Client side after reading a couple of 1000000 bytes it stuck. sometimes it works fine and sometimes it gets stuck. I am using 300 MB pdf to test. Any help, code, and output is like below.
server
package main
import (
"fmt"
"io"
"log"
"net"
"os"
"strconv"
"strings"
)
func main() {
ls, err := net.Listen("tcp", ":1234")
errFunc(err)
defer ls.Close()
conn, _ := ls.Accept()
defer conn.Close()
for {
file, err := os.Open(strings.TrimSpace("./" + "Mag" + ".pdf"))
errFunc(err)
defer file.Close()
fileInfo, err := file.Stat()
errFunc(err)
size := fileInfo.Size()
numberOfTime := size / 1000000
leftByte := size - numberOfTime*1000000
numberOfTimeString := strconv.Itoa(int(numberOfTime))
leftByteString := strconv.Itoa(int(leftByte))
fmt.Println("1000000 times : ", numberOfTimeString)
fmt.Println("Left Bytes : ", leftByteString)
_, err = fmt.Fprintf(conn, numberOfTimeString+"\n")
errFunc(err)
_, err = fmt.Fprintf(conn, leftByteString+"\n")
errFunc(err)
fileWriter := io.Writer(conn)
for i := 0; i < int(numberOfTime); i++ {
n, err := io.CopyN(conn, file, 1000000)
if i >= 30 {
fmt.Println(err, n)
}
}
n, err := io.CopyN(fileWriter, file, leftByte+1)
if err == io.EOF {
fmt.Println(err, n)
}
fmt.Printf("Succefully bytes sent : %v \n\n\n\n\n", n)
file.Close()
}
}
func errFunc(err error) {
if err != nil {
log.Fatal(err)
}
}
client
package main
import (
"bufio"
"fmt"
"io"
"net"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
)
func main() {
c := make(chan os.Signal, 15)
signal.Notify(c, syscall.SIGINT)
go func() {
for {
s := <-c
switch s {
case syscall.SIGINT:
os.Exit(1)
}
}
}()
conn, _ := net.Dial("tcp", ":1234")
defer conn.Close()
connReadWrite := bufio.NewReader(io.Reader(conn))
var i int
var filename string
for {
i++
nu := strconv.Itoa(i)
filename = "image" + nu + ".pdf"
file, err := os.Create(filename)
defer file.Close()
numberOfTimeString, err := connReadWrite.ReadString('\n')
if err != nil {
fmt.Println(err)
}
println("1000000 times :", numberOfTimeString)
numberOfTimeString = strings.TrimSuffix(numberOfTimeString, "\n")
numberOfTime, err := strconv.Atoi(numberOfTimeString)
if err != nil {
fmt.Println(err)
}
leftByteString, err := connReadWrite.ReadString('\n')
if err != nil {
println(err)
}
println("Left Bytes :", leftByteString)
leftByteString = strings.TrimSuffix(leftByteString, "\n")
leftByte, err := strconv.Atoi(leftByteString)
if err != nil {
panic(err)
}
fmt.Println("After convert in Num :", numberOfTime, leftByte)
newFileWriter := io.Writer(file)
newFileReader := io.Reader(conn)
for i := 0; i < numberOfTime; i++ {
n, err := io.CopyN(newFileWriter, newFileReader, 1000000)
if i >= 30 {
errFun(err, n)
}
}
n, err := io.CopyN(newFileWriter, newFileReader, int64(leftByte))
errFun(err, n)
fmt.Printf("sucessfully Transfered ---> \n\n\n\n\n\n")
}
}
func errFun(err error, n int64) {
if err == io.EOF {
fmt.Println("End of file : ", n)
return
} else if n == 0 {
fmt.Println("n is : ", n)
return
} else if err != nil {
fmt.Println(err)
return
}
fmt.Println(err, " : ", n)
}
input/output
from server side first we are sending number of bytes it need to readand then client side it gets a number of bytes it needs to read and then I am sending the file and then it read. In the picture, I was able to send one-time second time it got stuck sometimes it stuck first time too.I am able to send number of byte from server side second time too but as you can see it don't read that numeber, it read something "%PDF..." and it even don't print "100000 times : " correctly it prints "%???00 times :" I just don’t understand this
enter image description here
I believe the issue is that you're using a bytes.Buffer in the client:
connReadWrite := bufio.NewReader(io.Reader(conn))
But you aren't using it later with the CopyN:
newFileWriter := io.Writer(file)
newFileReader := io.Reader(conn)
for i := 0; i < numberOfTime; i++ {
_, err := io.CopyN(newFileWriter, newFileReader, 1000000)
if err != nil {
log.Fatalln(err)
}
}
Using:
newFileWriter := io.Writer(file)
for i := 0; i < numberOfTime; i++ {
_, err := io.CopyN(file, connReadWrite, 1000000)
if err != nil {
log.Fatalln(err)
}
}
May fix it.
If you have control over the protocol you are using to send the file, I recommend doing something simpler. For example using the big-endian int64 length prefix.
Send:
func sendFile(name string, conn net.Conn) error {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return err
}
sz := fi.Size()
buf := bufio.NewWriter(conn)
err = binary.Write(buf, binary.BigEndian, sz)
if err != nil {
return err
}
_, err = io.CopyN(buf, f, sz)
if err != nil {
return err
}
return buf.Flush()
}
Receive:
func recvFile(name string, conn net.Conn) error {
f, err := os.Create(name)
if err != nil {
return err
}
defer f.Close()
buf := bufio.NewReader(conn)
var sz int64
err = binary.Read(buf, binary.BigEndian, &sz)
if err != nil {
return err
}
_, err = io.CopyN(f, buf, sz)
if err != nil {
return err
}
return nil
}

I'm encountering multiple errors when trying concurrently parse sites with GO

Following some #Sam Whited advance and doing some research on stack, i've rewritten my code see below: This version of the code seems more stable, however, it is having issues where every once and a while the i get a slew of TCP errors as if i'm no closing my requests. I've throttled the requests by adding a sleep. It seems to help a bit.
func main() {
runtime.GOMAXPROCS(maxParallelism())
var file = flag.String("f", "", "Enter new line deliminated text file")
var fileName = flag.String("s", "contact_bot.csv", "Enter new line deliminated text file")
flag.Parse()
if *file != "" {
counter := 0
filters = []string{"info", "ads", "sales", "sale", "info", "media", "mediarelations", "media_relations", "contact", "contacts", "contactus", "contact_us", "contact-us", "about_us", "general", "advertise", "support", "systems", "system"}
emailRE = regexp.MustCompile(`([a-z0-9!#$%&'*+\/=?^_{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_{|}~-]+)*(#|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)`)
seedUrls, err := readLines(*file)
checkErr(err)
numberOfUrls := len(seedUrls)
usr, err := user.Current()
checkErr(err)
parentPath := filepath.Join(usr.HomeDir, "/Desktop/"+*fileName)
file, err := os.Create(parentPath)
checkErr(err)
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
var header = []string{"URL", "EMAILS"}
err = writer.Write(header)
checkErr(err)
data = make(chan *HTTPResponse)
go asyncHTTPGets(seedUrls)
loop:
for result := range data {
counter++
emails := findEmails(result.HTML, filters)
fmt.Printf("%s, %s, %s\n", result.URL, emails, strconv.Itoa(numberOfUrls))
var row = []string{result.URL, strings.Join(emails, ",")}
err := writer.Write(row)
// writer.Flush()
checkErr(err)
if counter == len(seedUrls) {
break loop
}
numberOfUrls--
}
}
}
// AsyncHTTPGets ...
func asyncHTTPGets(urls []string) {
counter := 0
for _, url := range urls {
counter++
if counter%10 == 0 {
time.Sleep(1 * time.Second)
}
go func(url string) {
fmt.Printf("Fetching %s \n", url)
resp, err := http.Get(url)
if err != nil {
fmt.Println(err.Error())
data <- &HTTPResponse{url, err.Error()}
return
}
b := resp.Body
buf := new(bytes.Buffer)
buf.ReadFrom(b)
resp.Body.Close()
myHTML := buf.String()
data <- &HTTPResponse{url, myHTML}
}(url)
}
}
func findEmails(html string, filters []string) []string {
emails := emailRE.FindAllString(html, -1)
filteredEmails := []string{}
for _, email := range emails {
if stringInSlice(email, filters) {
if !stringInSlice(email, filteredEmails) {
filteredEmails = append(filteredEmails, email)
}
}
}
sort.Strings(filteredEmails)
return filteredEmails
}
The application will open a large number of sockets and possibly breach file descriptor limits. I suggest limiting the number of concurrent requests to prevent this issue:
var (
requestMu sync.Mutex // protects requestCount
requestCount int // incremented on each request
)
// Create 10 workers. Adjust up or down as needed.
for w := 0; w < 10; w++ {
go func() {
for {
// Increment request count. Exit at end.
requestMu.Lock()
i := requestCount
requestCount++
requestMu.Unlock()
if i >= len(seedUrls) {
return
}
// Fetch the current URL.
myURL := seedUrls[i]
resp, err := http.Get(myUrl)
if err != nil {
fmt.Println(myURL, err.Error(), i)
data <- &HTTPResponse{myURL, err.Error()}
continue
}
// Read body and close.
b, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
fmt.Println(myURL, err.Error(), i)
data <- &HTTPResponse{myURL, err.Error()}
continue
}
myHTML := string(b)
data <- &HTTPResponse{myURL, myHTML}
}
}()
}
// Recieve expected number of results
for i := 0; i < len(seedUrls); i++ {
result <- data
emails := findEmails(result.HTML, filters)
fmt.Printf("%s, %s, %d\n", result.URL, emails, i)
var row = []string{result.URL, strings.Join(emails, ",")}
err := writer.Write(row)
writer.Flush()
if err != nil {
panic(err)
}
}

Memory management in Go

I have a closure where I declare and define local variables:
func writer_factory() func() *net.TCPConn {
response_port := "localhost:8000"
tcpAddr_res, err := net.ResolveTCPAddr("tcp4", response_port)
checkError(err)
var response_writer *net.TCPConn
checkError(err)
return func() *net.TCPConn {
if response_writer == nil{
response_writer, err = net.DialTCP("tcp", nil, tcpAddr_res)
checkError(err)
}
return response_writer
}
}
Now my question is, if I call this writer_factory multiple times will I get a memory leak?
So specifically, will I have a memory leak using writer_factory in this program:
package main
import (
"fmt"
"net"
"os"
"strings"
// "io/ioutil"
)
//fmt.Printf("messages are (1) %q\n", messages)
func main() {
end_of_message_terminator := "||"
beginning_of_next_message := ""
request := make([]byte, 512)
service_port := ":7777"
tcpAddr, err := net.ResolveTCPAddr("tcp4", service_port)
checkError(err)
listener, err := net.ListenTCP("tcp", tcpAddr)
checkError(err)
for {
response_writer := writer_factory()
conn, err := listener.Accept()
if err != nil {
continue
}
read_len, err := conn.Read(request)
if read_len == 0 {
continue
}
request_string := string(request[:read_len])
messages := strings.Split(request_string, end_of_message_terminator)
messages[0] = beginning_of_next_message + messages[0]
if messages[len(messages) - 1] != "" {
beginning_of_next_message = messages[len(messages) - 1]
messages[len(messages) - 1] = ""
}
if len(messages) == 1 {
continue
}
rw := response_writer()
join_channel := make(chan struct{})
for i := 0; i < len(messages); i++ {
go func(i int, rw *net.TCPConn){
respond_to_message(messages[i], rw)
join_channel <- struct{}{}
}(i, rw)
}
go func (){
for i := 0; i < len(messages); i++ {
<- join_channel
}
rw.Close()
}()
conn.Close()
}
}
func writer_factory() func() *net.TCPConn {
response_port := "localhost:8000"
tcpAddr_res, err := net.ResolveTCPAddr("tcp4", response_port)
checkError(err)
var response_writer *net.TCPConn
checkError(err)
return func() *net.TCPConn {
if response_writer == nil{
response_writer, err = net.DialTCP("tcp", nil, tcpAddr_res)
checkError(err)
}
return response_writer
}
}
func respond_to_message(message string, response_writer *net.TCPConn){
message_parameters := strings.Split(message, "|")
//response_writer.Write([]byte("asti de chris"))
for i := range message_parameters {
param_parts := strings.Split(message_parameters[i], "=")
param_name := param_parts[0]
//param_value := param_parts[1]
response_writer.Write([]byte(param_name))
//fmt.Println(string(result))
}
}
func checkError(err error) {
if err != nil {
fmt.Fprintf(os.Stderr, "Fatal error: %s", err.Error())
os.Exit(1)
}
}
To exercise this code launch the program, then launch this program netcat -l -p 8000 and then this program printf "asti||" | netcat localhost 7777
You can definitely improve performance by using one connection or (if one connection is not thread save) use a sync.Pool. That would save you memory allocations (and with it garbage collection) and also time wasted on opening all those tcp connections.

Resources