How to determine which goroutine is blocking execution? - go

all.
I have a small parser that writes found data to Postgres, as database framework I use https://github.com/jackc/pgx.
I write parsed data to an unbuffered channel from various goroutines.
I have special goroutine where I read data from this channel and write it to the database.
I'm debugging an application and it hangs forever sometime after (perhaps waiting for a free connection to a database in the pool).
How to determine which goroutine is blocking execution?
I've heard that there is a pprof, but I never used it.
Thanks.
minimal example:
I've struct like this
ParsingResults struct {
parser DataParser
data []*common.Data
err error
}
in separate goroutine I init unbuffered channel like this:
results = make(chan *ParsingResults)
then I start various goroutines, where I run parsers:
go fetcher.Parse(results)
each parser gathers data and passes it to the channel like this:
var (
results chan<- *ParsingResults
pageResults *ParsingResults
)
results <- pageResults
if pageResults.err != nil {
return
}
time.Sleep(p.provider.DelayBetweenPages)
and in a separate goroutine such a function is launched:
func (fetcher *Fetcher) waitForResults(ctx context.Context) {
for {
select {
case results := <-fetcher.resultsChannel:
provider := results.parser.GetProvider()
if results.err != nil {
common.Logger.Errorw("failed to fetch data from provider",
"provider", provider.Url,
"error", results.err)
continue
}
data := fetcher.removeDuplicates(results.data)
common.Logger.Infow("fetched some data",
"provider", provider.Url,
"rows_count", len(results.data),
"unique_rows_count", len(data))
_, err := fetcher.Repo.SaveFetchedData(ctx, data)
if err != nil {
common.Logger.Errorw("failed to save fetched data",
"provider", provider.Url,
"error", err)
continue
}
common.Logger.Infow("fetched data were saved successfully",
"provider", provider.Url,
"rows_count", len(results.data),
"unique_rows_count", len(data))
case <-ctx.Done():
return
default:
common.Logger.Infow("for debugging's sake! waiting for some data to arrive!")
}
}
}
the data is stored in the database in this function:
func (repo *Repository) SaveFetchedData(ctx context.Context, rows []*common.Data) (int64, error) {
if len(rows) == 0 {
return 0, nil
}
baseQB := sq.Insert(db.DataTableName).
Columns(saveFetchedDataCols...).
PlaceholderFormat(sq.Dollar)
batch := &pgx.Batch{}
for _, p := range rows {
curQB := baseQB.Values(p.Row1, p.Row2, sq.Expr("NOW()"))
curQuery, curArgs, err := curQB.ToSql()
if err != nil {
return 0, fmt.Errorf("failed to generate SQL query: %w", err)
}
batch.Queue(curQuery, curArgs...)
}
br := repo.pool.SendBatch(ctx, batch)
ct, err := br.Exec()
if err != nil {
return 0, fmt.Errorf("failed to run SQL query batch: %w", err)
}
return ct.RowsAffected(), nil
}

I checked out full goroutine stack in pprof. So the error was that I did not release the connection from the pool after processing the result of the batch request.
Therefore, 10 requests passed, the pool was completely filled and the execution thread was blocked. Guys, y'all are the best. Thanks for the help.

Related

automatic gRPC unix reconnect after EOF

I have an application (let's call it client) connecting to another process (let's call it server) on the same machine via gRPC. The communication goes over unix socket.
If server is restarted, my client gets an EOF and does not re-establish the connection, although I expected the clientConn to handle the reconnection automatically.
Why isn't the dialer taking care of the reconnection?
I expect it to do so with the backoff params I passed.
Below some pseudo-MWE.
Run establish the initial connection, then spawns goroutineOne
goroutineOne waits for the connection to be ready and delegates the send to fooUpdater
fooUpdater streams the data, or returns in case of errors
for waitUntilReady I used the pseudo-code referenced by this answer to get a new stream.
func main() {
go func() {
if err := Run(ctx); err != nil {
log.Errorf("connection error: %v", err)
}
ctxCancel()
}()
// some wait logic
}
func Run(ctx context.Context) {
backoffConfig := backoff.Config{
BaseDelay: time.Duration(1 * time.Second),
Multiplier: backoff.DefaultConfig.Multiplier,
Jitter: backoff.DefaultConfig.Jitter,
MaxDelay: time.Duration(120 * time.Second),
}
myConn, err := grpc.DialContext(ctx,
"/var/run/foo.bar",
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithConnectParams(grpc.ConnectParams{Backoff: backoffConfig, MinConnectTimeout: time.Duration(1 * time.Second)}),
grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
d := net.Dialer{}
c, err := d.DialContext(ctx, "unix", addr)
if err != nil {
return nil, fmt.Errorf("connection to unix://%s failed: %w", addr, err)
}
return c, nil
}),
)
if err != nil {
return fmt.Errorf("could not establish socket for foo: %w", err)
}
defer myConn.Close()
return goroutineOne()
}
func goroutineOne() {
reconnect := make(chan struct{})
for {
if ready := waitUntilReady(ctx, myConn, time.Duration(2*time.Minute)); !ready {
return fmt.Errorf("myConn: %w, timeout: %s", ErrWaitReadyTimeout, "2m")
}
go func() {
if err := fooUpdater(ctx, dataBuffer, myConn); err != nil {
log.Errorf("foo updater: %v", err)
}
reconnect <- struct{}{}
}()
select {
case <-ctx.Done():
return nil
case <-reconnect:
}
}
}
func fooUpdater(ctx context.Context, dataBuffer custom.CircularBuffer, myConn *grpc.ClientConn) error {
clientStream, err := myConn.Stream(ctx) // custom pb code, returns grpc.ClientConn.NewStream(...)
if err != nil {
return fmt.Errorf("could not obtain stream: %w", err)
}
for {
select {
case <-ctx.Done():
return nil
case data := <-dataBuffer:
if err := clientStream.Send(data); err != nil {
return fmt.Errorf("could not send data: %w", err)
}
}
}
}
func waitUntilReady(ctx context.Context, conn *grpc.ClientConn, maxTimeout time.Duration) bool {
ctx, cancel := context.WithTimeout(ctx, maxTimeout)
defer cancel()
currentState := conn.GetState()
timeoutValid := true
for currentState != connectivity.Ready && timeoutValid {
timeoutValid = conn.WaitForStateChange(ctx, currentState)
currentState = conn.GetState()
// debug print currentState -> prints IDLE
}
return currentState == connectivity.Ready
}
Debugging hints also welcome :)
Based on the provided code and information, there might be an issue with how ctx.Done is being utilized.
The ctx.Done() is being used in fooUpdater and goroutineOnefunctions. When connection breaks, I believe that the ctx.Done() gets called in both functions, with the following execution order:
Connection breaks, the ctx.Done case in the fooUpdater function gets called, exiting the function. The select statement in the goroutineOne function also executes the ctx.Done case, which exists the function, and the client doesn't reconnect.
Try debugging it to check if both select case blocks get executed, but I believe that is the issue here.
According to the GRPC documentation, the connection is re-established if there is a transient failure otherwise it fails immediately. You can try to verify that the failure is transient by printing the connectivity state.
You should print the error code also to understand Why RPC failed.
Maybe what you have tried is not considered a transient failure.
Also, according to the following entry retry logic does not work with streams: grpc-java: Proper handling of retry on client for service streaming call
Here are the links to the corresponding docs:
https://grpc.github.io/grpc/core/md_doc_connectivity-semantics-and-api.html
https://pkg.go.dev/google.golang.org/grpc#section-readme
Also, check the following entry:
Ways to wait if server is not available in gRPC from client side

Connect kafka in go(sarama), the consumer can not get message through topic

I just want to follow a demo to try use kafka in go. I can successfully produce message by sarama, but when i want to consume the message, can not get it.
package main
import (
"fmt"
"github.com/Shopify/sarama"
)
// kafka consumer
func main() {
consumer, err := sarama.NewConsumer([]string{"127.0.0.1:9092"}, nil)
if err != nil {
fmt.Printf("fail to start consumer, err:%v\n", err)
return
}
partitionList, err := consumer.Partitions("test")
if err != nil {
fmt.Printf("fail to get list of partition:err%v\n", err)
return
}
fmt.Println(partitionList)
for partition := range partitionList {
pc, err := consumer.ConsumePartition("test", int32(partition), sarama.OffsetNewest)
if err != nil {
fmt.Printf("failed to start consumer for partition %d,err:%v\n", partition, err)
return
}
defer pc.AsyncClose()
go func(sarama.PartitionConsumer) {
for msg := range pc.Messages() {
fmt.Printf("Partition:%d Offset:%d Key:%v Value:%v", msg.Partition, msg.Offset, msg.Key, msg.Value)
}
}(pc)
}
}
The return of the code is
[0]
-1
But actually i can get the message through kafka-console-consumer.
I believe you are not wating for the messages to come...
here is the list fo issues you have in your code:
defer pc.AsyncClose() will trigger on function exit, not scope exit.
goroutine is launching into nowhere... nothing blocks or wait for the results to come.
go func(sarama.PartitionConsumer) {
for msg := range pc.Messages() {
fmt.Printf("Partition:%d Offset:%d Key:%v Value:%v", msg.Partition, msg.Offset, msg.Key, msg.Value)
}
}(pc)
not passing argument to goroutine. go func(sarama.PartitionConsumer) { this is only type. go func(pc sarama.PartitionConsumer) {.
Remove goroutine, and just check the consumer channel if you want to make hello world example.

how to cleanly stop goroutines internally on error

All,
I'm writing a program involving tcp traffic that has several points of failure, and
I'd like to be able to exit out of a goroutine smoothly in an error condition without incurring coding overhead.
Here's some pseudocode:
func main() {
l, err := net.Listen(CONN_TYPE, CONN_HOST+":"+ CONN_PORT)
for {
// Listen for an incoming connection.
conn, err := l.Accept()
if err != nil {
fmt.Println("Error accepting: ", err.Error())
os.Exit(1)
}
done_flag := make(chan bool, 1)
// Handle connections in a new goroutine.
go func() {
conn.Write([]byte("string1\n"))
conn.Write([]byte("string2\n"))
...
}()
}
}
Now, what I'm trying to avoid is the following code with the connection statements, where I wrap the code in error handling inside the goroutine (something like the following):
go func() {
if (_err := _send_ack(conn, "string1\n"); _err != nil {
done_flag <- true
}
if (_err := _send_ack(conn, "string2\n"); _err != nil {
done_flag <- true
}
}()
Instead, if there was a connection issue, I'd rather short circuit the whole thing and just exit the goroutine with an error right then and there - and I'd rather not have to worry about how I structure the code. I could perhaps, further wrap _send_ack and send the channel as a function parameter - but that gets iffy if the program gets to be highly hierarchical. For example, I might have a goroutine composed of several funcs, each of which handles a different tcp conversation - and I don't want to litter my subroutines with an extra channel parameter to propogate the channel up and down the call stack just in case I have to set a done flag. Plus there is the question of what happens to the goroutine after the done flag is being set and how to handle it in the caller.
If I was working in python, or perl, or C++, i'd throw an exception which has attached to it a stack trace where the error occurred and then process this error in the caller. But since golang doesn't have exceptions, I was hoping for a way to just stop the goroutine cold without actually exiting the main program - ie: set a channel to have the relevant error and then just stop execution at that point.
I see the panic function, but i'm not sure of the side effects of this. Can you panic() out of a goroutine without affecting the main program, or is there a way to intelligently short-circuit a goroutine without side effects, perhaps returning back something akin to an exception, with stack trace and error? Or what is the suggested way to cleanly error handle a hierarchical program like this?
Thanks much for any help - I'm new to golang and it probably shows.
Ed
golang suggests using explicit error instead of using implicit exception.
// for code simplicity
func doSendACKImpl(conn net.Conn) error {
if err := _send_ack(conn, "string1\n"); err != nil {
return err
}
if err := _send_ack(conn, "string2\n"); err != nil {
return err
}
return nil
}
func main() {
l, err := net.Listen(CONN_TYPE, CONN_HOST+":"+ CONN_PORT)
for {
// Listen for an incoming connection.
conn, err := l.Accept()
if err != nil {
fmt.Println("Error accepting: ", err.Error())
os.Exit(1)
}
// can change to self defined ResponseType, here use error for demo
workRes := make(chan error, 1)
go func() {
// return write back to channel
workRes <- doSendACKImpl(conn)
}()
select {
// read result back
case resError := <-workRes:
fmt.Printf("meet error %s", resError)
}
}
}
for more concurrent ability, use more channel buffer size, and move the processing result handler into another goroutine
func main() {
l, _ := net.Listen(CONN_TYPE, CONN_HOST+":"+CONN_PORT)
// more result buffer size
const workSize int = 100
// can change to self defined ResponseType, here use error for demo
workResBuffer := make(chan error, workSize)
// goroutine collect result
go func() {
// get all result from worker responses
for resError := range workResBuffer {
fmt.Printf("meet error %s", resError)
}
}()
for {
// Listen for an incoming connection.
conn, err := l.Accept()
if err != nil {
fmt.Println("Error accepting: ", err.Error())
os.Exit(1)
}
// TODO: limit the goroutine number
go func() {
// return write back to channel
workResBuffer <- doSendACKImpl(conn)
}()
}
}

Multiple docker container logs

I'm trying to get the logs from multiple docker containers at once (order doesn't matter). This works as expected if types.ContainerLogsOption.Follow is set to false.
If types.ContainerLogsOption.Follow is set to true sometimes the log output get stuck after a few logs and no follow up logs are printed to stdout.
If the output doesn't get stuck it works as expected.
Additionally if I restart one or all of the containers the command doesn't exit like docker logs -f containerName does.
func (w *Whatever) Logs(options LogOptions) {
readers := []io.Reader{}
for _, container := range options.Containers {
responseBody, err := w.Docker.Client.ContainerLogs(context.Background(), container, types.ContainerLogsOptions{
ShowStdout: true,
ShowStderr: true,
Follow: options.Follow,
})
defer responseBody.Close()
if err != nil {
log.Fatal(err)
}
readers = append(readers, responseBody)
}
// concatenate all readers to one
multiReader := io.MultiReader(readers...)
_, err := stdcopy.StdCopy(os.Stdout, os.Stderr, multiReader)
if err != nil && err != io.EOF {
log.Fatal(err)
}
}
Basically there is no great difference in my implementation from that of docker logs https://github.com/docker/docker/blob/master/cli/command/container/logs.go, hence I'm wondering what causes this issues.
As JimB commented, that method won't work due to the operation of io.MultiReader. What you need to do is read from each from each response individually and combine the output. Since you're dealing with logs, it would make sense to break up the reads on newlines. bufio.Scanner does this for a single io.Reader. So one option would be to create a new type that scans multiple readers concurrently.
You could use it like this:
scanner := NewConcurrentScanner(readers...)
for scanner.Scan() {
fmt.Println(scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Fatalln(err)
}
Example implementation of a concurrent scanner:
// ConcurrentScanner works like io.Scanner, but with multiple io.Readers
type ConcurrentScanner struct {
scans chan []byte // Scanned data from readers
errors chan error // Errors from readers
done chan struct{} // Signal that all readers have completed
cancel func() // Cancel all readers (stop on first error)
data []byte // Last scanned value
err error
}
// NewConcurrentScanner starts scanning each reader in a separate goroutine
// and returns a *ConcurrentScanner.
func NewConcurrentScanner(readers ...io.Reader) *ConcurrentScanner {
ctx, cancel := context.WithCancel(context.Background())
s := &ConcurrentScanner{
scans: make(chan []byte),
errors: make(chan error),
done: make(chan struct{}),
cancel: cancel,
}
var wg sync.WaitGroup
wg.Add(len(readers))
for _, reader := range readers {
// Start a scanner for each reader in it's own goroutine.
go func(reader io.Reader) {
defer wg.Done()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
select {
case s.scans <- scanner.Bytes():
// While there is data, send it to s.scans,
// this will block until Scan() is called.
case <-ctx.Done():
// This fires when context is cancelled,
// indicating that we should exit now.
return
}
}
if err := scanner.Err(); err != nil {
select {
case s.errors <- err:
// Reprort we got an error
case <-ctx.Done():
// Exit now if context was cancelled, otherwise sending
// the error and this goroutine will never exit.
return
}
}
}(reader)
}
go func() {
// Signal that all scanners have completed
wg.Wait()
close(s.done)
}()
return s
}
func (s *ConcurrentScanner) Scan() bool {
select {
case s.data = <-s.scans:
// Got data from a scanner
return true
case <-s.done:
// All scanners are done, nothing to do.
case s.err = <-s.errors:
// One of the scanners error'd, were done.
}
s.cancel() // Cancel context regardless of how we exited.
return false
}
func (s *ConcurrentScanner) Bytes() []byte {
return s.data
}
func (s *ConcurrentScanner) Text() string {
return string(s.data)
}
func (s *ConcurrentScanner) Err() error {
return s.err
}
Here's an example of it working in the Go Playground: https://play.golang.org/p/EUB0K2V7iT
You can see that the concurrent scanner output is interleaved. Rather than reading all of one reader, then moving on to the next, as is seen with io.MultiReader.

goroutine deadlock: In an app that reads from a blockchain and writes to rethinkdb, have

Okay, so
My situation is this: It's been three weeks and some-odd hours since I've become entranced by golang. I'm working on a blockchain dump tool for steem, and I'm probably going to give a touch of gjson to github.com/go-steem/rpc, the library I currently rely on. Now, with this said, this question is about the goroutines for my current blockchain reader. Here it is (sorry a tad on the beefy side, but you'll see the part that I want to pull back into the library, too):
// Keep processing incoming blocks forever.
fmt.Println("---> Entering the block processing loop")
for {
// Get current properties.
props, err := Client.Database.GetDynamicGlobalProperties()
if err != nil {
fmt.Println(err)
}
// Process blocks.
for I := uint32(1); I <= props.LastIrreversibleBlockNum; I++ {
go getblock(I, Client, Rsession)
}
if err != nil {
fmt.Println(err)
}
}
}
func getblock(I uint32, Client *rpc.Client, Rsession *r.Session) {
block, err := Client.Database.GetBlock(I)
fmt.Println(I)
writeBlock(block, Rsession)
if err != nil {
fmt.Println(err)
}
}
func writeBlock(block *d.Block, Rsession *r.Session) {
//rethinkdb writes
r.Table("transactions").
Insert(block.Transactions).
Exec(Rsession)
r.Table("blocks").
Insert(block).
Exec(Rsession)
}
I just made a third edit to this, which was to call the function writeBlock from goroutine getBlock instead of the way I was doing things before. I'
Okay, so that is now resolved, but this is going to spawn another question, unfortunatley.
I've got the application working with the goroutine, however it hasn't increased performance any.
The way that I got it to work was by not spawning a goroutine from a goroutine and instead calling a plain function, writeBlock from the goroutine "getblock":
fmt.Println("---> Entering the block processing loop")
for {
// Get current properties.
props, err := Client.Database.GetDynamicGlobalProperties()
if err != nil {
fmt.Println(err)
}
// Process blocks.
for I := uint32(1); I <= props.LastIrreversibleBlockNum; I++ {
go getblock(I, Client, Rsession)
}
if err != nil {
fmt.Println(err)
}
}
}
func getblock(I uint32, Client *rpc.Client, Rsession *r.Session) {
block, err := Client.Database.GetBlock(I)
fmt.Println(I)
writeBlock(block, Rsession)
if err != nil {
fmt.Println(err)
}
}
func writeBlock(block *d.Block, Rsession *r.Session) {
//rethinkdb writes
r.Table("transactions").
Insert(block.Transactions).
Exec(Rsession)
r.Table("blocks").
Insert(block).
Exec(Rsession)
}

Resources