Is there any possibility of a goroutine interrupt without panic? - go

i'm setting up a service, provide http server and run goroutine to deal with some job, look code
once in a cycle, a sub job seems like interrupt, there is no logs after one func call
it didn't catch any panic err, and the defer seems not trigger because the mutex lock is not unlock
the log is not interrupted or lost
log of other job is complete
there is no restart or exit or oom kill on those time
this is for a CentOS 7.5, my service running in docker
go1.11
docker 18.09
this is an occasional bug, i add more log and open pprof, and try to reproduce this bug
main.go
func main() {
....
// this is a cycle job, with custom time intervals
router.Cycle(r)
....
endless.ListenAndServe(":"+conf.Conf.Port, r)
}
router/cycle.go
// this is a loop job, when job end, sleep custom time intervals and run again
// implemented by encapsulating a goroutine, and create a context
func Cycle(g *gin.Engine) {
cyclec := cli.InitCycle(g)
cyclec.AddFunc(time.Second, schedule.RunSomeDeal)
cyclec.Start()
}
///RunSomeDeal
func RunSomeDeal(c *gin.Context) error {
...
// deal some sub job
for i := 0; i < missionLen; i++ {
// this is once job, like cycle but only run once
// a new context is generated by passing the exist context and a goroutine executes the callback function
helpers.Job.Run(c, func(newCtx *gin.Context) error {
return DealMission(newCtx, someparams...)
})
}
return nil
}
// Job.Run
func (c *Job) Run(ctx *gin.Context, f func(ctx *gin.Context) error) {
e := &Entry{
Job: FuncJob(f),
}
if c.getJobContext != nil {
e.span = c.getJobContext(ctx)
}
go c.runWithRecovery(e)
}
func (c *Job) runWithRecovery(e *Entry) {
ctx := gin.CreateNewContext(c.gin)
...
defer func() {
if r := recover(); r != nil {
const size = 64 << 10
buf := make([]byte, size)
buf = buf[:runtime.Stack(buf, false)]
requestId, _ := ctx.Get("requestId")
handleName := ctx.CustomContext.HandlerName()
info, _ := json.Marshal(map[string]interface{}{
...some kv for log
})
log.Printf(...)
}
gin.RecycleContext(c.gin, ctx)
}()
if c.beforeRun != nil {
ok := c.beforeRun(ctx, e.span)
if !ok {
return
}
}
error := e.Job.Run(ctx)
...
if c.afterRun != nil {
c.afterRun(ctx)
}
}
// DealMission
func DealMission(c *gin.Context, params...) {
// lock something use sync.mutex
doSomeLock()
defer func() {
// ...not trigger
unlockErr := unlockxxxxx(...)
if unlockErr != nil {
panic("some error info")
}
} ()
base.DebugLog(...)
err := SomeOtherFunc(c, params...)
base.DebugLog(...)
}
// some other func
func SomeOtherFunc(ctx *gin.Context, params...) error {
err := CallOther()
base.DebugLog(...)
err := CallOther()
base.DebugLog(...)
// there is no logs after this call func, and Job.runWithRecovery not catch any panic error
err := CallOther()
// print log...
base.DebugLog(...)
}
in this sub job, the log stop at a certain line, and no panic,no error, and the defer seems not trigger because the mutex lock is not unlock
log for other job is well, and log of next cycle job is well too

Related

How to exit a time-out task in goroutine

I want run (v, ctx) to exit when it runs out of time,Here is the code I wrote to help me see if there is a problem or Whether there is a better job?
package main
import (
"context"
"log"
"math/rand"
"time"
)
func main() {
do := make(chan int)
rand.Seed(time.Now().Unix())
s := []int{100, 200, 300, 400, 500, 600}
ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
defer cancel()
work := func(ctx context.Context) {
for v := range do {
run(v, ctx)
}
}
for range [3]struct{}{} {
go work(ctx)
}
for _, v := range s {
do <- v
}
time.Sleep(time.Second * 20)
}
func run(v int, ctx context.Context) {
select {
case <-ctx.Done():
log.Print("timeout")
return
default:
//do something
log.Print(v)
time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second)
}
}
I'm a beginner,I have run my code, but I'm not sure if it will be a problem? Can you give me some advice?
There are a couple techniques you can use.
First, I would recommend avoiding range over the channel since it doesn't support contexts. Use a loop around select instead:
work := func(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
case v := <-do:
run(ctx, v) // ctx should conventionally be first parameter.
}
}
}
Within run and descendent functions you can detect cancellation via ctx.Done() as above, or call ctx.Err() and return:
if err := ctx.Err(); err != nil {
return // err -- you should capture and return errors from your worker function.
}
golang.org/x/sync/errgroup.Group can be used to manage starting goroutines, waiting for them to complete, and capturing the first (if any) error worker functions:
g, ctx := errgroup.WithContext(ctx)
for i := 0; i < 3; i++ {
g.Go(func() error {
return work(ctx)
})
}
if err := g.Wait(); err != nil {
fmt.Println("Failed:", err)
}

How to return the error from the gouroutine inside a loop early?

I have a goroutine inside a loop and the way I am handling the error is that I add it to a channel and after all the goroutines are finished, I check if there was an error and I return accordingly.
The issue with this is that I want to return an error as soon as I get it so that I don't spend time waiting for all the goroutines to finish as it would be inefficient.
I tried adding the select statement but it doesn't work and I can't add the select statement inside the goroutines since I want to exit the for loop and the try function too.
How can I do this?
Here is the code:
package main
import (
"sync"
"runtime"
"fmt"
"errors"
)
func try() (bool, error) {
wg := new(sync.WaitGroup)
s := []int{0,1,2,3,4,5}
ec := make(chan error)
for i, val := range s {
/*
select {
case err, ok := <-ec:
if ok {
println("error 1", err.Error())
return false, err
}
default:
}
*/
wg.Add(1)
i := i
val := val
go func() {
err := func(i int, val int, wg *sync.WaitGroup) error {
defer wg.Done()
if i == 3 {
return errors.New("one error")
} else {
return nil
}
}(i, val, wg)
if err != nil {
ec <- err
return
}
}()
}
wg.Wait()
select {
case err, ok := <-ec:
if ok {
println("error 2", err.Error())
return false, err
}
default:
}
return true, nil
}
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
b, e := try()
if e != nil {
fmt.Println(e.Error(), b)
}
}
This is the go playground link
With wg.Wait() before your select statement, you are effectively waiting for all goroutines to return.
The issue with this is that I want to return an error as soon as I get it
I assume that with this you mean stopping running goroutines as soon as any one of them returns an error.
In this case, you could use context.Context to manage cancellation, but even better is an errgroup.Group, which nicely combines context functionality and synchronization:
Package errgroup provides synchronization, error propagation, and Context cancelation for groups of goroutines working on subtasks of a common task.
In particular Group.Go:
The first call to return a non-nil error cancels the group; its error will be returned by Wait.
import (
"sync"
"runtime"
"fmt"
"errors"
"golang.org/x/sync/errgroup"
)
func try() (bool, error) {
errg := new(errgroup.Group)
s := []int{0,1,2,3,4,5}
for i, val := range s {
i := i
val := val
errg.Go(func() error {
return func(i int, val int) error {
if i == 3 {
return errors.New("one error")
} else {
return nil
}
}(i, val)
})
}
if err := errg.Wait(); err != nil {
// handle error
}
return true, nil
}
https://play.golang.org/p/lSIIFJqXf0W
I have found tomb to be useful for this. Below is a stripped-down non-working example that shows the gist, without handling things like variable encapsulation in the loop. It should give you the idea, but I'm happy to clarify on any points.
package main
import (
"fmt"
"gopkg.in/tomb.v2"
"sync"
)
func main() {
ts := tomb.Tomb{}
s := []int{0,1,2,3,4,5}
for i, v := range s {
ts.Go(func() error {
// do some work here or return an error, make sure to watch the dying chan, if it closes,
//then one of the other go-routines failed.
select {
case <- ts.Dying():
return nil
case err := <- waitingForWork():
if err != nil {
return err
}
return nil
}
})
}
// If an error appears here, one of the go-routines must have failed
err := ts.Wait()
if err != nil {
fmt.Println(err)
}
}

How to kill running goroutines from outside? [duplicate]

This question already has answers here:
cancel a blocking operation in Go
(2 answers)
Closed 3 years ago.
I have a pool of goroutines which I want to kill a specific one of them from outside.
Thanks for your help
You cannot "kill" a goroutine. You can use one of the synchronizations primitives, or the context to tell the goroutine that you want it to end.
ctx,cancel:=context.WithCancel(context.Background())
go func(ctx context.Context) {
for {
select {
case <-ctx.Done():
return
default
}
// Do stuff
}
}(ctx)
...
// Tell that goroutine to stop
cancel()
You can use sync.atomic as well:
die:=make([]int32,nGoroutines)
go func(i int) {
if atomic.LoadInt32(&die[i])==1 {
return
}
// Do stuff
if atomic.LoadInt32(&die[i])==1 {
return
}
// Do more stuff
}(0) // 0'th goroutine
...
// Tell goroutine n to stop
atomic.StoreInt32(&die[n],1)
I found an answer. There is a concept named Context which carries deadlines, cancelation signals, and other request-scoped values across API boundaries and between processes. This is sample codes:
type Job struct {
ID int
Context context.Context
Cancel context.CancelFunc
}
var jobs = make(map[int] Job)
func worker(ctx context.Context, index int) {
fmt.Printf("starting job with id %d\n", index)
<-ctx.Done()
}
func main() {
var err error
id := 0
r := gin.Default()
r.POST("/start", func(c *gin.Context) {
var job Job
err := json.NewDecoder(c.Request.Body).Decode(&job)
if err != nil{ fmt.Println(err)}
ctx, cancel := context.WithCancel(context.Background())
job.ID = id
job.Context = ctx
job.Cancel = cancel
jobs[job.ID] = job
c.JSON(http.StatusOK, gin.H{"message": "job received"})
go worker(ctx, job.ID)
id ++
})
r.GET("/cancel/:id", func(c *gin.Context) {
id := c.Param("id")
idInt, err := strconv.Atoi(id)
if err != nil {fmt.Println(err)}
jobs[idInt].Cancel()
})
err = endless.ListenAndServe(":8080", r)
if err != nil{ fmt.Printf("Could not run server : %v", err.Error())}
}
You could use a channel
endRoutine:= make(chan bool)
go func() {
<-endRoutine
}()
//To end one thread waiting on the channel
endRoutine<-true
https://play.golang.org/p/IRARfywOLZX
If you are processing data in a loop and need to check if you should exit on each iteration, you can use a switch
go func() {
for {
switch {
case <-endRoutine:
return //exit
default:
break
}
}
}()
https://play.golang.org/p/18eOHpilnFi

Multiple docker container logs

I'm trying to get the logs from multiple docker containers at once (order doesn't matter). This works as expected if types.ContainerLogsOption.Follow is set to false.
If types.ContainerLogsOption.Follow is set to true sometimes the log output get stuck after a few logs and no follow up logs are printed to stdout.
If the output doesn't get stuck it works as expected.
Additionally if I restart one or all of the containers the command doesn't exit like docker logs -f containerName does.
func (w *Whatever) Logs(options LogOptions) {
readers := []io.Reader{}
for _, container := range options.Containers {
responseBody, err := w.Docker.Client.ContainerLogs(context.Background(), container, types.ContainerLogsOptions{
ShowStdout: true,
ShowStderr: true,
Follow: options.Follow,
})
defer responseBody.Close()
if err != nil {
log.Fatal(err)
}
readers = append(readers, responseBody)
}
// concatenate all readers to one
multiReader := io.MultiReader(readers...)
_, err := stdcopy.StdCopy(os.Stdout, os.Stderr, multiReader)
if err != nil && err != io.EOF {
log.Fatal(err)
}
}
Basically there is no great difference in my implementation from that of docker logs https://github.com/docker/docker/blob/master/cli/command/container/logs.go, hence I'm wondering what causes this issues.
As JimB commented, that method won't work due to the operation of io.MultiReader. What you need to do is read from each from each response individually and combine the output. Since you're dealing with logs, it would make sense to break up the reads on newlines. bufio.Scanner does this for a single io.Reader. So one option would be to create a new type that scans multiple readers concurrently.
You could use it like this:
scanner := NewConcurrentScanner(readers...)
for scanner.Scan() {
fmt.Println(scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Fatalln(err)
}
Example implementation of a concurrent scanner:
// ConcurrentScanner works like io.Scanner, but with multiple io.Readers
type ConcurrentScanner struct {
scans chan []byte // Scanned data from readers
errors chan error // Errors from readers
done chan struct{} // Signal that all readers have completed
cancel func() // Cancel all readers (stop on first error)
data []byte // Last scanned value
err error
}
// NewConcurrentScanner starts scanning each reader in a separate goroutine
// and returns a *ConcurrentScanner.
func NewConcurrentScanner(readers ...io.Reader) *ConcurrentScanner {
ctx, cancel := context.WithCancel(context.Background())
s := &ConcurrentScanner{
scans: make(chan []byte),
errors: make(chan error),
done: make(chan struct{}),
cancel: cancel,
}
var wg sync.WaitGroup
wg.Add(len(readers))
for _, reader := range readers {
// Start a scanner for each reader in it's own goroutine.
go func(reader io.Reader) {
defer wg.Done()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
select {
case s.scans <- scanner.Bytes():
// While there is data, send it to s.scans,
// this will block until Scan() is called.
case <-ctx.Done():
// This fires when context is cancelled,
// indicating that we should exit now.
return
}
}
if err := scanner.Err(); err != nil {
select {
case s.errors <- err:
// Reprort we got an error
case <-ctx.Done():
// Exit now if context was cancelled, otherwise sending
// the error and this goroutine will never exit.
return
}
}
}(reader)
}
go func() {
// Signal that all scanners have completed
wg.Wait()
close(s.done)
}()
return s
}
func (s *ConcurrentScanner) Scan() bool {
select {
case s.data = <-s.scans:
// Got data from a scanner
return true
case <-s.done:
// All scanners are done, nothing to do.
case s.err = <-s.errors:
// One of the scanners error'd, were done.
}
s.cancel() // Cancel context regardless of how we exited.
return false
}
func (s *ConcurrentScanner) Bytes() []byte {
return s.data
}
func (s *ConcurrentScanner) Text() string {
return string(s.data)
}
func (s *ConcurrentScanner) Err() error {
return s.err
}
Here's an example of it working in the Go Playground: https://play.golang.org/p/EUB0K2V7iT
You can see that the concurrent scanner output is interleaved. Rather than reading all of one reader, then moving on to the next, as is seen with io.MultiReader.

How do I handle errors in a worker pool using WaitGroup?

I got a problem using sync.WaitGroup and select together. If you take a look at following http request pool you will notice that if an error occurs it will never be reported as wg.Done() will block and there is no read from the channel anymore.
package pool
import (
"fmt"
"log"
"net/http"
"sync"
)
var (
MaxPoolQueue = 100
MaxPoolWorker = 10
)
type Pool struct {
wg *sync.WaitGroup
queue chan *http.Request
errors chan error
}
func NewPool() *Pool {
return &Pool{
wg: &sync.WaitGroup{},
queue: make(chan *http.Request, MaxPoolQueue),
errors: make(chan error),
}
}
func (p *Pool) Add(r *http.Request) {
p.wg.Add(1)
p.queue <- r
}
func (p *Pool) Run() error {
for i := 0; i < MaxPoolWorker; i++ {
go p.doWork()
}
select {
case err := <-p.errors:
return err
default:
p.wg.Wait()
}
return nil
}
func (p *Pool) doWork() {
for r := range p.queue {
fmt.Printf("Request to %s\n", r.Host)
p.wg.Done()
_, err := http.DefaultClient.Do(r)
if err != nil {
log.Fatal(err)
p.errors <- err
} else {
fmt.Printf("no error\n")
}
}
}
Source can be found here
How can I still use WaitGroup but also get errors from go routines?
Just got the answer my self as I wrote the question and as I think it is an interesting case I would like to share it with you.
The trick to use sync.WaitGroup and chan together is that we wrap:
select {
case err := <-p.errors:
return err
default:
p.wg.Done()
}
Together in a for loop:
for {
select {
case err := <-p.errors:
return err
default:
p.wg.Done()
}
}
In this case select will always check for errors and wait if nothing happens :)
It looks a bit like the fail-fast mechanism enabled by the Tomb library (Tomb V2 GoDoc):
The tomb package handles clean goroutine tracking and termination.
If any of the tracked goroutines returns a non-nil error, or the Kill or Killf method is called by any goroutine in the system (tracked or not), the tomb Err is set, Alive is set to false, and the Dying channel is closed to flag that all tracked goroutines are supposed to willingly terminate as soon as possible.
Once all tracked goroutines terminate, the Dead channel is closed, and Wait unblocks and returns the first non-nil error presented to the tomb via a result or an explicit Kill or Killf method call, or nil if there were no errors.
You can see an example in this playground:
(extract)
// start runs all the given functions concurrently
// until either they all complete or one returns an
// error, in which case it returns that error.
//
// The functions are passed a channel which will be closed
// when the function should stop.
func start(funcs []func(stop <-chan struct{}) error) error {
var tomb tomb.Tomb
var wg sync.WaitGroup
allDone := make(chan struct{})
// Start all the functions.
for _, f := range funcs {
f := f
wg.Add(1)
go func() {
defer wg.Done()
if err := f(tomb.Dying()); err != nil {
tomb.Kill(err)
}
}()
}
// Start a goroutine to wait for them all to finish.
go func() {
wg.Wait()
close(allDone)
}()
// Wait for them all to finish, or one to fail
select {
case <-allDone:
case <-tomb.Dying():
}
tomb.Done()
return tomb.Err()
}
A simpler implementation would be like below. (Check in play.golang: https://play.golang.org/p/TYxxsDRt5Wu)
package main
import "fmt"
import "sync"
import "time"
type Error struct {
message string
}
func (e Error) Error() string {
return e.message
}
func main() {
var wg sync.WaitGroup
waitGroupLength := 8
errChannel := make(chan error, 1)
// Setup waitgroup to match the number of go routines we'll launch off
wg.Add(waitGroupLength)
finished := make(chan bool, 1) // this along with wg.Wait() are why the error handling works and doesn't deadlock
for i := 0; i < waitGroupLength; i++ {
go func(i int) {
fmt.Printf("Go routine %d executed\n", i+1)
time.Sleep(time.Duration(waitGroupLength - i))
time.Sleep(0) // only here so the time import is needed
if i%4 == 1 {
errChannel <- Error{fmt.Sprintf("Errored on routine %d", i+1)}
}
// Mark the wait group as Done so it does not hang
wg.Done()
}(i)
}
go func() {
wg.Wait()
close(finished)
}()
L:
for {
select {
case <-finished:
break L // this will break from loop
case err := <-errChannel:
if err != nil {
fmt.Println("error ", err)
// handle your error
}
}
}
fmt.Println("Executed all go routines")
}

Resources