Process leaves too many opened file descriptor.
When i do: lsof -p $pid, most of results are (about 80%):
points2 28360 root 911u sock 0,9 0t0 42082509
protocol: TCPv6
Before FD Type turns to 'sock', it stays as CLOSE_WAIT for a while. What I noticed is that some of these 'sock' FDs disapears, some stay forever
Amount of open files increases gradually with small fluctuations till it not reaches the maximum 1024. Currently i set allowed maximum amount of open files for 4096, to make process work longer.
srvTLS := &http.Server{
Addr: utils.PortSocketTLS,
ReadTimeout: 10 * time.Second,
WriteTimeout: 15 * time.Second,
}
srvTLS.SetKeepAlivesEnabled(false)
Handler:
func WsRoom(w http.ResponseWriter, r *http.Request) {
ws, err := websocket.Upgrade(w, r, nil, 1024, 1024)
if _, ok := err.(websocket.HandshakeError); ok {
http.Error(w, "Not a websocket handshake", 400)
return
} else if err != nil {
return
}
...other stuff
}
Writer to conn
func PlayerWriter(pc *model.PlayerConn) {
ticker := time.NewTicker(utils.PingPeriod)
defer func() {
ticker.Stop()
pc.WS.Close()
}()
for {
select {
case message, ok := <-pc.Ch:
pc.WS.SetWriteDeadline(time.Now().Add(utils.WriteWait))
if !ok {
pc.WS.WriteMessage(websocket.CloseMessage, []byte{})
return
}
err := pc.WS.WriteMessage(websocket.TextMessage, message)
if err != nil {
return
}
inst := &model.PlayerLeftInstruction{}
_ = json.Unmarshal(message, inst)
if inst.Instruction == utils.UtilAFK || inst.Instruction == utils.RoomMoneyLess {
return
}
break
case <-ticker.C:
pc.WS.SetWriteDeadline(time.Now().Add(utils.WriteWait))
if err := pc.WS.WriteMessage(websocket.PingMessage, nil); err != nil {
return
}
break
}
}}
Conn Listener:
func PlayerListener(pc *model.PlayerConn) {
defer func() {
if r := recover(); r != nil {
}
close(pc.Ch)
pc.Room.Leave <- pc
pc.WS.Close()
}()
pc.WS.SetReadLimit(utils.MaxMessageSize)
pc.WS.SetReadDeadline(time.Now().Add(utils.PongWait))
pc.WS.SetPongHandler(func(string) error { pc.WS.SetReadDeadline(time.Now().Add(utils.PongWait)); return nil })
for {
_, command, err := pc.WS.ReadMessage()
if err != nil {
break
}
si := model.StatusInstruction{}
json.Unmarshal(command, &si)
z := make([]byte, len(command))
copy(z, command)
switch si.Status {
case utils.PlayerLeft:
goto Exit
case utils.MoveTurn, utils.LocalTurn, utils.LocalBetFold, utils.LocalBet, utils.MoveTurnBet:
pc.Room.RoomData.GameBridger.InstCh <- z
break
...some stuff
default:
log.Printf("Unexpected command is responsed it is: %s", string(command))
goto Exit
}
}
Exit:
}
If it's needed, i can share more code. I think problem with timeouts and something refered to this, but i don't exactly know where i'm missing
In lsof, TYPE 'sock' usually means: socket connection that didn't recived anything.
I had logical error in my code by NOT closing in some cases the established websocket connection.
Also i had goroutine leaks (however it didn't affected on lsof number), pprof helped to detect leaks.
Related
I have a go api project where I also run a worker (RabbitMQ). I just discovered a problem that my worker and my http listen and serve do not work together. The moment I run the worker, the port of api is not reached.
Here is what my code looks like.
app.go
func (a *App) StartWorker() {
connection, err := amqp091.Dial(os.Getenv("AMQP_URL"))
if err != nil {
panic(err)
}
defer connection.Close()
consumer, err := events.NewConsumer(connection, database.GetDatabase(a.Database))
if err != nil {
panic(err)
}
consumer.Listen(os.Args[1:])
}
func (a *App) Run(addr string) {
logs := log.New(os.Stdout, "my-service", log.LstdFlags)
server := &http.Server{
Addr: addr,
Handler: a.Router,
ErrorLog: logs,
IdleTimeout: 120 * time.Second, // max time for connections using TCP Keep-Alive
ReadTimeout: 5 * time.Second,
WriteTimeout: 10 * time.Second,
}
go func() {
if err := server.ListenAndServe(); err != nil {
logs.Fatal(err)
}
}()
// trap sigterm or interrupt and gracefully shutdown the server
c := make(chan os.Signal)
signal.Notify(c, os.Interrupt)
signal.Notify(c, os.Kill)
sig := <-c
logs.Println("Recieved terminate, graceful shutdown", sig)
tc, _ := context.WithTimeout(context.Background(), 30*time.Second)
server.Shutdown(tc)
}
here is my
consumer.go
// NewConsumer returns a new Consumer
func NewConsumer(conn *amqp.Connection, db *mongo.Database) (Consumer, error) {
consumer := Consumer{
conn: conn,
db: db,
}
err := consumer.setup()
if err != nil {
return Consumer{}, err
}
return consumer, nil
}
// Listen will listen for all new Queue publications
// and print them to the console.
func (consumer *Consumer) Listen(topics []string) error {
ch, err := consumer.conn.Channel()
if err != nil {
return err
}
defer ch.Close()
if err != nil {
return err
}
msgs, err := ch.Consume("update.package.rating", "", true, false, false, false, nil)
if err != nil {
return err
}
forever := make(chan bool)
go func() {
for msg := range msgs {
switch msg.RoutingKey {
case "update.package.rating":
worker.RatePackage(packageRepo.NewPackagesRepository(consumer.db), msg.Body)
}
// acknowledege received event
log.Printf("Received a message: %s", msg.Body)
}
}()
log.Printf("[*] Waiting for message [Exchange, Queue][%s, %s]. To exit press CTRL+C", getExchangeName(), "update.package.rating")
<-forever
return nil
}
main.go
func main() {
start := app.App{}
start.StartApp()
start.StartWorker()
start.Run(":3006")
}
the port 3006 is not reached.
I am using gin-gonic to serve my http request.
Any help is welcomed.
I had a similar problem while using gin framework.Solved the issue by running my consumer inside a go routine.I invoked my consumer like below.
go notificationCallback.ConsumeBankTransaction()
and both the server and the rabbitmq consumer run seamlessly.Still monitoring performance to see if it is robust and resilient enough.
I am creating a GoLang application and clients are android phones. I am able to handle connections. If user closes the android application connection is dropped with EOF
My problem is, if client just turn off wifi network connection is still alive.
Here is my code
func main() {
fmt.Println("Starting server...")
connection, err := net.Listen("tcp", ":4406")
if err != nil {
fmt.Println(err)
}
defer connection.Close()
manager := ClientManager{
clients: make(map[*Client]bool),
broadcast: make(chan []byte),
register: make(chan *Client),
}
go manager.start()
for {
connection, _ := connection.Accept()
if err != nil {
fmt.Println(err)
}
client := &Client{socket: connection, data: make(chan []byte), uuid: connection.RemoteAddr().String()}
manager.register <- client
go manager.receive(client)
go handleConnection(client)
}
}
Handeling connections
func handleConnection(client *Client) {
conn := client.socket
defer conn.Close()
notify := make(chan error)
go func() {
buf := make([]byte, 1024)
for {
n, err := conn.Read(buf)
if err != nil {
notify <- err
return
}
if n > 0 {
fmt.Println("unexpected data: %s", buf[:n])
}
}
}()
for {
select {
case err := <-notify:
if err != nil {
fmt.Println("connection dropped message", err)
return
}
case <-time.After(time.Second * 1):
fmt.Println("timeout 1, still alive")
}
}
}
When remote wifi is off (cable removed) I want to disconnect the user. I tried to read a byte and every second and it is reading it. I sent a byte and it is sent as well.
I am experimenting with Go - and would like to create a TCP server which I can telnet to, send commands and receive responses.
const (
CONN_HOST = "localhost"
CONN_PORT = "3333"
CONN_TYPE = "tcp"
)
func main() {
listener, err := net.Listen(CONN_TYPE, fmt.Sprintf("%s:%s", CONN_HOST, CONN_PORT))
if err != nil {
log.Panicln(err)
}
defer listener.Close()
for {
conn, err := listener.Accept()
if err != nil {
log.Panicln(err)
}
go handleRequest(conn)
}
}
func handleRequest(conn net.Conn) {
buffer := make([]byte, 1024)
length, err := conn.Read(buffer)
if err != nil {
log.Panicln(err)
}
str := string(buffer[:length])
fmt.Println(conn.RemoteAddr().String())
fmt.Printf("Received command %d\t:%s\n", length, str)
switch str {
case "PING\r\n":
sendResponse("PONG", conn)
case "PUSH\r\n":
sendResponse("GOT PUSH", conn)
default:
conn.Write([]byte(fmt.Sprintf("UNKNOWN_COMMAND: %s\n", str)))
}
conn.Close() // closes the connection
}
func sendResponse(res string, conn net.Conn) {
conn.Write([]byte(res+"\n"))
}
The above snippet will close the connection every time, kicking me out of the terminal session. But what I actually want, is to be able to keep the connection open for more I/O operations. If I simply remove the conn.Close(), then the server appears to hang somewhere as it does not get any more responses.
The way I have resolved this is to have my handleRequest method endlessly loop so that it never exits till it receives a QUIT\r\n message. Is this appropriate - or is there a better way of achieving?
func handleRequest(conn net.Conn) {
for {
log.Println("Handling Request")
buffer := make([]byte, 1024)
length, err := conn.Read(buffer)
if err != nil {
log.Panicln(err)
}
str := string(buffer[:length])
fmt.Println(conn.RemoteAddr().String())
fmt.Printf("Received command %d\t:%s\n", length, str)
switch str {
case "PING\r\n":
sendResponse("PONG", conn)
case "PUSH\r\n":
sendResponse("GOT PUSH", conn)
case "QUIT\r\n":
sendResponse("Goodbye", conn)
conn.Close()
default:
conn.Write([]byte(fmt.Sprintf("UNKNOWN_COMMAND: %s\n", str)))
}
}
}
Your second example with the loop is already what you want. You simply loop and read as long as you want (or probably until some read/write timeout or an external cancellation signal).
However it still has an error in it:
TCP gives you a stream of bytes, where it is not guaranteed that one write from a side will yield exactly one read on the other side with the same data length. This means if the client writes PING\r\n you could still receive only PI in the first read. You could fix that by using a bufio.Scanner and always read up to the first newline.
Not sure if this is what you're looking for. Taken from net/http implementation, wrapping your net.TCPListener's Accept method.
tcpKeepAliveListener{listener.(*net.TCPListener)}
type tcpKeepAliveListener struct {
*net.TCPListener
}
func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
tc, err := ln.AcceptTCP()
if err != nil {
return
}
tc.SetKeepAlive(true)
tc.SetKeepAlivePeriod(3 * time.Minute)
return tc, nil
}
Refer : Link 1 & Link 2
I have an application that makes about 400 reads per seconds and 100 writes per second to redis (hosted on redislabs). The application is using github.com/garyburd/redigo package as a redis proxy.
I have two functions which are the only ones being used to read and write:
func getCachedVPAIDConfig(key string) chan *cachedVPAIDConfig {
c := make(chan *cachedVPAIDConfig)
go func() {
p := pool.Get()
defer p.Close()
switch p.Err() {
case nil:
item, err := redis.Bytes(p.Do("GET", key))
if err != nil {
c <- &cachedVPAIDConfig{nil, err}
return
}
c <- &cachedVPAIDConfig{item, nil}
default:
c <- &cachedVPAIDConfig{nil, p.Err()}
return
}
}()
return c
}
func setCachedVPAIDConfig(key string, j []byte) chan error {
c := make(chan error)
go func() {
p := pool.Get()
defer p.Close()
switch p.Err() {
case nil:
_, err := p.Do("SET", key, j)
if err != nil {
c <- err
return
}
c <- nil
default:
c <- p.Err()
return
}
}()
return c
}
As you can see, I'm using the recommended connection pooling mechanism (http://godoc.org/github.com/garyburd/redigo/redis#Pool).
I'm calling these functions on every http request an endpoint on the application is getting. The problem is: once the application starts getting requests, it immediately starts throwing the error
dial tcp 54.160.xxx.xx:yyyy: connect: cannot assign requested address
(54.160.xxx.xx:yyyy is the redis host)
I see on redis that there are only about 600 connections when this starts to happen, which doesn't sound like a lot.
I tried playing with the MaxActive setting of the pool, setting it anywhere between 1000 and 50K, but the result is the same.
Any ideas?
EDIT
Here's my pool initialization code (doing this in func init):
pool = redis.Pool{
MaxActive: 1000, // note: I tried changing this to 50K, result the same
Dial: func() (redis.Conn, error) {
c, err := redis.Dial("tcp", redisHost)
if err != nil {
return nil, err
}
if _, err := c.Do("AUTH", redisPassword); err != nil {
c.Close()
return nil, err
}
return c, err
},
}
Edit 2:
Issue solved by applying the stuff suggested in the answer below!
New code for pool init:
pool = redis.Pool{
MaxActive: 500,
MaxIdle: 500,
IdleTimeout: 5 * time.Second,
Dial: func() (redis.Conn, error) {
c, err := redis.DialTimeout("tcp", redisHost, 100*time.Millisecond, 100*time.Millisecond, 100*time.Millisecond)
if err != nil {
return nil, err
}
if _, err := c.Do("AUTH", redisPassword); err != nil {
c.Close()
return nil, err
}
return c, err
},
}
This new init makes it so that the get and set timeouts are handled by redigo internally, so I no longer need to return a channel on the getCachedVPAIDConfig and setCachedVPAIDConfig funcs. This is how they look now:
func setCachedVPAIDConfig(key string, j []byte) error {
p := pool.Get()
switch p.Err() {
case nil:
_, err := p.Do("SET", key, j)
p.Close()
return err
default:
p.Close()
return p.Err()
}
}
func getCachedVPAIDConfig(key string) ([]byte, error) {
p := pool.Get()
switch p.Err() {
case nil:
item, err := redis.Bytes(p.Do("GET", key))
p.Close()
return item, err
default:
p.Close()
return nil, p.Err()
}
}
You're closing the connection after sending on the channels, if the channel is blocking you're not closing connections, which would result in the error you're seeing. so don't just defer, close the connection explicitly.
I don't think it's the problem but a good idea regardless - set a timeout on your connections with DialTimeout.
Make sure you have a proper TestOnBorrow function to get rid of dead connections, especially if you have timeout. I usually do a PING if the connection has been idle for more than 3 seconds (the function receives the idle time as a parameter)
Try setting MaxIdle to a larger number as well, I remember having problems with pooling that were resolved by increasing that parameter in the pool.
I'm trying to connect a computer behind NAT with the internet through a 3rd party server(aka reverse connection). I'm listening on two ports. On one port (dstNet) is connecting the machine behind NAT and on the other port are connecting the internet clients.
The issue is that I don't know how to handle the disconnection of the machine behind NAT. Even if the machine is connecting again the the traffic is not handled sent/written anymore... I get [DEBUG] socks: Copied 0 bytes to client which is my warning of course. Below is the code. It's quite long but I can't find what to trim.
// Make a bridge between dstNet which is
// usually behind NAT and srcNet which is usually a client
// which wants to route the traffic though the NAT machine.
package main
import (
"bufio"
"errors"
log "github.com/golang/glog"
"io"
"net"
"time"
)
const (
// listen on the dstNet so that we can
// create a connection with the NAT client
dstNet = "0.0.0.0:9000"
// listen on srcNet so that we can get traffic
// to forward to dstNet
srcNet = "0.0.0.0:9001"
)
var errCh = make(chan error, 1)
// make a channel to send the reverse connections
var lrCh = make(chan net.Conn, 1)
func listenDst() {
// Listen on the dstNet
lr, err := net.Listen("tcp", dstNet)
if err != nil {
log.Error(err)
errCh <- err
return
}
// accept the connection
for {
lrConn, err := lr.Accept()
if err != nil {
log.Error(err)
errCh <- err
return
}
log.Errorf("sent connection")
// lrConn.SetReadDeadline(time.Now().Add(10 * time.Second))
lrCh <- lrConn
}
}
func main() {
go func() {
for err := range errCh {
if err != nil {
panic(err)
}
}
}()
// listen for the nat server
go listenDst()
// listen for clients to connect
l, err := net.Listen("tcp", srcNet)
if err != nil {
log.Error(err)
panic(err)
}
// accept the connection
for {
conn, err := l.Accept()
if err != nil {
log.Error(err)
panic(err)
}
// serve the connection
go func(conn net.Conn) {
defer conn.Close()
bufConn := bufio.NewReader(conn)
dst := <-lrCh
defer dst.Close()
// Start proxying
errCh2 := make(chan error, 2)
go proxy("target", dst, bufConn, errCh2)
go proxy("client", conn, dst, errCh2)
// Wait
var ei int
for err = range errCh2 {
switch {
case err != nil && err.Error() == "no byte":
log.Error(err)
case err != nil && err.Error() == "use of closed network connection":
// if the connection is closed we restart it.
log.Error(err)
// BUG() attempt to write again the bytes
case err != nil:
log.Error(err)
errCh <- err
}
if ei == 1 {
log.Errorf("done with errors")
close(errCh2)
}
ei++
}
}(conn)
}
}
// proxy is used to suffle data from src to destination, and sends errors
// down a dedicated channel
func proxy(name string, dst io.Writer, src io.Reader, errCh2 chan error) {
n, err := io.Copy(dst, src)
// Log, and sleep. This is jank but allows the otherside
// to finish a pending copy
log.Errorf("[DEBUG] socks: Copied %d bytes to %s", n, name)
time.Sleep(10 * time.Millisecond)
// Send any errors
switch {
case err != nil:
log.Error(err)
errCh2 <- err
case n < 1:
errCh2 <- errors.New("no byte")
default:
errCh2 <- nil
}
return
}
The only time you can reuse a connection after an error is if is a temporary condition.
if err, ok := err.(net.Error); ok && err.Temporary() {
}
If you are trying to proxy a TCPconnection, and there is any other error (checking for Temporary may not even be that useful), you need to drop the whole thing and start over. You have no idea what the state of the remote server is, how many packets are in flight or lost, and it's only going to cause more difficult bugs the harder you try. (tip: don't hide concurrency or timing problems with a sleep. It's just making it harder in the long run)
Here is a much simpler proxy pattern for go if you want to reference it:
https://gist.github.com/jbardin/821d08cb64c01c84b81a
func Proxy(srvConn, cliConn *net.TCPConn) {
// channels to wait on the close event for each connection
serverClosed := make(chan struct{}, 1)
clientClosed := make(chan struct{}, 1)
go broker(srvConn, cliConn, clientClosed)
go broker(cliConn, srvConn, serverClosed)
// wait for one half of the proxy to exit, then trigger a shutdown of the
// other half by calling CloseRead(). This will break the read loop in the
// broker and allow us to fully close the connection cleanly without a
// "use of closed network connection" error.
var waitFor chan struct{}
select {
case <-clientClosed:
// the client closed first and any more packets from the server aren't
// useful, so we can optionally SetLinger(0) here to recycle the port
// faster.
srvConn.SetLinger(0)
srvConn.CloseRead()
waitFor = serverClosed
case <-serverClosed:
cliConn.CloseRead()
waitFor = clientClosed
}
// Wait for the other connection to close.
// This "waitFor" pattern isn't required, but gives us a way to track the
// connection and ensure all copies terminate correctly; we can trigger
// stats on entry and deferred exit of this function.
<-waitFor
}
// This does the actual data transfer.
// The broker only closes the Read side.
func broker(dst, src net.Conn, srcClosed chan struct{}) {
// We can handle errors in a finer-grained manner by inlining io.Copy (it's
// simple, and we drop the ReaderFrom or WriterTo checks for
// net.Conn->net.Conn transfers, which aren't needed). This would also let
// us adjust buffersize.
_, err := io.Copy(dst, src)
if err != nil {
log.Printf("Copy error: %s", err)
}
if err := src.Close(); err != nil {
log.Printf("Close error: %s", err)
}
srcClosed <- struct{}{}
}
It turned out that I had to restart the listener not only to close the connection. I've modified the broker function to reset the destNet listener if it can't write (i.e. writes 0 bytes) to src. I'm still not sure if this is the right way to do it (i.e. closing the listener seems bad in a multi-connections scenario as I guess I reset all the client connections dialing on that address) but so far this is the best I could do to fix it.
if n == 0 {
lrNewCh <- 1
}
Here is all the code. All the credit goes to #JimB
// Make a bridge between dstNet which is
// usually behind NAT and srcNet which is usually a client
// which wants to route the traffic though the NAT machine.
package main
import (
log "github.com/golang/glog"
"io"
"net"
)
// listen on the dstNet so that we can
// create a connection with the NAT client
var dstNet *net.TCPAddr = &net.TCPAddr{IP: net.ParseIP("0.0.0.0"), Port: 9000}
// listen on srcNet so that we can get traffic
// to forward to dstNet
var srcNet *net.TCPAddr = &net.TCPAddr{IP: net.ParseIP("0.0.0.0"), Port: 9001}
var errCh = make(chan error, 1)
// make a channel to send the reverse connections
var lrCh = make(chan *net.TCPConn, 1)
var lrNewCh = make(chan int, 1)
func listenDst() {
// Listen on the dstNet
lr, err := net.ListenTCP("tcp", dstNet)
if err != nil {
log.Error(err)
errCh <- err
return
}
// accept the connection
for {
lrConn, err := lr.AcceptTCP()
if err != nil {
log.Error(err)
//errCh <- err
//return
}
status := <-lrNewCh
log.Errorf("status request is %v", status)
if status == 1{
log.Errorf("we close and restart the listener and the connection")
if err = lrConn.Close(); err !=nil{
log.Error(err)
}
if err = lr.Close(); err !=nil{
log.Error(err)
}
lr, err = net.ListenTCP("tcp", dstNet)
if err != nil {
log.Error(err)
errCh <- err
return
}
lrConn, err = lr.AcceptTCP()
if err !=nil{
log.Error(err)
errCh <- err
}
}else{
log.Errorf("new connection on its way")
lrCh <- lrConn
}
// default:
// log.Errorf("accepting new connections")
}
}
func main() {
go func() {
for err := range errCh {
if err != nil {
panic(err)
}
}
}()
// listen for the nat server
go listenDst()
// listen for clients to connect
l, err := net.ListenTCP("tcp", srcNet)
if err != nil {
log.Error(err)
panic(err)
}
// accept the connection
for {
conn, err := l.AcceptTCP()
if err != nil {
log.Error(err)
panic(err)
}
// serve the connection
go func(conn *net.TCPConn) {
defer conn.Close()
lrNewCh <- 0
dst := <-lrCh
defer dst.Close()
proxy(dst, conn)
}(conn)
}
}
func proxy(srvConn, cliConn *net.TCPConn) {
// channels to wait on the close event for each connection
serverClosed := make(chan struct{}, 1)
clientClosed := make(chan struct{}, 1)
go broker(srvConn, cliConn, clientClosed)
go broker(cliConn, srvConn, serverClosed)
// wait for one half of the proxy to exit, then trigger a shutdown of the
// other half by calling CloseRead(). This will break the read loop in the
// broker and allow us to fully close the connection cleanly without a
// "use of closed network connection" error.
var waitFor chan struct{}
select {
case <-clientClosed:
// the client closed first and any more packets from the server aren't
// useful, so we can optionally SetLinger(0) here to recycle the port
// faster.
srvConn.SetLinger(0)
srvConn.CloseRead()
waitFor = serverClosed
case <-serverClosed:
cliConn.CloseRead()
waitFor = clientClosed
}
// Wait for the other connection to close.
// This "waitFor" pattern isn't required, but gives us a way to track the
// connection and ensure all copies terminate correctly; we can trigger
// stats on entry and deferred exit of this function.
<-waitFor
}
// This does the actual data transfer.
// The broker only closes the Read side.
func broker(dst, src net.Conn, srcClosed chan struct{}) {
// We can handle errors in a finer-grained manner by inlining io.Copy (it's
// simple, and we drop the ReaderFrom or WriterTo checks for
// net.Conn->net.Conn transfers, which aren't needed). This would also let
// us adjust buffersize.
n, err := io.Copy(dst, src)
log.Errorf(" %v bytes copied", n)
if err != nil {
log.Errorf("Copy error: %s", err)
// errCh <- err
}
if err := src.Close(); err != nil {
log.Errorf("Close error: %s", err)
errCh <- err
}
if n == 0 {
lrNewCh <- 1
}
srcClosed <- struct{}{}
}