I'm facing an issue with Rancher 2.5.15 where the main Rancher pod and eks-config-operator are constantly crashing and rebooting. This started around 2 weeks ago without any change in the deployed Rancher application, which we confirmed by restoring a snapshot 5 days prior to this issue appearing, with no change.
I assume the issue is due to a registered EKS cluster having issues when returning the cluster information, as it's failing on a function called BuildUpstreamClusterState, however it doesn't indicate which cluster it may be. Deleting the clusters from Rancher is not an option, as there are several EKS clusters added, and since some of them were provisioned from Rancher itself, deleting the clusters from the Rancher UI will also delete them from AWS.
The clusters themselves are working correctly, including the cluster where Rancher is running.
Full log of the panic event:
E0722 12:59:58.631866 7 runtime.go:78] Observed a panic: runtime.boundsError{x:0, y:0, signed:true, code:0x0} (runtime error: index out of range [0] with length 0)
goroutine 1523 [running]:
k8s.io/apimachinery/pkg/util/runtime.logPanic(0x3658a80, 0xc09820cba0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/runtime/runtime.go:74 +0x95
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/runtime/runtime.go:48 +0x86
panic(0x3658a80, 0xc09820cba0)
/usr/local/go/src/runtime/panic.go:965 +0x1b9
github.com/rancher/eks-operator/controller.BuildUpstreamClusterState(0xc037b89ae0, 0x20, 0xc0631001c8, 0x14, 0xc0453237d0, 0xc0818104b8, 0x1, 0x1, 0xc0818104c8, 0xc062c43f00, ...)
/go/pkg/mod/github.com/rancher/eks-operator#v1.0.9/controller/eks-cluster-config-handler.go:865 +0x1931
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.BuildEKSUpstreamSpec(0x3f3d538, 0xc04cdf3050, 0xc04a2ad100, 0x30, 0xc0d89d79a0, 0x1)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/eks_upstream_spec.go:80 +0xc92
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.getComparableUpstreamSpec(0x3f3d538, 0xc04cdf3050, 0xc04a2ad100, 0x30, 0xc0d89d79a0, 0x1)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/cluster_upstream_refresher.go:263 +0x14c
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.(*clusterRefreshController).refreshClusterUpstreamSpec(0xc04cdb7740, 0xc04a2ad100, 0x38cc6bf, 0x3, 0xa303efde8, 0x59d1700, 0x1)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/cluster_upstream_refresher.go:147 +0xef
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.(*clusterRefreshController).onClusterChange(0xc04cdb7740, 0xc0065a5200, 0x7, 0xc04a2ad100, 0x37761a0, 0x383d380, 0x7ff83d307a98)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/cluster_upstream_refresher.go:75 +0x225
github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3.FromClusterHandlerToHandler.func1(0xc0065a5200, 0x7, 0x3ef5738, 0xc04a2ad100, 0xc04a2ad100, 0x7ff83d307a98, 0xc04a2ad100, 0x1)
/go/src/github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3/cluster.go:105 +0x6b
github.com/rancher/lasso/pkg/controller.SharedControllerHandlerFunc.OnChange(0xc04d858010, 0xc0065a5200, 0x7, 0x3ef5738, 0xc04a2ad100, 0x0, 0xc04a2ad100, 0x0, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/sharedcontroller.go:29 +0x4e
github.com/rancher/lasso/pkg/controller.(*SharedHandler).OnChange(0xc001b351a0, 0xc0065a5200, 0x7, 0x3ef5738, 0xc04a2ad100, 0xc022e42c01, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/sharedhandler.go:66 +0x123
github.com/rancher/lasso/pkg/controller.(*controller).syncHandler(0xc000dbd340, 0xc0065a5200, 0x7, 0xc032d3be58, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:210 +0xd1
github.com/rancher/lasso/pkg/controller.(*controller).processSingleItem(0xc000dbd340, 0x3089ba0, 0xc04da70400, 0x0, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:192 +0xe7
github.com/rancher/lasso/pkg/controller.(*controller).processNextWorkItem(0xc000dbd340, 0x203001)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:169 +0x54
github.com/rancher/lasso/pkg/controller.(*controller).runWorker(...)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:158
k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc010473450)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:155 +0x5f
k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc010473450, 0x3ed54c0, 0xc022aa9050, 0x1, 0xc0019563c0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:156 +0x9b
k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc010473450, 0x3b9aca00, 0x0, 0x1, 0xc0019563c0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:133 +0x98
k8s.io/apimachinery/pkg/util/wait.Until(0xc010473450, 0x3b9aca00, 0xc0019563c0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:90 +0x4d
created by github.com/rancher/lasso/pkg/controller.(*controller).run
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:129 +0x33b
panic: runtime error: index out of range [0] with length 0 [recovered]
panic: runtime error: index out of range [0] with length 0
goroutine 1523 [running]:
k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/runtime/runtime.go:55 +0x109
panic(0x3658a80, 0xc09820cba0)
/usr/local/go/src/runtime/panic.go:965 +0x1b9
github.com/rancher/eks-operator/controller.BuildUpstreamClusterState(0xc037b89ae0, 0x20, 0xc0631001c8, 0x14, 0xc0453237d0, 0xc0818104b8, 0x1, 0x1, 0xc0818104c8, 0xc062c43f00, ...)
/go/pkg/mod/github.com/rancher/eks-operator#v1.0.9/controller/eks-cluster-config-handler.go:865 +0x1931
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.BuildEKSUpstreamSpec(0x3f3d538, 0xc04cdf3050, 0xc04a2ad100, 0x30, 0xc0d89d79a0, 0x1)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/eks_upstream_spec.go:80 +0xc92
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.getComparableUpstreamSpec(0x3f3d538, 0xc04cdf3050, 0xc04a2ad100, 0x30, 0xc0d89d79a0, 0x1)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/cluster_upstream_refresher.go:263 +0x14c
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.(*clusterRefreshController).refreshClusterUpstreamSpec(0xc04cdb7740, 0xc04a2ad100, 0x38cc6bf, 0x3, 0xa303efde8, 0x59d1700, 0x1)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/cluster_upstream_refresher.go:147 +0xef
github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher.(*clusterRefreshController).onClusterChange(0xc04cdb7740, 0xc0065a5200, 0x7, 0xc04a2ad100, 0x37761a0, 0x383d380, 0x7ff83d307a98)
/go/src/github.com/rancher/rancher/pkg/controllers/management/clusterupstreamrefresher/cluster_upstream_refresher.go:75 +0x225
github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3.FromClusterHandlerToHandler.func1(0xc0065a5200, 0x7, 0x3ef5738, 0xc04a2ad100, 0xc04a2ad100, 0x7ff83d307a98, 0xc04a2ad100, 0x1)
/go/src/github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3/cluster.go:105 +0x6b
github.com/rancher/lasso/pkg/controller.SharedControllerHandlerFunc.OnChange(0xc04d858010, 0xc0065a5200, 0x7, 0x3ef5738, 0xc04a2ad100, 0x0, 0xc04a2ad100, 0x0, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/sharedcontroller.go:29 +0x4e
github.com/rancher/lasso/pkg/controller.(*SharedHandler).OnChange(0xc001b351a0, 0xc0065a5200, 0x7, 0x3ef5738, 0xc04a2ad100, 0xc022e42c01, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/sharedhandler.go:66 +0x123
github.com/rancher/lasso/pkg/controller.(*controller).syncHandler(0xc000dbd340, 0xc0065a5200, 0x7, 0xc032d3be58, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:210 +0xd1
github.com/rancher/lasso/pkg/controller.(*controller).processSingleItem(0xc000dbd340, 0x3089ba0, 0xc04da70400, 0x0, 0x0)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:192 +0xe7
github.com/rancher/lasso/pkg/controller.(*controller).processNextWorkItem(0xc000dbd340, 0x203001)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:169 +0x54
github.com/rancher/lasso/pkg/controller.(*controller).runWorker(...)
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:158
k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0xc010473450)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:155 +0x5f
k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc010473450, 0x3ed54c0, 0xc022aa9050, 0x1, 0xc0019563c0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:156 +0x9b
k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc010473450, 0x3b9aca00, 0x0, 0x1, 0xc0019563c0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:133 +0x98
k8s.io/apimachinery/pkg/util/wait.Until(0xc010473450, 0x3b9aca00, 0xc0019563c0)
/go/pkg/mod/k8s.io/apimachinery#v0.20.6/pkg/util/wait/wait.go:90 +0x4d
created by github.com/rancher/lasso/pkg/controller.(*controller).run
/go/pkg/mod/github.com/rancher/lasso#v0.0.0-20210408231703-9ddd9378d08d/pkg/controller/controller.go:129 +0x33b
1.9 GiB database size on disk.
60 GiB memory.
After start cockroach starts to use all memory and increase goroutines count, until it eats whole memory.
gossip client (0/3 cur/max conns)
gossip server (0/3 cur/max conns, infos 0/0 sent/received, bytes 0B/0B sent/received)
W200117 11:21:26.697551 195 storage/store.go:3926 [n1,s1] handle raft ready: 0.6s [processed=1]
W200117 11:21:26.839611 177 storage/store.go:3926 [n1,s1] handle raft ready: 0.6s [processed=1]
W200117 11:21:26.839933 206 storage/store.go:3926 [n1,s1] handle raft ready: 0.7s [processed=1]
W200117 11:21:26.840083 170 storage/store.go:3926 [n1,s1] handle raft ready: 0.6s [processed=1]
W200117 11:21:27.036814 190 storage/store.go:3926 [n1,s1] handle raft ready: 0.6s [processed=1]
W200117 11:21:29.764620 198 storage/store.go:3926 [n1,s1] handle raft ready: 0.7s [processed=1]
W200117 11:21:34.446099 167 storage/store.go:3926 [n1,s1] handle raft ready: 0.7s [processed=1]
W200117 11:21:34.563275 182 storage/store.go:3926 [n1,s1] handle raft ready: 1.0s [processed=1]
I200117 11:21:35.034645 276 server/status/runtime.go:465 [n1] runtime stats: 60 GiB RSS, 9012 goroutines, 52 GiB/3.6 GiB/59 GiB GO alloc/idle/total, 761 MiB/852 MiB CGO alloc/total, 11145.7 CGO/sec, 732.2/48.3 %(u/s)time, 0.1 %gc (1x), 411 KiB/571 KiB (r/w)net
W200117 11:21:36.774832 170 storage/store.go:3926 [n1,s1] handle raft ready: 0.7s [processed=1]
I200117 11:21:45.128897 276 server/status/runtime.go:465 [n1] runtime stats: 61 GiB RSS, 9111 goroutines, 53 GiB/4.0 GiB/60 GiB GO alloc/idle/total, 445 MiB/539 MiB CGO alloc/total, 23047.0 CGO/sec, 754.2/28.9 %(u/s)time, 0.0 %gc (0x), 362 KiB/455 KiB (r/w)net
fatal error: runtime: out of memory
After that there are only errors in log:
runtime stack:
runtime.throw(0x2b00eec, 0x16)
/usr/local/go/src/runtime/panic.go:616 +0x81
runtime.sysMap(0xd2d44b0000, 0x2db0000, 0x0, 0x4882bf8)
/usr/local/go/src/runtime/mem_linux.go:216 +0x20a
runtime.(*mheap).sysAlloc(0x48410a0, 0x2db0000, 0x20224af01)
/usr/local/go/src/runtime/malloc.go:470 +0xd4
runtime.(*mheap).grow(0x48410a0, 0x16d7, 0x0)
/usr/local/go/src/runtime/mheap.go:907 +0x60
runtime.(*mheap).allocSpanLocked(0x48410a0, 0x16d7, 0x4882c08, 0x2050e5e)
/usr/local/go/src/runtime/mheap.go:820 +0x301
runtime.(*mheap).alloc_m(0x48410a0, 0x16d7, 0x101, 0x0)
/usr/local/go/src/runtime/mheap.go:686 +0x118
runtime.(*mheap).alloc.func1()
/usr/local/go/src/runtime/mheap.go:753 +0x4d
runtime.(*mheap).alloc(0x48410a0, 0x16d7, 0xd00f000101, 0xc4204a4180)
/usr/local/go/src/runtime/mheap.go:752 +0x8a
runtime.largeAlloc(0x2dae000, 0xd00f970100, 0x0)
/usr/local/go/src/runtime/malloc.go:826 +0x94
runtime.mallocgc.func1()
/usr/local/go/src/runtime/malloc.go:721 +0x46
runtime.systemstack(0x7f1b00000000)
/usr/local/go/src/runtime/asm_amd64.s:409 +0x79
runtime.mstart()
/usr/local/go/src/runtime/proc.go:1175
goroutine 16380 [running]:
runtime.systemstack_switch()
/usr/local/go/src/runtime/asm_amd64.s:363 fp=0xc9995451b8 sp=0xc9995451b0 pc=0x70b970
runtime.mallocgc(0x2dae000, 0x0, 0x0, 0x0)
/usr/local/go/src/runtime/malloc.go:720 +0x8a2 fp=0xc999545258 sp=0xc9995451b8 pc=0x6c46e2
runtime.growslice(0x268d940, 0xd29e7c0000, 0xd146e4, 0xd146e4, 0x2dad295, 0x0, 0x0, 0x0)
/usr/local/go/src/runtime/slice.go:172 +0x21d fp=0xc9995452c0 sp=0xc999545258 pc=0x6f5c2d
github.com/cockroachdb/cockroach/pkg/roachpb.(*ScanResponse).combine(0xd064b26ee8, 0x302bdc0, 0xd0bc2462b0, 0x302bdc0, 0xd0bc2462b0)
/go/src/github.com/cockroachdb/cockroach/pkg/roachpb/api.go:259 +0x3fb fp=0xc9995453e0 sp=0xc9995452c0 pc=0xda0efb
github.com/cockroachdb/cockroach/pkg/roachpb.(*BatchResponse).Combine(0xcae57fb7a0, 0xc503e11da0, 0xc7b2972a40, 0x6, 0x8, 0x0, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/roachpb/batch.go:360 +0x32e fp=0xc9995454c8 sp=0xc9995453e0 pc=0xe4d50e
github.com/cockroachdb/cockroach/pkg/kv.(*DistSender).divideAndSendBatchToRanges.func1(0xc999545848, 0xc999545b28, 0xc999545a70, 0xc999545b20, 0xc99954578f, 0xc999545830, 0xc999545794)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/dist_sender.go:821 +0x273 fp=0xc999545680 sp=0xc9995454c8 pc=0x1767443
github.com/cockroachdb/cockroach/pkg/kv.(*DistSender).divideAndSendBatchToRanges(0xc42077a700, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc4de731900, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/dist_sender.go:1000 +0x797 fp=0xc999545a58 sp=0xc999545680 pc=0x174b557
github.com/cockroachdb/cockroach/pkg/kv.(*DistSender).Send(0xc42077a700, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/dist_sender.go:683 +0x4c7 fp=0xc999545d58 sp=0xc999545a58 pc=0x174a497
github.com/cockroachdb/cockroach/pkg/kv.(*txnLockGatekeeper).SendLocked(0xcb19e3abc8, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_coord_sender.go:234 +0xf5 fp=0xc999545e08 sp=0xc999545d58 pc=0x17588a5
github.com/cockroachdb/cockroach/pkg/kv.(*txnMetrics).SendLocked(0xcb19e3ab90, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_metrics.go:58 +0x12d fp=0xc999545ed8 sp=0xc999545e08 pc=0x1761f7d
github.com/cockroachdb/cockroach/pkg/kv.(*txnSpanRefresher).sendLockedWithRefreshAttempts(0xcb19e3aaf8, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_span_refresher.go:167 +0x98 fp=0xc999545fb8 sp=0xc999545ed8 pc=0x17645a8
github.com/cockroachdb/cockroach/pkg/kv.(*txnSpanRefresher).SendLocked(0xcb19e3aaf8, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_span_refresher.go:105 +0x11e fp=0xc9995460a0 sp=0xc999545fb8 pc=0x176404e
github.com/cockroachdb/cockroach/pkg/kv.(*txnPipeliner).SendLocked(0xcb19e3aa78, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_pipeliner.go:169 +0x165 fp=0xc9995461b8 sp=0xc9995460a0 pc=0x1762555
github.com/cockroachdb/cockroach/pkg/kv.(*txnIntentCollector).SendLocked(0xcb19e3aa38, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_intent_collector.go:106 +0x45d fp=0xc999546338 sp=0xc9995461b8 pc=0x17605ad
github.com/cockroachdb/cockroach/pkg/kv.(*txnSeqNumAllocator).SendLocked(0xcb19e3ab78, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_sequence_nums.go:62 +0x1f3 fp=0xc999546450 sp=0xc999546338 pc=0x1763e33
github.com/cockroachdb/cockroach/pkg/kv.(*txnHeartbeat).SendLocked(0xcb19e3a990, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_interceptor_heartbeat.go:230 +0x542 fp=0xc999546e00 sp=0xc999546450 pc=0x175df82
github.com/cockroachdb/cockroach/pkg/kv.(*TxnCoordSender).Send(0xcb19e3a800, 0x305a3c0, 0xcb14dc0d20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc67b3dbd00, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/kv/txn_coord_sender.go:648 +0x456 fp=0xc999547160 sp=0xc999546e00 pc=0x175b136
github.com/cockroachdb/cockroach/pkg/internal/client.(*DB).sendUsingSender(0xc420778280, 0x305a3c0, 0xc9d8025380, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/internal/client/db.go:623 +0x135 fp=0xc999547228 sp=0xc999547160 pc=0xfdc3f5
github.com/cockroachdb/cockroach/pkg/internal/client.(*Txn).Send(0xc909d72990, 0x305a3c0, 0xc9d8025380, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/internal/client/txn.go:805 +0x14c fp=0xc999547380 sp=0xc999547228 pc=0xfe64ac
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*txnKVFetcher).fetch(0xc6b75efd50, 0x305a3c0, 0xc9d8025380, 0x0, 0xc9995479c0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/kvfetcher.go:327 +0x597 fp=0xc9995477f8 sp=0xc999547380 pc=0x15708a7
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*txnKVFetcher).nextBatch(0xc6b75efd50, 0x305a3c0, 0xc9d8025380, 0x10000cc88b49400, 0x0, 0x15, 0xcc88b49420, 0x0, 0xc42b83d400, 0x2, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/kvfetcher.go:394 +0x6d fp=0xc999547878 sp=0xc9995477f8 pc=0x157112d
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*RowFetcher).nextKV(0xcad73e5f18, 0x305a3c0, 0xc9d8025380, 0x3f, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/rowfetcher.go:479 +0x370 fp=0xc999547a20 sp=0xc999547878 pc=0x157a730
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*RowFetcher).NextKey(0xcad73e5f18, 0x305a3c0, 0xc9d8025380, 0x0, 0x0, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/rowfetcher.go:498 +0x8a fp=0xc999547b00 sp=0xc999547a20 pc=0x157a9da
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*RowFetcher).StartScanFrom(0xcad73e5f18, 0x305a3c0, 0xc9d8025380, 0x30387c0, 0xc6b75efd50, 0x0, 0x1)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/rowfetcher.go:447 +0xd9 fp=0xc999547b40 sp=0xc999547b00 pc=0x157a359
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*RowFetcher).StartScan(0xcad73e5f18, 0x305a3c0, 0xc9d8025380, 0xc909d72990, 0xc9f8754400, 0x3f, 0x40, 0x0, 0x0, 0x0, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/rowfetcher.go:435 +0x208 fp=0xc999547c88 sp=0xc999547b40 pc=0x157a1c8
github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*joinReader).readInput(0xcad73e5500, 0x0, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/joinreader.go:440 +0x92c fp=0xc999547e38 sp=0xc999547c88 pc=0x17c8b3c
github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*joinReader).Next(0xcad73e5500, 0x0, 0x0, 0x0, 0xcb15328e10)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/joinreader.go:356 +0x2df fp=0xc999547e90 sp=0xc999547e38 pc=0x17c81af
github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.Run(0x305a3c0, 0xc9d8025380, 0x3063380, 0xcad73e5500, 0x3037440, 0xcb19e92e00)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/base.go:172 +0x35 fp=0xc999547ed0 sp=0xc999547e90 pc=0x1789045
github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*ProcessorBase).Run(0xcad73e5500, 0x305a3c0, 0xc9d8025380, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/processors.go:731 +0x98 fp=0xc999547f10 sp=0xc999547ed0 pc=0x17d4b58
github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*Flow).StartSync(0xcad736dc00, 0x305a3c0, 0xc9d8025380, 0x2c375e8, 0xcad72389a0, 0x3037140)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/flow.go:581 +0x191 fp=0xc999547f60 sp=0xc999547f10 pc=0x17aaa41
github.com/cockroachdb/cockroach/pkg/sql.(*DistSQLPlanner).Run(0xc4208e7700, 0xc4c10cb020, 0xc909d72360, 0xc999548898, 0xcb19e8a500, 0xc8e49839b0, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsql_running.go:253 +0x886 fp=0xc999548790 sp=0xc999547f60 pc=0x1b119a6
github.com/cockroachdb/cockroach/pkg/sql.(*DistSQLPlanner).PlanAndRun(0xc4208e7700, 0x305a3c0, 0xc9d8024b10, 0xc8e49839b0, 0xc4c10cb020, 0xc909d72360, 0x304d340, 0xcad73ecdc0, 0xcb19e8a500)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/distsql_running.go:756 +0x24c fp=0xc999548a38 sp=0xc999548790 pc=0x1b14fac
github.com/cockroachdb/cockroach/pkg/sql.(*connExecutor).execWithDistSQLEngine(0xc8e4983500, 0x305a3c0, 0xc9d8024b10, 0xc8e4983918, 0x3, 0x7f1b1a3c03c0, 0xc909d72480, 0xcabe553601, 0xd, 0xd)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:971 +0x2d8 fp=0xc999548c10 sp=0xc999548a38 pc=0x1ac15d8
github.com/cockroachdb/cockroach/pkg/sql.(*connExecutor).dispatchToExecutionEngine(0xc8e4983500, 0x305a3c0, 0xc9d8024b10, 0x305da40, 0xcafa482f80, 0xcabe553680, 0xd, 0xd, 0xc90a26f440, 0x119, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:818 +0xa7e fp=0xc999548dd8 sp=0xc999548c10 pc=0x1ac096e
github.com/cockroachdb/cockroach/pkg/sql.(*connExecutor).execStmtInOpenState(0xc8e4983500, 0x305a3c0, 0xc9d8024b10, 0x305da40, 0xcafa482f80, 0xcabe553680, 0xd, 0xd, 0xc90a26f440, 0x119, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:396 +0xa8f fp=0xc9995495b0 sp=0xc999548dd8 pc=0x1abc96f
github.com/cockroachdb/cockroach/pkg/sql.(*connExecutor).execStmt(0xc8e4983500, 0x305a3c0, 0xc9d8024b10, 0x305da40, 0xcafa482f80, 0xcabe553680, 0xd, 0xd, 0xc90a26f440, 0x119, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor_exec.go:96 +0x341 fp=0xc999549748 sp=0xc9995495b0 pc=0x1abbac1
github.com/cockroachdb/cockroach/pkg/sql.(*connExecutor).run(0xc8e4983500, 0x305a300, 0xc9e1391740, 0xc4207bee58, 0x5400, 0x15000, 0xc4207beef0, 0xc4c0d0c790, 0x0, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:1180 +0x1400 fp=0xc999549e88 sp=0xc999549748 pc=0x1ab3560
github.com/cockroachdb/cockroach/pkg/sql.(*Server).ServeConn(0xc42087e780, 0x305a300, 0xc9e1391740, 0xc8e4983500, 0x5400, 0x15000, 0xc4207beef0, 0xc4c0d0c790, 0x0, 0x0)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:391 +0xce fp=0xc999549ee8 sp=0xc999549e88 pc=0x1aaf1de
github.com/cockroachdb/cockroach/pkg/sql/pgwire.(*conn).processCommandsAsync.func1(0xc66dbb98c1, 0xc719da4640, 0x305a300, 0xc9e1391740, 0xc4c0d0c790, 0x304f900, 0xc719da4620, 0xc9c20116e0, 0xc426828a00, 0x0, ...)
/go/src/github.com/cockroachdb/cockroach/pkg/sql/pgwire/conn.go:520 +0x1e7 fp=0xc999549f78 sp=0xc999549ee8 pc=0x1f07747
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:2361 +0x1 fp=0xc999549f80 sp=0xc999549f78 pc=0x70e501
created by github.com/cockroachdb/cockroach/pkg/sql/pgwire.(*conn).processCommandsAsync
/go/src/github.com/cockroachdb/cockroach/pkg/sql/pgwire/conn.go:475 +0x17b
I've used this config max-sql-memory=.3 and cache=.40.
From the handle raft ready messages, it seems like your disk is underprovisioned on IOPS or slow to handle requests, which is causing KV operations to queue up in memory.
One workaround to physically limit the amount of memory Cockroach's SQL processors and storage-level caches use, using the flags --max-sql-memory and --cache respectively. This will not solve this problem entirely as raft operations will still queue up until the disk is correctly provisioned, but it will help.
The below command will set both SQL and cache memory limits to 25% of total system memory:
cockroach start --store=<dir> --max-sql-memory=0.25 --cache=0.25
More on memory limits: https://www.cockroachlabs.com/blog/memory-usage-cockroachdb/
we use github.com/gogo/protobuf, I get service crash at proto.Marshal(), this happens almost once a day,But I did not receive a memory alarm,This looks like a memory leak,has anyone encountered this situation?
fatal error: runtime: out of memory
runtime stack:
runtime.throw(0xe25648, 0x16)
/usr/local/go/src/runtime/panic.go:617 +0x72
runtime.sysMap(0xc5cc000000, 0xc490000000, 0x18671b8)
/usr/local/go/src/runtime/mem_linux.go:170 +0xc7
runtime.(*mheap).sysAlloc(0x184e180, 0xc48d3d8000, 0x184e190, 0x62469ec)
/usr/local/go/src/runtime/malloc.go:633 +0x1cd
runtime.(*mheap).grow(0x184e180, 0x62469ec, 0x0)
/usr/local/go/src/runtime/mheap.go:1222 +0x42
runtime.(*mheap).allocSpanLocked(0x184e180, 0x62469ec, 0x18671c8, 0x7f213328e6d8)
/usr/local/go/src/runtime/mheap.go:1150 +0x37f
runtime.(*mheap).alloc_m(0x184e180, 0x62469ec, 0x7f21bed20101, 0x184e190)
/usr/local/go/src/runtime/mheap.go:977 +0xc2
runtime.(*mheap).alloc.func1()
/usr/local/go/src/runtime/mheap.go:1048 +0x4c
runtime.(*mheap).alloc(0x184e180, 0x62469ec, 0xc000000101, 0x7f21bed2a0e0)
/usr/local/go/src/runtime/mheap.go:1047 +0x8a
runtime.largeAlloc(0xc48d3d8000, 0xffffffffffff0100, 0x7f21bed2a0e0)
/usr/local/go/src/runtime/malloc.go:1055 +0x99
runtime.mallocgc.func1()
/usr/local/go/src/runtime/malloc.go:950 +0x46
runtime.systemstack(0x0)
/usr/local/go/src/runtime/asm_amd64.s:351 +0x66
runtime.mstart()
/usr/local/go/src/runtime/proc.go:1153
goroutine 521107 [running]:
runtime.systemstack_switch()
/usr/local/go/src/runtime/asm_amd64.s:311 fp=0xc0982eae30 sp=0xc0982eae28 pc=0x45cdf0
runtime.mallocgc(0xc48d3d8000, 0x0, 0x1700, 0xc492070000)
/usr/local/go/src/runtime/malloc.go:949 +0x872 fp=0xc0982eaed0 sp=0xc0982eae30 pc=0x40e6d2
runtime.growslice(0xca6f00, 0xc492070000, 0x1c43, 0x2000, 0xc48d3d6e73, 0x0, 0x0, 0x0)
/usr/local/go/src/runtime/slice.go:175 +0x151 fp=0xc0982eaf38 sp=0xc0982eaed0 pc=0x4462a1
xxxx/vendor/github.com/gogo/protobuf/proto.(*Buffer).EncodeStringBytes(...)
xxxx/vendor/github.com/gogo/protobuf/proto.Marshal(0xf58840, 0xc45eda2a00, 0x2d, 0x1, 0xc1e72e7801, 0x1, 0xc4860713e0)
/opt/go/src/xxxx/vendor/github.com/gogo/protobuf/proto/encode.go:236 +0x92 fp=0xc0982eb9a8 sp=0xc0982eb960 pc=0x79dd12
xxxx/feature_server_user/get_feature.(*GetUserFeatureAction).Handler(0x1864ca8, 0xc46a746a00, 0x17, 0x200, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
I encontered the segmentation violation error when I use Sprintf in my golang code. Anyboy know what's wrong with my code?
My code as following:
func createTrainingPath(trainingPrefix string, uuid string, codec string) string {
return fmt.Sprintf("%s/%s/%s/%s", trainingPrefix, codec, time.Now().Format("20060102"), uuid)
}
The error as following:
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x6ca1f7]
goroutine 7483565 [running]:
panic(0x1162ea0, 0x19558e0)
/home/wenchuang.liu/go/src/runtime/panic.go:540 +0x45e fp=0xc494a569f0 sp=0xc494a56948 pc=0x697f4e
runtime.panicmem()
/home/wenchuang.liu/go/src/runtime/panic.go:63 +0x5e fp=0xc494a56a10 sp=0xc494a569f0 pc=0x696c7e
runtime.sigpanic()
/home/wenchuang.liu/go/src/runtime/signal_unix.go:367 +0x17c fp=0xc494a56a60 sp=0xc494a56a10 pc=0x6af2dc
runtime.memmove(0xc4200e2419, 0x0, 0x20)
/home/wenchuang.liu/go/src/runtime/memmove_amd64.s:178 +0x147 fp=0xc494a56a68 sp=0xc494a56a60 pc=0x6ca1f7
fmt.(*buffer).WriteString(...)
/home/wenchuang.liu/go/src/fmt/print.go:82
fmt.(*fmt).padString(0xc47dafe040, 0x0, 0x20)
/home/wenchuang.liu/go/src/fmt/format.go:110 +0x9c fp=0xc494a56af0 sp=0xc494a56a68 pc=0x7365fc
fmt.(*fmt).fmt_s(0xc47dafe040, 0x0, 0x20)
/home/wenchuang.liu/go/src/fmt/format.go:328 +0x61 fp=0xc494a56b28 sp=0xc494a56af0 pc=0x7373a1
fmt.(*pp).fmtString(0xc47dafe000, 0x0, 0x20, 0x7f3300000073)
/home/wenchuang.liu/go/src/fmt/print.go:433 +0x197 fp=0xc494a56b60 sp=0xc494a56b28 pc=0x73b077
fmt.(*pp).printArg(0xc47dafe000, 0x1131bc0, 0xc4c6f94200, 0x73)
/home/wenchuang.liu/go/src/fmt/print.go:664 +0x7b5 fp=0xc494a56be0 sp=0xc494a56b60 pc=0x73d355
fmt.(*pp).doPrintf(0xc47dafe000, 0x1232d53, 0xb, 0xc494a56dd0, 0x4, 0x4)
/home/wenchuang.liu/go/src/fmt/print.go:996 +0x15a fp=0xc494a56d10 sp=0xc494a56be0 pc=0x740cea
fmt.Sprintf(0x1232d53, 0xb, 0xc494a56dd0, 0x4, 0x4, 0xc4c48478d8, 0x8)
/home/wenchuang.liu/go/src/fmt/print.go:196 +0x66 fp=0xc494a56d68 sp=0xc494a56d10 pc=0x739226
i7zuoye.com/ago/gomark/service/utils.createTrainingPath(0x0, 0x20, 0x122f9f1, 0x6, 0xc42c2c9560, 0xc4209e0f68)
/home/wenchuang.liu/gohere/src/i7zuoye.com/ago/gomark/service/utils/media.go:210 +0x190 fp=0xc494a56e20 sp=0xc494a56d68 pc=0xbdc6f0
i7zuoye.com/ago/gomark/service/utils.SaveResult(0x0, 0x20, 0xc4909e4a00, 0xf5e, 0x134f, 0x865823, 0xc4206b0301, 0xc4209e0fa8, 0x865823)
/home/wenchuang.liu/gohere/src/i7zuoye.com/ago/gomark/service/utils/media.go:116 +0x51 fp=0xc494a56e98 sp=0xc494a56e20 pc=0xbdaf41
i7zuoye.com/ago/gomark/service/evaluate/stream.sendResponse.func1(0xc4acdfbe60, 0xc4909e4a00, 0xf5e, 0x134f, 0xc4371bc410, 0xc8, 0x0, 0x0, 0x0, 0x12392fd, ...)
/home/wenchuang.liu/gohere/src/i7zuoye.com/ago/gomark/service/evaluate/stream/streamserver.go:390 +0x73 fp=0xc494a56f40 sp=0xc494a56e98 pc=0xbf3593
runtime.goexit()
/home/wenchuang.liu/go/src/runtime/asm_amd64.s:2337 +0x1 fp=0xc494a56f48 sp=0xc494a56f40 pc=0x6c9901
created by i7zuoye.com/ago/gomark/service/evaluate/stream.sendResponse
/home/wenchuang.liu/gohere/src/i7zuoye.com/ago/gomark/service/evaluate/stream/streamserver.go:389 +0x179
Any help would be appreciated.