Skip to content

Commit b2e0a54

Browse files
richhxRichard Huang
and
Richard Huang
authored
v23/context: fix goroutine leak in WithRootCancel (#214)
Whenever a child context is created via `WithRootCancel`, a goroutine spawns to gracefully handle closing the child context whenever the root context gets canceled. However, the current implementation leaks goroutines whenever the child context exits before the root context exits. This pull request looks to fix the problem by exiting the goroutine whenever the child context is done. See `TestRootCancelGoroutineLeak` for the reproduction use case and test that demonstrates the leak. This is also easily visible via `pprof` using the `goroutine` module. Co-authored-by: Richard Huang <rhuang@grailbio.com>
1 parent fa39730 commit b2e0a54

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

v23/context/context.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,12 @@ func WithRootCancel(parent *T) (*T, CancelFunc) {
303303
// Forward the cancelation from the root context to the newly
304304
// created context.
305305
go func() {
306-
<-rootCtx.Done()
307-
cancel()
306+
select {
307+
case <-rootCtx.Done():
308+
cancel()
309+
case <-ctx.Done():
310+
cancel()
311+
}
308312
}()
309313
} else if atomic.AddInt32(&nRootCancelWarning, 1) < 3 {
310314
vlog.Errorf("context.WithRootCancel: context %+v is not derived from root v23 context.\n", parent)

v23/context/context_test.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
gocontext "context"
1010
"fmt"
1111
"os"
12+
"runtime"
13+
"strings"
1214
"sync"
1315
"testing"
1416
"time"
@@ -320,6 +322,36 @@ func TestRootCancelChain(t *testing.T) {
320322
}
321323
}
322324

325+
func TestRootCancelGoroutineLeak(t *testing.T) {
326+
rootCtx, rootcancel := context.RootContext()
327+
const iterations = 1024
328+
for i := 0; i != iterations; i++ {
329+
_, cancel := context.WithRootCancel(rootCtx)
330+
cancel()
331+
}
332+
333+
// Arbitrary threshold to wait for the goroutines in the created contexts
334+
// above to exit. This threshold was arbitrarily created after running
335+
// `go test -count=10000 -run TestRootCancelGoroutineLeak$` and verifying
336+
// that the tests did not fail flakily.
337+
const waitThreshold = 8*time.Millisecond
338+
time.Sleep(waitThreshold)
339+
340+
// Verify that goroutines no longer exist in the runtime stack.
341+
buf := make([]byte, 2<<20)
342+
buf = buf[:runtime.Stack(buf, true)]
343+
count := 0
344+
for _, g := range strings.Split(string(buf), "\n\n") {
345+
if strings.Contains(g, "v.io/v23/context.WithRootCancel.func1") {
346+
count++
347+
}
348+
}
349+
if count != 0 {
350+
t.Errorf("expected 0 but got %d: goroutine leaking in WithRootCancel", count)
351+
}
352+
rootcancel()
353+
}
354+
323355
func TestRootCancel_GoContext(t *testing.T) {
324356
root, rootcancel := context.RootContext()
325357

0 commit comments

Comments
 (0)