Skip to content

Commit 99bbca3

Browse files
authored
feat: add automatic heap snapshots to help debug memory issues (#5920)
1 parent 5ec931b commit 99bbca3

File tree

7 files changed

+120
-1
lines changed

7 files changed

+120
-1
lines changed

packages/dd-trace/src/config.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,9 @@ class Config {
529529
defaults['grpc.client.error.statuses'] = GRPC_CLIENT_ERROR_STATUSES
530530
defaults['grpc.server.error.statuses'] = GRPC_SERVER_ERROR_STATUSES
531531
defaults.headerTags = []
532+
defaults['heapSnapshot.count'] = 0
533+
defaults['heapSnapshot.destination'] = ''
534+
defaults['heapSnapshot.interval'] = 3600
532535
defaults.hostname = '127.0.0.1'
533536
defaults['iast.dbRowsToTaint'] = 1
534537
defaults['iast.deduplicationEnabled'] = true
@@ -713,6 +716,9 @@ class Config {
713716
DD_GRPC_CLIENT_ERROR_STATUSES,
714717
DD_GRPC_SERVER_ERROR_STATUSES,
715718
JEST_WORKER_ID,
719+
DD_HEAP_SNAPSHOT_COUNT,
720+
DD_HEAP_SNAPSHOT_DESTINATION,
721+
DD_HEAP_SNAPSHOT_INTERVAL,
716722
DD_IAST_DB_ROWS_TO_TAINT,
717723
DD_IAST_DEDUPLICATION_ENABLED,
718724
DD_IAST_ENABLED,
@@ -896,6 +902,9 @@ class Config {
896902
this._setIntegerRangeSet(env, 'grpc.client.error.statuses', DD_GRPC_CLIENT_ERROR_STATUSES)
897903
this._setIntegerRangeSet(env, 'grpc.server.error.statuses', DD_GRPC_SERVER_ERROR_STATUSES)
898904
this._setArray(env, 'headerTags', DD_TRACE_HEADER_TAGS)
905+
env['heapSnapshot.count'] = maybeInt(DD_HEAP_SNAPSHOT_COUNT)
906+
this._setString(env, 'heapSnapshot.destination', DD_HEAP_SNAPSHOT_DESTINATION)
907+
env['heapSnapshot.interval'] = maybeInt(DD_HEAP_SNAPSHOT_INTERVAL)
899908
this._setString(env, 'hostname', DD_AGENT_HOST)
900909
env['iast.dbRowsToTaint'] = maybeInt(DD_IAST_DB_ROWS_TO_TAINT)
901910
this._setBoolean(env, 'iast.deduplicationEnabled', DD_IAST_DEDUPLICATION_ENABLED)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
'use strict'
2+
3+
const { join } = require('path')
4+
const { setImmediate, setTimeout } = require('timers/promises')
5+
const { format } = require('util')
6+
const { writeHeapSnapshot } = require('v8')
7+
const { threadId } = require('worker_threads')
8+
const log = require('./log')
9+
10+
async function scheduleSnapshot (config, total) {
11+
if (total > config.heapSnapshot.count) return
12+
13+
await setTimeout(config.heapSnapshot.interval * 1000, null, { ref: false })
14+
await clearMemory()
15+
writeHeapSnapshot(getName(config.heapSnapshot.destination))
16+
await scheduleSnapshot(config, total + 1)
17+
}
18+
19+
async function clearMemory () {
20+
if (!globalThis.gc) return
21+
globalThis.gc()
22+
await setImmediate()
23+
globalThis.gc() // Run full GC a second time for anything missed in first GC.
24+
}
25+
26+
function pad (value) {
27+
return String(value).padStart(2, 0)
28+
}
29+
30+
function getName (destination) {
31+
const date = new Date()
32+
const filename = format(
33+
'Heap-%s%s%s-%s%s%s-%s-%s.heapsnapshot',
34+
date.getFullYear(),
35+
pad(date.getMonth()),
36+
pad(date.getDate()),
37+
pad(date.getHours()),
38+
pad(date.getMinutes()),
39+
pad(date.getSeconds()),
40+
process.pid,
41+
threadId
42+
)
43+
44+
return join(destination, filename)
45+
}
46+
47+
module.exports = {
48+
async start (config) {
49+
const destination = config.heapSnapshot.destination
50+
51+
try {
52+
await scheduleSnapshot(config, 1)
53+
log.debug('Wrote heap snapshots to %s.', destination)
54+
} catch (e) {
55+
log.error('Failed to write heap snapshots to %s.', destination, e)
56+
}
57+
}
58+
}

packages/dd-trace/src/proxy.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ class Tracer extends NoopProxy {
100100
require('./crashtracking').start(config)
101101
}
102102

103+
if (config.heapSnapshot.count > 0) {
104+
require('./heap_snapshots').start(config)
105+
}
106+
103107
telemetry.start(config, this._pluginManager)
104108

105109
if (config.dogstatsd) {

packages/dd-trace/src/supported-configurations.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@
7878
"DD_GIT_PULL_REQUEST_BASE_BRANCH_SHA": ["A"],
7979
"DD_GRPC_CLIENT_ERROR_STATUSES": ["A"],
8080
"DD_GRPC_SERVER_ERROR_STATUSES": ["A"],
81+
"DD_HEAP_SNAPSHOT_COUNT": ["A"],
82+
"DD_HEAP_SNAPSHOT_INTERVAL": ["A"],
83+
"DD_HEAP_SNAPSHOT_DESTINATION": ["A"],
8184
"DD_IAST_DB_ROWS_TO_TAINT": ["A"],
8285
"DD_IAST_DEDUPLICATION_ENABLED": ["A"],
8386
"DD_IAST_ENABLED": ["A"],

packages/dd-trace/test/config.spec.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,9 @@ describe('Config', () => {
303303
expect(config).to.have.property('flushMinSpans', 1000)
304304
expect(config.grpc.client.error.statuses).to.deep.equal(GRPC_CLIENT_ERROR_STATUSES)
305305
expect(config.grpc.server.error.statuses).to.deep.equal(GRPC_SERVER_ERROR_STATUSES)
306+
expect(config).to.have.nested.property('heapSnapshot.count', 0)
307+
expect(config).to.have.nested.property('heapSnapshot.destination', '')
308+
expect(config).to.have.nested.property('heapSnapshot.interval', 3600)
306309
expect(config).to.have.nested.property('iast.enabled', false)
307310
expect(config).to.have.nested.property('iast.redactionEnabled', true)
308311
expect(config).to.have.nested.property('iast.redactionNamePattern', null)
@@ -557,6 +560,9 @@ describe('Config', () => {
557560
process.env.DD_ENV = 'test'
558561
process.env.DD_GRPC_CLIENT_ERROR_STATUSES = '3,13,400-403'
559562
process.env.DD_GRPC_SERVER_ERROR_STATUSES = '3,13,400-403'
563+
process.env.DD_HEAP_SNAPSHOT_COUNT = '1'
564+
process.env.DD_HEAP_SNAPSHOT_DESTINATION = '/tmp'
565+
process.env.DD_HEAP_SNAPSHOT_INTERVAL = '1800'
560566
process.env.DD_IAST_DB_ROWS_TO_TAINT = 2
561567
process.env.DD_IAST_DEDUPLICATION_ENABLED = false
562568
process.env.DD_IAST_ENABLED = 'true'
@@ -675,6 +681,9 @@ describe('Config', () => {
675681
expect(config.grpc.client.error.statuses).to.deep.equal([3, 13, 400, 401, 402, 403])
676682
expect(config.grpc.server.error.statuses).to.deep.equal([3, 13, 400, 401, 402, 403])
677683
expect(config).to.have.property('hostname', 'agent')
684+
expect(config).to.have.nested.property('heapSnapshot.count', 1)
685+
expect(config).to.have.nested.property('heapSnapshot.destination', '/tmp')
686+
expect(config).to.have.nested.property('heapSnapshot.interval', 1800)
678687
expect(config).to.have.nested.property('iast.dbRowsToTaint', 2)
679688
expect(config).to.have.nested.property('iast.deduplicationEnabled', false)
680689
expect(config).to.have.nested.property('iast.enabled', true)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
'use strict'
2+
3+
require('./setup/tap')
4+
5+
const { mkdtempSync, readdirSync } = require('fs')
6+
const { tmpdir } = require('os')
7+
const { join } = require('path')
8+
const { threadId } = require('worker_threads')
9+
const { start } = require('../src/heap_snapshots')
10+
11+
const destination = mkdtempSync(join(tmpdir(), 'dd-trace-heap-snapshot-'))
12+
13+
describe('Heap Snapshots', () => {
14+
it('should take heap snapshots over time', async () => {
15+
// Keep process alive since `start` uses an unref timer.
16+
const interval = setInterval(() => {}, 1000)
17+
18+
await start({
19+
heapSnapshot: {
20+
count: 3,
21+
destination,
22+
interval: 1
23+
}
24+
})
25+
26+
clearInterval(interval)
27+
28+
const pattern = new RegExp(`^Heap-\\d{8}-\\d{6}-${process.pid}-${threadId}\\.heapsnapshot$`)
29+
const files = readdirSync(destination)
30+
31+
expect(files).to.have.length(3)
32+
expect(files[0]).to.match(pattern)
33+
expect(files[1]).to.match(pattern)
34+
})
35+
})

packages/dd-trace/test/proxy.spec.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ describe('TracerProxy', () => {
139139
enabled: false
140140
},
141141
configure: sinon.spy(),
142-
llmobs: {}
142+
llmobs: {},
143+
heapSnapshot: {}
143144
}
144145
Config = sinon.stub().returns(config)
145146

0 commit comments

Comments
 (0)