From 11288652d564a86ac9243c67eaf92a7137668064 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 15 May 2025 16:58:17 +0200 Subject: [PATCH 1/8] Revert "simpler adding --no-async-context-frame (#5717)" This reverts commit fcf95b27a843cee9f61fea5e5613e57e2155b03a. --- packages/dd-trace/test/setup/core.js | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/packages/dd-trace/test/setup/core.js b/packages/dd-trace/test/setup/core.js index ed090d5afd7..37249fb0088 100644 --- a/packages/dd-trace/test/setup/core.js +++ b/packages/dd-trace/test/setup/core.js @@ -59,23 +59,29 @@ if (NODE_MAJOR >= 24 && !process.env.OPTIONS_OVERRIDE) { const childProcess = require('child_process') const { exec, fork } = childProcess - function addAsyncContextFrame (fn, thisArg, args) { + childProcess.exec = function (...args) { const opts = args[1] if (opts) { - const env = opts.env ||= {} - env.NODE_OPTIONS ||= '' - if (!env.NODE_OPTIONS.includes('--no-async-context-frame')) { - env.NODE_OPTIONS += ' --no-async-context-frame' + if (opts?.env?.NODE_OPTIONS && !opts.env.NODE_OPTIONS.includes('--no-async-context-frame')) { + opts.env.NODE_OPTIONS += ' --no-async-context-frame' + } else { + opts.env ||= {} + opts.env.NODE_OPTIONS = '--no-async-context-frame' } } - return fn.apply(thisArg, args) + return exec.apply(this, args) } - childProcess.exec = function () { - return addAsyncContextFrame(exec, this, arguments) - } - - childProcess.fork = function () { - return addAsyncContextFrame(fork, this, arguments) + childProcess.fork = function (...args) { + const opts = args[1] + if (opts) { + if (opts?.env?.NODE_OPTIONS && !opts.env.NODE_OPTIONS.includes('--no-async-context-frame')) { + opts.env.NODE_OPTIONS += ' --no-async-context-frame' + } else { + opts.env ||= {} + opts.env.NODE_OPTIONS = '--no-async-context-frame' + } + } + return fork.apply(this, args) } } From 5b1c69622f7186629c7c07a8dab4cc6ea078d4eb Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 15 May 2025 16:58:35 +0200 Subject: [PATCH 2/8] Revert "fix child_process test on node 24 (#5722)" This reverts commit 87fcd71d12f24e6d1d7d640ac79ddfa935298561. --- .github/workflows/apm-integrations.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/apm-integrations.yml b/.github/workflows/apm-integrations.yml index 82d26ff8576..c6b600d03cb 100644 --- a/.github/workflows/apm-integrations.yml +++ b/.github/workflows/apm-integrations.yml @@ -230,8 +230,6 @@ jobs: - run: yarn test:plugins:ci - uses: ./.github/actions/node/latest - run: yarn test:plugins:ci - env: - OPTIONS_OVERRIDE: 1 - uses: codecov/codecov-action@18283e04ce6e62d37312384ff67231eb8fd56d24 # v5.4.3 confluentinc-kafka-javascript: From 77eb88a45c719bf25d91c7f2295f946b1929cc76 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 15 May 2025 16:58:44 +0200 Subject: [PATCH 3/8] Revert "use no async context frame option for now (#5706)" This reverts commit f4ee8443bfabbb8a39e3165c5f2da64bf3176f01. --- .github/workflows/debugger.yml | 2 -- packages/dd-trace/test/setup/core.js | 33 ---------------------------- 2 files changed, 35 deletions(-) diff --git a/.github/workflows/debugger.yml b/.github/workflows/debugger.yml index 88b288a7970..6d84ad62eb6 100644 --- a/.github/workflows/debugger.yml +++ b/.github/workflows/debugger.yml @@ -26,8 +26,6 @@ jobs: - uses: ./.github/actions/install - run: yarn test:debugger:ci - run: yarn test:integration:debugger - env: - OPTIONS_OVERRIDE: 1 - if: always() uses: ./.github/actions/testagent/logs with: diff --git a/packages/dd-trace/test/setup/core.js b/packages/dd-trace/test/setup/core.js index 37249fb0088..c15a282b37e 100644 --- a/packages/dd-trace/test/setup/core.js +++ b/packages/dd-trace/test/setup/core.js @@ -5,7 +5,6 @@ const chai = require('chai') const sinonChai = require('sinon-chai') const { setTimeout } = require('timers/promises') const proxyquire = require('../proxyquire') -const { NODE_MAJOR } = require('../../../../version') { // get-port can often return a port that is already in use, thanks to a race @@ -53,35 +52,3 @@ if (/^v\d+\.x$/.test(process.env.GITHUB_BASE_REF || '')) { process.env.DD_INJECTION_ENABLED = 'true' process.env.DD_INJECT_FORCE = 'true' } - -// TODO(bengl): remove this block once we can properly support Node.js 24 without it -if (NODE_MAJOR >= 24 && !process.env.OPTIONS_OVERRIDE) { - const childProcess = require('child_process') - const { exec, fork } = childProcess - - childProcess.exec = function (...args) { - const opts = args[1] - if (opts) { - if (opts?.env?.NODE_OPTIONS && !opts.env.NODE_OPTIONS.includes('--no-async-context-frame')) { - opts.env.NODE_OPTIONS += ' --no-async-context-frame' - } else { - opts.env ||= {} - opts.env.NODE_OPTIONS = '--no-async-context-frame' - } - } - return exec.apply(this, args) - } - - childProcess.fork = function (...args) { - const opts = args[1] - if (opts) { - if (opts?.env?.NODE_OPTIONS && !opts.env.NODE_OPTIONS.includes('--no-async-context-frame')) { - opts.env.NODE_OPTIONS += ' --no-async-context-frame' - } else { - opts.env ||= {} - opts.env.NODE_OPTIONS = '--no-async-context-frame' - } - } - return fork.apply(this, args) - } -} From efefba5d02b6ffc0707b9dbdb7a554d249515150 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Fri, 16 May 2025 12:13:47 +0200 Subject: [PATCH 4/8] DEV: pin node version to 24 --- package.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package.json b/package.json index b16b1cbec9b..870c10abaa7 100644 --- a/package.json +++ b/package.json @@ -156,5 +156,8 @@ "tiktoken": "^1.0.21", "yaml": "^2.8.0", "yarn-deduplicate": "^6.0.2" + }, + "volta": { + "node": "24.2.0" } } From f236bca78016a9a9ecc139bd4b5e1473d78d495f Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 5 May 2025 15:08:15 +0200 Subject: [PATCH 5/8] Allow the CPU profiler to store sample context in the async context frame --- packages/dd-trace/src/profiling/config.js | 25 ++++ .../dd-trace/src/profiling/profilers/wall.js | 134 +++++++++++++----- .../src/supported-configurations.json | 1 + .../test/profiling/profilers/wall.spec.js | 9 +- 4 files changed, 129 insertions(+), 40 deletions(-) diff --git a/packages/dd-trace/src/profiling/config.js b/packages/dd-trace/src/profiling/config.js index 9cddf3769c7..0271a19e95a 100644 --- a/packages/dd-trace/src/profiling/config.js +++ b/packages/dd-trace/src/profiling/config.js @@ -16,6 +16,7 @@ const { tagger } = require('./tagger') const { isFalse, isTrue } = require('../util') const { getAzureTagsFromMetadata, getAzureAppMetadata } = require('../azure_metadata') const { getEnvironmentVariables } = require('../config-helper') +const satisfies = require('semifies') class Config { constructor (options = {}) { @@ -41,6 +42,7 @@ class Config { DD_PROFILING_TIMELINE_ENABLED, DD_PROFILING_UPLOAD_PERIOD, DD_PROFILING_UPLOAD_TIMEOUT, + DD_PROFILING_USE_ASYNC_CONTEXT_FRAME, DD_PROFILING_V8_PROFILER_BUG_WORKAROUND, DD_PROFILING_WALLTIME_ENABLED, DD_SERVICE, @@ -207,6 +209,29 @@ class Config { this.uploadCompression = { method: uploadCompression, level } + const that = this + function turnOffAsyncContextFrame (msg) { + that.logger.warn( + `DD_PROFILING_USE_ASYNC_CONTEXT_FRAME was set ${msg}, it will have no effect.`) + that.useAsyncContextFrame = false + } + + this.useAsyncContextFrame = isTrue(coalesce(options.useAsyncContextFrame, + DD_PROFILING_USE_ASYNC_CONTEXT_FRAME, false)) + if (this.useAsyncContextFrame) { + if (satisfies(process.versions.node, '>=24.0.0')) { + if (process.execArgv.includes('--no-async-context-frame')) { + turnOffAsyncContextFrame('with --no-async-context-frame') + } + } else if (satisfies(process.versions.node, '>=23.0.0')) { + if (!process.execArgv.includes('--experimental-async-context-frame')) { + turnOffAsyncContextFrame('without --experimental-async-context-frame') + } + } else { + turnOffAsyncContextFrame('but it requires at least Node 23') + } + } + this.profilers = ensureProfilers(profilers, this) } } diff --git a/packages/dd-trace/src/profiling/profilers/wall.js b/packages/dd-trace/src/profiling/profilers/wall.js index c39550fc654..7a639c469fb 100644 --- a/packages/dd-trace/src/profiling/profilers/wall.js +++ b/packages/dd-trace/src/profiling/profilers/wall.js @@ -16,7 +16,7 @@ const { const { isWebServerSpan, endpointNameFromTags, getStartedSpans } = require('../webspan-utils') -const beforeCh = dc.channel('dd-trace:storage:before') +let beforeCh const enterCh = dc.channel('dd-trace:storage:enter') const spanFinishCh = dc.channel('dd-trace:span:finish') const profilerTelemetryMetrics = telemetryMetrics.manager.namespace('profilers') @@ -30,14 +30,39 @@ function getActiveSpan () { return store && store.span } +function updateContext (context) { + // Converting spanIDs to strings is not necessary as generateLabels will do it + // too. When we don't use async context frame, we can convert them when the + // sample is taken though so we amortize the latency of operations. It is an + // optimization. + if (typeof context.spanId === 'object') { + context.spanId = context.spanId.toString(10) + } + if (typeof context.rootSpanId === 'object') { + context.rootSpanId = context.rootSpanId.toString(10) + } + if (context.webTags !== undefined && context.endpoint === undefined) { + // endpoint may not be determined yet, but keep it as fallback + // if tags are not available anymore during serialization + context.endpoint = endpointNameFromTags(context.webTags) + } +} + let channelsActivated = false -function ensureChannelsActivated () { +function ensureChannelsActivated (useAsyncContextFrame) { if (channelsActivated) return - const { AsyncLocalStorage, createHook } = require('async_hooks') const shimmer = require('../../../../datadog-shimmer') - createHook({ before: () => beforeCh.publish() }).enable() + // When using the async context frame to store sample context (available with + // Node 24), we do not need to use the async hooks anymore. + if (!useAsyncContextFrame) { + const { createHook } = require('async_hooks') + beforeCh = dc.channel('dd-trace:storage:before') + createHook({ before: () => beforeCh.publish() }).enable() + } + + const { AsyncLocalStorage } = require('async_hooks') let inRun = false shimmer.wrap(AsyncLocalStorage.prototype, 'enterWith', function (original) { @@ -48,22 +73,27 @@ function ensureChannelsActivated () { } }) - shimmer.wrap(AsyncLocalStorage.prototype, 'run', function (original) { - return function (store, callback, ...args) { - const wrappedCb = shimmer.wrapFunction(callback, cb => function (...args) { - inRun = false - enterCh.publish() - const retVal = cb.apply(this, args) + // The AsyncContextFrame-based implementation of AsyncLocalStorage.run() + // delegates to AsyncLocalStorage.enterWith() so it doesn't need to be + // separately instrumented. + if (!useAsyncContextFrame) { + shimmer.wrap(AsyncLocalStorage.prototype, 'run', function (original) { + return function (store, callback, ...args) { + const wrappedCb = shimmer.wrapFunction(callback, cb => function (...args) { + inRun = false + enterCh.publish() + const retVal = cb.apply(this, args) + inRun = true + return retVal + }) inRun = true + const retVal = original.call(this, store, wrappedCb, ...args) + enterCh.publish() + inRun = false return retVal - }) - inRun = true - const retVal = original.call(this, store, wrappedCb, ...args) - enterCh.publish() - inRun = false - return retVal - } - }) + } + }) + } channelsActivated = true } @@ -77,6 +107,8 @@ class NativeWallProfiler { this._endpointCollectionEnabled = !!options.endpointCollectionEnabled this._timelineEnabled = !!options.timelineEnabled this._cpuProfilingEnabled = !!options.cpuProfilingEnabled + this._useAsyncContextFrame = !!options.useAsyncContextFrame + // We need to capture span data into the sample context for either code hotspots // or endpoint collection. this._captureSpanData = this._codeHotspotsEnabled || this._endpointCollectionEnabled @@ -130,19 +162,24 @@ class NativeWallProfiler { withContexts: this._withContexts, lineNumbers: false, workaroundV8Bug: this._v8ProfilerBugWorkaroundEnabled, - collectCpuTime: this._cpuProfilingEnabled + collectCpuTime: this._cpuProfilingEnabled, + useCPED: this._useAsyncContextFrame }) if (this._withContexts) { - this._setNewContext() + if (!this._useAsyncContextFrame) { + this._setNewContext() + } if (this._captureSpanData) { this._profilerState = this._pprof.time.getState() this._lastSampleCount = 0 - ensureChannelsActivated() + ensureChannelsActivated(this._useAsyncContextFrame) - beforeCh.subscribe(this._enter) + if (!this._useAsyncContextFrame) { + beforeCh.subscribe(this._enter) + } enterCh.subscribe(this._enter) spanFinishCh.subscribe(this._spanFinished) } @@ -154,17 +191,37 @@ class NativeWallProfiler { _enter () { if (!this._started) return - const sampleCount = this._profilerState[kSampleCount] - if (sampleCount !== this._lastSampleCount) { - this._lastSampleCount = sampleCount - const context = this._currentContext.ref - this._setNewContext() + const span = getActiveSpan() + const sampleContext = span ? this._getProfilingContext(span) : {} + + // Note that we store the sample context differently with and without the + // async context frame. With the async context frame, we tell the profiler + // to store the sample context directly in the frame on each enterWith. + // Without the async context frame, we store one holder object as the + // profiler's single sample context, and reassign its "ref" property on + // every async context change. Then when we detect that the profiler took a + // sample (and thus bound the holder as that sample's context), we create a + // new holder object so that we no longer mutate the old one. This is really + // an optimization to avoid going to profiler's native SetContext every + // time. With async context frame however, we can't have that optimization, + // as we can't tell from which async context frame was the sampling context + // taken. For the same reason we can't call updateContext() on the old + // context -- we simply can't tell which one it might've been across all + // possible async context frames. + if (this._useAsyncContextFrame) { + this._pprof.time.setContext(sampleContext) + } else { + const sampleCount = this._profilerState[kSampleCount] + if (sampleCount !== this._lastSampleCount) { + this._lastSampleCount = sampleCount + const context = this._currentContext.ref + this._setNewContext() - this._updateContext(context) - } + updateContext(context) + } - const span = getActiveSpan() - this._currentContext.ref = span ? this._getProfilingContext(span) : {} + this._currentContext.ref = sampleContext + } } _getProfilingContext (span) { @@ -245,7 +302,7 @@ class NativeWallProfiler { _stop (restart) { if (!this._started) return - if (this._captureSpanData) { + if (this._captureSpanData && !this._useAsyncContextFrame) { // update last sample context if needed this._enter() this._lastSampleCount = 0 @@ -259,7 +316,9 @@ class NativeWallProfiler { } } else { if (this._captureSpanData) { - beforeCh.unsubscribe(this._enter) + if (!this._useAsyncContextFrame) { + beforeCh.unsubscribe(this._enter) + } enterCh.unsubscribe(this._enter) spanFinishCh.unsubscribe(this._spanFinished) this._profilerState = undefined @@ -296,8 +355,9 @@ class NativeWallProfiler { } // Native profiler doesn't set context.context for some samples, such as idle samples or when - // the context was otherwise unavailable when the sample was taken. - const ref = context.context?.ref + // the context was otherwise unavailable when the sample was taken. Note that with async context + // frame, we don't use the "ref" indirection. + const ref = this._useAsyncContextFrame ? context.context : context.context?.ref if (typeof ref !== 'object') { return labels } @@ -305,10 +365,10 @@ class NativeWallProfiler { const { spanId, rootSpanId, webTags, endpoint } = ref if (spanId !== undefined) { - labels[SPAN_ID_LABEL] = spanId + labels[SPAN_ID_LABEL] = typeof spanId === 'object' ? spanId.toString(10) : spanId } if (rootSpanId !== undefined) { - labels[LOCAL_ROOT_SPAN_ID_LABEL] = rootSpanId + labels[LOCAL_ROOT_SPAN_ID_LABEL] = typeof rootSpanId === 'object' ? rootSpanId.toString(10) : rootSpanId } if (webTags !== undefined && Object.keys(webTags).length !== 0) { labels['trace endpoint'] = endpointNameFromTags(webTags) diff --git a/packages/dd-trace/src/supported-configurations.json b/packages/dd-trace/src/supported-configurations.json index fa37dd54e10..6ac54892752 100644 --- a/packages/dd-trace/src/supported-configurations.json +++ b/packages/dd-trace/src/supported-configurations.json @@ -126,6 +126,7 @@ "DD_PROFILING_PROFILERS": ["A"], "DD_PROFILING_SOURCE_MAP": ["A"], "DD_PROFILING_UPLOAD_PERIOD": ["A"], + "DD_PROFILING_USE_ASYNC_CONTEXT_FRAME": ["A"], "DD_PROFILING_V8_PROFILER_BUG_WORKAROUND": ["A"], "DD_PROFILING_WALLTIME_ENABLED": ["A"], "DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS": ["A"], diff --git a/packages/dd-trace/test/profiling/profilers/wall.spec.js b/packages/dd-trace/test/profiling/profilers/wall.spec.js index dd8183c656f..671d4b92867 100644 --- a/packages/dd-trace/test/profiling/profilers/wall.spec.js +++ b/packages/dd-trace/test/profiling/profilers/wall.spec.js @@ -61,7 +61,8 @@ describe('profilers/native/wall', () => { withContexts: false, lineNumbers: false, workaroundV8Bug: false, - collectCpuTime: false + collectCpuTime: false, + useCPED: false }) }) @@ -80,7 +81,8 @@ describe('profilers/native/wall', () => { withContexts: false, lineNumbers: false, workaroundV8Bug: false, - collectCpuTime: false + collectCpuTime: false, + useCPED: false }) }) @@ -176,7 +178,8 @@ describe('profilers/native/wall', () => { withContexts: false, lineNumbers: false, workaroundV8Bug: false, - collectCpuTime: false + collectCpuTime: false, + useCPED: false }) }) From 1be0573912cb199e441b93f0de807050ce2ca443 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Tue, 6 May 2025 15:05:33 +0200 Subject: [PATCH 6/8] Test the async context frame with Node 23 and later --- integration-tests/profiler/profiler.spec.js | 25 ++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/integration-tests/profiler/profiler.spec.js b/integration-tests/profiler/profiler.spec.js index c7a52eecdfd..f2907c247cf 100644 --- a/integration-tests/profiler/profiler.spec.js +++ b/integration-tests/profiler/profiler.spec.js @@ -13,6 +13,7 @@ const fsync = require('fs') const net = require('net') const zlib = require('zlib') const { Profile } = require('pprof-format') +const satisfies = require('semifies') const DEFAULT_PROFILE_TYPES = ['wall', 'space'] if (process.platform !== 'win32') { @@ -332,15 +333,23 @@ describe('profiler', () => { // with recomputed busyCycleTimeNs, but let's give ourselves more leeway. this.retries(9) const procStart = BigInt(Date.now() * 1000000) - const proc = fork(path.join(cwd, 'profiler/codehotspots.js'), { - cwd, - env: { - DD_PROFILING_EXPORTERS: 'file', - DD_PROFILING_ENABLED: 1, - BUSY_CYCLE_TIME: (busyCycleTimeNs | 0).toString(), - DD_TRACE_AGENT_PORT: agent.port + const env = { + DD_PROFILING_EXPORTERS: 'file', + DD_PROFILING_ENABLED: 1, + BUSY_CYCLE_TIME: (busyCycleTimeNs | 0).toString(), + DD_TRACE_AGENT_PORT: agent.port + } + // With Node 23 or later, test the profiler with async context frame use. + const execArgv = [] + if (satisfies(process.versions.node, '>=23.0.0')) { + env.DD_PROFILING_USE_ASYNC_CONTEXT_FRAME = 1 + if (!satisfies(process.versions.node, '>=24.0.0')) { + // For Node 23, use the experimental command line flag for Node to enable + // async context frame. Node 24 has it enabled by default. + execArgv.push('--experimental-async-context-frame') } - }) + } + const proc = fork(path.join(cwd, 'profiler/codehotspots.js'), { cwd, env, execArgv }) await processExitPromise(proc, timeout) const procEnd = BigInt(Date.now() * 1000000) From 680248b1d0841fad73b408e870a49d01e2c5003c Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 30 Jun 2025 17:16:18 +0200 Subject: [PATCH 7/8] Add telemetry for number of live CPED native sample context objects --- integration-tests/profiler/profiler.spec.js | 45 +++++++++++++++---- packages/dd-trace/src/profiler.js | 5 ++- packages/dd-trace/src/profiling/config.js | 2 + .../dd-trace/src/profiling/profilers/wall.js | 16 ++++++- 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/integration-tests/profiler/profiler.spec.js b/integration-tests/profiler/profiler.spec.js index f2907c247cf..fac6ceb9d58 100644 --- a/integration-tests/profiler/profiler.spec.js +++ b/integration-tests/profiler/profiler.spec.js @@ -650,7 +650,7 @@ describe('profiler', () => { }) }) - context('Profiler API telemetry', () => { + context('Profiler telemetry', () => { beforeEach(async () => { agent = await new FakeAgent().start() }) @@ -678,21 +678,20 @@ describe('profiler', () => { const pp = payload.payload assert.equal(pp.namespace, 'profilers') const series = pp.series - assert.lengthOf(series, 2) - assert.equal(series[0].metric, 'profile_api.requests') - assert.equal(series[0].type, 'count') + const requests = series.find(s => s.metric === 'profile_api.requests') + assert.equal(requests.type, 'count') // There's a race between metrics and on-shutdown profile, so metric // value will be between 1 and 3 - requestCount = series[0].points[0][1] + requestCount = requests.points[0][1] assert.isAtLeast(requestCount, 1) assert.isAtMost(requestCount, 3) - assert.equal(series[1].metric, 'profile_api.responses') - assert.equal(series[1].type, 'count') - assert.include(series[1].tags, 'status_code:200') + const responses = series.find(s => s.metric === 'profile_api.responses') + assert.equal(responses.type, 'count') + assert.include(responses.tags, 'status_code:200') // Same number of requests and responses - assert.equal(series[1].points[0][1], requestCount) + assert.equal(responses.points[0][1], requestCount) }, 'generate-metrics', timeout) const checkDistributions = agent.assertTelemetryReceived(({ _, payload }) => { @@ -713,6 +712,34 @@ describe('profiler', () => { // Same number of requests and points assert.equal(requestCount, pointsCount) }) + + it('sends wall profiler sample context telemetry', async function () { + if (satisfies(process.versions.node, '<24.0.0')) { + this.skip() // Wall profiler context count telemetry is not supported in Node < 24 + } + proc = fork(profilerTestFile, { + cwd, + env: { + DD_TRACE_AGENT_PORT: agent.port, + DD_PROFILING_ENABLED: 1, + DD_PROFILING_UPLOAD_PERIOD: 1, + DD_PROFILING_USE_ASYNC_CONTEXT_FRAME: 1, + DD_TELEMETRY_HEARTBEAT_INTERVAL: 1, // every second + TEST_DURATION_MS: 1500 + } + }) + + const checkMetrics = agent.assertTelemetryReceived(({ _, payload }) => { + const pp = payload.payload + assert.equal(pp.namespace, 'profilers') + const sampleContexts = pp.series.find(s => s.metric === 'wall.sample_contexts') + assert.isDefined(sampleContexts) + assert.equal(sampleContexts.type, 'gauge') + assert.isAtLeast(sampleContexts.points[0][1], 1) + }, 'generate-metrics', timeout) + + await Promise.all([checkProfiles(agent, proc, timeout), checkMetrics]) + }) }) function forkSsi (args) { diff --git a/packages/dd-trace/src/profiler.js b/packages/dd-trace/src/profiler.js index 3752af18d30..697ad2a68c1 100644 --- a/packages/dd-trace/src/profiler.js +++ b/packages/dd-trace/src/profiler.js @@ -10,6 +10,8 @@ module.exports = { start: config => { const { service, version, env, url, hostname, port, tags, repositoryUrl, commitSHA, injectionEnabled } = config const { enabled, sourceMap, exporters } = config.profiling + const { heartbeatInterval } = config.telemetry + const logger = { debug: (message) => log.debug(message), info: (message) => log.info(message), @@ -39,7 +41,8 @@ module.exports = { repositoryUrl, commitSHA, libraryInjected, - activation + activation, + heartbeatInterval }) }, diff --git a/packages/dd-trace/src/profiling/config.js b/packages/dd-trace/src/profiling/config.js index 0271a19e95a..f6a83f824ee 100644 --- a/packages/dd-trace/src/profiling/config.js +++ b/packages/dd-trace/src/profiling/config.js @@ -232,6 +232,8 @@ class Config { } } + this.heartbeatInterval = options.heartbeatInterval || 60 * 1000 // 1 minute + this.profilers = ensureProfilers(profilers, this) } } diff --git a/packages/dd-trace/src/profiling/profilers/wall.js b/packages/dd-trace/src/profiling/profilers/wall.js index 7a639c469fb..9058e79cec8 100644 --- a/packages/dd-trace/src/profiling/profilers/wall.js +++ b/packages/dd-trace/src/profiling/profilers/wall.js @@ -108,6 +108,7 @@ class NativeWallProfiler { this._timelineEnabled = !!options.timelineEnabled this._cpuProfilingEnabled = !!options.cpuProfilingEnabled this._useAsyncContextFrame = !!options.useAsyncContextFrame + this._telemetryHeartbeatIntervalMillis = options.heartbeatInterval || 60 * 1e3 // 60 seconds // We need to capture span data into the sample context for either code hotspots // or endpoint collection. @@ -177,7 +178,9 @@ class NativeWallProfiler { ensureChannelsActivated(this._useAsyncContextFrame) - if (!this._useAsyncContextFrame) { + if (this._useAsyncContextFrame) { + this._setupTelemetryMetrics() + } else { beforeCh.subscribe(this._enter) } enterCh.subscribe(this._enter) @@ -188,6 +191,16 @@ class NativeWallProfiler { this._started = true } + _setupTelemetryMetrics () { + const contextCountGauge = profilerTelemetryMetrics.gauge('wall.sample_contexts') + + const that = this + this._contextCountGaugeUpdater = setInterval(() => { + contextCountGauge.mark(that._profilerState[kCPEDContextCount]) + }, that._telemetryHeartbeatIntervalMillis) + this._contextCountGaugeUpdater.unref() + } + _enter () { if (!this._started) return @@ -315,6 +328,7 @@ class NativeWallProfiler { this._reportV8bug(v8BugDetected === 1) } } else { + clearInterval(this._contextCountGaugeUpdater) if (this._captureSpanData) { if (!this._useAsyncContextFrame) { beforeCh.unsubscribe(this._enter) From 23f6b47e6cd09c6934737b5708d0f043fdefa4bb Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Fri, 16 May 2025 12:14:12 +0200 Subject: [PATCH 8/8] DEBUG --- integration-tests/profiler/codehotspots.js | 19 +++++++++-- integration-tests/profiler/profiler.spec.js | 1 + .../dd-trace/src/profiling/profilers/wall.js | 34 ++++++++++++++++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/integration-tests/profiler/codehotspots.js b/integration-tests/profiler/codehotspots.js index 8926c896ac1..531a6eeafea 100644 --- a/integration-tests/profiler/codehotspots.js +++ b/integration-tests/profiler/codehotspots.js @@ -2,6 +2,7 @@ const DDTrace = require('dd-trace') const tracer = DDTrace.init() +const NativeWallProfiler = require('dd-trace/packages/dd-trace/src/profiling/profilers/wall') // Busy cycle duration is communicated in nanoseconds through the environment // variable by the test. On first execution, it'll be 10 * the sampling period @@ -29,12 +30,18 @@ function busyLoop () { let counter = 0 function runBusySpans () { - tracer.trace('x' + counter, { type: 'web', resource: `endpoint-${counter}` }, (_, done) => { + const id1 = `x-${counter}` + tracer.trace(id1, { type: 'web', resource: `endpoint-${counter}` }, (_, done) => { + logData(id1) setImmediate(() => { + logData(`${id1} timeout`) for (let i = 0; i < 3; ++i) { const z = i - tracer.trace('y' + i, (_, done2) => { + const id2 = `y-${counter}-${i}` + tracer.trace(id2, (_, done2) => { + logData(id2) const busyWork = () => { + logData(`${id2}-timeout`) busyLoop() done2() if (z === 2) { @@ -60,4 +67,12 @@ function runBusySpans () { }) } +function logData (codeContext) { + const active = NativeWallProfiler.prototype.getActiveSpan() + const sampleContext = NativeWallProfiler.prototype.getSampleContext() + const indicator = (active.spanId === sampleContext.spanId) ? '✅' : '❌' + const CPEDContextCount = NativeWallProfiler.prototype.getCPEDContextCount() + console.log(indicator, codeContext, 'activeSpan:', active.spanId, ', sampleContext:', sampleContext.spanId, ', CPEDContextCount:', CPEDContextCount) +} + tracer.profilerStarted().then(runBusySpans) diff --git a/integration-tests/profiler/profiler.spec.js b/integration-tests/profiler/profiler.spec.js index fac6ceb9d58..8b8fd211115 100644 --- a/integration-tests/profiler/profiler.spec.js +++ b/integration-tests/profiler/profiler.spec.js @@ -349,6 +349,7 @@ describe('profiler', () => { execArgv.push('--experimental-async-context-frame') } } + console.log({path: path.join(cwd, 'profiler/codehotspots.js'), env, execArgv }) const proc = fork(path.join(cwd, 'profiler/codehotspots.js'), { cwd, env, execArgv }) await processExitPromise(proc, timeout) diff --git a/packages/dd-trace/src/profiling/profilers/wall.js b/packages/dd-trace/src/profiling/profilers/wall.js index 9058e79cec8..72b95967ec3 100644 --- a/packages/dd-trace/src/profiling/profilers/wall.js +++ b/packages/dd-trace/src/profiling/profilers/wall.js @@ -17,13 +17,14 @@ const { const { isWebServerSpan, endpointNameFromTags, getStartedSpans } = require('../webspan-utils') let beforeCh +let lastInstance const enterCh = dc.channel('dd-trace:storage:enter') const spanFinishCh = dc.channel('dd-trace:span:finish') const profilerTelemetryMetrics = telemetryMetrics.manager.namespace('profilers') const ProfilingContext = Symbol('NativeWallProfiler.ProfilingContext') -let kSampleCount +let kSampleCount, kCPEDContextCount function getActiveSpan () { const store = storage('legacy').getStore() @@ -131,6 +132,7 @@ class NativeWallProfiler { this._logger = options.logger this._started = false + lastInstance = this } codeHotspotsEnabled () { @@ -147,6 +149,7 @@ class NativeWallProfiler { this._mapper = mapper this._pprof = require('@datadog/pprof') kSampleCount = this._pprof.time.constants.kSampleCount + kCPEDContextCount = this._pprof.time.constants.kCPEDContextCount // pprof otherwise crashes in worker threads if (!process._startProfilerIdleNotifier) { @@ -274,6 +277,15 @@ class NativeWallProfiler { return profilingContext } + _getSampleContext () { + const context = this._pprof.time.getContext() + return this._useAsyncContextFrame ? context : context.ref + } + + _getCPEDContextCount () { + return this._profilerState[kCPEDContextCount] + } + _setNewContext () { this._pprof.time.setContext( this._currentContext = { @@ -411,4 +423,24 @@ class NativeWallProfiler { } } +NativeWallProfiler.prototype.getActiveSpan = function () { + const span = getActiveSpan() + if (span === undefined) { + return {} + } + const spanData = lastInstance._getProfilingContext(span) + updateContext(spanData) + return { spanId: spanData.spanId, rootSpanId: spanData.rootSpanId } +} +NativeWallProfiler.prototype.getSampleContext = function () { + const ctx = lastInstance._getSampleContext() + if (ctx === undefined) { + return {} + } + updateContext(ctx) + return { spanId: ctx.spanId, rootSpanId: ctx.rootSpanId } +} +NativeWallProfiler.prototype.getCPEDContextCount = function () { + return lastInstance._getCPEDContextCount() +} module.exports = NativeWallProfiler