From d0537a9b575be97f1e40305d8efb7e5c4d07bd67 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Fri, 2 May 2025 14:31:52 +0200 Subject: [PATCH 01/11] format --- bindings/profilers/wall.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 938aa9e6..f7385576 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -14,6 +14,9 @@ * limitations under the License. */ +#include +#include +#include #include #include #include @@ -22,10 +25,6 @@ #include #include -#include -#include -#include - #include "per-isolate-data.hh" #include "translate-time-profile.hh" #include "wall.hh" From 916d921212d265d42db3fae47d93eda1bb034994 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Tue, 20 May 2025 13:46:37 +0200 Subject: [PATCH 02/11] Encapsulate the pair of pointers with atomic switch into a class --- bindings/profilers/wall.cc | 24 ++++----------------- bindings/profilers/wall.hh | 43 +++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index f7385576..ddad89d9 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -560,7 +560,6 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, contexts_.reserve(duration * 2 / samplingPeriod); } - curContext_.store(&context1_, std::memory_order_relaxed); collectionMode_.store(CollectionMode::kNoCollect, std::memory_order_relaxed); gcCount.store(0, std::memory_order_relaxed); @@ -1006,28 +1005,13 @@ v8::CpuProfiler* WallProfiler::CreateV8CpuProfiler() { } v8::Local WallProfiler::GetContext(Isolate* isolate) { - auto context = *curContext_.load(std::memory_order_relaxed); + auto context = curContext_.Get(); if (!context) return v8::Undefined(isolate); return context->Get(isolate); } void WallProfiler::SetContext(Isolate* isolate, Local value) { - // Need to be careful here, because we might be interrupted by a - // signal handler that will make use of curContext_. - // Update of shared_ptr is not atomic, so instead we use a pointer - // (curContext_) that points on two shared_ptr (context1_ and context2_), - // update the shared_ptr that is not currently in use and then atomically - // update curContext_. - auto newCurContext = curContext_.load(std::memory_order_relaxed) == &context1_ - ? &context2_ - : &context1_; - if (!value->IsNullOrUndefined()) { - *newCurContext = std::make_shared>(isolate, value); - } else { - newCurContext->reset(); - } - std::atomic_signal_fence(std::memory_order_release); - curContext_.store(newCurContext, std::memory_order_relaxed); + curContext_.Set(isolate, value); } NAN_GETTER(WallProfiler::GetContext) { @@ -1102,10 +1086,10 @@ void WallProfiler::PushContext(int64_t time_from, // Be careful this is called in a signal handler context therefore all // operations must be async signal safe (in particular no allocations). // Our ring buffer avoids allocations. - auto context = curContext_.load(std::memory_order_relaxed); + auto context = curContext_.Get(); std::atomic_signal_fence(std::memory_order_acquire); if (contexts_.size() < contexts_.capacity()) { - contexts_.push_back({*context, time_from, time_to, cpu_time, async_id}); + contexts_.push_back({context, time_from, time_to, cpu_time, async_id}); std::atomic_fetch_add_explicit( reinterpret_cast*>(&fields_[kSampleCount]), 1U, diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index caea8b5c..38dc7e5a 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -36,6 +36,41 @@ struct Result { std::string msg; }; +using ContextPtr = std::shared_ptr>; + +/** + * Class that allows atomic updates to a ContextPtr. Since update of shared_ptr + * is not atomic, we use a pointer that alternates between pointing to one of + * two shared_ptrs instead, and we use atomic operations to update the pointer. + */ +class AtomicContextPtr { + ContextPtr ptr1; + ContextPtr ptr2; + std::atomic currentPtr = &ptr1; + + void Set(v8::Isolate* isolate, v8::Local value) { + auto oldPtr = currentPtr.load(std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_acquire); + auto newPtr = oldPtr == &ptr1 ? &ptr2 : &ptr1; + if (!value->IsNullOrUndefined()) { + *newPtr = std::make_shared>(isolate, value); + } else { + newPtr->reset(); + } + std::atomic_signal_fence(std::memory_order_release); + currentPtr.store(newPtr, std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_release); + } + + ContextPtr Get() { + auto ptr = currentPtr.load(std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_acquire); + return ptr ? *ptr : ContextPtr(); + } + + friend class WallProfiler; +}; + class WallProfiler : public Nan::ObjectWrap { public: enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts }; @@ -43,19 +78,13 @@ class WallProfiler : public Nan::ObjectWrap { private: enum Fields { kSampleCount, kFieldCount }; - using ContextPtr = std::shared_ptr>; - std::chrono::microseconds samplingPeriod_{0}; v8::CpuProfiler* cpuProfiler_ = nullptr; // TODO: Investigate use of v8::Persistent instead of shared_ptr to // avoid heap allocation. Need to figure out the right move/copy semantics in // and out of the ring buffer. - // We're using a pair of shared pointers and an atomic pointer-to-current as - // a way to ensure signal safety on update. - ContextPtr context1_; - ContextPtr context2_; - std::atomic curContext_; + AtomicContextPtr curContext_; std::atomic gcCount = 0; double gcAsyncId; From c837e594889a0ca316d2cbe4c53d1497cd683054 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Tue, 20 May 2025 13:48:49 +0200 Subject: [PATCH 03/11] Allow use of CPED to store sampling context --- bindings/profilers/wall.cc | 223 ++++++++++++++++++++++++++++++++++--- bindings/profilers/wall.hh | 30 ++++- ts/src/time-profiler.ts | 6 +- 3 files changed, 236 insertions(+), 23 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index ddad89d9..5f2b2777 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -58,6 +58,8 @@ using namespace v8; namespace dd { +using ContextPtr = std::shared_ptr>; + // Maximum number of rounds in the GetV8ToEpochOffset static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20; @@ -331,8 +333,7 @@ void SignalHandler::HandleProfilerSignal(int sig, auto time_from = Now(); old_handler(sig, info, context); auto time_to = Now(); - auto async_id = prof->GetAsyncId(isolate); - prof->PushContext(time_from, time_to, cpu_time, async_id); + prof->PushContext(time_from, time_to, cpu_time, isolate); } #else class SignalHandler { @@ -543,8 +544,10 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, bool workaroundV8Bug, bool collectCpuTime, bool collectAsyncId, - bool isMainThread) + bool isMainThread, + bool useCPED) : samplingPeriod_(samplingPeriod), + useCPED_(useCPED), includeLines_(includeLines), withContexts_(withContexts), isMainThread_(isMainThread) { @@ -555,6 +558,11 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, workaroundV8Bug_ = workaroundV8Bug && DD_WALL_USE_SIGPROF && detectV8Bug_; collectCpuTime_ = collectCpuTime && withContexts; collectAsyncId_ = collectAsyncId && withContexts; +#if NODE_MAJOR_VERSION >= 23 + useCPED_ = useCPED && withContexts; +#else + useCPED_ = false; +#endif if (withContexts_) { contexts_.reserve(duration * 2 / samplingPeriod); @@ -574,10 +582,18 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, jsArray_ = v8::Global(isolate, jsArray); std::fill(fields_, fields_ + kFieldCount, 0); - if (collectAsyncId_) { + if (collectAsyncId_ || useCPED_) { isolate->AddGCPrologueCallback(&GCPrologueCallback, this); isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this); } + + if (useCPED_) { + cpedSymbol_.Reset( + isolate, + Private::ForApi(isolate, + String::NewFromUtf8Literal( + isolate, "dd::WallProfiler::cpedSymbol_"))); + } } void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { @@ -589,16 +605,56 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { g_profilers.RemoveProfiler(isolate, this); } - if (collectAsyncId_) { + if (collectAsyncId_ || useCPED_) { isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this); isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this); } node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); + + for (auto it = liveContextPtrs_.begin(); it != liveContextPtrs_.end();) { + auto ptr = *it; + ptr->UnregisterFromGC(); + delete ptr; + ++it; + } + liveContextPtrs_.clear(); + deadContextPtrs_.clear(); } } +class PersistentContextPtr : AtomicContextPtr { + std::vector* dead; + Persistent per; + + PersistentContextPtr(std::vector* dead) : dead(dead) {} + + void UnregisterFromGC() { + if (!per.IsEmpty()) { + per.ClearWeak(); + per.Reset(); + } + } + + void MarkDead() { dead->push_back(this); } + + void RegisterForGC(Isolate* isolate, const Local& obj) { + // Register a callback to delete this object when the object is GCed + per.Reset(isolate, obj); + per.SetWeak( + this, + [](const WeakCallbackInfo& data) { + auto ptr = data.GetParameter(); + ptr->MarkDead(); + ptr->UnregisterFromGC(); + }, + WeakCallbackType::kParameter); + } + + friend class WallProfiler; +}; + #define DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(name) \ auto name##Value = \ Nan::Get(arg, Nan::New(#name).ToLocalChecked()); \ @@ -651,6 +707,14 @@ NAN_METHOD(WallProfiler::New) { DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(collectCpuTime); DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(collectAsyncId); DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread); + DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(useCPED); + +#if NODE_MAJOR_VERSION < 23 + if (useCPED) { + return Nan::ThrowTypeError( + "useCPED is not supported on this Node.js version."); + } +#endif if (withContexts && !DD_WALL_USE_SIGPROF) { return Nan::ThrowTypeError("Contexts are not supported."); @@ -690,7 +754,8 @@ NAN_METHOD(WallProfiler::New) { workaroundV8Bug, collectCpuTime, collectAsyncId, - isMainThread); + isMainThread, + useCPED); obj->Wrap(info.This()); info.GetReturnValue().Set(info.This()); } else { @@ -1004,14 +1069,124 @@ v8::CpuProfiler* WallProfiler::CreateV8CpuProfiler() { return cpuProfiler_; } -v8::Local WallProfiler::GetContext(Isolate* isolate) { - auto context = curContext_.Get(); - if (!context) return v8::Undefined(isolate); - return context->Get(isolate); +Local WallProfiler::GetContext(Isolate* isolate) { + auto context = GetContextPtr(isolate); + if (context) { + return context->Get(isolate); + } + return Undefined(isolate); } void WallProfiler::SetContext(Isolate* isolate, Local value) { +#if NODE_MAJOR_VERSION >= 23 + if (!useCPED_) { + curContext_.Set(isolate, value); + return; + } + + // Clean up dead context pointers + for (auto it = deadContextPtrs_.begin(); it != deadContextPtrs_.end();) { + auto ptr = *it; + liveContextPtrs_.erase(ptr); + delete ptr; + ++it; + } + deadContextPtrs_.clear(); + + auto cped = isolate->GetContinuationPreservedEmbedderData(); + // No Node AsyncContextFrame in this continuation yet + if (!cped->IsObject()) return; + + auto v8Ctx = isolate->GetCurrentContext(); + // This should always be called from a V8 context, but check just in case. + if (v8Ctx.IsEmpty()) return; + + auto cpedObj = cped.As(); + auto localSymbol = cpedSymbol_.Get(isolate); + auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); + if (maybeProfData.IsEmpty()) return; + + PersistentContextPtr* contextPtr = nullptr; + auto profData = maybeProfData.ToLocalChecked(); + if (profData->IsUndefined()) { + contextPtr = new PersistentContextPtr(&deadContextPtrs_); + + auto external = External::New(isolate, contextPtr); + setInProgress.store(true, std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_release); + auto maybeSetResult = cpedObj->SetPrivate(v8Ctx, localSymbol, external); + std::atomic_signal_fence(std::memory_order_release); + setInProgress.store(false, std::memory_order_relaxed); + if (maybeSetResult.IsNothing()) { + delete contextPtr; + return; + } + liveContextPtrs_.insert(contextPtr); + contextPtr->RegisterForGC(isolate, cpedObj); + } else { + contextPtr = + static_cast(profData.As()->Value()); + } + + contextPtr->Set(isolate, value); +#else curContext_.Set(isolate, value); +#endif +} + +ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { + auto isSetInProgress = setInProgress.load(std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_acquire); + if (isSetInProgress) { + // New sample context is being set. Safe behavior is to not try attempt + // Object::Get on it and just return empty right now. + return ContextPtr(); + } + + if (useCPED_) { + auto curGcCount = gcCount.load(std::memory_order_relaxed); + std::atomic_signal_fence(std::memory_order_acquire); + if (curGcCount > 0) { + return gcContext; + } + } + + return GetContextPtr(isolate); +} + +ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { +#if NODE_MAJOR_VERSION >= 23 + if (!useCPED_) { + return curContext_.Get(); + } + + if (!isolate->IsInUse()) { + // Must not try to create a handle scope if isolate is not in use. + return ContextPtr(); + } + HandleScope scope(isolate); + + auto cped = isolate->GetContinuationPreservedEmbedderData(); + if (cped->IsObject()) { + auto v8Ctx = isolate->GetEnteredOrMicrotaskContext(); + if (!v8Ctx.IsEmpty()) { + auto cpedObj = cped.As(); + auto localSymbol = cpedSymbol_.Get(isolate); + auto maybeProfData = cpedObj->GetPrivate(v8Ctx, localSymbol); + if (!maybeProfData.IsEmpty()) { + auto profData = maybeProfData.ToLocalChecked(); + if (!profData->IsUndefined()) { + return static_cast( + profData.As()->Value()) + ->Get(); + } + } + } + } + return ContextPtr(); +#else + return curContext_.Get(); +#endif } NAN_GETTER(WallProfiler::GetContext) { @@ -1044,6 +1219,10 @@ NAN_METHOD(WallProfiler::Dispose) { } double GetAsyncIdNoGC(v8::Isolate* isolate) { + if (!isolate->IsInUse()) { + // Must not try to create a handle scope if isolate is not in use. + return -1; + } #if NODE_MAJOR_VERSION >= 24 HandleScope scope(isolate); auto context = isolate->GetEnteredOrMicrotaskContext(); @@ -1069,27 +1248,39 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) { auto curCount = gcCount.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); if (curCount == 0) { - gcAsyncId = GetAsyncIdNoGC(isolate); + if (collectAsyncId_) { + gcAsyncId = GetAsyncIdNoGC(isolate); + } + if (useCPED_) { + gcContext = GetContextPtrSignalSafe(isolate); + } } std::atomic_signal_fence(std::memory_order_release); gcCount.store(curCount + 1, std::memory_order_relaxed); } void WallProfiler::OnGCEnd() { - gcCount.fetch_sub(1, std::memory_order_relaxed); + auto oldCount = gcCount.fetch_sub(1, std::memory_order_relaxed); + if (oldCount == 1 && useCPED_) { + // Not strictly necessary, as we'll reset it to something else on next GC, + // but why retain it longer than needed? + gcContext.reset(); + } } void WallProfiler::PushContext(int64_t time_from, int64_t time_to, int64_t cpu_time, - double async_id) { + Isolate* isolate) { // Be careful this is called in a signal handler context therefore all // operations must be async signal safe (in particular no allocations). // Our ring buffer avoids allocations. - auto context = curContext_.Get(); - std::atomic_signal_fence(std::memory_order_acquire); if (contexts_.size() < contexts_.capacity()) { - contexts_.push_back({context, time_from, time_to, cpu_time, async_id}); + contexts_.push_back({GetContextPtrSignalSafe(isolate), + time_from, + time_to, + cpu_time, + GetAsyncId(isolate)}); std::atomic_fetch_add_explicit( reinterpret_cast*>(&fields_[kSampleCount]), 1U, diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 38dc7e5a..a6136b35 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -71,6 +71,8 @@ class AtomicContextPtr { friend class WallProfiler; }; +class PersistentContextPtr; + class WallProfiler : public Nan::ObjectWrap { public: enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts }; @@ -80,14 +82,24 @@ class WallProfiler : public Nan::ObjectWrap { std::chrono::microseconds samplingPeriod_{0}; v8::CpuProfiler* cpuProfiler_ = nullptr; - // TODO: Investigate use of v8::Persistent instead of shared_ptr to - // avoid heap allocation. Need to figure out the right move/copy semantics in - // and out of the ring buffer. + bool useCPED_ = false; + // If we aren't using the CPED, we use a single context ptr stored here. AtomicContextPtr curContext_; + // Otherwise we'll use a private symbol to store the context in CPED objects. + v8::Global cpedSymbol_; + // We track live context pointers in a set to avoid memory leaks. They will + // be deleted when the profiler is disposed. + std::unordered_set liveContextPtrs_; + // Context pointers belonging to GC'd CPED objects register themselves here. + // They will be deleted and removed from liveContextPtrs_ next time SetContext + // is invoked. + std::vector deadContextPtrs_; std::atomic gcCount = 0; + std::atomic setInProgress = false; double gcAsyncId; + ContextPtr gcContext; std::atomic collectionMode_; std::atomic noCollectCallCount_; @@ -136,6 +148,9 @@ class WallProfiler : public Nan::ObjectWrap { static void CleanupHook(void* data); void Cleanup(v8::Isolate* isolate); + ContextPtr GetContextPtr(v8::Isolate* isolate); + ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate); + public: /** * @param samplingPeriodMicros sampling interval, in microseconds @@ -143,6 +158,10 @@ class WallProfiler : public Nan::ObjectWrap { * parameter is informative; it is up to the caller to call the Stop method * every period. The parameter is used to preallocate data structures that * should not be reallocated in async signal safe code. + * @param useCPED whether to use the V8 ContinuationPreservedEmbedderData to + * store the current sampling context. It can be used if AsyncLocalStorage + * uses the AsyncContextFrame implementation (experimental in Node 23, default + * in Node 24.) */ explicit WallProfiler(std::chrono::microseconds samplingPeriod, std::chrono::microseconds duration, @@ -151,14 +170,15 @@ class WallProfiler : public Nan::ObjectWrap { bool workaroundV8bug, bool collectCpuTime, bool collectAsyncId, - bool isMainThread); + bool isMainThread, + bool useCPED); v8::Local GetContext(v8::Isolate*); void SetContext(v8::Isolate*, v8::Local); void PushContext(int64_t time_from, int64_t time_to, int64_t cpu_time, - double async_id); + v8::Isolate* isolate); Result StartImpl(); v8::ProfilerId StartInternal(); Result StopImpl(bool restart, v8::Local& profile); diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index b9cd299d..6c6893cd 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -67,6 +67,7 @@ export interface TimeProfilerOptions { workaroundV8Bug?: boolean; collectCpuTime?: boolean; collectAsyncId?: boolean; + useCPED?: boolean; } const DEFAULT_OPTIONS: TimeProfilerOptions = { @@ -77,6 +78,7 @@ const DEFAULT_OPTIONS: TimeProfilerOptions = { workaroundV8Bug: true, collectCpuTime: false, collectAsyncId: false, + useCPED: false, }; export async function profile(options: TimeProfilerOptions = {}) { @@ -100,8 +102,8 @@ export function start(options: TimeProfilerOptions = {}) { gProfiler.start(); - // If contexts are enabled, set an initial empty context - if (options.withContexts) { + // If contexts are enabled without using CPED, set an initial empty context + if (options.withContexts && !options.useCPED) { setContext({}); } } From 23889f0931b05dff91c11e8bc28c524b0c6eb07d Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Fri, 16 May 2025 16:18:38 +0200 Subject: [PATCH 04/11] Test useCPED when on Node 24 --- package.json | 1 + ts/test/test-time-profiler.ts | 16 ++++++++++++++++ ts/test/worker.ts | 17 ++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 0ba2a0f4..6a5b6e02 100644 --- a/package.json +++ b/package.json @@ -57,6 +57,7 @@ "mocha": "^10.2.0", "nan": "^2.22.2", "nyc": "^15.1.0", + "semver": "^7.7.2", "sinon": "^15.2.0", "source-map-support": "^0.5.21", "tmp": "0.2.1", diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index 2434c813..5a14e8cf 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -23,9 +23,15 @@ import {hrtime} from 'process'; import {Label, Profile} from 'pprof-format'; import {AssertionError} from 'assert'; import {GenerateTimeLabelsArgs, LabelSet} from '../src/v8-types'; +import {AsyncLocalStorage} from 'async_hooks'; +import {satisfies} from 'semver'; const assert = require('assert'); +const useCPED = + satisfies(process.versions.node, '>=24.0.0') && + !process.execArgv.includes('--no-async-context-frame'); + const PROFILE_OPTIONS = { durationMillis: 500, intervalMicros: 1000, @@ -44,11 +50,16 @@ describe('Time Profiler', () => { this.skip(); } const startTime = BigInt(Date.now()) * 1000n; + if (useCPED) { + // Ensure an async context frame is created to hold the profiler context. + new AsyncLocalStorage().enterWith(1); + } time.start({ intervalMicros: 20 * 1_000, durationMillis: PROFILE_OPTIONS.durationMillis, withContexts: true, lineNumbers: false, + useCPED, }); const initialContext: {[key: string]: string} = {}; time.setContext(initialContext); @@ -97,11 +108,16 @@ describe('Time Profiler', () => { this.timeout(3000); const intervalNanos = PROFILE_OPTIONS.intervalMicros * 1_000; + if (useCPED) { + // Ensure an async context frame is created to hold the profiler context. + new AsyncLocalStorage().enterWith(1); + } time.start({ intervalMicros: PROFILE_OPTIONS.intervalMicros, durationMillis: PROFILE_OPTIONS.durationMillis, withContexts: true, lineNumbers: false, + useCPED, }); // By repeating the test few times, we also exercise the profiler // start-stop overlap behavior. diff --git a/ts/test/worker.ts b/ts/test/worker.ts index a5f5a1bd..155c89f5 100644 --- a/ts/test/worker.ts +++ b/ts/test/worker.ts @@ -3,13 +3,18 @@ import {pbkdf2} from 'crypto'; import {time} from '../src/index'; import {Profile, ValueType} from 'pprof-format'; import {getAndVerifyPresence, getAndVerifyString} from './profiles-for-tests'; +import {satisfies} from 'semver'; import assert from 'assert'; +import {AsyncLocalStorage} from 'async_hooks'; const DURATION_MILLIS = 1000; const intervalMicros = 10000; const withContexts = process.platform === 'darwin' || process.platform === 'linux'; +const useCPED = + satisfies(process.versions.node, '>=24.0.0') && + !process.execArgv.includes('--no-async-context-frame'); function createWorker(durationMs: number): Promise { return new Promise((resolve, reject) => { @@ -52,15 +57,20 @@ function getCpuUsage() { } async function main(durationMs: number) { + if (useCPED) new AsyncLocalStorage().enterWith(1); time.start({ durationMillis: durationMs * 3, intervalMicros, withContexts, collectCpuTime: withContexts, + useCPED: useCPED, }); + if (withContexts) { + time.setContext({}); + } const cpu0 = getCpuUsage(); - const nbWorkers = Number(process.argv[2] || 2); + const nbWorkers = Number(process.argv[2] ?? 2); // start workers const workers = executeWorkers(nbWorkers, durationMs); @@ -91,12 +101,17 @@ async function main(durationMs: number) { } async function worker(durationMs: number) { + if (useCPED) new AsyncLocalStorage().enterWith(1); time.start({ durationMillis: durationMs, intervalMicros, withContexts, collectCpuTime: withContexts, + useCPED: useCPED, }); + if (withContexts) { + time.setContext({}); + } const deadline = Date.now() + durationMs; await Promise.all([bar(deadline), foo(deadline)]); From aa86b6322e4f82ba274102a555107e1a9d8f70b2 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 19 May 2025 18:34:17 +0200 Subject: [PATCH 05/11] Design doc explaining how the sampling context works in continuation-preserved embedder data --- doc/sample_context_in_cped.md | 190 ++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 doc/sample_context_in_cped.md diff --git a/doc/sample_context_in_cped.md b/doc/sample_context_in_cped.md new file mode 100644 index 00000000..259c7749 --- /dev/null +++ b/doc/sample_context_in_cped.md @@ -0,0 +1,190 @@ +# Storing Sample Context in V8 Continuation-Preserved Embedder Data + +## What is the Sample Context? +Datadog's Node.js profiler has the ability to store a custom object that it will +then associate with collected CPU samples. We refer to this object as the +"sample context." A higher-level embedding (typically, dd-trace-js) will then +update the sample context to keep it current with changes in the execution. A +typical piece of data sample context stores is the tracing span ID, so whenever +it changes, the sample context needs to be updated. + +## How is the Sample Context stored and updated? +Before Node 23, the sample context would be stored in a +`std::shared_ptr>` field on the C++ `WallProfiler` +instance. (In fact, due to the need for ensuring atomic updates and shared +pointers not being effectively updateable atomically it's actually a pair of +fields with an atomic pointer-to-shared-pointer switching between them, but I +digress.) Due to it being a single piece of instance state, it had to be updated +every time the active span changed, possibly on every invocation of +`AsyncLocalStorage.enterWith` and `.run`, but even more importantly on every +async context change, and for that we needed to register a "before" callback +with `async_hooks.createHook`. This meant that we needed to both update the +sample context on every async context change, but more importantly it also meant +we needed to use `async_hooks.createHook` which is getting deprecated in Node. +Current documentation for it is not exactly a shining endorsement: +> Please migrate away from this API, if you can. We do not recommend using the +> createHook, AsyncHook, and executionAsyncResource APIs as they have usability +> issues, safety risks, and performance implications. + +Fortunately, first the V8 engine and then Node.js gave us building blocks for a +better solution. + +## V8 Continuation-Preserved Embedder Data and Node.js Async Context Frame +In the V8 engine starting from version 12 (the one shipping with Node 22) +`v8::Isolate` exposes an API to set and get embedder-specific data on it so that +it is preserved across executions that are logical continuations of each other +(essentially: across promise chains; this includes await expressions.) Even +though the APIs are exposed on the isolate, the data is stored on a +per-continuation basis and the engine takes care to return the right one when +`Isolate::GetContinuationPreservedEmbedderData()` method is invoked. We will +refer to continuation-preserved embedder data as "CPED" from now on. + +Starting with Node.js 23, CPED is used to implement data storage behind Node.js +`AsyncLocalStorage` API. This dovetails nicely with our needs as all the +span-related data we set on the sample context is normally managed in an async +local storage (ALS) by the tracer. An application can create any number of +ALSes, and each ALS manages a single value per async context. This value is +somewhat confusingly called the "store" of the async local storage, making it +important to not confuse the terms "storage" (an identity with multiple values, +one per async context) and "store", which is a value of a storage within a +particular async context. + +The new implementation for storing ALS stores introduces an internal Node.js +class named `AsyncContextFrame` (ACF) which is a map that uses ALSes as keys, +and their stores as the map values, essentially providing a mapping from an ALS +to its store in the current async context. (This implementation is very similar +to how e.g. Java implements `ThreadLocal`, which is a close analogue to ALS in +Node.js.) ACF instances are then stored in CPED. + +## Storing the Sample Context in CPED +Node.js – as the embedder of V8 – commandeers the CPED to store instances of +ACF in it. This means that our profiler can't directly store our sample context +in the CPED, because then we'd overwrite the ACF reference already in there and +break Node.js. Fortunately, since ACF is "just" an ordinary JavaScript object, +we can define a new property on it, and store our sample context in it! +JavaScript properties can have strings, numbers, or symbols as their keys, with +symbols being the recommended practice to define properties that are hidden from +unrelated code as symbols are private to their creator and only compare equal to +themselves. Thus we create a private symbol in the profiler instance for our +property key, and our logic for storing the sample context thus becomes: +* get the CPED from the V8 isolate +* if it is not an object, do nothing (we can't set the sample context) +* otherwise set the sample context as a value in the object with our property + key. + +The reality is a bit thornier, though. Imagine what happens if while we're +setting the property, we get interrupted by a PROF signal and the signal handler +tries to read the property value? It could easily observe an inconsistent state +and crash. But even if it reads a property value, which one did it read? Still +the old one, already the new one, or maybe a torn value between the two? + +Fortunately, we had the exact same problem with our previous approach where we +only stored one sample context in the profiler instances, and the solution is +the same. We encapsulate the pair of shared pointers to a V8 `Global` and an +atomic pointer-to-pointer in a class named `AtomicContextPtr`, which looks like +this: +``` +using ContextPtr = std::shared_ptr>; + +class AtomicContextPtr { + ContextPtr ptr1; + ContextPtr ptr2; + std::atomic currentPtr = &ptr1; + ... +``` +A `Set` method on this class will first store the newly passed sample context in +either `ptr1` or `ptr2` – whichever `currentPtr` is _not_ pointing to at the +moment. Subsequently it atomically updates `currentPtr` to now point to it. + +Instead of storing the current sample context in the ACF property directly, +we want to store an `AtomicContextPtr` (ACP.) The only problem? This is a C++ +class, and properties of JavaScript objects can only be JavaScript values. +Fortunately, V8 gives us a solution for this as well: the `v8::External` type is +a V8 value type that wraps a `void *`. +So now the algorithm for setting a sample context is: +* get the CPED from the V8 isolate +* if it is not an object, do nothing (we can't set the sample context) +* Retrieve the property value. If there is one, it's the `External` wrapping the + pointer to the ACP we use. +* If there is none, allocate a new ACP on C++ heap, create a `v8::External` to + hold its pointer, and store it as a property in the ACF. +* Set the sample context as a value on the either retrieved or created ACP. + +The chain of data now looks something like this: +``` +v8::Isolate (from Isolate::GetCurrent()) + +-> current continuation (internally managed by V8) + +-> node::AsyncContextFrame (in continuation's CPED field) + +-> v8::External (in AsyncContextFrame's private property) + +-> dd::AsyncContextPtr (in External's data field) + +-> std::shared_ptr> (in either AsyncContextPtr::ptr1 or ptr2) + +-> v8::Global (in shared_ptr) + +-> v8::Value (the actual sample context object) +``` +The last 3-4 steps were the same in the previous code version as well, except +`ptr1` and `ptr2` were directly represented in the `WallProfiler`, so then it +looked like this: +``` +dd::WallProfiler + +-> std::shared_ptr> (in either WallProfiler::ptr1 or ptr2) + +-> v8::Global (in shared_ptr) + +-> v8::Value (the actual sample context object) +``` +The difference between the two diagrams shows how we encapsulated the +`(ptr1, ptr2, currentPtr)` tuple into a separate class and moved it out from +being an instance state of `WallProfiler` to being a property of every ACF we +encounter. + +## Odds and ends +And that's mostly it! There are few more small odds and ends to make it work +safely. We still need to guard writing the property value to the ACF against +concurrent access by the signal handler, but now it happens only once for every +ACF, when we create its ACP. We guard by introducing an atomic boolean and +proper signal fencing. + +The signal handler code also needs to be prevented from trying to access the +data while a GC is in progress. With this new model, the signal handler +unfortunately needs to do a small number of V8 API invocations. It needs to +retrieve the current V8 `Context`, it needs to obtain a `Local` for the property +key, and finally it needs to use both in an `Object::Get` call on the CPED. +Calling a property getter on an object is reentrancy into V8, which is advised +against, but this being an ordinary property it ends up being a single dependent +load, which turns out to work safely… unless there's GC happening. For this +reason, we register GC prologue and epilogue callbacks with the V8 isolate so we +can know when GCs are ongoing and the signal handler will refrain from touching +CPED during them. We'll however grab the current sample context from the CPED +and store it in a profiler instance field in the GC prologue and use it for any +samples taken during GC. + +Speaking of GC, we can now have an unbounded number of ACPs – one for each live +ACF. Each ACP is allocated on the C++ heap, and needs to be deleted eventually. +The profiler tracks every ACP it creates in an internal set of live ACPs and +deletes them all when it itself gets disposed. This would still allow for +unbounded growth so we additionally register a V8 GC finalization callback for +every ACF. When V8 collects an ACF instance its finalization callback will put +that ACF's ACP into the profiler's internal vector of ready-to-delete ACPs and +the profiler processes that vector (both deletes the ACP and removes it from the +live set) on each call to `SetContext`. + +## Changes in dd-trace-js +For completeness, we'll describe the changes in dd-trace-js here as well. The +main change is that with Node 24, we no longer require async hooks. The +instrumentation points for `AsyncLocalStorage.enterWith` and +`AsyncLocalStorage.run` remain in place – they are the only ones that are needed +now. + +There are some small performance optimizations that no longer apply with the new +approach, though. For one, with the old approach we did some data conversions +(span IDs to string, a tag array to endpoint string) in a sample when a sample +was captured. With the new approach, we do these conversions for all sample +contexts during profile serialization. Doing them after each sample capture +amortized their cost possibly minimally reducing the latency induced at +serialization time. With the old approach we also called `SetContext` only once +per sampling – we'd install a sample context to be used for the next sample, and +then kept updating a `ref` field in it with a reference to the actual data. +Since we no longer have a single sample context (but one per continuation) we +can not do this anymore, and we need to call `SetContext` on every ACF change. +The cost of this (basically, going into a native call from JavaScript) are still +well offset by not having to use async hooks and do work on every async context +change. We could arguably simplify the code by removing those small +optimizations. From 79114c1a709c273ebfa1b029bc36c1dbe548fd66 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Tue, 3 Jun 2025 16:35:01 +0200 Subject: [PATCH 06/11] Addressing simpler points of review feedback --- bindings/profilers/wall.cc | 22 ++++++++-------------- bindings/profilers/wall.hh | 4 ++-- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 5f2b2777..010eb71e 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -58,8 +58,6 @@ using namespace v8; namespace dd { -using ContextPtr = std::shared_ptr>; - // Maximum number of rounds in the GetV8ToEpochOffset static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20; @@ -613,11 +611,9 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); - for (auto it = liveContextPtrs_.begin(); it != liveContextPtrs_.end();) { - auto ptr = *it; + for (auto ptr : liveContextPtrs_) { ptr->UnregisterFromGC(); delete ptr; - ++it; } liveContextPtrs_.clear(); deadContextPtrs_.clear(); @@ -1085,11 +1081,9 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { } // Clean up dead context pointers - for (auto it = deadContextPtrs_.begin(); it != deadContextPtrs_.end();) { - auto ptr = *it; + for (auto ptr : deadContextPtrs_) { liveContextPtrs_.erase(ptr); delete ptr; - ++it; } deadContextPtrs_.clear(); @@ -1112,11 +1106,11 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { contextPtr = new PersistentContextPtr(&deadContextPtrs_); auto external = External::New(isolate, contextPtr); - setInProgress.store(true, std::memory_order_relaxed); + setInProgress_.store(true, std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_release); auto maybeSetResult = cpedObj->SetPrivate(v8Ctx, localSymbol, external); std::atomic_signal_fence(std::memory_order_release); - setInProgress.store(false, std::memory_order_relaxed); + setInProgress_.store(false, std::memory_order_relaxed); if (maybeSetResult.IsNothing()) { delete contextPtr; return; @@ -1135,7 +1129,7 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { } ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { - auto isSetInProgress = setInProgress.load(std::memory_order_relaxed); + auto isSetInProgress = setInProgress_.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); if (isSetInProgress) { // New sample context is being set. Safe behavior is to not try attempt @@ -1147,7 +1141,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { auto curGcCount = gcCount.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); if (curGcCount > 0) { - return gcContext; + return gcContext_; } } @@ -1252,7 +1246,7 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) { gcAsyncId = GetAsyncIdNoGC(isolate); } if (useCPED_) { - gcContext = GetContextPtrSignalSafe(isolate); + gcContext_ = GetContextPtrSignalSafe(isolate); } } std::atomic_signal_fence(std::memory_order_release); @@ -1264,7 +1258,7 @@ void WallProfiler::OnGCEnd() { if (oldCount == 1 && useCPED_) { // Not strictly necessary, as we'll reset it to something else on next GC, // but why retain it longer than needed? - gcContext.reset(); + gcContext_.reset(); } } diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index a6136b35..b9d988f3 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -97,9 +97,9 @@ class WallProfiler : public Nan::ObjectWrap { std::vector deadContextPtrs_; std::atomic gcCount = 0; - std::atomic setInProgress = false; + std::atomic setInProgress_ = false; double gcAsyncId; - ContextPtr gcContext; + ContextPtr gcContext_; std::atomic collectionMode_; std::atomic noCollectCallCount_; From 9ae8154e7f0e3d2ab795e3de4fbf744953de63fa Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 4 Jun 2025 11:12:20 +0200 Subject: [PATCH 07/11] Remove AtomicContextPtr in favor of setInProgress_ --- bindings/profilers/wall.cc | 53 +++++++++++++++++++++++++++++++------- bindings/profilers/wall.hh | 37 +++----------------------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 010eb71e..7e1c27f4 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -58,6 +58,20 @@ using namespace v8; namespace dd { +class SignalMutex { + std::atomic& mutex_; + + inline void store(bool value) { + std::atomic_signal_fence(std::memory_order_release); + mutex_.store(value, std::memory_order_relaxed); + } + + public: + inline SignalMutex(std::atomic& mutex) : mutex_(mutex) { store(true); } + + inline ~SignalMutex() { store(false); } +}; + // Maximum number of rounds in the GetV8ToEpochOffset static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20; @@ -620,7 +634,18 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { } } -class PersistentContextPtr : AtomicContextPtr { +void SetContextPtr(ContextPtr& contextPtr, + Isolate* isolate, + Local value) { + if (contextPtr) { + contextPtr = std::make_shared>(isolate, value); + } else { + contextPtr.reset(); + } +} + +class PersistentContextPtr { + ContextPtr context; std::vector* dead; Persistent per; @@ -648,6 +673,14 @@ class PersistentContextPtr : AtomicContextPtr { WeakCallbackType::kParameter); } + void Set(Isolate* isolate, const Local& value) { + SetContextPtr(context, isolate, value); + } + + ContextPtr Get() const { + return context; + } + friend class WallProfiler; }; @@ -1073,10 +1106,15 @@ Local WallProfiler::GetContext(Isolate* isolate) { return Undefined(isolate); } +void WallProfiler::SetCurrentContextPtr(Isolate* isolate, Local value) { + SignalMutex m(setInProgress_); + SetContextPtr(curContext_, isolate, value); +} + void WallProfiler::SetContext(Isolate* isolate, Local value) { #if NODE_MAJOR_VERSION >= 23 if (!useCPED_) { - curContext_.Set(isolate, value); + SetCurrentContextPtr(isolate, value); return; } @@ -1102,15 +1140,12 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { PersistentContextPtr* contextPtr = nullptr; auto profData = maybeProfData.ToLocalChecked(); + SignalMutex m(setInProgress_); if (profData->IsUndefined()) { contextPtr = new PersistentContextPtr(&deadContextPtrs_); auto external = External::New(isolate, contextPtr); - setInProgress_.store(true, std::memory_order_relaxed); - std::atomic_signal_fence(std::memory_order_release); auto maybeSetResult = cpedObj->SetPrivate(v8Ctx, localSymbol, external); - std::atomic_signal_fence(std::memory_order_release); - setInProgress_.store(false, std::memory_order_relaxed); if (maybeSetResult.IsNothing()) { delete contextPtr; return; @@ -1124,7 +1159,7 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { contextPtr->Set(isolate, value); #else - curContext_.Set(isolate, value); + SetCurrentContextPtr(isolate, value); #endif } @@ -1151,7 +1186,7 @@ ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) { ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { #if NODE_MAJOR_VERSION >= 23 if (!useCPED_) { - return curContext_.Get(); + return curContext_; } if (!isolate->IsInUse()) { @@ -1179,7 +1214,7 @@ ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) { } return ContextPtr(); #else - return curContext_.Get(); + return curContext_; #endif } diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index b9d988f3..f234d70c 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -38,39 +38,6 @@ struct Result { using ContextPtr = std::shared_ptr>; -/** - * Class that allows atomic updates to a ContextPtr. Since update of shared_ptr - * is not atomic, we use a pointer that alternates between pointing to one of - * two shared_ptrs instead, and we use atomic operations to update the pointer. - */ -class AtomicContextPtr { - ContextPtr ptr1; - ContextPtr ptr2; - std::atomic currentPtr = &ptr1; - - void Set(v8::Isolate* isolate, v8::Local value) { - auto oldPtr = currentPtr.load(std::memory_order_relaxed); - std::atomic_signal_fence(std::memory_order_acquire); - auto newPtr = oldPtr == &ptr1 ? &ptr2 : &ptr1; - if (!value->IsNullOrUndefined()) { - *newPtr = std::make_shared>(isolate, value); - } else { - newPtr->reset(); - } - std::atomic_signal_fence(std::memory_order_release); - currentPtr.store(newPtr, std::memory_order_relaxed); - std::atomic_signal_fence(std::memory_order_release); - } - - ContextPtr Get() { - auto ptr = currentPtr.load(std::memory_order_relaxed); - std::atomic_signal_fence(std::memory_order_acquire); - return ptr ? *ptr : ContextPtr(); - } - - friend class WallProfiler; -}; - class PersistentContextPtr; class WallProfiler : public Nan::ObjectWrap { @@ -85,7 +52,7 @@ class WallProfiler : public Nan::ObjectWrap { bool useCPED_ = false; // If we aren't using the CPED, we use a single context ptr stored here. - AtomicContextPtr curContext_; + ContextPtr curContext_; // Otherwise we'll use a private symbol to store the context in CPED objects. v8::Global cpedSymbol_; // We track live context pointers in a set to avoid memory leaks. They will @@ -151,6 +118,8 @@ class WallProfiler : public Nan::ObjectWrap { ContextPtr GetContextPtr(v8::Isolate* isolate); ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate); + void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); + public: /** * @param samplingPeriodMicros sampling interval, in microseconds From 5e2043c372e80cbf1240e2e1811b18c50b4ed56c Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 4 Jun 2025 17:20:37 +0200 Subject: [PATCH 08/11] Move PersistentContextPtr to top --- bindings/profilers/wall.cc | 98 +++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 7e1c27f4..23d5b527 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -72,6 +72,54 @@ class SignalMutex { inline ~SignalMutex() { store(false); } }; +void SetContextPtr(ContextPtr& contextPtr, + Isolate* isolate, + Local value) { + if (!value->IsNullOrUndefined()) { + contextPtr = std::make_shared>(isolate, value); + } else { + contextPtr.reset(); + } +} + +class PersistentContextPtr { + ContextPtr context; + std::vector* dead; + Persistent per; + + PersistentContextPtr(std::vector* dead) : dead(dead) {} + + void UnregisterFromGC() { + if (!per.IsEmpty()) { + per.ClearWeak(); + per.Reset(); + } + } + + void MarkDead() { dead->push_back(this); } + + void RegisterForGC(Isolate* isolate, const Local& obj) { + // Register a callback to delete this object when the object is GCed + per.Reset(isolate, obj); + per.SetWeak( + this, + [](const WeakCallbackInfo& data) { + auto ptr = data.GetParameter(); + ptr->MarkDead(); + ptr->UnregisterFromGC(); + }, + WeakCallbackType::kParameter); + } + + void Set(Isolate* isolate, const Local& value) { + SetContextPtr(context, isolate, value); + } + + ContextPtr Get() const { return context; } + + friend class WallProfiler; +}; + // Maximum number of rounds in the GetV8ToEpochOffset static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20; @@ -634,56 +682,6 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { } } -void SetContextPtr(ContextPtr& contextPtr, - Isolate* isolate, - Local value) { - if (contextPtr) { - contextPtr = std::make_shared>(isolate, value); - } else { - contextPtr.reset(); - } -} - -class PersistentContextPtr { - ContextPtr context; - std::vector* dead; - Persistent per; - - PersistentContextPtr(std::vector* dead) : dead(dead) {} - - void UnregisterFromGC() { - if (!per.IsEmpty()) { - per.ClearWeak(); - per.Reset(); - } - } - - void MarkDead() { dead->push_back(this); } - - void RegisterForGC(Isolate* isolate, const Local& obj) { - // Register a callback to delete this object when the object is GCed - per.Reset(isolate, obj); - per.SetWeak( - this, - [](const WeakCallbackInfo& data) { - auto ptr = data.GetParameter(); - ptr->MarkDead(); - ptr->UnregisterFromGC(); - }, - WeakCallbackType::kParameter); - } - - void Set(Isolate* isolate, const Local& value) { - SetContextPtr(context, isolate, value); - } - - ContextPtr Get() const { - return context; - } - - friend class WallProfiler; -}; - #define DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(name) \ auto name##Value = \ Nan::Get(arg, Nan::New(#name).ToLocalChecked()); \ From 5b06bbe3c0fd6ace434b64a9f632dd5c7582ffc4 Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Thu, 5 Jun 2025 15:08:13 +0200 Subject: [PATCH 09/11] Expose a counter of number of live context pointers. Also optimize away some setting of null/undefined as context --- bindings/profilers/wall.cc | 36 ++++++++++++++++++++++++++++++++++++ bindings/profilers/wall.hh | 3 +-- ts/src/time-profiler.ts | 3 ++- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 23d5b527..79e201bd 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -72,6 +72,27 @@ class SignalMutex { inline ~SignalMutex() { store(false); } }; +// This class is used to update the context count metric when it goes out of +// scope. +class ContextCountMetricUpdater { + uint32_t* fields_; + std::unordered_set& liveContextPtrs_; + + public: + inline ContextCountMetricUpdater( + uint32_t* fields, + std::unordered_set& liveContextPtrs) + : fields_(fields), liveContextPtrs_(liveContextPtrs) {} + + inline ~ContextCountMetricUpdater() { + std::atomic_store_explicit( + reinterpret_cast*>( + &fields_[WallProfiler::Fields::kCPEDContextCount]), + liveContextPtrs_.size(), + std::memory_order_relaxed); + } +}; + void SetContextPtr(ContextPtr& contextPtr, Isolate* isolate, Local value) { @@ -673,6 +694,7 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); + ContextCountMetricUpdater updater(fields_, liveContextPtrs_); for (auto ptr : liveContextPtrs_) { ptr->UnregisterFromGC(); delete ptr; @@ -853,6 +875,7 @@ v8::ProfilerId WallProfiler::StartInternal() { if (withContexts_ || workaroundV8Bug_) { SignalHandler::IncreaseUseCount(); fields_[kSampleCount] = 0; + fields_[kCPEDContextCount] = 0; } if (collectCpuTime_) { @@ -1072,6 +1095,11 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::New(kSampleCount), ReadOnlyDontDelete) .FromJust(); + Nan::DefineOwnProperty(constants, + Nan::New("kCPEDContextCount").ToLocalChecked(), + Nan::New(kCPEDContextCount), + ReadOnlyDontDelete) + .FromJust(); Nan::DefineOwnProperty(target, Nan::New("constants").ToLocalChecked(), constants, @@ -1116,6 +1144,8 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { return; } + ContextCountMetricUpdater updater(fields_, liveContextPtrs_); + // Clean up dead context pointers for (auto ptr : deadContextPtrs_) { liveContextPtrs_.erase(ptr); @@ -1140,6 +1170,12 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { auto profData = maybeProfData.ToLocalChecked(); SignalMutex m(setInProgress_); if (profData->IsUndefined()) { + if (value->IsNullOrUndefined()) { + // Don't go to the trouble of mutating the CPED for null or undefined as + // the absence of a sample context will be interpreted as undefined in + // GetContextPtr anyway. + return; + } contextPtr = new PersistentContextPtr(&deadContextPtrs_); auto external = External::New(isolate, contextPtr); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index f234d70c..2916edd8 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -43,10 +43,9 @@ class PersistentContextPtr; class WallProfiler : public Nan::ObjectWrap { public: enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts }; + enum Fields { kSampleCount, kCPEDContextCount, kFieldCount }; private: - enum Fields { kSampleCount, kFieldCount }; - std::chrono::microseconds samplingPeriod_{0}; v8::CpuProfiler* cpuProfiler_ = nullptr; diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index 6c6893cd..caf50301 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -30,7 +30,7 @@ import { import {GenerateTimeLabelsFunction} from './v8-types'; import {isMainThread} from 'worker_threads'; -const {kSampleCount} = profilerConstants; +const {kSampleCount, kCPEDContextCount} = profilerConstants; const DEFAULT_INTERVAL_MICROS: Microseconds = 1000; const DEFAULT_DURATION_MILLIS: Milliseconds = 60000; @@ -178,6 +178,7 @@ export function v8ProfilerStuckEventLoopDetected() { export const constants = { kSampleCount, + kCPEDContextCount, GARBAGE_COLLECTION_FUNCTION_NAME, NON_JS_THREADS_FUNCTION_NAME, }; From 604518f340de49e1db36cd4463297117de664c7c Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 25 Jun 2025 11:18:58 +0200 Subject: [PATCH 10/11] Review feedback: - SignalMutex -> SignalGuard - Use defer instead of ContextCountMetricUpdater - make PersistentContextPtr methods public instead of declaring a friend class --- bindings/profilers/wall.cc | 53 +++++++++++++++----------------------- bindings/profilers/wall.hh | 1 + 2 files changed, 22 insertions(+), 32 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index 79e201bd..be9c033b 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -25,6 +25,7 @@ #include #include +#include "defer.hh" #include "per-isolate-data.hh" #include "translate-time-profile.hh" #include "wall.hh" @@ -58,39 +59,18 @@ using namespace v8; namespace dd { -class SignalMutex { - std::atomic& mutex_; +class SignalGuard { + std::atomic& guard_; inline void store(bool value) { std::atomic_signal_fence(std::memory_order_release); - mutex_.store(value, std::memory_order_relaxed); + guard_.store(value, std::memory_order_relaxed); } public: - inline SignalMutex(std::atomic& mutex) : mutex_(mutex) { store(true); } + inline SignalGuard(std::atomic& guard) : guard_(guard) { store(true); } - inline ~SignalMutex() { store(false); } -}; - -// This class is used to update the context count metric when it goes out of -// scope. -class ContextCountMetricUpdater { - uint32_t* fields_; - std::unordered_set& liveContextPtrs_; - - public: - inline ContextCountMetricUpdater( - uint32_t* fields, - std::unordered_set& liveContextPtrs) - : fields_(fields), liveContextPtrs_(liveContextPtrs) {} - - inline ~ContextCountMetricUpdater() { - std::atomic_store_explicit( - reinterpret_cast*>( - &fields_[WallProfiler::Fields::kCPEDContextCount]), - liveContextPtrs_.size(), - std::memory_order_relaxed); - } + inline ~SignalGuard() { store(false); } }; void SetContextPtr(ContextPtr& contextPtr, @@ -108,6 +88,7 @@ class PersistentContextPtr { std::vector* dead; Persistent per; + public: PersistentContextPtr(std::vector* dead) : dead(dead) {} void UnregisterFromGC() { @@ -137,8 +118,6 @@ class PersistentContextPtr { } ContextPtr Get() const { return context; } - - friend class WallProfiler; }; // Maximum number of rounds in the GetV8ToEpochOffset @@ -677,6 +656,14 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, } } +void WallProfiler::UpdateContextCount() { + std::atomic_store_explicit( + reinterpret_cast*>( + &fields_[WallProfiler::Fields::kCPEDContextCount]), + liveContextPtrs_.size(), + std::memory_order_relaxed); +} + void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { if (cpuProfiler_ != nullptr) { cpuProfiler_->Dispose(); @@ -694,13 +681,13 @@ void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { node::RemoveEnvironmentCleanupHook( isolate, &WallProfiler::CleanupHook, isolate); - ContextCountMetricUpdater updater(fields_, liveContextPtrs_); for (auto ptr : liveContextPtrs_) { ptr->UnregisterFromGC(); delete ptr; } liveContextPtrs_.clear(); deadContextPtrs_.clear(); + UpdateContextCount(); } } @@ -1133,7 +1120,7 @@ Local WallProfiler::GetContext(Isolate* isolate) { } void WallProfiler::SetCurrentContextPtr(Isolate* isolate, Local value) { - SignalMutex m(setInProgress_); + SignalGuard m(setInProgress_); SetContextPtr(curContext_, isolate, value); } @@ -1144,7 +1131,9 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { return; } - ContextCountMetricUpdater updater(fields_, liveContextPtrs_); + defer { + UpdateContextCount(); + }; // Clean up dead context pointers for (auto ptr : deadContextPtrs_) { @@ -1168,7 +1157,7 @@ void WallProfiler::SetContext(Isolate* isolate, Local value) { PersistentContextPtr* contextPtr = nullptr; auto profData = maybeProfData.ToLocalChecked(); - SignalMutex m(setInProgress_); + SignalGuard m(setInProgress_); if (profData->IsUndefined()) { if (value->IsNullOrUndefined()) { // Don't go to the trouble of mutating the CPED for null or undefined as diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 2916edd8..15d5c17e 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -118,6 +118,7 @@ class WallProfiler : public Nan::ObjectWrap { ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate); void SetCurrentContextPtr(v8::Isolate* isolate, v8::Local context); + void UpdateContextCount(); public: /** From 4916ae7c1d08787d39a3048fb56ef2791f6bdabc Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Wed, 25 Jun 2025 14:10:43 +0200 Subject: [PATCH 11/11] Fix format --- bindings/profilers/wall.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index be9c033b..7ab05a9c 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -657,11 +657,11 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, } void WallProfiler::UpdateContextCount() { - std::atomic_store_explicit( - reinterpret_cast*>( - &fields_[WallProfiler::Fields::kCPEDContextCount]), - liveContextPtrs_.size(), - std::memory_order_relaxed); + std::atomic_store_explicit( + reinterpret_cast*>( + &fields_[WallProfiler::Fields::kCPEDContextCount]), + liveContextPtrs_.size(), + std::memory_order_relaxed); } void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) {