Skip to content

Commit c656e99

Browse files
committed
Allow use of CPED to store sampling context
1 parent d213a01 commit c656e99

File tree

3 files changed

+169
-41
lines changed

3 files changed

+169
-41
lines changed

bindings/profilers/wall.cc

Lines changed: 125 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ using namespace v8;
5858

5959
namespace dd {
6060

61+
using ContextPtr = std::shared_ptr<Global<Value>>;
62+
6163
// Maximum number of rounds in the GetV8ToEpochOffset
6264
static constexpr int MAX_EPOCH_OFFSET_ATTEMPTS = 20;
6365

@@ -318,8 +320,7 @@ void SignalHandler::HandleProfilerSignal(int sig,
318320
auto time_from = Now();
319321
old_handler(sig, info, context);
320322
auto time_to = Now();
321-
auto async_id = prof->GetAsyncId(isolate);
322-
prof->PushContext(time_from, time_to, cpu_time, async_id);
323+
prof->PushContext(time_from, time_to, cpu_time, isolate);
323324
}
324325
#else
325326
class SignalHandler {
@@ -516,8 +517,10 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
516517
bool workaroundV8Bug,
517518
bool collectCpuTime,
518519
bool collectAsyncId,
519-
bool isMainThread)
520+
bool isMainThread,
521+
bool useCPED)
520522
: samplingPeriod_(samplingPeriod),
523+
useCPED_(useCPED),
521524
includeLines_(includeLines),
522525
withContexts_(withContexts),
523526
isMainThread_(isMainThread) {
@@ -533,7 +536,6 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
533536
contexts_.reserve(duration * 2 / samplingPeriod);
534537
}
535538

536-
curContext_.store(&context1_, std::memory_order_relaxed);
537539
collectionMode_.store(CollectionMode::kNoCollect, std::memory_order_relaxed);
538540
gcCount.store(0, std::memory_order_relaxed);
539541

@@ -548,7 +550,7 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
548550
jsArray_ = v8::Global<v8::Uint32Array>(isolate, jsArray);
549551
std::fill(fields_, fields_ + kFieldCount, 0);
550552

551-
if (collectAsyncId_) {
553+
if (collectAsyncId_ || useCPED_) {
552554
isolate->AddGCPrologueCallback(&GCPrologueCallback, this);
553555
isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this);
554556
}
@@ -624,6 +626,7 @@ NAN_METHOD(WallProfiler::New) {
624626
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(collectCpuTime);
625627
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(collectAsyncId);
626628
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread);
629+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(useCPED);
627630

628631
if (withContexts && !DD_WALL_USE_SIGPROF) {
629632
return Nan::ThrowTypeError("Contexts are not supported.");
@@ -663,7 +666,8 @@ NAN_METHOD(WallProfiler::New) {
663666
workaroundV8Bug,
664667
collectCpuTime,
665668
collectAsyncId,
666-
isMainThread);
669+
isMainThread,
670+
useCPED);
667671
obj->Wrap(info.This());
668672
info.GetReturnValue().Set(info.This());
669673
} else {
@@ -978,28 +982,111 @@ v8::CpuProfiler* WallProfiler::CreateV8CpuProfiler() {
978982
}
979983

980984
v8::Local<v8::Value> WallProfiler::GetContext(Isolate* isolate) {
981-
auto context = *curContext_.load(std::memory_order_relaxed);
985+
auto context = GetContextPtr(isolate);
982986
if (!context) return v8::Undefined(isolate);
983987
return context->Get(isolate);
984988
}
985989

990+
class PersistentContextPtr : AtomicContextPtr {
991+
Persistent<Object> per;
992+
993+
void BindLifecycleTo(Isolate* isolate, Local<Object>& obj) {
994+
// Register a callback to delete this object when the object is GCed
995+
per.Reset(isolate, obj);
996+
per.SetWeak(
997+
this,
998+
[](const WeakCallbackInfo<PersistentContextPtr>& data) {
999+
auto& per = data.GetParameter()->per;
1000+
if (!per.IsEmpty()) {
1001+
per.ClearWeak();
1002+
per.Reset();
1003+
}
1004+
// Using SetSecondPassCallback as shared_ptr can trigger ~Global and
1005+
// any V8 API use needs to be in the second pass
1006+
data.SetSecondPassCallback(
1007+
[](const WeakCallbackInfo<PersistentContextPtr>& data) {
1008+
delete data.GetParameter();
1009+
});
1010+
},
1011+
WeakCallbackType::kParameter);
1012+
}
1013+
1014+
friend class WallProfiler;
1015+
};
1016+
9861017
void WallProfiler::SetContext(Isolate* isolate, Local<Value> value) {
987-
// Need to be careful here, because we might be interrupted by a
988-
// signal handler that will make use of curContext_.
989-
// Update of shared_ptr is not atomic, so instead we use a pointer
990-
// (curContext_) that points on two shared_ptr (context1_ and context2_),
991-
// update the shared_ptr that is not currently in use and then atomically
992-
// update curContext_.
993-
auto newCurContext = curContext_.load(std::memory_order_relaxed) == &context1_
994-
? &context2_
995-
: &context1_;
996-
if (!value->IsNullOrUndefined()) {
997-
*newCurContext = std::make_shared<Global<Value>>(isolate, value);
1018+
if (!useCPED_) {
1019+
curContext_.Set(isolate, value);
1020+
return;
1021+
}
1022+
1023+
auto cped = isolate->GetContinuationPreservedEmbedderData();
1024+
// No Node AsyncContextFrame in this continuation yet
1025+
if (!cped->IsObject()) return;
1026+
1027+
auto cpedObj = cped.As<Object>();
1028+
auto localSymbol = cpedSymbol_.Get(isolate);
1029+
auto v8Ctx = isolate->GetCurrentContext();
1030+
auto maybeProfData = cpedObj->Get(v8Ctx, localSymbol);
1031+
if (maybeProfData.IsEmpty()) return;
1032+
auto profData = maybeProfData.ToLocalChecked();
1033+
1034+
PersistentContextPtr* contextPtr = nullptr;
1035+
if (profData->IsUndefined()) {
1036+
contextPtr = new PersistentContextPtr();
1037+
1038+
auto maybeSetResult =
1039+
cpedObj->Set(v8Ctx, localSymbol, External::New(isolate, contextPtr));
1040+
if (maybeSetResult.IsNothing()) {
1041+
delete contextPtr;
1042+
return;
1043+
}
1044+
contextPtr->BindLifecycleTo(isolate, cpedObj);
9981045
} else {
999-
newCurContext->reset();
1046+
contextPtr =
1047+
static_cast<PersistentContextPtr*>(profData.As<External>()->Value());
10001048
}
1001-
std::atomic_signal_fence(std::memory_order_release);
1002-
curContext_.store(newCurContext, std::memory_order_relaxed);
1049+
1050+
contextPtr->Set(isolate, value);
1051+
}
1052+
1053+
ContextPtr WallProfiler::GetContextPtrSignalSafe(Isolate* isolate) {
1054+
if (!useCPED_) {
1055+
// Not strictly necessary but we can avoid HandleScope creation for this
1056+
// case.
1057+
return curContext_.Get();
1058+
}
1059+
1060+
auto curGcCount = gcCount.load(std::memory_order_relaxed);
1061+
std::atomic_signal_fence(std::memory_order_acquire);
1062+
if (curGcCount > 0) {
1063+
return gcContext;
1064+
} else if (isolate->InContext()) {
1065+
auto handleScope = HandleScope(isolate);
1066+
return GetContextPtr(isolate);
1067+
}
1068+
// not in a V8 Context
1069+
return std::shared_ptr<Global<Value>>();
1070+
}
1071+
1072+
ContextPtr WallProfiler::GetContextPtr(Isolate* isolate) {
1073+
if (!useCPED_) {
1074+
return curContext_.Get();
1075+
}
1076+
1077+
auto cped = isolate->GetContinuationPreservedEmbedderData();
1078+
if (!cped->IsObject()) return std::shared_ptr<Global<Value>>();
1079+
1080+
auto cpedObj = cped.As<Object>();
1081+
auto localSymbol = cpedSymbol_.Get(isolate);
1082+
auto maybeProfData = cpedObj->Get(isolate->GetCurrentContext(), localSymbol);
1083+
if (maybeProfData.IsEmpty()) return std::shared_ptr<Global<Value>>();
1084+
auto profData = maybeProfData.ToLocalChecked();
1085+
1086+
if (profData->IsUndefined()) return std::shared_ptr<Global<Value>>();
1087+
1088+
return static_cast<PersistentContextPtr*>(profData.As<External>()->Value())
1089+
->Get();
10031090
}
10041091

10051092
NAN_GETTER(WallProfiler::GetContext) {
@@ -1050,8 +1137,13 @@ void WallProfiler::OnGCStart(v8::Isolate* isolate) {
10501137
auto curCount = gcCount.load(std::memory_order_relaxed);
10511138
std::atomic_signal_fence(std::memory_order_acquire);
10521139
if (curCount == 0) {
1053-
gcAsyncId = GetAsyncIdNoGC(isolate);
1054-
}
1140+
if (collectAsyncId_) {
1141+
gcAsyncId = GetAsyncIdNoGC(isolate);
1142+
}
1143+
if (useCPED_) {
1144+
gcContext = GetContextPtrSignalSafe(isolate);
1145+
}
1146+
}
10551147
gcCount.store(curCount + 1, std::memory_order_relaxed);
10561148
std::atomic_signal_fence(std::memory_order_release);
10571149
}
@@ -1060,23 +1152,28 @@ void WallProfiler::OnGCEnd() {
10601152
auto newCount = gcCount.load(std::memory_order_relaxed) - 1;
10611153
std::atomic_signal_fence(std::memory_order_acquire);
10621154
gcCount.store(newCount, std::memory_order_relaxed);
1063-
std::atomic_signal_fence(std::memory_order_release);
10641155
if (newCount == 0) {
10651156
gcAsyncId = -1;
1157+
if (useCPED_) {
1158+
gcContext.reset();
1159+
}
10661160
}
1161+
std::atomic_signal_fence(std::memory_order_release);
10671162
}
10681163

10691164
void WallProfiler::PushContext(int64_t time_from,
10701165
int64_t time_to,
10711166
int64_t cpu_time,
1072-
int64_t async_id) {
1167+
Isolate* isolate) {
10731168
// Be careful this is called in a signal handler context therefore all
10741169
// operations must be async signal safe (in particular no allocations).
10751170
// Our ring buffer avoids allocations.
1076-
auto context = curContext_.load(std::memory_order_relaxed);
1077-
std::atomic_signal_fence(std::memory_order_acquire);
10781171
if (contexts_.size() < contexts_.capacity()) {
1079-
contexts_.push_back({*context, time_from, time_to, cpu_time, async_id});
1172+
contexts_.push_back({GetContextPtrSignalSafe(isolate),
1173+
time_from,
1174+
time_to,
1175+
cpu_time,
1176+
GetAsyncId(isolate)});
10801177
std::atomic_fetch_add_explicit(
10811178
reinterpret_cast<std::atomic<uint32_t>*>(&fields_[kSampleCount]),
10821179
1U,

bindings/profilers/wall.hh

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,29 +37,53 @@ struct Result {
3737
std::string msg;
3838
};
3939

40+
using ContextPtr = std::shared_ptr<v8::Global<v8::Value>>;
41+
42+
class AtomicContextPtr {
43+
ContextPtr ptr1;
44+
ContextPtr ptr2;
45+
std::atomic<ContextPtr*> currentPtr = &ptr1;
46+
47+
void Set(v8::Isolate* isolate, v8::Local<v8::Value> value) {
48+
auto newPtr =
49+
currentPtr.load(std::memory_order_relaxed) == &ptr1 ? &ptr2 : &ptr1;
50+
if (!value->IsNullOrUndefined()) {
51+
*newPtr = std::make_shared<v8::Global<v8::Value>>(isolate, value);
52+
} else {
53+
newPtr->reset();
54+
}
55+
std::atomic_signal_fence(std::memory_order_release);
56+
currentPtr.store(newPtr, std::memory_order_relaxed);
57+
}
58+
59+
ContextPtr Get() {
60+
auto ptr = currentPtr.load(std::memory_order_relaxed);
61+
std::atomic_signal_fence(std::memory_order_acquire);
62+
return ptr ? *ptr : std::shared_ptr<v8::Global<v8::Value>>();
63+
}
64+
65+
friend class WallProfiler;
66+
};
67+
4068
class WallProfiler : public Nan::ObjectWrap {
4169
public:
4270
enum class CollectionMode { kNoCollect, kPassThrough, kCollectContexts };
4371

4472
private:
4573
enum Fields { kSampleCount, kFieldCount };
4674

47-
using ContextPtr = std::shared_ptr<v8::Global<v8::Value>>;
48-
4975
std::chrono::microseconds samplingPeriod_{0};
5076
v8::CpuProfiler* cpuProfiler_ = nullptr;
51-
// TODO: Investigate use of v8::Persistent instead of shared_ptr<Global> to
52-
// avoid heap allocation. Need to figure out the right move/copy semantics in
53-
// and out of the ring buffer.
5477

55-
// We're using a pair of shared pointers and an atomic pointer-to-current as
56-
// a way to ensure signal safety on update.
57-
ContextPtr context1_;
58-
ContextPtr context2_;
59-
std::atomic<ContextPtr*> curContext_;
78+
bool useCPED_ = false;
79+
// If we aren't using the CPED, we use a single context ptr stored here.
80+
AtomicContextPtr curContext_;
81+
// Otherwise we'll use a private symbol to store the context in CPED objects.
82+
v8::Global<v8::Symbol> cpedSymbol_;
6083

6184
std::atomic<int> gcCount = 0;
6285
int64_t gcAsyncId;
86+
ContextPtr gcContext;
6387

6488
std::atomic<CollectionMode> collectionMode_;
6589
std::atomic<uint64_t> noCollectCallCount_;
@@ -106,6 +130,8 @@ class WallProfiler : public Nan::ObjectWrap {
106130
GENERAL_REGS_ONLY;
107131

108132
bool waitForSignal(uint64_t targetCallCount = 0);
133+
ContextPtr GetContextPtr(v8::Isolate* isolate);
134+
ContextPtr GetContextPtrSignalSafe(v8::Isolate* isolate);
109135

110136
public:
111137
/**
@@ -114,6 +140,10 @@ class WallProfiler : public Nan::ObjectWrap {
114140
* parameter is informative; it is up to the caller to call the Stop method
115141
* every period. The parameter is used to preallocate data structures that
116142
* should not be reallocated in async signal safe code.
143+
* @param useCPED whether to use the V8 ContinuationPreservingEmbedderData
144+
* to store the current sampling context. It can be used if AsyncLocalStorage
145+
* uses the AsyncContextFrame implementation (experimental in Node 23, default
146+
* in Node 24.)
117147
*/
118148
explicit WallProfiler(std::chrono::microseconds samplingPeriod,
119149
std::chrono::microseconds duration,
@@ -122,14 +152,15 @@ class WallProfiler : public Nan::ObjectWrap {
122152
bool workaroundV8bug,
123153
bool collectCpuTime,
124154
bool collectAsyncId,
125-
bool isMainThread);
155+
bool isMainThread,
156+
bool useCPED);
126157

127158
v8::Local<v8::Value> GetContext(v8::Isolate*);
128159
void SetContext(v8::Isolate*, v8::Local<v8::Value>);
129160
void PushContext(int64_t time_from,
130161
int64_t time_to,
131162
int64_t cpu_time,
132-
int64_t async_id);
163+
v8::Isolate* isolate);
133164
Result StartImpl();
134165
std::string StartInternal();
135166
Result StopImpl(bool restart,

ts/src/time-profiler.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ export function start(options: TimeProfilerOptions = {}) {
9393
throw new Error('Wall profiler is already started');
9494
}
9595

96-
gProfiler = new TimeProfiler({...options, isMainThread});
96+
gProfiler = new TimeProfiler({...options, isMainThread, useCPED: false});
9797
gSourceMapper = options.sourceMapper;
9898
gIntervalMicros = options.intervalMicros!;
9999
gV8ProfilerStuckEventLoopDetected = 0;

0 commit comments

Comments
 (0)