Skip to content

Commit d213a01

Browse files
authored
Only collect async ID when it's safe to do so (#197)
* Guard against collecting async ID while performing GC * Additional sanity checks: - don't sample a dead isolate - only try to retrieve an async ID when there's a context * Move GetAsyncId and GC callbacks out of the header file. Use relaxed load/store with appropriate signal fences for cheaper reading/writing of gcCount. * Have a config option for collecting async IDs
1 parent 51951b7 commit d213a01

File tree

4 files changed

+159
-58
lines changed

4 files changed

+159
-58
lines changed

bindings/profilers/wall.cc

Lines changed: 99 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ void SignalHandler::HandleProfilerSignal(int sig,
291291
return;
292292
}
293293
auto isolate = Isolate::GetCurrent();
294+
if (!isolate || isolate->IsDead()) {
295+
return;
296+
}
294297
WallProfiler* prof = g_profilers.GetProfiler(isolate);
295298

296299
if (!prof) {
@@ -315,9 +318,7 @@ void SignalHandler::HandleProfilerSignal(int sig,
315318
auto time_from = Now();
316319
old_handler(sig, info, context);
317320
auto time_to = Now();
318-
int64_t async_id = -1;
319-
// don't capture for now until we work out the issues with GC and thread start
320-
// static_cast<int64_t>(node::AsyncHooksGetExecutionAsyncId(isolate));
321+
auto async_id = prof->GetAsyncId(isolate);
321322
prof->PushContext(time_from, time_to, cpu_time, async_id);
322323
}
323324
#else
@@ -473,11 +474,13 @@ std::shared_ptr<ContextsByNode> WallProfiler::GetContextsByNode(
473474
sampleContext.context.get()->Get(isolate))
474475
.Check();
475476
}
476-
timedContext
477-
->Set(v8Context,
478-
asyncIdKey,
479-
NewNumberFromInt64(isolate, sampleContext.async_id))
480-
.Check();
477+
if (collectAsyncId_) {
478+
timedContext
479+
->Set(v8Context,
480+
asyncIdKey,
481+
NewNumberFromInt64(isolate, sampleContext.async_id))
482+
.Check();
483+
}
481484
}
482485
}
483486
array->Set(v8Context, array->Length(), timedContext).Check();
@@ -492,12 +495,27 @@ std::shared_ptr<ContextsByNode> WallProfiler::GetContextsByNode(
492495
return contextsByNode;
493496
}
494497

498+
void GCPrologueCallback(Isolate* isolate,
499+
GCType type,
500+
GCCallbackFlags flags,
501+
void* data) {
502+
static_cast<WallProfiler*>(data)->OnGCStart(isolate);
503+
}
504+
505+
void GCEpilogueCallback(Isolate* isolate,
506+
GCType type,
507+
GCCallbackFlags flags,
508+
void* data) {
509+
static_cast<WallProfiler*>(data)->OnGCEnd();
510+
}
511+
495512
WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
496513
std::chrono::microseconds duration,
497514
bool includeLines,
498515
bool withContexts,
499516
bool workaroundV8Bug,
500517
bool collectCpuTime,
518+
bool collectAsyncId,
501519
bool isMainThread)
502520
: samplingPeriod_(samplingPeriod),
503521
includeLines_(includeLines),
@@ -509,14 +527,17 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
509527
// event just after triggers the issue.
510528
workaroundV8Bug_ = workaroundV8Bug && DD_WALL_USE_SIGPROF && detectV8Bug_;
511529
collectCpuTime_ = collectCpuTime && withContexts;
530+
collectAsyncId_ = collectAsyncId && withContexts;
512531

513532
if (withContexts_) {
514533
contexts_.reserve(duration * 2 / samplingPeriod);
515534
}
516535

517536
curContext_.store(&context1_, std::memory_order_relaxed);
518537
collectionMode_.store(CollectionMode::kNoCollect, std::memory_order_relaxed);
538+
gcCount.store(0, std::memory_order_relaxed);
519539

540+
// TODO: bind to this isolate? Would fix the Dispose(nullptr) issue.
520541
auto isolate = v8::Isolate::GetCurrent();
521542
v8::Local<v8::ArrayBuffer> buffer =
522543
v8::ArrayBuffer::New(isolate, sizeof(uint32_t) * kFieldCount);
@@ -526,6 +547,11 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod,
526547
fields_ = static_cast<uint32_t*>(buffer->GetBackingStore()->Data());
527548
jsArray_ = v8::Global<v8::Uint32Array>(isolate, jsArray);
528549
std::fill(fields_, fields_ + kFieldCount, 0);
550+
551+
if (collectAsyncId_) {
552+
isolate->AddGCPrologueCallback(&GCPrologueCallback, this);
553+
isolate->AddGCEpilogueCallback(&GCEpilogueCallback, this);
554+
}
529555
}
530556

531557
WallProfiler::~WallProfiler() {
@@ -538,9 +564,22 @@ void WallProfiler::Dispose(Isolate* isolate) {
538564
cpuProfiler_ = nullptr;
539565

540566
g_profilers.RemoveProfiler(isolate, this);
567+
568+
if (isolate != nullptr && collectAsyncId_) {
569+
isolate->RemoveGCPrologueCallback(&GCPrologueCallback, this);
570+
isolate->RemoveGCEpilogueCallback(&GCEpilogueCallback, this);
571+
}
541572
}
542573
}
543574

575+
#define DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(name) \
576+
auto name##Value = \
577+
Nan::Get(arg, Nan::New<v8::String>(#name).ToLocalChecked()); \
578+
if (name##Value.IsEmpty() || !name##Value.ToLocalChecked()->IsBoolean()) { \
579+
return Nan::ThrowTypeError(#name " must be a boolean."); \
580+
} \
581+
bool name = name##Value.ToLocalChecked().As<v8::Boolean>()->Value();
582+
544583
NAN_METHOD(WallProfiler::New) {
545584
if (info.Length() != 1 || !info[0]->IsObject()) {
546585
return Nan::ThrowTypeError("WallProfiler must have one object argument.");
@@ -579,50 +618,12 @@ NAN_METHOD(WallProfiler::New) {
579618
return Nan::ThrowTypeError("Duration must not be less than sample rate.");
580619
}
581620

582-
auto lineNumbersValue =
583-
Nan::Get(arg, Nan::New<v8::String>("lineNumbers").ToLocalChecked());
584-
if (lineNumbersValue.IsEmpty() ||
585-
!lineNumbersValue.ToLocalChecked()->IsBoolean()) {
586-
return Nan::ThrowTypeError("lineNumbers must be a boolean.");
587-
}
588-
bool lineNumbers =
589-
lineNumbersValue.ToLocalChecked().As<v8::Boolean>()->Value();
590-
591-
auto withContextsValue =
592-
Nan::Get(arg, Nan::New<v8::String>("withContexts").ToLocalChecked());
593-
if (withContextsValue.IsEmpty() ||
594-
!withContextsValue.ToLocalChecked()->IsBoolean()) {
595-
return Nan::ThrowTypeError("withContext must be a boolean.");
596-
}
597-
bool withContexts =
598-
withContextsValue.ToLocalChecked().As<v8::Boolean>()->Value();
599-
600-
auto workaroundV8BugValue =
601-
Nan::Get(arg, Nan::New<v8::String>("workaroundV8Bug").ToLocalChecked());
602-
if (workaroundV8BugValue.IsEmpty() ||
603-
!workaroundV8BugValue.ToLocalChecked()->IsBoolean()) {
604-
return Nan::ThrowTypeError("workaroundV8Bug must be a boolean.");
605-
}
606-
bool workaroundV8Bug =
607-
workaroundV8BugValue.ToLocalChecked().As<v8::Boolean>()->Value();
608-
609-
auto collectCpuTimeValue =
610-
Nan::Get(arg, Nan::New<v8::String>("collectCpuTime").ToLocalChecked());
611-
if (collectCpuTimeValue.IsEmpty() ||
612-
!collectCpuTimeValue.ToLocalChecked()->IsBoolean()) {
613-
return Nan::ThrowTypeError("collectCpuTime must be a boolean.");
614-
}
615-
bool collectCpuTime =
616-
collectCpuTimeValue.ToLocalChecked().As<v8::Boolean>()->Value();
617-
618-
auto isMainThreadValue =
619-
Nan::Get(arg, Nan::New<v8::String>("isMainThread").ToLocalChecked());
620-
if (isMainThreadValue.IsEmpty() ||
621-
!isMainThreadValue.ToLocalChecked()->IsBoolean()) {
622-
return Nan::ThrowTypeError("isMainThread must be a boolean.");
623-
}
624-
bool isMainThread =
625-
isMainThreadValue.ToLocalChecked().As<v8::Boolean>()->Value();
621+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(lineNumbers);
622+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(withContexts);
623+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(workaroundV8Bug);
624+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(collectCpuTime);
625+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(collectAsyncId);
626+
DD_WALL_PROFILER_GET_BOOLEAN_CONFIG(isMainThread);
626627

627628
if (withContexts && !DD_WALL_USE_SIGPROF) {
628629
return Nan::ThrowTypeError("Contexts are not supported.");
@@ -632,6 +633,10 @@ NAN_METHOD(WallProfiler::New) {
632633
return Nan::ThrowTypeError("Cpu time collection requires contexts.");
633634
}
634635

636+
if (collectAsyncId && !withContexts) {
637+
return Nan::ThrowTypeError("Async ID collection requires contexts.");
638+
}
639+
635640
if (lineNumbers && withContexts) {
636641
// Currently custom contexts are not compatible with caller line
637642
// information, because it's not possible to associate context with line
@@ -657,6 +662,7 @@ NAN_METHOD(WallProfiler::New) {
657662
withContexts,
658663
workaroundV8Bug,
659664
collectCpuTime,
665+
collectAsyncId,
660666
isMainThread);
661667
obj->Wrap(info.This());
662668
info.GetReturnValue().Set(info.This());
@@ -668,6 +674,8 @@ NAN_METHOD(WallProfiler::New) {
668674
}
669675
}
670676

677+
#undef DD_WALL_PROFILER_GET_BOOLEAN_CONFIG
678+
671679
NAN_METHOD(WallProfiler::Start) {
672680
WallProfiler* wallProfiler =
673681
Nan::ObjectWrap::Unwrap<WallProfiler>(info.Holder());
@@ -1019,6 +1027,45 @@ NAN_METHOD(WallProfiler::Dispose) {
10191027
delete profiler;
10201028
}
10211029

1030+
int64_t GetAsyncIdNoGC(v8::Isolate* isolate) {
1031+
return isolate->InContext()
1032+
? static_cast<int64_t>(
1033+
node::AsyncHooksGetExecutionAsyncId(isolate))
1034+
: -1;
1035+
}
1036+
1037+
int64_t WallProfiler::GetAsyncId(v8::Isolate* isolate) {
1038+
if (!collectAsyncId_) {
1039+
return -1;
1040+
}
1041+
auto curGcCount = gcCount.load(std::memory_order_relaxed);
1042+
std::atomic_signal_fence(std::memory_order_acquire);
1043+
if (curGcCount > 0) {
1044+
return gcAsyncId;
1045+
}
1046+
return GetAsyncIdNoGC(isolate);
1047+
}
1048+
1049+
void WallProfiler::OnGCStart(v8::Isolate* isolate) {
1050+
auto curCount = gcCount.load(std::memory_order_relaxed);
1051+
std::atomic_signal_fence(std::memory_order_acquire);
1052+
if (curCount == 0) {
1053+
gcAsyncId = GetAsyncIdNoGC(isolate);
1054+
}
1055+
gcCount.store(curCount + 1, std::memory_order_relaxed);
1056+
std::atomic_signal_fence(std::memory_order_release);
1057+
}
1058+
1059+
void WallProfiler::OnGCEnd() {
1060+
auto newCount = gcCount.load(std::memory_order_relaxed) - 1;
1061+
std::atomic_signal_fence(std::memory_order_acquire);
1062+
gcCount.store(newCount, std::memory_order_relaxed);
1063+
std::atomic_signal_fence(std::memory_order_release);
1064+
if (newCount == 0) {
1065+
gcAsyncId = -1;
1066+
}
1067+
}
1068+
10221069
void WallProfiler::PushContext(int64_t time_from,
10231070
int64_t time_to,
10241071
int64_t cpu_time,

bindings/profilers/wall.hh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ class WallProfiler : public Nan::ObjectWrap {
5858
ContextPtr context2_;
5959
std::atomic<ContextPtr*> curContext_;
6060

61+
std::atomic<int> gcCount = 0;
62+
int64_t gcAsyncId;
63+
6164
std::atomic<CollectionMode> collectionMode_;
6265
std::atomic<uint64_t> noCollectCallCount_;
6366
std::string profileId_;
@@ -68,6 +71,7 @@ class WallProfiler : public Nan::ObjectWrap {
6871
bool workaroundV8Bug_;
6972
static inline constexpr bool detectV8Bug_ = true;
7073
bool collectCpuTime_;
74+
bool collectAsyncId_;
7175
bool isMainThread_;
7276
int v8ProfilerStuckEventLoopDetected_ = 0;
7377
ProcessCpuClock::time_point startProcessCpuTime_{};
@@ -117,6 +121,7 @@ class WallProfiler : public Nan::ObjectWrap {
117121
bool withContexts,
118122
bool workaroundV8bug,
119123
bool collectCpuTime,
124+
bool collectAsyncId,
120125
bool isMainThread);
121126

122127
v8::Local<v8::Value> GetContext(v8::Isolate*);
@@ -149,6 +154,10 @@ class WallProfiler : public Nan::ObjectWrap {
149154
return threadCpuStopWatch_.GetAndReset();
150155
}
151156

157+
int64_t GetAsyncId(v8::Isolate* isolate);
158+
void OnGCStart(v8::Isolate* isolate);
159+
void OnGCEnd();
160+
152161
static NAN_METHOD(New) GENERAL_REGS_ONLY;
153162
static NAN_METHOD(Start);
154163
static NAN_METHOD(Stop);

ts/src/time-profiler.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ export interface TimeProfilerOptions {
6666
withContexts?: boolean;
6767
workaroundV8Bug?: boolean;
6868
collectCpuTime?: boolean;
69+
collectAsyncId?: boolean;
6970
}
7071

7172
const DEFAULT_OPTIONS: TimeProfilerOptions = {
@@ -75,6 +76,7 @@ const DEFAULT_OPTIONS: TimeProfilerOptions = {
7576
withContexts: false,
7677
workaroundV8Bug: true,
7778
collectCpuTime: false,
79+
collectAsyncId: false,
7880
};
7981

8082
export async function profile(options: TimeProfilerOptions = {}) {

ts/test/test-time-profiler.ts

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ describe('Time Profiler', () => {
3939
assert.equal(profile.stringTable.strings!.indexOf('(program)'), -1);
4040
});
4141

42-
it('should update state', function () {
42+
it('should update state', function shouldUpdateState() {
4343
if (process.platform !== 'darwin' && process.platform !== 'linux') {
4444
this.skip();
4545
}
@@ -90,7 +90,7 @@ describe('Time Profiler', () => {
9090
assert(checked, 'No context found');
9191
});
9292

93-
it('should assign labels', function () {
93+
it('should have labels', function shouldHaveLabels() {
9494
if (process.platform !== 'darwin' && process.platform !== 'linux') {
9595
this.skip();
9696
}
@@ -129,10 +129,8 @@ describe('Time Profiler', () => {
129129
if (!context) {
130130
return {};
131131
}
132-
assert(
133-
typeof context.asyncId === 'number',
134-
'context.asyncId should be a number'
135-
);
132+
// Does not collect async IDs by default
133+
assert(typeof context.asyncId === 'undefined');
136134
const labels: LabelSet = {};
137135
for (const [key, value] of Object.entries(context.context ?? {})) {
138136
if (typeof value === 'string') {
@@ -356,6 +354,51 @@ describe('Time Profiler', () => {
356354
});
357355
});
358356

357+
it('should have async IDs when enabled', async function shouldCollectAsyncIDs() {
358+
if (process.platform !== 'darwin' && process.platform !== 'linux') {
359+
this.skip();
360+
}
361+
this.timeout(3000);
362+
363+
time.start({
364+
intervalMicros: PROFILE_OPTIONS.intervalMicros,
365+
durationMillis: PROFILE_OPTIONS.durationMillis,
366+
withContexts: true,
367+
lineNumbers: false,
368+
collectAsyncId: true,
369+
});
370+
let setDone: () => void;
371+
const done = new Promise<void>(resolve => {
372+
setDone = resolve;
373+
});
374+
375+
const testStart = hrtime.bigint();
376+
const testDurationNanos = PROFILE_OPTIONS.durationMillis * 1_000_000;
377+
setTimeout(loop, 0);
378+
379+
function loop() {
380+
const loopDurationNanos = PROFILE_OPTIONS.intervalMicros * 1_000;
381+
const loopStart = hrtime.bigint();
382+
while (hrtime.bigint() - loopStart < loopDurationNanos);
383+
if (hrtime.bigint() - testStart < testDurationNanos) {
384+
setTimeout(loop, 0);
385+
} else {
386+
setDone();
387+
}
388+
}
389+
390+
await done;
391+
392+
let asyncIdObserved = false;
393+
time.stop(false, ({context}: GenerateTimeLabelsArgs) => {
394+
if (!asyncIdObserved && typeof context?.asyncId === 'number') {
395+
asyncIdObserved = context?.asyncId !== -1;
396+
}
397+
return {};
398+
});
399+
assert(asyncIdObserved, 'Async ID was not observed');
400+
});
401+
359402
describe('profile (w/ stubs)', () => {
360403
// eslint-disable-next-line @typescript-eslint/no-explicit-any
361404
const sinonStubs: Array<sinon.SinonStub<any, any>> = [];

0 commit comments

Comments
 (0)