Skip to content

Commit 5327824

Browse files
authored
tasks,debugging: make it possible to get the backtrace of a task (#32283)
This should work for any non-copy stack task. To make it work better, this now switches to the JL_HAVE_UNW_CONTEXT by default for some platforms. Also export the list of all live (currently running or suspended) tasks which have real stacks (the non-copy-stack tasks) which were started by the current thread.
1 parent fad55ab commit 5327824

File tree

8 files changed

+332
-223
lines changed

8 files changed

+332
-223
lines changed

src/gc-stacks.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,12 @@ void sweep_stack_pools(void)
216216
continue;
217217
while (1) {
218218
jl_task_t *t = (jl_task_t*)lst[n];
219+
assert(jl_is_task(t));
219220
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
220-
n++;
221+
if (t->stkbuf == NULL)
222+
ndel++; // jl_release_task_stack called
223+
else
224+
n++;
221225
}
222226
else {
223227
ndel++;
@@ -243,3 +247,30 @@ void sweep_stack_pools(void)
243247
live_tasks->len -= ndel;
244248
}
245249
}
250+
251+
JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
252+
{
253+
jl_ptls_t ptls = jl_get_ptls_states();
254+
arraylist_t *live_tasks = &ptls->heap.live_tasks;
255+
size_t i, j, l;
256+
jl_array_t *a;
257+
do {
258+
l = live_tasks->len;
259+
a = jl_alloc_vec_any(l + 1); // may gc
260+
} while (l + 1 < live_tasks->len);
261+
l = live_tasks->len;
262+
void **lst = live_tasks->items;
263+
j = 0;
264+
((void**)jl_array_data(a))[j++] = ptls->root_task;
265+
for (i = 0; i < l; i++) {
266+
if (((jl_task_t*)lst[i])->stkbuf != NULL)
267+
((void**)jl_array_data(a))[j++] = lst[i];
268+
}
269+
l = jl_array_len(a);
270+
if (j < l) {
271+
JL_GC_PUSH1(&a);
272+
jl_array_del_end(a, l - j);
273+
JL_GC_POP();
274+
}
275+
return a;
276+
}

src/julia.expmap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
add_library_mapping;
3333
utf8proc_*;
3434
jlbacktrace;
35+
jlbacktracet;
3536
julia_type_to_llvm;
3637
_IO_stdin_used;
3738
__ZN4llvm23createLowerSimdLoopPassEv;

src/julia.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1762,7 +1762,17 @@ typedef struct _jl_task_t {
17621762
// current exception handler
17631763
jl_handler_t *eh;
17641764

1765-
jl_ucontext_t ctx; // saved thread state
1765+
union {
1766+
jl_ucontext_t ctx; // saved thread state
1767+
#ifdef _OS_WINDOWS_
1768+
jl_ucontext_t copy_stack_ctx;
1769+
#else
1770+
struct jl_stack_context_t copy_stack_ctx;
1771+
#endif
1772+
};
1773+
#if defined(JL_TSAN_ENABLED)
1774+
void *tsan_state;
1775+
#endif
17661776
void *stkbuf; // malloc'd memory (either copybuf or stack)
17671777
size_t bufsz; // actual sizeof stkbuf
17681778
unsigned int copy_stack:31; // sizeof stack for copybuf

src/julia_threads.h

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616

1717
// Options for task switching algorithm (in order of preference):
1818
// JL_HAVE_ASM -- mostly setjmp
19-
// JL_HAVE_ASYNCIFY -- task switching based on the binaryen asyncify transform
20-
// JL_HAVE_UNW_CONTEXT -- hybrid of libunwind for start, setjmp for resume
19+
// JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
20+
// JL_HAVE_UNW_CONTEXT -- libunwind-based
21+
// JL_HAVE_ASYNCIFY -- task switching based on the binary asyncify transform
2122
// JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
2223
// JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume
2324

@@ -33,24 +34,25 @@ typedef win32_ucontext_t jl_ucontext_t;
3334
#if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) || \
3435
defined(_CPU_ARM_) || defined(_CPU_PPC64_))
3536
#define JL_HAVE_ASM
36-
#elif defined(_OS_DARWIN_)
37+
#endif
38+
#if defined(_OS_DARWIN_)
3739
#define JL_HAVE_UNW_CONTEXT
38-
#elif defined(_OS_LINUX_)
39-
#define JL_HAVE_UCONTEXT
40+
//#elif defined(_OS_LINUX_)
41+
//#define JL_HAVE_UNW_CONTEXT // very slow, but more debugging
4042
#elif defined(_OS_EMSCRIPTEN_)
4143
#define JL_HAVE_ASYNCIFY
42-
#else
43-
#define JL_HAVE_UNW_CONTEXT
44+
#elif !defined(JL_HAVE_ASM)
45+
#define JL_HAVE_UNW_CONTEXT // optimistically?
4446
#endif
4547
#endif
4648

47-
#if defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK)
48-
typedef struct {
49+
50+
struct jl_stack_context_t {
4951
jl_jmp_buf uc_mcontext;
50-
#if defined(JL_TSAN_ENABLED)
51-
void *tsan_state;
52-
#endif
53-
} jl_ucontext_t;
52+
};
53+
54+
#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
55+
typedef struct jl_stack_context_t jl_ucontext_t;
5456
#endif
5557
#if defined(JL_HAVE_ASYNCIFY)
5658
#if defined(JL_TSAN_ENABLED)
@@ -65,15 +67,14 @@ typedef struct {
6567
void *stacktop;
6668
} jl_ucontext_t;
6769
#endif
68-
#if defined(JL_HAVE_UCONTEXT) || defined(JL_HAVE_UNW_CONTEXT)
70+
#if defined(JL_HAVE_UNW_CONTEXT)
6971
#define UNW_LOCAL_ONLY
7072
#include <libunwind.h>
71-
typedef struct {
72-
ucontext_t ctx;
73-
#if defined(JL_TSAN_ENABLED)
74-
void *tsan_state;
73+
typedef unw_context_t jl_ucontext_t;
7574
#endif
76-
} jl_ucontext_t;
75+
#if defined(JL_HAVE_UCONTEXT)
76+
#include <ucontext.h>
77+
typedef ucontext_t jl_ucontext_t;
7778
#endif
7879
#endif
7980

@@ -210,7 +211,15 @@ struct _jl_tls_states_t {
210211
struct _jl_timing_block_t *timing_stack;
211212
void *stackbase;
212213
size_t stacksize;
213-
jl_ucontext_t base_ctx; // base context of stack
214+
union {
215+
jl_ucontext_t base_ctx; // base context of stack
216+
// This hack is needed to support always_copy_stacks:
217+
#ifdef _OS_WINDOWS_
218+
jl_ucontext_t copy_stack_ctx;
219+
#else
220+
struct jl_stack_context_t copy_stack_ctx;
221+
#endif
222+
};
214223
jl_jmp_buf *safe_restore;
215224
// Temp storage for exception thrown in signal handler. Not rooted.
216225
struct _jl_value_t *sig_exception;

src/partr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ uint64_t io_wakeup_leave;
4040
JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT
4141
{
4242
// Try to acquire the lock on this task.
43-
int16_t was = task->tid;
43+
int16_t was = jl_atomic_load_relaxed(&task->tid);
4444
if (was == tid)
4545
return 1;
4646
if (was == -1)

src/signals-unix.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
#include "julia_assert.h"
4545

46+
// helper function for returning the unw_context_t inside a ucontext_t
4647
static bt_context_t *jl_to_bt_context(void *sigctx)
4748
{
4849
#ifdef __APPLE__

src/stackwalk.c

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@
1414
// returning from the callee function will invalidate the context
1515
#ifdef _OS_WINDOWS_
1616
jl_mutex_t jl_in_stackwalk;
17-
#define jl_unw_get(context) RtlCaptureContext(context)
17+
#define jl_unw_get(context) (RtlCaptureContext(context), 0)
1818
#elif !defined(JL_DISABLE_LIBUNWIND)
1919
#define jl_unw_get(context) unw_getcontext(context)
2020
#else
21-
void jl_unw_get(void *context) {};
21+
int jl_unw_get(void *context) { return -1; }
2222
#endif
2323

2424
#ifdef __cplusplus
@@ -204,7 +204,9 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip
204204
{
205205
bt_context_t context;
206206
memset(&context, 0, sizeof(context));
207-
jl_unw_get(&context);
207+
int r = jl_unw_get(&context);
208+
if (r < 0)
209+
return 0;
208210
jl_gcframe_t *pgcstack = jl_pgcstack;
209211
bt_cursor_t cursor;
210212
if (!jl_unw_init(&cursor, &context))
@@ -239,9 +241,9 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
239241
bt_context_t context;
240242
bt_cursor_t cursor;
241243
memset(&context, 0, sizeof(context));
242-
jl_unw_get(&context);
244+
int r = jl_unw_get(&context);
243245
jl_gcframe_t *pgcstack = jl_pgcstack;
244-
if (jl_unw_init(&cursor, &context)) {
246+
if (r == 0 && jl_unw_init(&cursor, &context)) {
245247
// Skip frame for jl_backtrace_from_here itself
246248
skip += 1;
247249
size_t offset = 0;
@@ -688,8 +690,59 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
688690
}
689691
}
690692

693+
extern bt_context_t *jl_to_bt_context(void *sigctx);
694+
695+
void jl_rec_backtrace(jl_task_t *t)
696+
{
697+
jl_ptls_t ptls = jl_get_ptls_states();
698+
ptls->bt_size = 0;
699+
if (t == ptls->current_task) {
700+
ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
701+
return;
702+
}
703+
if (t->copy_stack || !t->started || t->stkbuf == NULL)
704+
return;
705+
int old = jl_atomic_compare_exchange(&t->tid, -1, ptls->tid);
706+
if (old != -1 && old != ptls->tid)
707+
return;
708+
bt_context_t *context = NULL;
709+
#if defined(_OS_WINDOWS_)
710+
bt_context_t c;
711+
memset(&c, 0, sizeof(c));
712+
_JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.uc_mcontext;
713+
#if defined(_CPU_X86_64_)
714+
c.Rbx = mctx->Rbx;
715+
c.Rsp = mctx->Rsp;
716+
c.Rbp = mctx->Rbp;
717+
c.Rsi = mctx->Rsi;
718+
c.Rdi = mctx->Rdi;
719+
c.R12 = mctx->R12;
720+
c.R13 = mctx->R13;
721+
c.R14 = mctx->R14;
722+
c.R15 = mctx->R15;
723+
c.Rip = mctx->Rip;
724+
memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
725+
#else
726+
c.Eip = mctx->Eip;
727+
c.Esp = mctx->Esp;
728+
c.Ebp = mctx->Ebp;
729+
#endif
730+
context = &c;
731+
#elif defined(JL_HAVE_UNW_CONTEXT)
732+
context = &t->ctx;
733+
#elif defined(JL_HAVE_UCONTEXT)
734+
context = jl_to_bt_context(&t->ctx);
735+
#else
736+
#endif
737+
if (context)
738+
ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
739+
if (old == -1)
740+
jl_atomic_store_relaxed(&t->tid, old);
741+
}
742+
691743
//--------------------------------------------------
692744
// Tools for interactive debugging in gdb
745+
693746
JL_DLLEXPORT void jl_gdblookup(void* ip)
694747
{
695748
jl_print_native_codeloc((uintptr_t)ip);
@@ -701,9 +754,19 @@ JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
701754
jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
702755
if (!s)
703756
return;
704-
size_t bt_size = jl_excstack_bt_size(s, s->top);
757+
size_t i, bt_size = jl_excstack_bt_size(s, s->top);
705758
jl_bt_element_t *bt_data = jl_excstack_bt_data(s, s->top);
706-
for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
759+
for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
760+
jl_print_bt_entry_codeloc(bt_data + i);
761+
}
762+
}
763+
JL_DLLEXPORT void jlbacktracet(jl_task_t *t)
764+
{
765+
jl_ptls_t ptls = jl_get_ptls_states();
766+
jl_rec_backtrace(t);
767+
size_t i, bt_size = ptls->bt_size;
768+
jl_bt_element_t *bt_data = ptls->bt_data;
769+
for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
707770
jl_print_bt_entry_codeloc(bt_data + i);
708771
}
709772
}

0 commit comments

Comments
 (0)