Skip to content

Commit a4641c8

Browse files
authored
threading: improve safety posture of modifying global bindings (#35761)
Ensures that we are holding some lock (a new one) while mutating the internal global `jl_current_modules` table. While reading these binding values is not memory-safe to do simultaneously (it may invent pointers from thin-air and segfault), this commit ensures that a data conflict during the writes to them will not corrupt the values written into memory.
1 parent c3d6a46 commit a4641c8

File tree

8 files changed

+129
-50
lines changed

8 files changed

+129
-50
lines changed

doc/src/devdocs/locks.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
3333
The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:
3434

3535
> * typecache
36-
37-
The following is a level 2 lock:
38-
3936
> * Module->lock
4037
4138
The following is a level 3 lock, which can only acquire level 1 or level 2 locks internally:
@@ -48,9 +45,10 @@ The following is a level 4 lock, which can only recurse to acquire level 1, 2, o
4845
4946
No Julia code may be called while holding a lock above this point.
5047

51-
The following is a level 6 lock, which can only recurse to acquire locks at lower levels:
48+
The following are a level 6 lock, which can only recurse to acquire locks at lower levels:
5249

5350
> * codegen
51+
> * jl_modules_mutex
5452
5553
The following is an almost root lock (level end-1), meaning only the root look may be held when
5654
trying to acquire it:
@@ -94,6 +92,19 @@ The following locks are broken:
9492
>
9593
> fix: create it
9694
95+
* Module->lock
96+
97+
> This is vulnerable to deadlocks since it can't be certain it is acquired in sequence.
98+
> Some operations (such as `import_module`) are missing a lock.
99+
>
100+
> fix: replace with `jl_modules_mutex`?
101+
102+
* loading.jl: `require` and `register_root_module`
103+
104+
> This file potentially has numerous problems.
105+
>
106+
> fix: needs locks
107+
97108
## Shared Global Data Structures
98109

99110
These data structures each need locks due to being shared mutable global state. It is the inverse

doc/src/manual/multi-threading.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,51 @@ julia> Threads.threadid()
6262
three processes have 2 threads enabled. For more fine grained control over worker
6363
threads use [`addprocs`](@ref) and pass `-t`/`--threads` as `exeflags`.
6464

65+
## Data-race freedom
66+
67+
You are entirely responsible for ensuring that your program is data-race free,
68+
and nothing promised here can be assumed if you do not observe that
69+
requirement. The observed results may be highly unintuitive.
70+
71+
The best way to ensure this is to acquire a lock around any access to data that
72+
can be observed from multiple threads. For example, in most cases you should
73+
use the following code pattern:
74+
75+
```julia-repl
76+
julia> lock(a) do
77+
use(a)
78+
end
79+
80+
julia> begin
81+
lock(a)
82+
try
83+
use(a)
84+
finally
85+
unlock(a)
86+
end
87+
end
88+
```
89+
90+
Additionally, Julia is not memory safe in the presence of a data race. Be very
91+
careful about reading a global variable (or closure variable) if another thread
92+
might write to it! Instead, always use the lock pattern above when changing any
93+
data (such as assigning to a global) visible to multiple threads.
94+
95+
```julia
96+
Thread 1:
97+
global b = false
98+
global a = rand()
99+
global b = true
100+
101+
Thread 2:
102+
while !b; end
103+
bad(a) # it is NOT safe to access `a` here!
104+
105+
Thread 3:
106+
while !@isdefined(a); end
107+
use(a) # it is NOT safe to access `a` here
108+
```
109+
65110
## The `@threads` Macro
66111

67112
Let's work a simple example using our native threads. Let us create an array of zeros:

src/atomics.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
// TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock
7474
# define jl_atomic_store(obj, val) \
7575
__atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
76+
# define jl_atomic_store_relaxed(obj, val) \
77+
__atomic_store_n(obj, val, __ATOMIC_RELAXED)
7678
# if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
7779
!(defined(_CPU_X86_) || defined(_CPU_X86_64_))
7880
// ICC and Clang doesn't have this bug...
@@ -264,6 +266,10 @@ static inline void jl_atomic_store_release(volatile T *obj, T2 val)
264266
jl_signal_fence();
265267
*obj = (T)val;
266268
}
269+
static inline void jl_atomic_store_relaxed(volatile T *obj, T2 val)
270+
{
271+
*obj = (T)val;
272+
}
267273
// atomic loads
268274
template<typename T>
269275
static inline T jl_atomic_load(volatile T *obj)

src/codegen.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,6 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *
16921692
{
16931693
jl_binding_t *bnd = NULL;
16941694
Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false);
1695-
// TODO: refactor. this partially duplicates code in emit_var
16961695
if (bnd && bnd->value != NULL) {
16971696
if (bnd->constp) {
16981697
return mark_julia_const(bnd->value);

src/init.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ static void jl_set_io_wait(int v)
618618
ptls->io_wait = v;
619619
}
620620

621+
extern jl_mutex_t jl_modules_mutex;
622+
621623
void _julia_init(JL_IMAGE_SEARCH rel)
622624
{
623625
jl_init_timing();
@@ -628,6 +630,7 @@ void _julia_init(JL_IMAGE_SEARCH rel)
628630
jl_safepoint_init();
629631
libsupport_init();
630632
htable_new(&jl_current_modules, 0);
633+
JL_MUTEX_INIT(&jl_modules_mutex);
631634
ios_set_io_wait_func = jl_set_io_wait;
632635
jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.),
633636
// best to call this first, since it also initializes libuv

src/julia.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,13 +1472,9 @@ JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSA
14721472
JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
14731473
JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
14741474
JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
1475-
JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT,
1476-
jl_sym_t *var,
1477-
jl_value_t *val JL_ROOTED_ARGUMENT);
1478-
JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT,
1479-
jl_sym_t *var,
1480-
jl_value_t *val JL_ROOTED_ARGUMENT);
1481-
JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs);
1475+
JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
1476+
JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
1477+
JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT);
14821478
JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b);
14831479
JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
14841480
JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);

src/module.c

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ static jl_binding_t *new_binding(jl_sym_t *name)
118118
}
119119

120120
// get binding for assignment
121-
JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m, jl_sym_t *var, int error)
121+
JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int error)
122122
{
123123
JL_LOCK_NOGC(&m->lock);
124124
jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&m->bindings, var);
@@ -567,24 +567,23 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
567567
JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
568568
{
569569
jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
570-
// In a release build, simply ignore conflicting assignments (for backwards compatibility).
571-
// However, we want to start asserting that they do not occur, since that can cause `val`
572-
// not to be rooted when the caller expected it to be.
573-
assert(!bp->constp);
574-
if (!bp->constp) {
575-
bp->value = val;
576-
jl_gc_wb(m, val);
577-
}
570+
JL_GC_PROMISE_ROOTED(bp);
571+
jl_checked_assignment(bp, val);
578572
}
579573

580574
JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
581575
{
582576
jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
583-
assert(!bp->constp);
584-
if (jl_atomic_compare_exchange(&bp->constp, 0, 1) == 0) {
585-
bp->value = val;
586-
jl_gc_wb(m, val);
577+
if (bp->value == NULL) {
578+
if (jl_atomic_bool_compare_exchange(&bp->constp, 0, 1)) {
579+
if (jl_atomic_bool_compare_exchange(&bp->value, NULL, val)) {
580+
jl_gc_wb_binding(bp, val);
581+
return;
582+
}
583+
}
587584
}
585+
jl_errorf("invalid redefinition of constant %s",
586+
jl_symbol_name(bp->name));
588587
}
589588

590589
JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
@@ -700,18 +699,22 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b)
700699

701700
JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs)
702701
{
703-
if (b->constp && b->value != NULL) {
704-
if (!jl_egal(rhs, b->value)) {
705-
if (jl_typeof(rhs) != jl_typeof(b->value) ||
706-
jl_is_type(rhs) /*|| jl_is_function(rhs)*/ || jl_is_module(rhs)) {
707-
jl_errorf("invalid redefinition of constant %s",
708-
jl_symbol_name(b->name));
709-
}
710-
jl_printf(JL_STDERR, "WARNING: redefinition of constant %s. This may fail, cause incorrect answers, or produce other errors.\n",
702+
if (b->constp) {
703+
jl_value_t *old = jl_atomic_compare_exchange(&b->value, NULL, rhs);
704+
if (old == NULL) {
705+
jl_gc_wb_binding(b, rhs);
706+
return;
707+
}
708+
if (jl_egal(rhs, old))
709+
return;
710+
if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs)) {
711+
jl_errorf("invalid redefinition of constant %s",
711712
jl_symbol_name(b->name));
712713
}
714+
jl_printf(JL_STDERR, "WARNING: redefinition of constant %s. This may fail, cause incorrect answers, or produce other errors.\n",
715+
jl_symbol_name(b->name));
713716
}
714-
b->value = rhs;
717+
jl_atomic_store_relaxed(&b->value, rhs);
715718
jl_gc_wb_binding(b, rhs);
716719
}
717720

src/toplevel.c

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ JL_DLLEXPORT int jl_lineno = 0; // need to update jl_critical_error if this is T
3232
JL_DLLEXPORT const char *jl_filename = "none"; // need to update jl_critical_error if this is TLS
3333

3434
htable_t jl_current_modules;
35+
jl_mutex_t jl_modules_mutex;
3536

3637
JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m)
3738
{
@@ -135,7 +136,9 @@ jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
135136
jl_module_t *newm = jl_new_module(name);
136137
jl_value_t *form = (jl_value_t*)newm;
137138
JL_GC_PUSH1(&form);
139+
JL_LOCK(&jl_modules_mutex);
138140
ptrhash_put(&jl_current_modules, (void*)newm, (void*)((uintptr_t)HT_NOTFOUND + 1));
141+
JL_UNLOCK(&jl_modules_mutex);
139142

140143
// copy parent environment info into submodule
141144
newm->uuid = parent_module->uuid;
@@ -144,23 +147,27 @@ jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
144147
jl_register_root_module(newm);
145148
}
146149
else {
150+
newm->parent = parent_module;
147151
jl_binding_t *b = jl_get_binding_wr(parent_module, name, 1);
148152
jl_declare_constant(b);
149-
if (b->value != NULL) {
150-
if (!jl_is_module(b->value)) {
153+
jl_value_t *old = jl_atomic_compare_exchange(&b->value, NULL, (jl_value_t*)newm);
154+
if (old != NULL) {
155+
if (!jl_is_module(old)) {
151156
jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
152157
}
153-
if (jl_generating_output()) {
158+
if (jl_generating_output())
154159
jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
155-
}
156160
jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
161+
old = jl_atomic_exchange(&b->value, (jl_value_t*)newm);
162+
}
163+
jl_gc_wb_binding(b, newm);
164+
if (old != NULL) {
157165
// create a hidden gc root for the old module
158-
uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)b->value);
166+
JL_LOCK(&jl_modules_mutex);
167+
uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)old);
159168
*refcnt += 1;
169+
JL_UNLOCK(&jl_modules_mutex);
160170
}
161-
newm->parent = parent_module;
162-
b->value = (jl_value_t*)newm;
163-
jl_gc_wb_binding(b, newm);
164171
}
165172

166173
if (parent_module == jl_main_module && name == jl_symbol("Base")) {
@@ -213,6 +220,7 @@ jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
213220
}
214221
#endif
215222

223+
JL_LOCK(&jl_modules_mutex);
216224
uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)newm);
217225
assert(*refcnt > (uintptr_t)HT_NOTFOUND);
218226
*refcnt -= 1;
@@ -225,6 +233,7 @@ jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
225233
// defer init of children until parent is done being defined
226234
// then initialize all in definition-finished order
227235
// at build time, don't run them at all (defer for runtime)
236+
form = NULL;
228237
if (!jl_generating_output()) {
229238
if (!ptrhash_has(&jl_current_modules, (void*)newm->parent)) {
230239
size_t i, l = jl_array_len(jl_module_init_order);
@@ -241,12 +250,16 @@ jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
241250
}
242251
if (ns < l)
243252
jl_array_del_end(jl_module_init_order, l - ns);
244-
l = jl_array_len(form);
245-
for (i = 0; i < l; i++) {
246-
jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(form, i);
247-
JL_GC_PROMISE_ROOTED(m);
248-
jl_module_run_initializer(m);
249-
}
253+
}
254+
}
255+
JL_UNLOCK(&jl_modules_mutex);
256+
257+
if (form) {
258+
size_t i, l = jl_array_len(form);
259+
for (i = 0; i < l; i++) {
260+
jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(form, i);
261+
JL_GC_PROMISE_ROOTED(m);
262+
jl_module_run_initializer(m);
250263
}
251264
}
252265

@@ -840,8 +853,11 @@ JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v)
840853
void jl_check_open_for(jl_module_t *m, const char* funcname)
841854
{
842855
if (jl_options.incremental && jl_generating_output()) {
843-
if (!ptrhash_has(&jl_current_modules, (void*)m) && !jl_is__toplevel__mod(m)) {
844-
if (m != jl_main_module) { // TODO: this was grand-fathered in
856+
if (m != jl_main_module) { // TODO: this was grand-fathered in
857+
JL_LOCK(&jl_modules_mutex);
858+
int open = ptrhash_has(&jl_current_modules, (void*)m);
859+
JL_UNLOCK(&jl_modules_mutex);
860+
if (!open && !jl_is__toplevel__mod(m)) {
845861
const char* name = jl_symbol_name(m->name);
846862
jl_errorf("Evaluation into the closed module `%s` breaks incremental compilation "
847863
"because the side effects will not be permanent. "

0 commit comments

Comments
 (0)