Skip to content

Commit fc8e6e7

Browse files
authored
bpart: Fix a hang in a particular corner case (#58271)
The `jl_get_binding_partition_with_hint` version of the bpart lookup did not properly set the `max_world` of the gap, leaving it at `~(size_t)0`. Having a `gap` with that `max_world`, but with `gap.replace` set is inconsistent and ended up causing an integer overflow downstream in the computation of `expected_prev_min_world`, which looked like a concurrent modification causing an infinite retry. Fixes #58257
1 parent 1f18391 commit fc8e6e7

File tree

2 files changed

+28
-9
lines changed

2 files changed

+28
-9
lines changed

Compiler/test/inference.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6428,4 +6428,21 @@ global invalid_setglobal!_exct_modeling::Int
64286428
setglobal!(@__MODULE__, :invalid_setglobal!_exct_modeling, x)
64296429
end == ErrorException
64306430

6431+
# Issue #58257 - Hang in inference during BindingPartition resolution
6432+
module A58257
6433+
module B58257
6434+
using ..A58257
6435+
# World age here is N
6436+
end
6437+
using .B58257
6438+
# World age here is N+1
6439+
@eval f() = $(GlobalRef(B58257, :get!))
6440+
end
6441+
6442+
## The sequence of events is critical here.
6443+
A58257.get! # Creates binding partition in A, N+1:∞
6444+
A58257.B58257.get! # Creates binding partition in A.B, N+1:∞
6445+
Base.invoke_in_world(UInt(38678), getglobal, A58257, :get!) # Expands binding partition in A through <N
6446+
@test Base.infer_return_type(A58257.f) == typeof(Base.get!) # Attempt to lookup A.B in world age N hangs
6447+
64316448
end # module inference

src/module.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,15 +136,18 @@ static jl_binding_partition_t *jl_implicit_import_resolved(jl_binding_t *b, stru
136136
if (jl_is_binding_partition(gap.parent)) {
137137
// Check if we can merge this into the previous binding partition
138138
jl_binding_partition_t *prev = (jl_binding_partition_t *)gap.parent;
139+
assert(new_max_world != ~(size_t)0); // It is inconsistent to have a gap with `gap.parent` set, but max_world == ~(size_t)0
139140
size_t expected_prev_min_world = new_max_world + 1;
140141
if (prev->restriction == resolution.binding_or_const && prev->kind == new_kind) {
142+
retry:
141143
if (!jl_atomic_cmpswap(&prev->min_world, &expected_prev_min_world, new_min_world)) {
142144
if (expected_prev_min_world <= new_min_world) {
143145
return prev;
144146
}
145147
else if (expected_prev_min_world <= new_max_world) {
146-
// Concurrent modification by another thread - bail.
147-
return NULL;
148+
// Concurrent modification of the partition. However, our lookup is still valid,
149+
// so we should still be able to extend the partition.
150+
goto retry;
148151
}
149152
// There remains a gap - proceed
150153
} else {
@@ -154,7 +157,7 @@ static jl_binding_partition_t *jl_implicit_import_resolved(jl_binding_t *b, stru
154157
for (;;) {
155158
// We've updated the previous partition - check if we've closed a gap
156159
size_t next_max_world = jl_atomic_load_relaxed(&next->max_world);
157-
if (next_max_world == expected_prev_min_world-1 && next->kind == new_kind && next->restriction == resolution.binding_or_const) {
160+
if (next_max_world >= expected_prev_min_world-1 && next->kind == new_kind && next->restriction == resolution.binding_or_const) {
158161
if (jl_atomic_cmpswap(&prev->min_world, &expected_prev_min_world, next_min_world)) {
159162
jl_binding_partition_t *nextnext = jl_atomic_load_relaxed(&next->next);
160163
if (!jl_atomic_cmpswap(&prev->next, &next, nextnext)) {
@@ -370,15 +373,15 @@ JL_DLLEXPORT void jl_update_loaded_bpart(jl_binding_t *b, jl_binding_partition_t
370373
bpart->kind = resolution.ultimate_kind;
371374
}
372375

373-
STATIC_INLINE jl_binding_partition_t *jl_get_binding_partition_(jl_binding_t *b JL_PROPAGATES_ROOT, jl_value_t *parent, _Atomic(jl_binding_partition_t *)*insert, size_t world, modstack_t *st) JL_GLOBALLY_ROOTED
376+
STATIC_INLINE jl_binding_partition_t *jl_get_binding_partition_(jl_binding_t *b JL_PROPAGATES_ROOT, jl_value_t *parent, _Atomic(jl_binding_partition_t *)*insert, size_t world, size_t max_world, modstack_t *st) JL_GLOBALLY_ROOTED
374377
{
375378
assert(jl_is_binding(b));
376379
struct implicit_search_gap gap;
377380
gap.parent = parent;
378381
gap.insert = insert;
379382
gap.inherited_flags = 0;
380383
gap.min_world = 0;
381-
gap.max_world = ~(size_t)0;
384+
gap.max_world = max_world;
382385
while (1) {
383386
gap.replace = jl_atomic_load_relaxed(gap.insert);
384387
jl_binding_partition_t *bpart = jl_get_binding_partition__(b, world, &gap);
@@ -395,15 +398,14 @@ jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world)
395398
if (!b)
396399
return NULL;
397400
// Duplicate the code for the entry frame for branch prediction
398-
return jl_get_binding_partition_(b, (jl_value_t*)b, &b->partitions, world, NULL);
401+
return jl_get_binding_partition_(b, (jl_value_t*)b, &b->partitions, world, ~(size_t)0, NULL);
399402
}
400403

401404
jl_binding_partition_t *jl_get_binding_partition_with_hint(jl_binding_t *b, jl_binding_partition_t *prev, size_t world) JL_GLOBALLY_ROOTED {
402405
// Helper for getting a binding partition for an older world after we've already looked up the partition for a newer world
403406
assert(b);
404-
// TODO: Is it possible for a concurrent lookup to have expanded this bpart, making this false?
405-
assert(jl_atomic_load_relaxed(&prev->min_world) > world);
406-
return jl_get_binding_partition_(b, (jl_value_t*)prev, &prev->next, world, NULL);
407+
size_t prev_min_world = jl_atomic_load_relaxed(&prev->min_world);
408+
return jl_get_binding_partition_(b, (jl_value_t*)prev, &prev->next, world, prev_min_world-1, NULL);
407409
}
408410

409411
jl_binding_partition_t *jl_get_binding_partition_all(jl_binding_t *b, size_t min_world, size_t max_world) {

0 commit comments

Comments
 (0)