Skip to content

Commit 3306a8b

Browse files
authored
[Distributed] Fix finalizer_ref usage of lock/wait resulting in failed task switches (#41846)
1 parent cd43dda commit 3306a8b

File tree

1 file changed

+44
-22
lines changed

1 file changed

+44
-22
lines changed

stdlib/Distributed/src/remotecall.jl

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,24 @@ end
8484

8585
function finalize_ref(r::AbstractRemoteRef)
8686
if r.where > 0 # Handle the case of the finalizer having been called manually
87-
if islocked(client_refs)
88-
# delay finalizer for later, when it's not already locked
87+
if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
88+
try
89+
delete!(client_refs.ht, r) # direct removal avoiding locks
90+
if isa(r, RemoteChannel)
91+
send_del_client_no_lock(r)
92+
else
93+
# send_del_client only if the reference has not been set
94+
r.v === nothing && send_del_client_no_lock(r)
95+
r.v = nothing
96+
end
97+
r.where = 0
98+
finally
99+
unlock(client_refs.lock)
100+
end
101+
else
89102
finalizer(finalize_ref, r)
90103
return nothing
91104
end
92-
delete!(client_refs, r)
93-
if isa(r, RemoteChannel)
94-
send_del_client(r)
95-
else
96-
# send_del_client only if the reference has not been set
97-
r.v === nothing && send_del_client(r)
98-
r.v = nothing
99-
end
100-
r.where = 0
101105
end
102106
nothing
103107
end
@@ -229,13 +233,18 @@ del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
229233
del_client(id, client) = del_client(PGRP, id, client)
230234
function del_client(pg, id, client)
231235
lock(client_refs) do
232-
rv = get(pg.refs, id, false)
233-
if rv !== false
234-
delete!(rv.clientset, client)
235-
if isempty(rv.clientset)
236-
delete!(pg.refs, id)
237-
#print("$(myid()) collected $id\n")
238-
end
236+
_del_client(pg, id, client)
237+
end
238+
nothing
239+
end
240+
241+
function _del_client(pg, id, client)
242+
rv = get(pg.refs, id, false)
243+
if rv !== false
244+
delete!(rv.clientset, client)
245+
if isempty(rv.clientset)
246+
delete!(pg.refs, id)
247+
#print("$(myid()) collected $id\n")
239248
end
240249
end
241250
nothing
@@ -259,13 +268,26 @@ function send_del_client(rr)
259268
if rr.where == myid()
260269
del_client(rr)
261270
elseif id_in_procs(rr.where) # process only if a valid worker
262-
w = worker_from_id(rr.where)::Worker
263-
push!(w.del_msgs, (remoteref_id(rr), myid()))
264-
w.gcflag = true
265-
notify(any_gc_flag)
271+
process_worker(rr)
266272
end
267273
end
268274

275+
function send_del_client_no_lock(rr)
276+
# for gc context to avoid yields
277+
if rr.where == myid()
278+
_del_client(PGRP, remoteref_id(rr), myid())
279+
elseif id_in_procs(rr.where) # process only if a valid worker
280+
process_worker(rr)
281+
end
282+
end
283+
284+
function process_worker(rr)
285+
w = worker_from_id(rr.where)::Worker
286+
push!(w.del_msgs, (remoteref_id(rr), myid()))
287+
w.gcflag = true
288+
notify(any_gc_flag)
289+
end
290+
269291
function add_client(id, client)
270292
lock(client_refs) do
271293
rv = lookup_ref(id)

0 commit comments

Comments
 (0)