Skip to content

Commit 37bcabd

Browse files
committed
Add a watcher mechanism to detect when Distributed might be in use
This should help users figure out if one their packages is using Distributed and another is using DistributedNext.
1 parent 13ce55b commit 37bcabd

File tree

6 files changed

+67
-2
lines changed

6 files changed

+67
-2
lines changed

Project.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
77
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
88
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
99

10+
[compat]
11+
Distributed = "1"
12+
1013
[extras]
14+
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
1115
LibSSH = "00483490-30f8-4353-8aba-35b82f51f4d0"
1216
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1317
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
1418

1519
[targets]
16-
test = ["LinearAlgebra", "Test", "LibSSH"]
20+
test = ["LinearAlgebra", "Test", "LibSSH", "Distributed"]

docs/src/_changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ This documents notable changes in DistributedNext.jl. The format is based on
99

1010
## Unreleased
1111

12+
### Added
13+
- A watcher mechanism has been added to detect when both the Distributed stdlib
14+
and DistributedNext may be active and adding workers. This should help prevent
15+
incompatibilities from both libraries being used simultaneously ([#10]).
16+
1217
### Fixed
1318
- Fixed behaviour of `isempty(::RemoteChannel)`, which previously had the
1419
side-effect of taking an element from the channel ([#3]).

src/DistributedNext.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,17 @@ export
7272
# Used only by shared arrays.
7373
check_same_host
7474

75+
distributed_module::Union{Module, Nothing} = nothing
76+
77+
function _find_distributed_stdlib()
78+
# Find the Distributed module if it's been loaded
79+
distributed_pkgid = Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed")
80+
global distributed_module = get(Base.loaded_modules, distributed_pkgid, nothing)
81+
end
82+
7583
function _require_callback(mod::Base.PkgId)
84+
_find_distributed_stdlib()
85+
7686
if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
7787
# broadcast top-level (e.g. from Main) import/using from node 1 (only)
7888
@sync for p in procs()
@@ -116,6 +126,24 @@ include("precompile.jl")
116126

117127
function __init__()
118128
init_parallel()
129+
130+
# Start a task to watch for the Distributed stdlib being loaded and
131+
# initialized to support multiple workers. We do this by checking if the
132+
# cluster cookie has been set, which is most likely to have been done
133+
# through Distributed.init_multi() being called by Distributed.addprocs() or
134+
# something.
135+
_find_distributed_stdlib()
136+
watcher_task = Threads.@spawn while true
137+
if !isnothing(distributed_module)
138+
if isdefined(distributed_module.LPROC, :cookie) && inited[]
139+
@warn "DistributedNext has detected that the Distributed stdlib may be in use. Be aware that these libraries are not compatible, you should use either one or the other."
140+
return
141+
end
142+
end
143+
144+
sleep(0.5)
145+
end
146+
errormonitor(watcher_task)
119147
end
120148

121149
end

test/distributed_exec.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3-
using Test, DistributedNext, Random, Serialization, Sockets
3+
using DistributedNext, Random, Serialization, Sockets
44
import DistributedNext: launch, manage
55

66

test/distributed_stdlib_detection.jl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using DistributedNext
2+
3+
@testset "Distributed.jl detection" begin
4+
# Just loading Distributed should do nothing
5+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; sleep(1)'`
6+
stderr_buf = IOBuffer()
7+
run(pipeline(cmd; stderr=stderr_buf))
8+
stderr_str = String(take!(stderr_buf))
9+
@test isempty(stderr_str)
10+
11+
# Only one of the two being active should also do nothing
12+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); sleep(1)'`
13+
stderr_buf = IOBuffer()
14+
run(pipeline(cmd; stderr=stderr_buf))
15+
stderr_str = String(take!(stderr_buf))
16+
@test isempty(stderr_str)
17+
18+
# But both being active at the same time should trigger a warning
19+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); DistributedNext.init_multi(); sleep(1)'`
20+
stderr_buf = IOBuffer()
21+
run(pipeline(cmd; stderr=stderr_buf))
22+
stderr_str = String(take!(stderr_buf))
23+
@test contains(stderr_str, "DistributedNext has detected that the Distributed stdlib may be in use")
24+
end

test/runtests.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3+
using Test
4+
35
# Run the distributed test outside of the main driver since it needs its own
46
# set of dedicated workers.
57
include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
@@ -18,3 +20,5 @@ end
1820
include("distributed_exec.jl")
1921

2022
include("managers.jl")
23+
24+
include("distributed_stdlib_detection.jl")

0 commit comments

Comments
 (0)