Skip to content

Commit fe0e79a

Browse files
committed
Add a watcher mechanism to detect when Distributed might be in use
This should help users figure out if one of their packages is using Distributed and another is using DistributedNext.
1 parent c68111a commit fe0e79a

File tree

6 files changed

+64
-2
lines changed

6 files changed

+64
-2
lines changed

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,17 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
88
Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
99

1010
[compat]
11+
Distributed = "1"
1112
Random = "1"
1213
Serialization = "1"
1314
Sockets = "1"
1415
julia = "1.9"
1516

1617
[extras]
18+
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
1719
LibSSH = "00483490-30f8-4353-8aba-35b82f51f4d0"
1820
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1921
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2022

2123
[targets]
22-
test = ["LinearAlgebra", "Test", "LibSSH"]
24+
test = ["LinearAlgebra", "Test", "LibSSH", "Distributed"]

docs/src/_changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ This documents notable changes in DistributedNext.jl. The format is based on
99

1010
## [v1.0.0] - 2024-12-02
1111

12+
### Added
13+
- A watcher mechanism has been added to detect when both the Distributed stdlib
14+
and DistributedNext may be active and adding workers. This should help prevent
15+
incompatibilities from both libraries being used simultaneously ([#10]).
16+
1217
### Fixed
1318
- Fixed behaviour of `isempty(::RemoteChannel)`, which previously had the
1419
side-effect of taking an element from the channel ([#3]).

src/DistributedNext.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,21 @@ export
7272
# Used only by shared arrays.
7373
check_same_host
7474

75+
function _check_distributed_active()
76+
# Find the Distributed module if it's been loaded
77+
distributed_pkgid = Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed")
78+
if !haskey(Base.loaded_modules, distributed_pkgid)
79+
return false
80+
end
81+
82+
if isdefined(Base.loaded_modules[distributed_pkgid].LPROC, :cookie) && inited[]
83+
@warn "DistributedNext has detected that the Distributed stdlib may be in use. Be aware that these libraries are not compatible, you should use either one or the other."
84+
return true
85+
else
86+
return false
87+
end
88+
end
89+
7590
function _require_callback(mod::Base.PkgId)
7691
if Base.toplevel_load[] && myid() == 1 && nprocs() > 1
7792
# broadcast top-level (e.g. from Main) import/using from node 1 (only)
@@ -116,6 +131,20 @@ include("precompile.jl")
116131

117132
function __init__()
118133
init_parallel()
134+
135+
# Start a task to watch for the Distributed stdlib being loaded and
136+
# initialized to support multiple workers. We do this by checking if the
137+
# cluster cookie has been set, which is most likely to have been done
138+
# through Distributed.init_multi() being called by Distributed.addprocs() or
139+
# something.
140+
watcher_task = Threads.@spawn while true
141+
if _check_distributed_active()
142+
return
143+
end
144+
145+
sleep(1)
146+
end
147+
errormonitor(watcher_task)
119148
end
120149

121150
end

test/distributed_exec.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3-
using Test, DistributedNext, Random, Serialization, Sockets
3+
using DistributedNext, Random, Serialization, Sockets
44
import DistributedNext: launch, manage
55

66

test/distributed_stdlib_detection.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
@testset "Distributed.jl detection" begin
2+
function get_stderr(cmd)
3+
stderr_buf = IOBuffer()
4+
run(pipeline(cmd; stderr=stderr_buf))
5+
return String(take!(stderr_buf))
6+
end
7+
8+
# Just loading Distributed should do nothing
9+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; @assert !DistributedNext._check_distributed_active()'`
10+
@test isempty(get_stderr(cmd))
11+
12+
# Only one of the two being active should also do nothing
13+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); @assert !DistributedNext._check_distributed_active()'`
14+
@test isempty(get_stderr(cmd))
15+
16+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; DistributedNext.init_multi(); @assert !DistributedNext._check_distributed_active()'`
17+
@test isempty(get_stderr(cmd))
18+
19+
# But both being active at the same time should trigger a warning
20+
cmd = `$test_exename $test_exeflags -e 'using Distributed, DistributedNext; Distributed.init_multi(); DistributedNext.init_multi(); @assert DistributedNext._check_distributed_active()'`
21+
@test contains(get_stderr(cmd), "DistributedNext has detected that the Distributed stdlib may be in use")
22+
end

test/runtests.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3+
using Test
4+
35
# Run the distributed test outside of the main driver since it needs its own
46
# set of dedicated workers.
57
include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
@@ -18,3 +20,5 @@ end
1820
include("distributed_exec.jl")
1921

2022
include("managers.jl")
23+
24+
include("distributed_stdlib_detection.jl")

0 commit comments

Comments
 (0)