From 15f6afbb8d3696cf0dc99b618a2c07ef1fc14799 Mon Sep 17 00:00:00 2001
From: Julian P Samaroo <jpsamaroo@jpsamaroo.me>
Date: Sat, 14 Oct 2023 10:18:56 -0700
Subject: [PATCH 1/9] Add gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index de6b1c6..6e6862d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 docs/src/changelog.md
+Manifest.toml
+*.swp

From 853974ae5812ab02eec96b3eb2b2928ec77379fb Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Mon, 13 Sep 2021 13:53:41 -0400
Subject: [PATCH 2/9] Make worker state variable threadsafe

---
 .github/workflows/ci.yml |  2 +-
 src/cluster.jl           | 72 ++++++++++++++++++++++++++++------------
 src/managers.jl          |  2 +-
 src/messages.jl          |  2 +-
 src/process_messages.jl  |  2 +-
 test/distributed_exec.jl |  5 ++-
 test/threads.jl          | 64 +++++++++++++++++++++++++++++++++++
 7 files changed, 122 insertions(+), 27 deletions(-)
 create mode 100644 test/threads.jl

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d4104fd..1dee688 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -54,7 +54,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         env:
-          JULIA_DISTRIBUTED_TESTING_STANDALONE: 1
+          JULIA_NUM_THREADS: 4
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v4
         with:
diff --git a/src/cluster.jl b/src/cluster.jl
index 2444695..653be62 100644
--- a/src/cluster.jl
+++ b/src/cluster.jl
@@ -99,10 +99,10 @@ mutable struct Worker
     del_msgs::Array{Any,1} # XXX: Could del_msgs and add_msgs be Channels?
     add_msgs::Array{Any,1}
     @atomic gcflag::Bool
-    state::WorkerState
-    c_state::Condition      # wait for state changes
-    ct_time::Float64        # creation time
-    conn_func::Any          # used to setup connections lazily
+    @atomic state::WorkerState
+    c_state::Threads.Condition # wait for state changes, lock for state
+    ct_time::Float64           # creation time
+    conn_func::Any             # used to setup connections lazily
 
     r_stream::IO
     w_stream::IO
@@ -134,7 +134,7 @@ mutable struct Worker
         if haskey(map_pid_wrkr, id)
             return map_pid_wrkr[id]
         end
-        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Condition(), time(), conn_func)
+        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Threads.Condition(), time(), conn_func)
         w.initialized = Event()
         register_worker(w)
         w
@@ -144,12 +144,14 @@ mutable struct Worker
 end
 
 function set_worker_state(w, state)
-    w.state = state
-    notify(w.c_state; all=true)
+    lock(w.c_state) do
+        @atomic w.state = state
+        notify(w.c_state; all=true)
+    end
 end
 
 function check_worker_state(w::Worker)
-    if w.state === W_CREATED
+    if (@atomic w.state) === W_CREATED
         if !isclusterlazy()
             if PGRP.topology === :all_to_all
                 # Since higher pids connect with lower pids, the remote worker
@@ -170,6 +172,7 @@ function check_worker_state(w::Worker)
             wait_for_conn(w)
         end
     end
+    return nothing
 end
 
 exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id)::Worker)
@@ -187,13 +190,21 @@ function exec_conn_func(w::Worker)
 end
 
 function wait_for_conn(w)
-    if w.state === W_CREATED
+    if (@atomic w.state) === W_CREATED
         timeout =  worker_timeout() - (time() - w.ct_time)
         timeout <= 0 && error("peer $(w.id) has not connected to $(myid())")
 
-        @async (sleep(timeout); notify(w.c_state; all=true))
-        wait(w.c_state)
-        w.state === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
+        T = Threads.@spawn begin
+            sleep($timeout)
+            lock(w.c_state) do
+                notify(w.c_state; all=true)
+            end
+        end
+        errormonitor(T)
+        lock(w.c_state) do
+            wait(w.c_state)
+            (@atomic w.state) === W_CREATED && error("peer $(w.id) didn't connect to $(myid()) within $timeout seconds")
+        end
     end
     nothing
 end
@@ -491,7 +502,10 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
         while true
             if isempty(launched)
                 istaskdone(t_launch) && break
-                @async (sleep(1); notify(launch_ntfy))
+                @async begin
+                    sleep(1)
+                    notify(launch_ntfy)
+                end
                 wait(launch_ntfy)
             end
 
@@ -645,7 +659,12 @@ function create_worker(manager, wconfig)
         # require the value of config.connect_at which is set only upon connection completion
         for jw in PGRP.workers
             if (jw.id != 1) && (jw.id < w.id)
-                (jw.state === W_CREATED) && wait(jw.c_state)
+                lock(jw.c_state) do
+                    # wait for wl to join
+                    if (@atomic jw.state) === W_CREATED
+                        wait(jw.c_state)
+                    end
+                end
                 push!(join_list, jw)
             end
         end
@@ -668,7 +687,12 @@ function create_worker(manager, wconfig)
         end
 
         for wl in wlist
-            (wl.state === W_CREATED) && wait(wl.c_state)
+            lock(wl.c_state) do
+                if (@atomic wl.state) === W_CREATED
+                    # wait for wl to join
+                    wait(wl.c_state)
+                end
+            end
             push!(join_list, wl)
         end
     end
@@ -685,7 +709,11 @@ function create_worker(manager, wconfig)
     @async manage(w.manager, w.id, w.config, :register)
     # wait for rr_ntfy_join with timeout
     timedout = false
-    @async (sleep($timeout); timedout = true; put!(rr_ntfy_join, 1))
+    @async begin
+        sleep($timeout)
+        timedout = true
+        put!(rr_ntfy_join, 1)
+    end
     wait(rr_ntfy_join)
     if timedout
         error("worker did not connect within $timeout seconds")
@@ -870,7 +898,7 @@ function nprocs()
         n = length(PGRP.workers)
         # filter out workers in the process of being setup/shutdown.
         for jw in PGRP.workers
-            if !isa(jw, LocalProcess) && (jw.state !== W_CONNECTED)
+            if !isa(jw, LocalProcess) && ((@atomic jw.state) !== W_CONNECTED)
                 n = n - 1
             end
         end
@@ -921,7 +949,7 @@ julia> procs()
 function procs()
     if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
         # filter out workers in the process of being setup/shutdown.
-        return Int[x.id for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
+        return Int[x.id for x in PGRP.workers if isa(x, LocalProcess) || ((@atomic x.state) === W_CONNECTED)]
     else
         return Int[x.id for x in PGRP.workers]
     end
@@ -930,7 +958,7 @@ end
 function id_in_procs(id)  # faster version of `id in procs()`
     if myid() == 1 || (PGRP.topology === :all_to_all  && !isclusterlazy())
         for x in PGRP.workers
-            if (x.id::Int) == id && (isa(x, LocalProcess) || (x::Worker).state === W_CONNECTED)
+            if (x.id::Int) == id && (isa(x, LocalProcess) || (@atomic (x::Worker).state) === W_CONNECTED)
                 return true
             end
         end
@@ -952,7 +980,7 @@ Specifically all workers bound to the same ip-address as `pid` are returned.
 """
 function procs(pid::Integer)
     if myid() == 1
-        all_workers = [x for x in PGRP.workers if isa(x, LocalProcess) || (x.state === W_CONNECTED)]
+        all_workers = [x for x in PGRP.workers if isa(x, LocalProcess) || ((@atomic x.state) === W_CONNECTED)]
         if (pid == 1) || (isa(map_pid_wrkr[pid].manager, LocalManager))
             Int[x.id for x in filter(w -> (w.id==1) || (isa(w.manager, LocalManager)), all_workers)]
         else
@@ -1059,11 +1087,11 @@ function _rmprocs(pids, waitfor)
 
         start = time_ns()
         while (time_ns() - start) < waitfor*1e9
-            all(w -> w.state === W_TERMINATED, rmprocset) && break
+            all(w -> (@atomic w.state) === W_TERMINATED, rmprocset) && break
             sleep(min(0.1, waitfor - (time_ns() - start)/1e9))
         end
 
-        unremoved = [wrkr.id for wrkr in filter(w -> w.state !== W_TERMINATED, rmprocset)]
+        unremoved = [wrkr.id for wrkr in filter(w -> (@atomic w.state) !== W_TERMINATED, rmprocset)]
         if length(unremoved) > 0
             estr = string("rmprocs: pids ", unremoved, " not terminated after ", waitfor, " seconds.")
             throw(ErrorException(estr))
diff --git a/src/managers.jl b/src/managers.jl
index 129b65c..56d1f78 100644
--- a/src/managers.jl
+++ b/src/managers.jl
@@ -183,7 +183,7 @@ function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy:
     # Wait for all launches to complete.
     @sync for (i, (machine, cnt)) in enumerate(manager.machines)
         let machine=machine, cnt=cnt
-             @async try
+            @async try
                 launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
             catch e
                 print(stderr, "exception launching on machine $(machine) : $(e)\n")
diff --git a/src/messages.jl b/src/messages.jl
index fe3e5ab..6e895f0 100644
--- a/src/messages.jl
+++ b/src/messages.jl
@@ -194,7 +194,7 @@ end
 function flush_gc_msgs()
     try
         for w in (PGRP::ProcessGroup).workers
-            if isa(w,Worker) && (w.state == W_CONNECTED) && w.gcflag
+            if isa(w,Worker) && ((@atomic w.state) == W_CONNECTED) && w.gcflag
                 flush_gc_msgs(w)
             end
         end
diff --git a/src/process_messages.jl b/src/process_messages.jl
index 3032917..a444651 100644
--- a/src/process_messages.jl
+++ b/src/process_messages.jl
@@ -222,7 +222,7 @@ function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
             println(stderr, "Process($(myid())) - Unknown remote, closing connection.")
         elseif !(wpid in map_del_wrkr)
             werr = worker_from_id(wpid)
-            oldstate = werr.state
+            oldstate = @atomic werr.state
             set_worker_state(werr, W_TERMINATED)
 
             # If unhandleable error occurred talking to pid 1, exit
diff --git a/test/distributed_exec.jl b/test/distributed_exec.jl
index a5b833b..fb0caba 100644
--- a/test/distributed_exec.jl
+++ b/test/distributed_exec.jl
@@ -1991,5 +1991,8 @@ end
 
 # Run topology tests last after removing all workers, since a given
 # cluster at any time only supports a single topology.
-nprocs() > 1 && rmprocs(workers())
+if nprocs() > 1
+    rmprocs(workers())
+end
+include("threads.jl")
 include("topology.jl")
diff --git a/test/threads.jl b/test/threads.jl
new file mode 100644
index 0000000..9d1d6d4
--- /dev/null
+++ b/test/threads.jl
@@ -0,0 +1,64 @@
+using Test
+using DistributedNext, Base.Threads
+using Base.Iterators: product
+
+exeflags = ("--startup-file=no",
+            "--check-bounds=yes",
+            "--depwarn=error",
+            "--threads=2")
+
+function call_on(f, wid, tid)
+    remotecall(wid) do
+        t = Task(f)
+        ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid - 1)
+        schedule(t)
+        @assert threadid(t) == tid
+        t
+    end
+end
+
+# Run function on process holding the data to only serialize the result of f.
+# This becomes useful for things that cannot be serialized (e.g. running tasks)
+# or that would be unnecessarily big if serialized.
+fetch_from_owner(f, rr) = remotecall_fetch(f ∘ fetch, rr.where, rr)
+
+isdone(rr) = fetch_from_owner(istaskdone, rr)
+isfailed(rr) = fetch_from_owner(istaskfailed, rr)
+
+@testset "RemoteChannel allows put!/take! from thread other than 1" begin
+    ws = ts = product(1:2, 1:2)
+    @testset "from worker $w1 to $w2 via 1" for (w1, w2) in ws
+        @testset "from thread $w1.$t1 to $w2.$t2" for (t1, t2) in ts
+            # We want (the default) laziness, so that we wait for `Worker.c_state`!
+            procs_added = addprocs(2; exeflags, lazy=true)
+            @everywhere procs_added using Base.Threads
+
+            p1 = procs_added[w1]
+            p2 = procs_added[w2]
+            chan_id = first(procs_added)
+            chan = RemoteChannel(chan_id)
+            send = call_on(p1, t1) do
+                put!(chan, nothing)
+            end
+            recv = call_on(p2, t2) do
+                take!(chan)
+            end
+
+            # Wait on the spawned tasks on the owner. Note that we use
+            # timedwait() instead of @sync to avoid deadlocks.
+            t1 = Threads.@spawn fetch_from_owner(wait, recv)
+            t2 = Threads.@spawn fetch_from_owner(wait, send)
+            @test timedwait(() -> istaskdone(t1), 5) == :ok
+            @test timedwait(() -> istaskdone(t2), 5) == :ok
+
+            # Check the tasks
+            @test isdone(send)
+            @test isdone(recv)
+
+            @test !isfailed(send)
+            @test !isfailed(recv)
+
+            rmprocs(procs_added)
+        end
+    end
+end

From a15baf428831ccb473bbe14475e0247aa7784d2d Mon Sep 17 00:00:00 2001
From: Julian P Samaroo <jpsamaroo@jpsamaroo.me>
Date: Sat, 14 Oct 2023 10:18:05 -0700
Subject: [PATCH 3/9] init_multi: Be more thread-safe

---
 src/cluster.jl | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/cluster.jl b/src/cluster.jl
index 653be62..c58a79b 100644
--- a/src/cluster.jl
+++ b/src/cluster.jl
@@ -1318,18 +1318,16 @@ end
 
 using Random: randstring
 
-let inited = false
-    # do initialization that's only needed when there is more than 1 processor
-    global function init_multi()
-        if !inited
-            inited = true
-            push!(Base.package_callbacks, _require_callback)
-            atexit(terminate_all_workers)
-            init_bind_addr()
-            cluster_cookie(randstring(HDR_COOKIE_LEN))
-        end
-        return nothing
+# do initialization that's only needed when there is more than 1 processor
+const inited = Threads.Atomic{Bool}(false)
+function init_multi()
+    if !Threads.atomic_cas!(inited, false, true)
+        push!(Base.package_callbacks, _require_callback)
+        atexit(terminate_all_workers)
+        init_bind_addr()
+        cluster_cookie(randstring(HDR_COOKIE_LEN))
     end
+    return nothing
 end
 
 function init_parallel()

From 1e4b52f7b6397b61afcb03c7bd6552dd2e0908a8 Mon Sep 17 00:00:00 2001
From: JamesWrigley <james@puiterwijk.org>
Date: Sun, 2 Jun 2024 19:52:32 +0200
Subject: [PATCH 4/9] Use errormonitor() in a few places

---
 src/cluster.jl | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/cluster.jl b/src/cluster.jl
index c58a79b..bb08d9b 100644
--- a/src/cluster.jl
+++ b/src/cluster.jl
@@ -706,14 +706,16 @@ function create_worker(manager, wconfig)
     join_message = JoinPGRPMsg(w.id, all_locs, PGRP.topology, enable_threaded_blas, isclusterlazy())
     send_msg_now(w, MsgHeader(RRID(0,0), ntfy_oid), join_message)
 
-    @async manage(w.manager, w.id, w.config, :register)
+    errormonitor(@async manage(w.manager, w.id, w.config, :register))
     # wait for rr_ntfy_join with timeout
     timedout = false
-    @async begin
-        sleep($timeout)
-        timedout = true
-        put!(rr_ntfy_join, 1)
-    end
+    errormonitor(
+        @async begin
+            sleep($timeout)
+            timedout = true
+            put!(rr_ntfy_join, 1)
+        end
+    )
     wait(rr_ntfy_join)
     if timedout
         error("worker did not connect within $timeout seconds")
@@ -763,17 +765,20 @@ function check_master_connect()
     if ccall(:jl_running_on_valgrind,Cint,()) != 0
         return
     end
-    @async begin
-        start = time_ns()
-        while !haskey(map_pid_wrkr, 1) && (time_ns() - start) < timeout
-            sleep(1.0)
-        end
 
-        if !haskey(map_pid_wrkr, 1)
-            print(stderr, "Master process (id 1) could not connect within $(timeout/1e9) seconds.\nexiting.\n")
-            exit(1)
+    errormonitor(
+        @async begin
+            start = time_ns()
+            while !haskey(map_pid_wrkr, 1) && (time_ns() - start) < timeout
+                sleep(1.0)
+            end
+
+            if !haskey(map_pid_wrkr, 1)
+                print(stderr, "Master process (id 1) could not connect within $(timeout/1e9) seconds.\nexiting.\n")
+                exit(1)
+            end
         end
-    end
+    )
 end
 
 

From 3c9080d3af93e33655bbb8a012fab9a650c194bc Mon Sep 17 00:00:00 2001
From: JamesWrigley <james@puiterwijk.org>
Date: Tue, 29 Oct 2024 21:34:52 +0100
Subject: [PATCH 5/9] Update changelog

---
 docs/src/_changelog.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/src/_changelog.md b/docs/src/_changelog.md
index fe80fb4..d9d6c95 100644
--- a/docs/src/_changelog.md
+++ b/docs/src/_changelog.md
@@ -12,6 +12,8 @@ This documents notable changes in DistributedNext.jl. The format is based on
 ### Fixed
 - Fixed behaviour of `isempty(::RemoteChannel)`, which previously had the
   side-effect of taking an element from the channel ([#3]).
+- Improved thread-safety, such that it should be safe to start workers with
+  multiple threads and send messages between them ([#4]).
 
 ### Changed
 - Added a `project` argument to [`addprocs(::AbstractVector)`](@ref) to specify

From f6892b5640c3601cfc7f1afa7dea38071b538216 Mon Sep 17 00:00:00 2001
From: JamesWrigley <james@puiterwijk.org>
Date: Thu, 31 Oct 2024 23:26:22 +0100
Subject: [PATCH 6/9] Always run multi-threaded tests

---
 test/distributed_exec.jl | 29 ++---------------------------
 1 file changed, 2 insertions(+), 27 deletions(-)

diff --git a/test/distributed_exec.jl b/test/distributed_exec.jl
index fb0caba..1b438a0 100644
--- a/test/distributed_exec.jl
+++ b/test/distributed_exec.jl
@@ -147,27 +147,6 @@ function poll_while(f::Function; timeout_seconds::Integer = 120)
     return true
 end
 
-function _getenv_include_thread_unsafe()
-    environment_variable_name = "JULIA_TEST_INCLUDE_THREAD_UNSAFE"
-    default_value = "false"
-    environment_variable_value = strip(get(ENV, environment_variable_name, default_value))
-    b = parse(Bool, environment_variable_value)::Bool
-    return b
-end
-const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
-function include_thread_unsafe_tests()
-    if Threads.maxthreadid() > 1
-        if _env_include_thread_unsafe
-            return true
-        end
-        msg = "Skipping a thread-unsafe test because `Threads.maxthreadid() > 1`"
-        @warn msg Threads.maxthreadid()
-        Test.@test_broken false
-        return false
-    end
-    return true
-end
-
 # DistributedNext GC tests for Futures
 function test_futures_dgc(id)
     f = remotecall(myid, id)
@@ -290,14 +269,10 @@ let wid1 = workers()[1],
     fstore = RemoteChannel(wid2)
 
     put!(fstore, rr)
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
-    end
+    @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
     finalize(rr) # finalize locally
     yield() # flush gc msgs
-    if include_thread_unsafe_tests()
-        @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
-    end
+    @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
     remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
     sleep(0.5) # to ensure that wid2 messages have been executed on wid1
     @test poll_while(() -> remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid))

From da533c1bca1c54dcf47d5a42facb426348ff4372 Mon Sep 17 00:00:00 2001
From: JamesWrigley <james@puiterwijk.org>
Date: Fri, 1 Nov 2024 14:47:28 +0100
Subject: [PATCH 7/9] Move SSH tests into a single-threaded process

Necessary because LibSSH is not thread-safe.
---
 test/distributed_exec.jl |  94 ------------------------------------
 test/runtests.jl         |  10 ++--
 test/sshmanager.jl       | 101 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 97 deletions(-)
 create mode 100644 test/sshmanager.jl

diff --git a/test/distributed_exec.jl b/test/distributed_exec.jl
index 1b438a0..6b5ae00 100644
--- a/test/distributed_exec.jl
+++ b/test/distributed_exec.jl
@@ -3,8 +3,6 @@
 using Test, DistributedNext, Random, Serialization, Sockets
 import DistributedNext: launch, manage
 
-import LibSSH as ssh
-import LibSSH.Demo: DemoServer
 
 @test cluster_cookie() isa String
 
@@ -762,98 +760,6 @@ if DoFullTest
     @test all([p == remotecall_fetch(myid, p) for p in all_w])
 end
 
-# LibSSH.jl currently only works on 64bit unixes
-if Sys.isunix() && Sys.WORD_SIZE == 64
-    function test_n_remove_pids(new_pids)
-        for p in new_pids
-            w_in_remote = sort(remotecall_fetch(workers, p))
-            try
-                @test intersect(new_pids, w_in_remote) == new_pids
-            catch
-                print("p       :     $p\n")
-                print("newpids :     $new_pids\n")
-                print("w_in_remote : $w_in_remote\n")
-                print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
-                rethrow()
-            end
-        end
-
-        remotecall_fetch(rmprocs, 1, new_pids)
-    end
-
-    println("\n\nTesting SSHManager. A minimum of 4GB of RAM is recommended.")
-    println("Please ensure port 9300 and 2222 are not in use.")
-
-    DemoServer(2222; auth_methods=[ssh.AuthMethod_None], allow_auth_none=true, verbose=false, timeout=3600) do
-        sshflags = `-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=ERROR -p 2222 `
-        #Issue #9951
-        hosts=[]
-        localhost_aliases = ["localhost", string(getipaddr()), "127.0.0.1"]
-        num_workers = parse(Int,(get(ENV, "JULIA_ADDPROCS_NUM", "9")))
-
-        for i in 1:(num_workers/length(localhost_aliases))
-            append!(hosts, localhost_aliases)
-        end
-
-        # CI machines sometimes don't already have a .ssh directory
-        ssh_dir = joinpath(homedir(), ".ssh")
-        if !isdir(ssh_dir)
-            mkdir(ssh_dir)
-        end
-
-        print("\nTesting SSH addprocs with $(length(hosts)) workers...\n")
-        new_pids = addprocs_with_testenv(hosts; sshflags=sshflags)
-        @test length(new_pids) == length(hosts)
-        test_n_remove_pids(new_pids)
-
-        print("\nMixed ssh addprocs with :auto\n")
-        new_pids = addprocs_with_testenv(["localhost", ("127.0.0.1", :auto), "localhost"]; sshflags=sshflags)
-        @test length(new_pids) == (2 + Sys.CPU_THREADS)
-        test_n_remove_pids(new_pids)
-
-        print("\nMixed ssh addprocs with numeric counts\n")
-        new_pids = addprocs_with_testenv([("localhost", 2), ("127.0.0.1", 2), "localhost"]; sshflags=sshflags)
-        @test length(new_pids) == 5
-        test_n_remove_pids(new_pids)
-
-        print("\nssh addprocs with tunnel\n")
-        new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, sshflags=sshflags)
-        @test length(new_pids) == num_workers
-        test_n_remove_pids(new_pids)
-
-        print("\nssh addprocs with tunnel (SSH multiplexing)\n")
-        new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, multiplex=true, sshflags=sshflags)
-        @test length(new_pids) == num_workers
-        controlpath = joinpath(ssh_dir, "julia-$(ENV["USER"])@localhost:2222")
-        @test issocket(controlpath)
-        test_n_remove_pids(new_pids)
-        @test :ok == timedwait(()->!issocket(controlpath), 10.0; pollint=0.5)
-
-        print("\nAll supported formats for hostname\n")
-        h1 = "localhost"
-        user = ENV["USER"]
-        h2 = "$user@$h1"
-        h3 = "$h2:2222"
-        h4 = "$h3 $(string(getipaddr()))"
-        h5 = "$h4:9300"
-
-        new_pids = addprocs_with_testenv([h1, h2, h3, h4, h5]; sshflags=sshflags)
-        @test length(new_pids) == 5
-        test_n_remove_pids(new_pids)
-
-        print("\nkeyword arg exename\n")
-        for exename in [`$(joinpath(Sys.BINDIR, Base.julia_exename()))`, "$(joinpath(Sys.BINDIR, Base.julia_exename()))"]
-            for addp_func in [()->addprocs_with_testenv(["localhost"]; exename=exename, exeflags=test_exeflags, sshflags=sshflags),
-                              ()->addprocs_with_testenv(1; exename=exename, exeflags=test_exeflags)]
-
-                local new_pids = addp_func()
-                @test length(new_pids) == 1
-                test_n_remove_pids(new_pids)
-            end
-        end
-    end
-end # unix-only
-
 let t = @task 42
     schedule(t, ErrorException(""), error=true)
     @test_throws TaskFailedException(t) Base.wait(t)
diff --git a/test/runtests.jl b/test/runtests.jl
index d34d07c..d4d1d86 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,11 +3,15 @@
 # Run the distributed test outside of the main driver since it needs its own
 # set of dedicated workers.
 include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
-disttestfile = joinpath(@__DIR__, "distributed_exec.jl")
 
-cmd = `$test_exename $test_exeflags $disttestfile`
+cmd = `$test_exename $test_exeflags`
+
+# Run the SSH tests with a single thread because LibSSH.jl is not thread-safe
+sshtestfile = joinpath(@__DIR__, "sshmanager.jl")
+run(addenv(`$cmd $sshtestfile`, "JULIA_NUM_THREADS" => "1"))
 
-if !success(pipeline(cmd; stdout=stdout, stderr=stderr)) && ccall(:jl_running_on_valgrind,Cint,()) == 0
+disttestfile = joinpath(@__DIR__, "distributed_exec.jl")
+if !success(pipeline(`$cmd  $disttestfile`; stdout=stdout, stderr=stderr)) && ccall(:jl_running_on_valgrind,Cint,()) == 0
     error("Distributed test failed, cmd : $cmd")
 end
 
diff --git a/test/sshmanager.jl b/test/sshmanager.jl
new file mode 100644
index 0000000..9bed971
--- /dev/null
+++ b/test/sshmanager.jl
@@ -0,0 +1,101 @@
+using Test
+using DistributedNext
+import Sockets: getipaddr
+
+import LibSSH as ssh
+import LibSSH.Demo: DemoServer
+
+
+include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
+
+# LibSSH.jl currently only works on 64bit unixes
+if Sys.isunix() && Sys.WORD_SIZE == 64
+    function test_n_remove_pids(new_pids)
+        for p in new_pids
+            w_in_remote = sort(remotecall_fetch(workers, p))
+            try
+                @test intersect(new_pids, w_in_remote) == new_pids
+            catch
+                print("p       :     $p\n")
+                print("newpids :     $new_pids\n")
+                print("w_in_remote : $w_in_remote\n")
+                print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
+                rethrow()
+            end
+        end
+
+        remotecall_fetch(rmprocs, 1, new_pids)
+    end
+
+    println("\n\nTesting SSHManager. A minimum of 4GB of RAM is recommended.")
+    println("Please ensure port 9300 and 2222 are not in use.")
+
+    DemoServer(2222; auth_methods=[ssh.AuthMethod_None], allow_auth_none=true, verbose=false, timeout=3600) do
+        sshflags = `-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=ERROR -p 2222 `
+        #Issue #9951
+        hosts=[]
+        localhost_aliases = ["localhost", string(getipaddr()), "127.0.0.1"]
+        num_workers = parse(Int,(get(ENV, "JULIA_ADDPROCS_NUM", "9")))
+
+        for i in 1:(num_workers/length(localhost_aliases))
+            append!(hosts, localhost_aliases)
+        end
+
+        # CI machines sometimes don't already have a .ssh directory
+        ssh_dir = joinpath(homedir(), ".ssh")
+        if !isdir(ssh_dir)
+            mkdir(ssh_dir)
+        end
+
+        print("\nTesting SSH addprocs with $(length(hosts)) workers...\n")
+        new_pids = addprocs_with_testenv(hosts; sshflags=sshflags)
+        @test length(new_pids) == length(hosts)
+        test_n_remove_pids(new_pids)
+
+        print("\nMixed ssh addprocs with :auto\n")
+        new_pids = addprocs_with_testenv(["localhost", ("127.0.0.1", :auto), "localhost"]; sshflags=sshflags)
+        @test length(new_pids) == (2 + Sys.CPU_THREADS)
+        test_n_remove_pids(new_pids)
+
+        print("\nMixed ssh addprocs with numeric counts\n")
+        new_pids = addprocs_with_testenv([("localhost", 2), ("127.0.0.1", 2), "localhost"]; sshflags=sshflags)
+        @test length(new_pids) == 5
+        test_n_remove_pids(new_pids)
+
+        print("\nssh addprocs with tunnel\n")
+        new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, sshflags=sshflags)
+        @test length(new_pids) == num_workers
+        test_n_remove_pids(new_pids)
+
+        print("\nssh addprocs with tunnel (SSH multiplexing)\n")
+        new_pids = addprocs_with_testenv([("localhost", num_workers)]; tunnel=true, multiplex=true, sshflags=sshflags)
+        @test length(new_pids) == num_workers
+        controlpath = joinpath(ssh_dir, "julia-$(ENV["USER"])@localhost:2222")
+        @test issocket(controlpath)
+        test_n_remove_pids(new_pids)
+        @test :ok == timedwait(()->!issocket(controlpath), 10.0; pollint=0.5)
+
+        print("\nAll supported formats for hostname\n")
+        h1 = "localhost"
+        user = ENV["USER"]
+        h2 = "$user@$h1"
+        h3 = "$h2:2222"
+        h4 = "$h3 $(string(getipaddr()))"
+        h5 = "$h4:9300"
+
+        new_pids = addprocs_with_testenv([h1, h2, h3, h4, h5]; sshflags=sshflags)
+        @test length(new_pids) == 5
+        test_n_remove_pids(new_pids)
+
+        print("\nkeyword arg exename\n")
+        for exename in [`$(joinpath(Sys.BINDIR, Base.julia_exename()))`, "$(joinpath(Sys.BINDIR, Base.julia_exename()))"]
+            for addp_func in [()->addprocs_with_testenv(["localhost"]; exename=exename, exeflags=test_exeflags, sshflags=sshflags),
+                              ()->addprocs_with_testenv(1; exename=exename, exeflags=test_exeflags)]
+
+                local new_pids = addp_func()
+                @test length(new_pids) == 1
+                test_n_remove_pids(new_pids)
+            end
+        end
+    end
+end

From b78aa9af5e743e6f7788372922a6272cf2aa5df0 Mon Sep 17 00:00:00 2001
From: JamesWrigley <james@puiterwijk.org>
Date: Fri, 1 Nov 2024 22:20:08 +0100
Subject: [PATCH 8/9] Refactor the tests into @testsets

This makes it much easier to see where errors/warnings are coming from. The
tests have been preserved in the exact order they were written, with no changes
other than the necessary ones to put them in `@testset`'s (e.g. creating modules
in global scope).
---
 test/distributed_exec.jl | 2966 +++++++++++++++++++-------------------
 test/managers.jl         |   34 +-
 test/runtests.jl         |   16 +-
 test/splitrange.jl       |   48 +-
 test/sshmanager.jl       |   32 +-
 test/topology.jl         |  220 +--
 6 files changed, 1684 insertions(+), 1632 deletions(-)

diff --git a/test/distributed_exec.jl b/test/distributed_exec.jl
index 6b5ae00..0ee9e6b 100644
--- a/test/distributed_exec.jl
+++ b/test/distributed_exec.jl
@@ -11,108 +11,116 @@ include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
 @test DistributedNext.extract_imports(:(begin; import Foo, Bar; let; using Baz; end; end)) ==
       Any[:(import Foo, Bar), :(using Baz)]
 
+
+id_me = nothing
+id_other = nothing
+
 # Test a few "remote" invocations when no workers are present
-@test remote(myid)() == 1
-@test pmap(identity, 1:100) == [1:100...]
-@test 100 == @distributed (+) for i in 1:100
+@testset "Remote invocations with no workers" begin
+    @test remote(myid)() == 1
+    @test pmap(identity, 1:100) == [1:100...]
+    @test 100 == @distributed (+) for i in 1:100
         1
     end
+end
 
-addprocs_with_testenv(4)
-@test nprocs() == 5
+@testset "Distributed loading of packages" begin
+    addprocs_with_testenv(4)
+    @test nprocs() == 5
 
-# distributed loading of packages
+    global id_me = myid()
+    global id_other = filter(x -> x != id_me, procs())[rand(1:(nprocs()-1))]
 
-# setup
-@everywhere begin
-    old_act_proj = Base.ACTIVE_PROJECT[]
-    pushfirst!(Base.LOAD_PATH, "@")
-    Base.ACTIVE_PROJECT[] = joinpath(Sys.BINDIR, "..", "share", "julia", "test", "TestPkg")
-end
+    # setup
+    @everywhere begin
+        old_act_proj = Base.ACTIVE_PROJECT[]
+        pushfirst!(Base.LOAD_PATH, "@")
+        Base.ACTIVE_PROJECT[] = joinpath(Sys.BINDIR, "..", "share", "julia", "test", "TestPkg")
+    end
 
-# cause precompilation of TestPkg to avoid race condition
-Base.compilecache(Base.identify_package("TestPkg"))
+    # cause precompilation of TestPkg to avoid race condition
+    Base.compilecache(Base.identify_package("TestPkg"))
 
-@everywhere using TestPkg
-@everywhere using TestPkg
+    @everywhere using TestPkg
+    @everywhere using TestPkg
 
-@everywhere begin
-    Base.ACTIVE_PROJECT[] = old_act_proj
-    popfirst!(Base.LOAD_PATH)
-end
-
-@everywhere using Test, Random, LinearAlgebra
+    @everywhere begin
+        Base.ACTIVE_PROJECT[] = old_act_proj
+        popfirst!(Base.LOAD_PATH)
+    end
 
-id_me = myid()
-id_other = filter(x -> x != id_me, procs())[rand(1:(nprocs()-1))]
+    @everywhere using Test, Random, LinearAlgebra
+end
 
-# Test role
-@everywhere using DistributedNext
-@test DistributedNext.myrole() === :master
-for wid = workers()
-    wrole = remotecall_fetch(wid) do
-        DistributedNext.myrole()
+@testset "Test role" begin
+    @everywhere using DistributedNext
+    @test DistributedNext.myrole() === :master
+    for wid = workers()
+        wrole = remotecall_fetch(wid) do
+            DistributedNext.myrole()
+        end
+        @test wrole === :worker
     end
-    @test wrole === :worker
 end
 
-# Test remote()
-let
-    pool = default_worker_pool()
+@testset "Test remote()" begin
+    let
+        pool = default_worker_pool()
 
-    count = 0
-    count_condition = Condition()
+        count = 0
+        count_condition = Condition()
 
-    function remote_wait(c)
-        @async_logerr begin
-            count += 1
-            remote(take!)(c)
-            count -= 1
-            notify(count_condition)
+        function remote_wait(c)
+            @async_logerr begin
+                count += 1
+                remote(take!)(c)
+                count -= 1
+                notify(count_condition)
+            end
+            yield()
         end
-        yield()
-    end
 
-    testchannels = [RemoteChannel() for i in 1:nworkers()]
-    testcount = 0
-    @test isready(pool) == true
-    for c in testchannels
-        @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
-
-    for c in testchannels
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
+        testchannels = [RemoteChannel() for i in 1:nworkers()]
+        testcount = 0
         @test isready(pool) == true
-    end
+        for c in testchannels
+            @test count == testcount
+            remote_wait(c)
+            testcount += 1
+        end
+        @test count == testcount
+        @test isready(pool) == false
+
+        for c in testchannels
+            @test count == testcount
+            put!(c, "foo")
+            testcount -= 1
+            (count == testcount) || wait(count_condition)
+            @test count == testcount
+            @test isready(pool) == true
+        end
 
-    @test count == 0
+        @test count == 0
 
-    for c in testchannels
+        for c in testchannels
+            @test count == testcount
+            remote_wait(c)
+            testcount += 1
+        end
         @test count == testcount
-        remote_wait(c)
-        testcount += 1
-    end
-    @test count == testcount
-    @test isready(pool) == false
+        @test isready(pool) == false
+
+        for c in reverse(testchannels)
+            @test count == testcount
+            put!(c, "foo")
+            testcount -= 1
+            (count == testcount) || wait(count_condition)
+            @test count == testcount
+            @test isready(pool) == true
+        end
 
-    for c in reverse(testchannels)
-        @test count == testcount
-        put!(c, "foo")
-        testcount -= 1
-        (count == testcount) || wait(count_condition)
-        @test count == testcount
-        @test isready(pool) == true
+        @test count == 0
     end
-
-    @test count == 0
 end
 
 # Test Futures
@@ -130,9 +138,6 @@ function testf(id)
     @test fetch(f) === :OK
 end
 
-testf(id_me)
-testf(id_other)
-
 function poll_while(f::Function; timeout_seconds::Integer = 120)
     start_time = time_ns()
     while f()
@@ -168,567 +173,584 @@ function test_futures_dgc(id)
     @test poll_while(() -> remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, fid))
 end
 
-test_futures_dgc(id_me)
-test_futures_dgc(id_other)
+@testset "GC tests for Futures" begin
+    testf(id_me)
+    testf(id_other)
 
-# if sent to another worker, it should not be deleted till all references are fetched.
-wid1 = workers()[1]
-wid2 = workers()[2]
-f = remotecall(myid, wid1)
-fid = remoteref_id(f)
+    test_futures_dgc(id_me)
+    test_futures_dgc(id_other)
 
-fstore = RemoteChannel(wid2)
-put!(fstore, f)
+    # if sent to another worker, it should not be deleted till all references are fetched.
+    wid1 = workers()[1]
+    wid2 = workers()[2]
+    f = remotecall(myid, wid1)
+    fid = remoteref_id(f)
+
+    fstore = RemoteChannel(wid2)
+    put!(fstore, f)
 
-@test fetch(f) == wid1
-@test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == true
-remotecall_fetch(r->(fetch(fetch(r)); yield()), wid2, fstore)
-sleep(0.5) # to ensure that wid2 gc messages have been executed on wid1
-@test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == false
+    @test fetch(f) == wid1
+    @test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == true
+    remotecall_fetch(r->(fetch(fetch(r)); yield()), wid2, fstore)
+    sleep(0.5) # to ensure that wid2 gc messages have been executed on wid1
+    @test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == false
 
-# put! should release remote reference since it would have been cached locally
-f = Future(wid1)
-fid = remoteref_id(f)
+    # put! should release remote reference since it would have been cached locally
+    f = Future(wid1)
+    fid = remoteref_id(f)
 
-# should not be created remotely till accessed
-@test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == false
-# create it remotely
-isready(f)
+    # should not be created remotely till accessed
+    @test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == false
+    # create it remotely
+    isready(f)
 
-@test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == true
-put!(f, :OK)
-@test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == false
-@test fetch(f) === :OK
+    @test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == true
+    put!(f, :OK)
+    @test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == false
+    @test fetch(f) === :OK
 
-# RemoteException should be thrown on a put! when another process has set the value
-f = Future(wid1)
-fid = remoteref_id(f)
+    # RemoteException should be thrown on a put! when another process has set the value
+    f = Future(wid1)
+    fid = remoteref_id(f)
 
-fstore = RemoteChannel(wid2)
-put!(fstore, f) # send f to wid2
-put!(f, :OK) # set value from master
+    fstore = RemoteChannel(wid2)
+    put!(fstore, f) # send f to wid2
+    put!(f, :OK) # set value from master
 
-@test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == true
+    @test remotecall_fetch(k->haskey(DistributedNext.PGRP.refs, k), wid1, fid) == true
 
-testval = remotecall_fetch(wid2, fstore) do x
-    try
-        put!(fetch(x), :OK)
-        return 0
-    catch e
-        if isa(e, RemoteException)
-            return 1
-        else
-            return 2
+    testval = remotecall_fetch(wid2, fstore) do x
+        try
+            put!(fetch(x), :OK)
+            return 0
+        catch e
+            if isa(e, RemoteException)
+                return 1
+            else
+                return 2
+            end
         end
     end
-end
-@test testval == 1
+    @test testval == 1
 
-# Issue number #25847
-@everywhere function f25847(ref)
-    fetch(ref)
-    return true
-end
+    # Issue number #25847
+    @everywhere function f25847(ref)
+        fetch(ref)
+        return true
+    end
 
-f = remotecall_wait(identity, id_other, ones(10))
-rrid = DistributedNext.RRID(f.whence, f.id)
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->DistributedNext.PGRP.refs[rrid].clientset, id_other)
-
-remotecall_fetch(f25847, id_other, f)
-@test BitSet([id_me]) == remotecall_fetch(()->DistributedNext.PGRP.refs[rrid].clientset, id_other)
-
-finalize(f)
-yield() # flush gc msgs
-@test poll_while(() -> remotecall_fetch(chk_rrid->(yield(); haskey(DistributedNext.PGRP.refs, chk_rrid)), id_other, rrid))
-
-# DistributedNext GC tests for RemoteChannels
-function test_remoteref_dgc(id)
-    rr = RemoteChannel(id)
-    put!(rr, :OK)
-    rrid = remoteref_id(rr)
-
-    # remote value should be deleted after finalizing the ref
-    @test remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, rrid) == true
-    @test fetch(rr) === :OK
-    @test remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, rrid) == true
-    finalize(rr)
-    yield(); # flush gc msgs
-    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, rrid))
-end
-test_remoteref_dgc(id_me)
-test_remoteref_dgc(id_other)
-
-# if sent to another worker, it should not be deleted till the other worker has also finalized.
-let wid1 = workers()[1],
-    wid2 = workers()[2],
-    rr = RemoteChannel(wid1),
-    rrid = remoteref_id(rr),
-    fstore = RemoteChannel(wid2)
+    f = remotecall_wait(identity, id_other, ones(10))
+    rrid = DistributedNext.RRID(f.whence, f.id)
+    remotecall_fetch(f25847, id_other, f)
+    @test BitSet([id_me]) == remotecall_fetch(()->DistributedNext.PGRP.refs[rrid].clientset, id_other)
+
+    remotecall_fetch(f25847, id_other, f)
+    @test BitSet([id_me]) == remotecall_fetch(()->DistributedNext.PGRP.refs[rrid].clientset, id_other)
 
-    put!(fstore, rr)
-    @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
-    finalize(rr) # finalize locally
+    finalize(f)
     yield() # flush gc msgs
-    @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
-    remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
-    sleep(0.5) # to ensure that wid2 messages have been executed on wid1
-    @test poll_while(() -> remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid))
-end
+    @test poll_while(() -> remotecall_fetch(chk_rrid->(yield(); haskey(DistributedNext.PGRP.refs, chk_rrid)), id_other, rrid))
+end
+
+@testset "GC tests for RemoteChannels" begin
+    function test_remoteref_dgc(id)
+        rr = RemoteChannel(id)
+        put!(rr, :OK)
+        rrid = remoteref_id(rr)
+
+        # remote value should be deleted after finalizing the ref
+        @test remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, rrid) == true
+        @test fetch(rr) === :OK
+        @test remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, rrid) == true
+        finalize(rr)
+        yield(); # flush gc msgs
+        @test poll_while(() -> remotecall_fetch(k->(yield();haskey(DistributedNext.PGRP.refs, k)), id, rrid))
+    end
+    test_remoteref_dgc(id_me)
+    test_remoteref_dgc(id_other)
+
+    # if sent to another worker, it should not be deleted till the other worker has also finalized.
+    let wid1 = workers()[1],
+        wid2 = workers()[2],
+        rr = RemoteChannel(wid1),
+        rrid = remoteref_id(rr),
+        fstore = RemoteChannel(wid2)
+
+        put!(fstore, rr)
+        @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
+        finalize(rr) # finalize locally
+        yield() # flush gc msgs
+        @test remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid) == true
+        remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
+        sleep(0.5) # to ensure that wid2 messages have been executed on wid1
+        @test poll_while(() -> remotecall_fetch(k -> haskey(DistributedNext.PGRP.refs, k), wid1, rrid))
+    end
+end
+
+@testset "issue #23109 - should not hang" begin
+    f = @spawnat :any rand(1, 1)
+    Base.Experimental.@sync begin
+        for _ in 1:10
+            @async fetch(f)
+        end
+    end
 
-# Tests for issue #23109 - should not hang.
-f = @spawnat :any rand(1, 1)
-Base.Experimental.@sync begin
-    for _ in 1:10
+    wid1, wid2 = workers()[1:2]
+    f = @spawnat wid1 rand(1,1)
+    Base.Experimental.@sync begin
         @async fetch(f)
+        @async remotecall_fetch(()->fetch(f), wid2)
     end
 end
 
-wid1, wid2 = workers()[1:2]
-f = @spawnat wid1 rand(1,1)
-Base.Experimental.@sync begin
-    @async fetch(f)
-    @async remotecall_fetch(()->fetch(f), wid2)
-end
+@testset "getindex on Futures and RemoteChannels" begin
+    @test fetch(@spawnat id_other myid()) == id_other
+    @test (@fetchfrom id_other myid()) == id_other
 
+    pids=[]
+    for i in 1:nworkers()
+        push!(pids, @fetch myid())
+    end
+    @test sort(pids) == sort(workers())
 
-@test fetch(@spawnat id_other myid()) == id_other
-@test (@fetchfrom id_other myid()) == id_other
-
-pids=[]
-for i in 1:nworkers()
-    push!(pids, @fetch myid())
-end
-@test sort(pids) == sort(workers())
 
+    # test getindex on Futures and RemoteChannels
+    function test_indexing(rr)
+        a = rand(5,5)
+        put!(rr, a)
+        @test rr[2,3] == a[2,3]
+        @test rr[] == a
+    end
 
-# test getindex on Futures and RemoteChannels
-function test_indexing(rr)
-    a = rand(5,5)
-    put!(rr, a)
-    @test rr[2,3] == a[2,3]
-    @test rr[] == a
+    test_indexing(Future())
+    test_indexing(Future(id_other))
+    test_indexing(RemoteChannel())
+    test_indexing(RemoteChannel(id_other))
 end
 
-test_indexing(Future())
-test_indexing(Future(id_other))
-test_indexing(RemoteChannel())
-test_indexing(RemoteChannel(id_other))
-
-# Test ser/deser to non-ClusterSerializer objects.
-function test_regular_io_ser(ref::DistributedNext.AbstractRemoteRef)
-    io = IOBuffer()
-    serialize(io, ref)
-    seekstart(io)
-    ref2 = deserialize(io)
-    for fld in fieldnames(typeof(ref))
-        v = getfield(ref2, fld)
-        if isa(v, Number)
-            @test v === zero(typeof(v))
-        elseif fld === :lock
-            @test v isa ReentrantLock
-            @test !islocked(v)
-        elseif v !== nothing
-            error(string("Add test for field ", fld))
+@testset "Ser/deser to non-ClusterSerializer objects" begin
+    function test_regular_io_ser(ref::DistributedNext.AbstractRemoteRef)
+        io = IOBuffer()
+        serialize(io, ref)
+        seekstart(io)
+        ref2 = deserialize(io)
+        for fld in fieldnames(typeof(ref))
+            v = getfield(ref2, fld)
+            if isa(v, Number)
+                @test v === zero(typeof(v))
+            elseif fld === :lock
+                @test v isa ReentrantLock
+                @test !islocked(v)
+            elseif v !== nothing
+                error(string("Add test for field ", fld))
+            end
         end
     end
+
+    test_regular_io_ser(Future())
+    test_regular_io_ser(RemoteChannel())
 end
 
-test_regular_io_ser(Future())
-test_regular_io_ser(RemoteChannel())
-
-# Test @distributed load balancing - all processors should get either M or M+1
-# iterations out of the loop range for some M.
-ids = @distributed((a,b)->[a;b], for i=1:7; myid(); end)
-workloads = Int[sum(ids .== i) for i in 2:nprocs()]
-@test maximum(workloads) - minimum(workloads) <= 1
-
-# @distributed reduction should work even with very short ranges
-@test @distributed(+, for i=1:2; i; end) == 3
-
-@test_throws ArgumentError sleep(-1)
-@test_throws ArgumentError timedwait(()->false, 0.1, pollint=-0.5)
-
-# specify pids for pmap
-@test sort(workers()[1:2]) == sort(unique(pmap(x->(sleep(0.1);myid()), WorkerPool(workers()[1:2]), 1:10)))
-
-# Testing buffered  and unbuffered reads
-# This large array should write directly to the socket
-a = fill(1, 10^6)
-@test a == remotecall_fetch((x)->x, id_other, a)
-
-# Not a bitstype, should be buffered
-s = [randstring() for x in 1:10^5]
-@test s == remotecall_fetch((x)->x, id_other, s)
-
-#large number of small requests
-num_small_requests = 10000
-@test fill(id_other, num_small_requests) == [remotecall_fetch(myid, id_other) for i in 1:num_small_requests]
-
-# test parallel sends of large arrays from multiple tasks to the same remote worker
-ntasks = 10
-rr_list = [Channel(1) for x in 1:ntasks]
-
-for rr in rr_list
-    local rr
-    let rr = rr
-        @async try
-            for i in 1:10
-                a = rand(2*10^5)
-                @test a == remotecall_fetch(x->x, id_other, a)
-                yield()
+@testset "@distributed and [un]buffered reads" begin
+    # Test @distributed load balancing - all processors should get either M or M+1
+    # iterations out of the loop range for some M.
+    ids = @distributed((a,b)->[a;b], for i=1:7; myid(); end)
+    workloads = Int[sum(ids .== i) for i in 2:nprocs()]
+    @test maximum(workloads) - minimum(workloads) <= 1
+
+    # @distributed reduction should work even with very short ranges
+    @test @distributed(+, for i=1:2; i; end) == 3
+
+    @test_throws ArgumentError sleep(-1)
+    @test_throws ArgumentError timedwait(()->false, 0.1, pollint=-0.5)
+
+    # specify pids for pmap
+    @test sort(workers()[1:2]) == sort(unique(pmap(x->(sleep(0.1);myid()), WorkerPool(workers()[1:2]), 1:10)))
+
+    # Testing buffered  and unbuffered reads
+    # This large array should write directly to the socket
+    a = fill(1, 10^6)
+    @test a == remotecall_fetch((x)->x, id_other, a)
+
+    # Not a bitstype, should be buffered
+    s = [randstring() for x in 1:10^5]
+    @test s == remotecall_fetch((x)->x, id_other, s)
+
+    #large number of small requests
+    num_small_requests = 10000
+    @test fill(id_other, num_small_requests) == [remotecall_fetch(myid, id_other) for i in 1:num_small_requests]
+
+    # test parallel sends of large arrays from multiple tasks to the same remote worker
+    ntasks = 10
+    rr_list = [Channel(1) for x in 1:ntasks]
+
+    for rr in rr_list
+        local rr
+        let rr = rr
+            @async try
+                for i in 1:10
+                    a = rand(2*10^5)
+                    @test a == remotecall_fetch(x->x, id_other, a)
+                    yield()
+                end
+                put!(rr, :OK)
+            catch
+                put!(rr, :ERROR)
             end
-            put!(rr, :OK)
-        catch
-            put!(rr, :ERROR)
         end
     end
-end
 
-@test [fetch(rr) for rr in rr_list] == [:OK for x in 1:ntasks]
-
-function test_channel(c)
-    @test isopen(c) == true
-    put!(c, 1)
-    put!(c, "Hello")
-    put!(c, 5.0)
-
-    @test isready(c) == true
-    @test isopen(c) == true
-    @test fetch(c) == 1
-    @test fetch(c) == 1   # Should not have been popped previously
-    @test take!(c) == 1
-    @test take!(c) == "Hello"
-    @test fetch(c) == 5.0
-    @test take!(c) == 5.0
-    @test isready(c) == false
-    @test isopen(c) == true
-    close(c)
-    @test isopen(c) == false
+    @test [fetch(rr) for rr in rr_list] == [:OK for x in 1:ntasks]
 end
 
-test_channel(Channel(10))
-test_channel(RemoteChannel(()->Channel(10)))
+@testset "RemoteChannels" begin
+    function test_channel(c)
+        @test isopen(c) == true
+        put!(c, 1)
+        put!(c, "Hello")
+        put!(c, 5.0)
 
-c=Channel{Int}(1)
-@test_throws MethodError put!(c, "Hello")
-
-# test channel iterations
-function test_iteration(in_c, out_c)
-    t=@async for v in in_c
-        put!(out_c, v)
+        @test isready(c) == true
+        @test isopen(c) == true
+        @test fetch(c) == 1
+        @test fetch(c) == 1   # Should not have been popped previously
+        @test take!(c) == 1
+        @test take!(c) == "Hello"
+        @test fetch(c) == 5.0
+        @test take!(c) == 5.0
+        @test isready(c) == false
+        @test isopen(c) == true
+        close(c)
+        @test isopen(c) == false
     end
 
-    @test isopen(in_c) == true
-    put!(in_c, 1)
-    @test take!(out_c) == 1
-    put!(in_c, "Hello")
-    close(in_c)
-    @test take!(out_c) == "Hello"
-    @test isopen(in_c) == false
-    @test_throws InvalidStateException put!(in_c, :foo)
-    yield()
-    @test istaskdone(t) == true
-end
+    test_channel(Channel(10))
+    test_channel(RemoteChannel(()->Channel(10)))
+
+    c=Channel{Int}(1)
+    @test_throws MethodError put!(c, "Hello")
 
-test_iteration(Channel(10), Channel(10))
-test_iteration(RemoteChannel(() -> Channel(10)), RemoteChannel(() -> Channel(10)))
+    # test channel iterations
+    function test_iteration(in_c, out_c)
+        t=@async for v in in_c
+            put!(out_c, v)
+        end
 
-@everywhere function test_iteration_take(ch)
-    count = 0
-    for x in ch
-        count += 1
+        @test isopen(in_c) == true
+        put!(in_c, 1)
+        @test take!(out_c) == 1
+        put!(in_c, "Hello")
+        close(in_c)
+        @test take!(out_c) == "Hello"
+        @test isopen(in_c) == false
+        @test_throws InvalidStateException put!(in_c, :foo)
+        yield()
+        @test istaskdone(t) == true
     end
-    return count
-end
 
-@everywhere test_iteration_collect(ch) = length(collect(ch))
+    test_iteration(Channel(10), Channel(10))
+    test_iteration(RemoteChannel(() -> Channel(10)), RemoteChannel(() -> Channel(10)))
 
-@everywhere function test_iteration_put(ch, total)
-    for i in 1:total
-        put!(ch, i)
+    @everywhere function test_iteration_take(ch)
+        count = 0
+        for x in ch
+            count += 1
+        end
+        return count
     end
-    close(ch)
-end
 
-let ch = RemoteChannel(() -> Channel(1))
-    @async test_iteration_put(ch, 10)
-    @test 10 == @fetchfrom id_other test_iteration_take(ch)
-    ch = RemoteChannel(() -> Channel(1))
-    @async test_iteration_put(ch, 10)
-    @test 10 == @fetchfrom id_other test_iteration_collect(ch)
-    # now reverse
-    ch = RemoteChannel(() -> Channel(1))
-    @spawnat id_other test_iteration_put(ch, 10)
-    @test 10 == test_iteration_take(ch)
-    ch = RemoteChannel(() -> Channel(1))
-    @spawnat id_other test_iteration_put(ch, 10)
-    @test 10 == test_iteration_collect(ch)
-end
+    @everywhere test_iteration_collect(ch) = length(collect(ch))
 
-# Test isempty(::RemoteChannel). This should not modify the underlying
-# AbstractChannel, which Base's default implementation will do.
-let
-    chan = Channel(1)
-    push!(chan, 1)
-    remotechan = RemoteChannel(() -> chan)
+    @everywhere function test_iteration_put(ch, total)
+        for i in 1:total
+            put!(ch, i)
+        end
+        close(ch)
+    end
 
-    @test !isempty(remotechan)
-    # Calling `isempty(remotechan)` shouldn't have modified `chan`
-    @test !isempty(chan)
-end
+    let ch = RemoteChannel(() -> Channel(1))
+        @async test_iteration_put(ch, 10)
+        @test 10 == @fetchfrom id_other test_iteration_take(ch)
+        ch = RemoteChannel(() -> Channel(1))
+        @async test_iteration_put(ch, 10)
+        @test 10 == @fetchfrom id_other test_iteration_collect(ch)
+        # now reverse
+        ch = RemoteChannel(() -> Channel(1))
+        @spawnat id_other test_iteration_put(ch, 10)
+        @test 10 == test_iteration_take(ch)
+        ch = RemoteChannel(() -> Channel(1))
+        @spawnat id_other test_iteration_put(ch, 10)
+        @test 10 == test_iteration_collect(ch)
+    end
 
-# make sure exceptions propagate when waiting on Tasks
-@test_throws CompositeException (@sync (@async error("oops")))
-try
-    @sync begin
-        for i in 1:5
-            @async error(i)
-        end
-    end
-    error("unexpected")
-catch ex
-    @test typeof(ex) == CompositeException
-    @test length(ex) == 5
-    @test typeof(ex.exceptions[1]) == TaskFailedException
-    @test typeof(ex.exceptions[1].task.exception) == ErrorException
-    # test start, next, and done
-    for (i, i_ex) in enumerate(ex)
-        @test i == parse(Int, i_ex.task.exception.msg)
-    end
-    # test showerror
-    err_str = sprint(showerror, ex)
-    err_one_str = sprint(showerror, ex.exceptions[1])
-    @test err_str == err_one_str * "\n\n...and 4 more exceptions.\n"
+    # Test isempty(::RemoteChannel). This should not modify the underlying
+    # AbstractChannel, which Base's default implementation will do.
+    let
+        chan = Channel(1)
+        push!(chan, 1)
+        remotechan = RemoteChannel(() -> chan)
+
+        @test !isempty(remotechan)
+        # Calling `isempty(remotechan)` shouldn't have modified `chan`
+        @test !isempty(chan)
+    end
 end
-@test sprint(showerror, CompositeException()) == "CompositeException()\n"
 
-function test_remoteexception_thrown(expr)
+@testset "Exceptions" begin
+    # make sure exceptions propagate when waiting on Tasks
+    @test_throws CompositeException (@sync (@async error("oops")))
     try
-        expr()
+        @sync begin
+            for i in 1:5
+                @async error(i)
+            end
+        end
         error("unexpected")
     catch ex
-        @test typeof(ex) == RemoteException
-        @test typeof(ex.captured) == CapturedException
-        @test typeof(ex.captured.ex) == ErrorException
-        @test ex.captured.ex.msg == "foobar"
+        @test typeof(ex) == CompositeException
+        @test length(ex) == 5
+        @test typeof(ex.exceptions[1]) == TaskFailedException
+        @test typeof(ex.exceptions[1].task.exception) == ErrorException
+        # test start, next, and done
+        for (i, i_ex) in enumerate(ex)
+            @test i == parse(Int, i_ex.task.exception.msg)
+        end
+        # test showerror
+        err_str = sprint(showerror, ex)
+        err_one_str = sprint(showerror, ex.exceptions[1])
+        @test err_str == err_one_str * "\n\n...and 4 more exceptions.\n"
     end
-end
+    @test sprint(showerror, CompositeException()) == "CompositeException()\n"
 
-for id in [id_other, id_me]
-    local id
-    test_remoteexception_thrown() do
-        remotecall_fetch(id) do
-            throw(ErrorException("foobar"))
+    function test_remoteexception_thrown(expr)
+        try
+            expr()
+            error("unexpected")
+        catch ex
+            @test typeof(ex) == RemoteException
+            @test typeof(ex.captured) == CapturedException
+            @test typeof(ex.captured.ex) == ErrorException
+            @test ex.captured.ex.msg == "foobar"
         end
     end
-    test_remoteexception_thrown() do
-        remotecall_wait(id) do
-            throw(ErrorException("foobar"))
+
+    for id in [id_other, id_me]
+        local id
+        test_remoteexception_thrown() do
+            remotecall_fetch(id) do
+                throw(ErrorException("foobar"))
+            end
+        end
+        test_remoteexception_thrown() do
+            remotecall_wait(id) do
+                throw(ErrorException("foobar"))
+            end
+        end
+        test_remoteexception_thrown() do
+            wait(remotecall(id) do
+                     throw(ErrorException("foobar"))
+                 end)
         end
     end
-    test_remoteexception_thrown() do
-        wait(remotecall(id) do
-            throw(ErrorException("foobar"))
-        end)
-    end
-end
 
-# make sure the stackframe from the remote error can be serialized
-let ex
-    try
-        remotecall_fetch(id_other) do
-            @eval module AModuleLocalToOther
+    # make sure the stackframe from the remote error can be serialized
+    let ex
+        try
+            remotecall_fetch(id_other) do
+                @eval module AModuleLocalToOther
                 foo() = throw(ErrorException("A.error"))
                 foo()
+                end
             end
+        catch ex
         end
-    catch ex
-    end
-    @test (ex::RemoteException).pid == id_other
-    @test ((ex.captured::CapturedException).ex::ErrorException).msg == "A.error"
-    bt = ex.captured.processed_bt::Array{Any,1}
-    @test length(bt) > 1
-    frame, repeated = bt[1]::Tuple{Base.StackTraces.StackFrame, Int}
-    @test frame.func === :foo
-    @test frame.linfo === nothing
-    @test repeated == 1
-end
-
-# pmap tests. Needs at least 4 processors dedicated to the below tests. Which we currently have
-# since the distributed tests are now spawned as a separate set.
-
-# Test all combinations of pmap keyword args.
-pmap_args = [
-                (:distributed, [:default, false]),
-                (:batch_size, [:default,2]),
-                (:on_error, [:default, e -> (e.msg == "foobar" ? true : rethrow())]),
-                (:retry_delays, [:default, fill(0.001, 1000)]),
-                (:retry_check, [:default, (s,e) -> (s,endswith(e.msg,"foobar"))]),
-            ]
-
-kwdict = Dict()
-function walk_args(i)
-    if i > length(pmap_args)
-        kwargs = []
-        for (k,v) in kwdict
-            if v !== :default
-                push!(kwargs, (k,v))
+        @test (ex::RemoteException).pid == id_other
+        @test ((ex.captured::CapturedException).ex::ErrorException).msg == "A.error"
+        bt = ex.captured.processed_bt::Array{Any,1}
+        @test length(bt) > 1
+        frame, repeated = bt[1]::Tuple{Base.StackTraces.StackFrame, Int}
+        @test frame.func === :foo
+        @test frame.linfo === nothing
+        @test repeated == 1
+    end
+end
+
+@testset "pmap()" begin
+    # pmap tests. Needs at least 4 processors dedicated to the below tests. Which we currently have
+    # since the distributed tests are now spawned as a separate set.
+
+    # Test all combinations of pmap keyword args.
+    pmap_args = [
+        (:distributed, [:default, false]),
+        (:batch_size, [:default,2]),
+        (:on_error, [:default, e -> (e.msg == "foobar" ? true : rethrow())]),
+        (:retry_delays, [:default, fill(0.001, 1000)]),
+        (:retry_check, [:default, (s,e) -> (s,endswith(e.msg,"foobar"))]),
+    ]
+
+    kwdict = Dict()
+    function walk_args(i)
+        if i > length(pmap_args)
+            kwargs = []
+            for (k,v) in kwdict
+                if v !== :default
+                    push!(kwargs, (k,v))
+                end
             end
-        end
-
-        data = 1:100
 
-        testw = kwdict[:distributed] === false ? [1] : workers()
-
-        if kwdict[:retry_delays] !== :default
-            mapf = x -> iseven(myid()) ? error("notfoobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    if isodd(p)
-                        @test p in pids
-                    else
-                        @test !(p in pids)
+            data = 1:100
+
+            testw = kwdict[:distributed] === false ? [1] : workers()
+
+            if kwdict[:retry_delays] !== :default
+                mapf = x -> iseven(myid()) ? error("notfoobar") : (x*2, myid())
+                results_test = pmap_res -> begin
+                    results = [x[1] for x in pmap_res]
+                    pids = [x[2] for x in pmap_res]
+                    @test results == [2:2:200...]
+                    for p in testw
+                        if isodd(p)
+                            @test p in pids
+                        else
+                            @test !(p in pids)
+                        end
                     end
                 end
-            end
-        elseif kwdict[:on_error] === :default
-            mapf = x -> (x*2, myid())
-            results_test = pmap_res -> begin
-                results = [x[1] for x in pmap_res]
-                pids = [x[2] for x in pmap_res]
-                @test results == [2:2:200...]
-                for p in testw
-                    @test p in pids
+            elseif kwdict[:on_error] === :default
+                mapf = x -> (x*2, myid())
+                results_test = pmap_res -> begin
+                    results = [x[1] for x in pmap_res]
+                    pids = [x[2] for x in pmap_res]
+                    @test results == [2:2:200...]
+                    for p in testw
+                        @test p in pids
+                    end
                 end
-            end
-        else
-            mapf = x -> iseven(x) ? error("foobar") : (x*2, myid())
-            results_test = pmap_res -> begin
-                w = testw
-                for (idx,x) in enumerate(data)
-                    if iseven(x)
-                        @test pmap_res[idx] == true
-                    else
-                        @test pmap_res[idx][1] == x*2
-                        @test pmap_res[idx][2] in w
+            else
+                mapf = x -> iseven(x) ? error("foobar") : (x*2, myid())
+                results_test = pmap_res -> begin
+                    w = testw
+                    for (idx,x) in enumerate(data)
+                        if iseven(x)
+                            @test pmap_res[idx] == true
+                        else
+                            @test pmap_res[idx][1] == x*2
+                            @test pmap_res[idx][2] in w
+                        end
                     end
                 end
             end
-        end
 
-        try
-            results_test(pmap(mapf, data; kwargs...))
-        catch
-            println("pmap executing with args : ", kwargs)
-            rethrow()
-        end
+            try
+                results_test(pmap(mapf, data; kwargs...))
+            catch
+                println("pmap executing with args : ", kwargs)
+                rethrow()
+            end
 
-        return
-    end
+            return
+        end
 
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][1]
-    walk_args(i+1)
+        kwdict[pmap_args[i][1]] = pmap_args[i][2][1]
+        walk_args(i+1)
 
-    kwdict[pmap_args[i][1]] = pmap_args[i][2][2]
-    walk_args(i+1)
-end
+        kwdict[pmap_args[i][1]] = pmap_args[i][2][2]
+        walk_args(i+1)
+    end
 
-# Start test for various kw arg combinations
-walk_args(1)
+    # Start test for various kw arg combinations
+    walk_args(1)
 
-include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "generic_map_tests.jl"))
-empty_pool = WorkerPool([myid()])
-pmap_fallback = (f, c...) -> pmap(f, empty_pool, c...)
-generic_map_tests(pmap_fallback)
+    include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "generic_map_tests.jl"))
+    empty_pool = WorkerPool([myid()])
+    pmap_fallback = (f, c...) -> pmap(f, empty_pool, c...)
+    generic_map_tests(pmap_fallback)
 
-# pmap with various types. Test for equivalence with map
-run_map_equivalence_tests(pmap)
-@test pmap(uppercase, "Hello World!") == map(uppercase, "Hello World!")
+    # pmap with various types. Test for equivalence with map
+    run_map_equivalence_tests(pmap)
+    @test pmap(uppercase, "Hello World!") == map(uppercase, "Hello World!")
 
 
-# Simple test for pmap throws error
-let error_thrown = false
-    try
-        pmap(x -> x == 50 ? error("foobar") : x, 1:100)
-    catch e
-        @test e.captured.ex.msg == "foobar"
-        error_thrown = true
+    # Simple test for pmap throws error
+    let error_thrown = false
+        try
+            pmap(x -> x == 50 ? error("foobar") : x, 1:100)
+        catch e
+            @test e.captured.ex.msg == "foobar"
+            error_thrown = true
+        end
+        @test error_thrown
     end
-    @test error_thrown
-end
 
-# Test pmap with a generator type iterator
-@test [1:100...] == pmap(x->x, Base.Generator(x->(sleep(0.0001); x), 1:100))
-
-# Test pgenerate
-n = 10
-as = [rand(4,4) for i in 1:n]
-bs = deepcopy(as)
-cs = collect(DistributedNext.pgenerate(x->(sleep(rand()*0.1); svd(x)), bs))
-svdas = map(svd, as)
-for i in 1:n
-    @test cs[i].U ≈ svdas[i].U
-    @test cs[i].S ≈ svdas[i].S
-    @test cs[i].V ≈ svdas[i].V
-end
+    # Test pmap with a generator type iterator
+    @test [1:100...] == pmap(x->x, Base.Generator(x->(sleep(0.0001); x), 1:100))
 
-# Test that the default worker pool cycles through all workers
-pmap(_->myid(), 1:nworkers())  # priming run
-@test nworkers() == length(unique(pmap(_->myid(), 1:100)))
-
-# Test same behaviour when executed on a worker
-@test nworkers() == length(unique(remotecall_fetch(()->pmap(_->myid(), 1:100), id_other)))
-
-# Same tests with custom worker pools.
-wp = WorkerPool(workers())
-@test nworkers() == length(unique(pmap(_->myid(), wp, 1:100)))
-@test nworkers() == length(unique(remotecall_fetch(wp->pmap(_->myid(), wp, 1:100), id_other, wp)))
-wp = WorkerPool(2:3)
-@test sort(unique(pmap(_->myid(), wp, 1:100))) == [2,3]
-
-# wait on worker pool
-wp = WorkerPool(2:2)
-w = take!(wp)
-
-# local call to _wait
-@test !isready(wp)
-t = @async wait(wp)
-@test !istaskdone(t)
-put!(wp, w)
-status = timedwait(() -> istaskdone(t), 10)
-@test status == :ok
-
-# remote call to _wait
-take!(wp)
-@test !isready(wp)
-f = @spawnat w wait(wp)
-@test !isready(f)
-put!(wp, w)
-status = timedwait(() -> isready(f), 10)
-@test status == :ok
-
-# CachingPool tests
-wp = CachingPool(workers())
-@test [1:100...] == pmap(x->x, wp, 1:100)
-
-clear!(wp)
-@test length(wp.map_obj2ref) == 0
-
-# default_worker_pool! tests
-wp_default = DistributedNext.default_worker_pool()
-try
-    local wp = CachingPool(workers())
-    DistributedNext.default_worker_pool!(wp)
+    # Test pgenerate
+    n = 10
+    as = [rand(4,4) for i in 1:n]
+    bs = deepcopy(as)
+    cs = collect(DistributedNext.pgenerate(x->(sleep(rand()*0.1); svd(x)), bs))
+    svdas = map(svd, as)
+    for i in 1:n
+        @test cs[i].U ≈ svdas[i].U
+        @test cs[i].S ≈ svdas[i].S
+        @test cs[i].V ≈ svdas[i].V
+    end
+
+    # Test that the default worker pool cycles through all workers
+    pmap(_->myid(), 1:nworkers())  # priming run
+    @test nworkers() == length(unique(pmap(_->myid(), 1:100)))
+
+    # Test same behaviour when executed on a worker
+    @test nworkers() == length(unique(remotecall_fetch(()->pmap(_->myid(), 1:100), id_other)))
+
+    # Same tests with custom worker pools.
+    wp = WorkerPool(workers())
+    @test nworkers() == length(unique(pmap(_->myid(), wp, 1:100)))
+    @test nworkers() == length(unique(remotecall_fetch(wp->pmap(_->myid(), wp, 1:100), id_other, wp)))
+    wp = WorkerPool(2:3)
+    @test sort(unique(pmap(_->myid(), wp, 1:100))) == [2,3]
+
+    # wait on worker pool
+    wp = WorkerPool(2:2)
+    w = take!(wp)
+
+    # local call to _wait
+    @test !isready(wp)
+    t = @async wait(wp)
+    @test !istaskdone(t)
+    put!(wp, w)
+    status = timedwait(() -> istaskdone(t), 10)
+    @test status == :ok
+
+    # remote call to _wait
+    take!(wp)
+    @test !isready(wp)
+    f = @spawnat w wait(wp)
+    @test !isready(f)
+    put!(wp, w)
+    status = timedwait(() -> isready(f), 10)
+    @test status == :ok
+
+    # CachingPool tests
+    wp = CachingPool(workers())
     @test [1:100...] == pmap(x->x, wp, 1:100)
-    @test !isempty(wp.map_obj2ref)
+
     clear!(wp)
-    @test isempty(wp.map_obj2ref)
-finally
-    DistributedNext.default_worker_pool!(wp_default)
+    @test length(wp.map_obj2ref) == 0
+
+    # default_worker_pool! tests
+    wp_default = DistributedNext.default_worker_pool()
+    try
+        local wp = CachingPool(workers())
+        DistributedNext.default_worker_pool!(wp)
+        @test [1:100...] == pmap(x->x, wp, 1:100)
+        @test !isempty(wp.map_obj2ref)
+        clear!(wp)
+        @test isempty(wp.map_obj2ref)
+    finally
+        DistributedNext.default_worker_pool!(wp_default)
+    end
 end
 
 # The below block of tests are usually run only on local development systems,
@@ -760,761 +782,779 @@ if DoFullTest
     @test all([p == remotecall_fetch(myid, p) for p in all_w])
 end
 
-let t = @task 42
-    schedule(t, ErrorException(""), error=true)
-    @test_throws TaskFailedException(t) Base.wait(t)
-end
+@testset "Various individual issues" begin
+    let t = @task 42
+        schedule(t, ErrorException(""), error=true)
+        @test_throws TaskFailedException(t) Base.wait(t)
+    end
 
-# issue #8207
-let A = Any[]
-    @distributed (+) for i in (push!(A,1); 1:2)
-        i
+    # issue #8207
+    let A = Any[]
+        @distributed (+) for i in (push!(A,1); 1:2)
+            i
+        end
+        @test length(A) == 1
     end
-    @test length(A) == 1
-end
 
-# issue #13168
-function f13168(n)
-    val = 0
-    for i = 1:n
-        val += sum(rand(n, n)^2)
+    # issue #13168
+    function f13168(n)
+        val = 0
+        for i = 1:n
+            val += sum(rand(n, n)^2)
+        end
+        return val
+    end
+    let t = schedule(@task f13168(100))
+        @test t.state === :runnable
+        @test t.queue !== nothing
+        @test_throws ErrorException schedule(t)
+        yield()
+        @test t.state === :done
+        @test t.queue === nothing
+        @test_throws ErrorException schedule(t)
+        @test isa(fetch(t), Float64)
     end
-    return val
-end
-let t = schedule(@task f13168(100))
-    @test t.state === :runnable
-    @test t.queue !== nothing
-    @test_throws ErrorException schedule(t)
-    yield()
-    @test t.state === :done
-    @test t.queue === nothing
-    @test_throws ErrorException schedule(t)
-    @test isa(fetch(t), Float64)
-end
 
-# issue #13122
-@test remotecall_fetch(identity, workers()[1], C_NULL) === C_NULL
+    # issue #13122
+    @test remotecall_fetch(identity, workers()[1], C_NULL) === C_NULL
 
-# issue #11062
-function t11062()
-    @async v11062 = 1
-    v11062 = 2
-end
+    # issue #11062
+    function t11062()
+        @async v11062 = 1
+        v11062 = 2
+    end
 
-@test t11062() == 2
+    @test t11062() == 2
 
-# issue #15406
-v15406 = remotecall_wait(() -> 1, id_other)
-fetch(v15406)
-remotecall_wait(fetch, id_other, v15406)
+    # issue #15406
+    v15406 = remotecall_wait(() -> 1, id_other)
+    fetch(v15406)
+    remotecall_wait(fetch, id_other, v15406)
 
 
-# issue #43396
-# Covers the remote fetch where the value returned is `nothing`
-# May be caused by attempting to unwrap a non-`Some` type with `something`
-# `call_on_owner` ref fetches return values not wrapped in `Some`
-# and have to be returned directly
-@test nothing === fetch(remotecall(() -> nothing, workers()[1]))
-@test 10 === fetch(remotecall(() -> 10, workers()[1]))
+    # issue #43396
+    # Covers the remote fetch where the value returned is `nothing`
+    # May be caused by attempting to unwrap a non-`Some` type with `something`
+    # `call_on_owner` ref fetches return values not wrapped in `Some`
+    # and have to be returned directly
+    @test nothing === fetch(remotecall(() -> nothing, workers()[1]))
+    @test 10 === fetch(remotecall(() -> 10, workers()[1]))
+end
 
+# Helper modules for the tests
+module LocalFoo
+global foo=1
+end
 
-# Test various forms of remotecall* invocations
+module LocalBar
+using DistributedNext
+bar() = @everywhere new_bar()=myid()
+end
 
-@everywhere f_args(v1, v2=0; kw1=0, kw2=0) = v1+v2+kw1+kw2
+f16091a() = 1
+f16091b = () -> 1
 
-function test_f_args(result, args...; kwargs...)
-    @test fetch(remotecall(args...; kwargs...)) == result
-    @test fetch(remotecall_wait(args...; kwargs...)) == result
-    @test remotecall_fetch(args...; kwargs...) == result
+@testset "remotecall*()" begin
+    # Test various forms of remotecall* invocations
 
-    # A visual test - remote_do should NOT print any errors
-    remote_do(args...; kwargs...)
-end
+    @everywhere f_args(v1, v2=0; kw1=0, kw2=0) = v1+v2+kw1+kw2
 
-for tid in [id_other, id_me, default_worker_pool()]
-    test_f_args(1, f_args, tid, 1)
-    test_f_args(3, f_args, tid, 1, 2)
-    test_f_args(5, f_args, tid, 1; kw1=4)
-    test_f_args(13, f_args, tid, 1; kw1=4, kw2=8)
-    test_f_args(15, f_args, tid, 1, 2; kw1=4, kw2=8)
-end
+    function test_f_args(result, args...; kwargs...)
+        @test fetch(remotecall(args...; kwargs...)) == result
+        @test fetch(remotecall_wait(args...; kwargs...)) == result
+        @test remotecall_fetch(args...; kwargs...) == result
 
-# Test remote_do
-f=Future(id_me)
-remote_do(fut->put!(fut, myid()), id_me, f)
-@test fetch(f) == id_me
+        # A visual test - remote_do should NOT print any errors
+        remote_do(args...; kwargs...)
+    end
 
-f=Future(id_other)
-remote_do(fut->put!(fut, myid()), id_other, f)
-@test fetch(f) == id_other
+    for tid in [id_other, id_me, default_worker_pool()]
+        test_f_args(1, f_args, tid, 1)
+        test_f_args(3, f_args, tid, 1, 2)
+        test_f_args(5, f_args, tid, 1; kw1=4)
+        test_f_args(13, f_args, tid, 1; kw1=4, kw2=8)
+        test_f_args(15, f_args, tid, 1, 2; kw1=4, kw2=8)
+    end
 
-# Github issue #29932
-rc_unbuffered = RemoteChannel(()->Channel{Vector{Float64}}(0))
-@test eltype(rc_unbuffered) == Vector{Float64}
+    # Test remote_do
+    f=Future(id_me)
+    remote_do(fut->put!(fut, myid()), id_me, f)
+    @test fetch(f) == id_me
 
-@async begin
-    # Trigger direct write (no buffering) of largish array
-    array_sz = Int(Base.SZ_UNBUFFERED_IO/8) + 1
-    largev = zeros(array_sz)
-    for i in 1:10
-        largev[1] = float(i)
-        put!(rc_unbuffered, largev)
-    end
-end
+    f=Future(id_other)
+    remote_do(fut->put!(fut, myid()), id_other, f)
+    @test fetch(f) == id_other
 
-@test remotecall_fetch(rc -> begin
-        for i in 1:10
-            take!(rc)[1] != float(i) && error("Failed")
-        end
-        return :OK
-    end, id_other, rc_unbuffered) === :OK
-
-# github issue 33972
-rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
-close(rc_unbuffered_other)
-try; take!(rc_unbuffered_other); catch; end
-@test !remotecall_fetch(rc -> islocked(DistributedNext.lookup_ref(remoteref_id(rc)).synctake),
-                        id_other, rc_unbuffered_other)
-
-# github PR #14456
-n = DoFullTest ? 6 : 5
-for i = 1:10^n
-    fetch(@spawnat myid() myid())
-end
+    # Github issue #29932
+    rc_unbuffered = RemoteChannel(()->Channel{Vector{Float64}}(0))
+    @test eltype(rc_unbuffered) == Vector{Float64}
 
-# issue #15451
-@test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7
-
-# issue #16091
-mutable struct T16091 end
-wid = workers()[1]
-try
-    remotecall_fetch(()->T16091, wid)
-    @test "unreachable" === true
-catch ex
-    ex = ((ex::RemoteException).captured::CapturedException).ex
-    @test (ex::UndefVarError).var === :T16091
-end
-try
-    remotecall_fetch(identity, wid, T16091)
-    @test "unreachable" === true
-catch ex
-    ex = ((ex::RemoteException).captured::CapturedException).ex
-    @test (ex::UndefVarError).var === :T16091
-end
+    @async begin
+        # Trigger direct write (no buffering) of largish array
+        array_sz = Int(Base.SZ_UNBUFFERED_IO/8) + 1
+        largev = zeros(array_sz)
+        for i in 1:10
+            largev[1] = float(i)
+            put!(rc_unbuffered, largev)
+        end
+    end
 
-f16091a() = 1
-remotecall_fetch(()->eval(:(f16091a() = 2)), wid)
-@test remotecall_fetch(f16091a, wid) === 2
-@test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1
+    @test remotecall_fetch(rc -> begin
+                               for i in 1:10
+                                   take!(rc)[1] != float(i) && error("Failed")
+                               end
+                               return :OK
+                           end, id_other, rc_unbuffered) === :OK
 
-# these will only heisen-fail, since it depends on the gensym counter collisions:
-f16091b = () -> 1
-remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid)
-@test remotecall_fetch(f16091b, 2) === 1
-# Global anonymous functions are over-written...
-@test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 1
-
-# ...while local anonymous functions are by definition, local.
-let
-    f16091c = () -> 1
-    @test remotecall_fetch(f16091c, 2) === 1
-    @test remotecall_fetch(
-        myid -> begin
-            let
-                f16091c = () -> 2
-                remotecall_fetch(f16091c, myid)
-            end
-        end, wid, myid()) === 2
-end
+    # github issue 33972
+    rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
+    close(rc_unbuffered_other)
+    try; take!(rc_unbuffered_other); catch; end
+    @test !remotecall_fetch(rc -> islocked(DistributedNext.lookup_ref(remoteref_id(rc)).synctake),
+                            id_other, rc_unbuffered_other)
 
-# issue #16451
-rng=RandomDevice()
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
+    # github PR #14456
+    n = DoFullTest ? 6 : 5
+    for i = 1:10^n
+        fetch(@spawnat myid() myid())
+    end
 
-rand(rng)
-retval = @distributed (+) for _ in 1:10
-    rand(rng)
-end
-@test retval > 0.0 && retval < 10.0
+    # issue #15451
+    @test remotecall_fetch(x->(y->2y)(x)+1, workers()[1], 3) == 7
 
-# serialization tests
-wrkr1 = workers()[1]
-wrkr2 = workers()[end]
+    # issue #16091
+    mutable struct T16091 end
+    wid = workers()[1]
+    try
+        remotecall_fetch(()->T16091, wid)
+        @test "unreachable" === true
+    catch ex
+        ex = ((ex::RemoteException).captured::CapturedException).ex
+        @test (ex::UndefVarError).var === :T16091
+    end
+    try
+        remotecall_fetch(identity, wid, T16091)
+        @test "unreachable" === true
+    catch ex
+        ex = ((ex::RemoteException).captured::CapturedException).ex
+        @test (ex::UndefVarError).var === :T16091
+    end
+
+    remotecall_fetch(()->eval(:(f16091a() = 2)), wid)
+    @test remotecall_fetch(f16091a, wid) === 2
+    @test remotecall_fetch((myid)->remotecall_fetch(f16091a, myid), wid, myid()) === 1
+
+    # these will only heisen-fail, since it depends on the gensym counter collisions:
+    remotecall_fetch(()->eval(:(f16091b = () -> 2)), wid)
+    @test remotecall_fetch(f16091b, 2) === 1
+    # Global anonymous functions are over-written...
+    @test remotecall_fetch((myid)->remotecall_fetch(f16091b, myid), wid, myid()) === 1
+
+    # ...while local anonymous functions are by definition, local.
+    let
+        f16091c = () -> 1
+        @test remotecall_fetch(f16091c, 2) === 1
+        @test remotecall_fetch(
+            myid -> begin
+                let
+                    f16091c = () -> 2
+                    remotecall_fetch(f16091c, myid)
+                end
+            end, wid, myid()) === 2
+    end
 
-@test remotecall_fetch(p->remotecall_fetch(myid, p), wrkr1, wrkr2) == wrkr2
+    # issue #16451
+    rng=RandomDevice()
+    retval = @distributed (+) for _ in 1:10
+        rand(rng)
+    end
+    @test retval > 0.0 && retval < 10.0
 
-# Send f to wrkr1 and wrkr2. Then try calling f on wrkr2 from wrkr1
-f_myid = ()->myid()
-@test wrkr1 == remotecall_fetch(f_myid, wrkr1)
-@test wrkr2 == remotecall_fetch(f_myid, wrkr2)
-@test wrkr2 == remotecall_fetch((f, p)->remotecall_fetch(f, p), wrkr1, f_myid, wrkr2)
+    rand(rng)
+    retval = @distributed (+) for _ in 1:10
+        rand(rng)
+    end
+    @test retval > 0.0 && retval < 10.0
 
-# Deserialization error recovery test
-# locally defined module, but unavailable on workers
-module LocalFoo
-    global foo=1
-end
+    # serialization tests
+    wrkr1 = workers()[1]
+    wrkr2 = workers()[end]
 
-let
-    @test_throws RemoteException remotecall_fetch(()->LocalFoo.foo, 2)
+    @test remotecall_fetch(p->remotecall_fetch(myid, p), wrkr1, wrkr2) == wrkr2
 
-    bad_thunk = ()->NonexistentModule.f()
-    @test_throws RemoteException remotecall_fetch(bad_thunk, 2)
+    # Send f to wrkr1 and wrkr2. Then try calling f on wrkr2 from wrkr1
+    f_myid = ()->myid()
+    @test wrkr1 == remotecall_fetch(f_myid, wrkr1)
+    @test wrkr2 == remotecall_fetch(f_myid, wrkr2)
+    @test wrkr2 == remotecall_fetch((f, p)->remotecall_fetch(f, p), wrkr1, f_myid, wrkr2)
 
-    # Test that the stream is still usable
-    @test remotecall_fetch(()->:test,2) === :test
-    ref = remotecall(bad_thunk, 2)
-    @test_throws RemoteException fetch(ref)
-end
+    # Deserialization error recovery test
+    # locally defined module, but unavailable on workers
+    let
+        @test_throws RemoteException remotecall_fetch(()->LocalFoo.foo, 2)
 
-# Test calling @everywhere from a module not defined on the workers
-module LocalBar
-    using DistributedNext
-    bar() = @everywhere new_bar()=myid()
-end
-LocalBar.bar()
-for p in procs()
-    @test p == remotecall_fetch(new_bar, p)
-end
+        bad_thunk = ()->NonexistentModule.f()
+        @test_throws RemoteException remotecall_fetch(bad_thunk, 2)
 
-# @everywhere (remotecall_eval) behaviors (#22589)
-let (p, p2) = filter!(p -> p != myid(), procs())
-    @test (myid() + 1) == @everywhere myid() (myid() + 1)
-    @test (p * 2) == @everywhere p (myid() * 2)
-    @test 1 == @everywhere p defined_on_p = 1
-    @test !@isdefined defined_on_p
-    @test !isdefined(Main, :defined_on_p)
-    @test remotecall_fetch(isdefined, p, Main, :defined_on_p)
-    @test !remotecall_fetch(isdefined, p2, Main, :defined_on_p)
-    @test nothing === @everywhere [p, p] defined_on_p += 1
-    @test 3 === @everywhere p defined_on_p
-    let ref = Ref(0)
-        @test nothing ===
-            @everywhere [myid(), p, myid(), myid(), p] begin
-                Test.@test Main === @__MODULE__
-                $ref[] += 1
-            end
-        @test ref[] == 3
+        # Test that the stream is still usable
+        @test remotecall_fetch(()->:test,2) === :test
+        ref = remotecall(bad_thunk, 2)
+        @test_throws RemoteException fetch(ref)
     end
-    function test_throw_on(procs, msg)
-        try
-            @everywhere procs error($msg)
-            error("test failed to throw")
-        catch excpt
-            if procs isa Int
-                ex = Any[excpt]
-            else
-                ex = (excpt::CompositeException).exceptions
-            end
-            for (p, ex) in zip(procs, ex)
-                local p
-                if procs isa Int || p != myid()
-                    @test (ex::RemoteException).pid == p
-                    ex = ((ex::RemoteException).captured::CapturedException).ex
+
+    # Test calling @everywhere from a module not defined on the workers
+    LocalBar.bar()
+    for p in procs()
+        @test p == remotecall_fetch(new_bar, p)
+    end
+
+    # @everywhere (remotecall_eval) behaviors (#22589)
+    let (p, p2) = filter!(p -> p != myid(), procs())
+        @test (myid() + 1) == @everywhere myid() (myid() + 1)
+        @test (p * 2) == @everywhere p (myid() * 2)
+        @test 1 == @everywhere p defined_on_p = 1
+        @test !@isdefined defined_on_p
+        @test !isdefined(Main, :defined_on_p)
+        @test remotecall_fetch(isdefined, p, Main, :defined_on_p)
+        @test !remotecall_fetch(isdefined, p2, Main, :defined_on_p)
+        @test nothing === @everywhere [p, p] defined_on_p += 1
+        @test 3 === @everywhere p defined_on_p
+        let ref = Ref(0)
+            @test nothing ===
+                @everywhere [myid(), p, myid(), myid(), p] begin
+                    Test.@test Main === @__MODULE__
+                    $ref[] += 1
+                end
+            @test ref[] == 3
+        end
+        function test_throw_on(procs, msg)
+            try
+                @everywhere procs error($msg)
+                error("test failed to throw")
+            catch excpt
+                if procs isa Int
+                    ex = Any[excpt]
                 else
-                    ex = (ex::TaskFailedException).task.exception
+                    ex = (excpt::CompositeException).exceptions
+                end
+                for (p, ex) in zip(procs, ex)
+                    local p
+                    if procs isa Int || p != myid()
+                        @test (ex::RemoteException).pid == p
+                        ex = ((ex::RemoteException).captured::CapturedException).ex
+                    else
+                        ex = (ex::TaskFailedException).task.exception
+                    end
+                    @test (ex::ErrorException).msg == msg
                 end
-                @test (ex::ErrorException).msg == msg
             end
         end
+        test_throw_on(p, "everywhere on p")
+        test_throw_on(myid(), "everywhere on myid")
+        test_throw_on([p, myid()], "everywhere on myid and p")
+        test_throw_on([p2, p], "everywhere on p and p2")
     end
-    test_throw_on(p, "everywhere on p")
-    test_throw_on(myid(), "everywhere on myid")
-    test_throw_on([p, myid()], "everywhere on myid and p")
-    test_throw_on([p2, p], "everywhere on p and p2")
 end
 
-# Test addprocs enable_threaded_blas parameter
-
-function get_remote_num_threads(processes_added)
-    return [remotecall_fetch(BLAS.get_num_threads, proc_id) for proc_id in processes_added]
-end
+@testset "addprocs enable_threaded_blas parameter" begin
+    function get_remote_num_threads(processes_added)
+        return [remotecall_fetch(BLAS.get_num_threads, proc_id) for proc_id in processes_added]
+    end
 
-function test_blas_config(pid, expected)
-    for worker in DistributedNext.PGRP.workers
-        if worker.id == pid
-            @test worker.config.enable_threaded_blas == expected
-            return
+    function test_blas_config(pid, expected)
+        for worker in DistributedNext.PGRP.workers
+            if worker.id == pid
+                @test worker.config.enable_threaded_blas == expected
+                return
+            end
         end
     end
-end
 
-function test_add_procs_threaded_blas()
-    master_blas_thread_count = BLAS.get_num_threads()
-    if master_blas_thread_count === nothing
-        @warn "Skipping blas num threads tests due to unsupported blas version"
-        return
-    end
+    function test_add_procs_threaded_blas()
+        master_blas_thread_count = BLAS.get_num_threads()
+        if master_blas_thread_count === nothing
+            @warn "Skipping blas num threads tests due to unsupported blas version"
+            return
+        end
 
-    # Test with default enable_threaded_blas false
-    processes_added = addprocs_with_testenv(2)
-    for proc_id in processes_added
-        test_blas_config(proc_id, false)
-    end
+        # Test with default enable_threaded_blas false
+        processes_added = addprocs_with_testenv(2)
+        for proc_id in processes_added
+            test_blas_config(proc_id, false)
+        end
 
-    # Master thread should not have changed
-    @test BLAS.get_num_threads() == master_blas_thread_count
+        # Master thread should not have changed
+        @test BLAS.get_num_threads() == master_blas_thread_count
 
-    # Threading disabled in children by default
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count == 1
-    end
-    rmprocs(processes_added)
+        # Threading disabled in children by default
+        thread_counts_by_process = get_remote_num_threads(processes_added)
+        for thread_count in thread_counts_by_process
+            @test thread_count == 1
+        end
+        rmprocs(processes_added)
 
-    processes_added = addprocs_with_testenv(2, enable_threaded_blas=true)
-    for proc_id in processes_added
-        test_blas_config(proc_id, true)
-    end
+        processes_added = addprocs_with_testenv(2, enable_threaded_blas=true)
+        for proc_id in processes_added
+            test_blas_config(proc_id, true)
+        end
 
-    @test BLAS.get_num_threads() == master_blas_thread_count
+        @test BLAS.get_num_threads() == master_blas_thread_count
 
-    # BLAS.set_num_threads(`num`) doesn't  cause BLAS.get_num_threads to return `num`
-    # depending on the machine, the BLAS version, and BLAS configuration, so
-    # we need a very lenient test.
-    thread_counts_by_process = get_remote_num_threads(processes_added)
-    for thread_count in thread_counts_by_process
-        @test thread_count >= 1
+        # BLAS.set_num_threads(`num`) doesn't  cause BLAS.get_num_threads to return `num`
+        # depending on the machine, the BLAS version, and BLAS configuration, so
+        # we need a very lenient test.
+        thread_counts_by_process = get_remote_num_threads(processes_added)
+        for thread_count in thread_counts_by_process
+            @test thread_count >= 1
+        end
+        rmprocs(processes_added)
     end
-    rmprocs(processes_added)
-end
-test_add_procs_threaded_blas()
-
-#19687
-if false ### TODO: The logic that is supposed to implement this is racy - Disabled for now
-# ensure no race conditions between rmprocs and addprocs
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    @spawnat p sleep(5)
-    rmprocs(p; waitfor=0)
+    test_add_procs_threaded_blas()
 end
 
-# Test if a wait has been called on rmprocs(...;waitfor=0), further remotecalls
-# don't throw errors.
-for i in 1:5
-    p = addprocs_with_testenv(1)[1]
-    np = nprocs()
-    @spawnat p sleep(5)
-    Base.wait(rmprocs(p; waitfor=0))
-    for pid in procs()
-        @test pid == remotecall_fetch(myid, pid)
-    end
-    @test nprocs() == np - 1
-end
+@testset "addprocs()/rmprocs()" begin
+    #19687
+    if false ### TODO: The logic that is supposed to implement this is racy - Disabled for now
+        # ensure no race conditions between rmprocs and addprocs
+        for i in 1:5
+            p = addprocs_with_testenv(1)[1]
+            @spawnat p sleep(5)
+            rmprocs(p; waitfor=0)
+        end
 
-# Test that an exception is thrown if workers are unable to be removed within requested time.
-if DoFullTest
-    pids=addprocs_with_testenv(4);
-    @test_throws ErrorException rmprocs(pids; waitfor=0.001);
-    # wait for workers to be removed
-    while any(in(procs()), pids)
-        sleep(0.1)
+        # Test if a wait has been called on rmprocs(...;waitfor=0), further remotecalls
+        # don't throw errors.
+        for i in 1:5
+            p = addprocs_with_testenv(1)[1]
+            np = nprocs()
+            @spawnat p sleep(5)
+            Base.wait(rmprocs(p; waitfor=0))
+            for pid in procs()
+                @test pid == remotecall_fetch(myid, pid)
+            end
+            @test nprocs() == np - 1
+        end
+
+        # Test that an exception is thrown if workers are unable to be removed within requested time.
+        if DoFullTest
+            pids=addprocs_with_testenv(4);
+            @test_throws ErrorException rmprocs(pids; waitfor=0.001);
+            # wait for workers to be removed
+            while any(in(procs()), pids)
+                sleep(0.1)
+            end
+        end
     end
-end
-end
 
-# Test addprocs/rmprocs from master node only
-for f in [ ()->addprocs(1; exeflags=test_exeflags), ()->rmprocs(workers()) ]
-    local f
-    try
-        remotecall_fetch(f, id_other)
-        error("Unexpected")
-    catch ex
-        @test isa(ex, RemoteException)
-        @test ex.captured.ex.msg == "Only process 1 can add and remove workers"
+    # Test addprocs/rmprocs from master node only
+    for f in [ ()->addprocs(1; exeflags=test_exeflags), ()->rmprocs(workers()) ]
+        local f
+        try
+            remotecall_fetch(f, id_other)
+            error("Unexpected")
+        catch ex
+            @test isa(ex, RemoteException)
+            @test ex.captured.ex.msg == "Only process 1 can add and remove workers"
+        end
     end
-end
 
-# Test the following addprocs error conditions
-# - invalid host name - github issue #20372
-# - julia exe exiting with an error
-# - timeout reading host:port from worker stdout
-# - host:port not found in worker stdout in the first 1000 lines
+    # Test the following addprocs error conditions
+    # - invalid host name - github issue #20372
+    # - julia exe exiting with an error
+    # - timeout reading host:port from worker stdout
+    # - host:port not found in worker stdout in the first 1000 lines
 
-struct ErrorSimulator <: ClusterManager
-    mode
-end
+    struct ErrorSimulator <: ClusterManager
+        mode
+    end
 
-function launch(manager::ErrorSimulator, params::Dict, launched::Array, c::Condition)
-    exename = params[:exename]
-    dir = params[:dir]
+    function DistributedNext.launch(manager::ErrorSimulator, params::Dict, launched::Array, c::Condition)
+        exename = params[:exename]
+        dir = params[:dir]
 
-    cmd = `$(Base.julia_cmd(exename)) --startup-file=no`
-    if manager.mode === :timeout
-        cmd = `$cmd -e "sleep(10)"`
-    elseif manager.mode === :ntries
-        cmd = `$cmd -e "[println(x) for x in 1:1001]"`
-    elseif manager.mode === :exit
-        cmd = `$cmd -e "exit(-1)"`
-    else
-        error("Unknown mode")
-    end
-    io = open(detach(setenv(cmd, dir=dir)))
+        cmd = `$(Base.julia_cmd(exename)) --startup-file=no`
+        if manager.mode === :timeout
+            cmd = `$cmd -e "sleep(10)"`
+        elseif manager.mode === :ntries
+            cmd = `$cmd -e "[println(x) for x in 1:1001]"`
+        elseif manager.mode === :exit
+            cmd = `$cmd -e "exit(-1)"`
+        else
+            error("Unknown mode")
+        end
+        io = open(detach(setenv(cmd, dir=dir)))
 
-    wconfig = WorkerConfig()
-    wconfig.process = io
-    wconfig.io = io.out
-    push!(launched, wconfig)
-    notify(c)
-end
+        wconfig = WorkerConfig()
+        wconfig.process = io
+        wconfig.io = io.out
+        push!(launched, wconfig)
+        notify(c)
+    end
 
-testruns = Any[]
+    testruns = Any[]
 
-if DoFullTest
-    append!(testruns, [(()->addprocs_with_testenv(["errorhost20372"]), "Unable to read host:port string from worker. Launch command exited with error?", ())])
-end
+    if DoFullTest
+        append!(testruns, [(()->addprocs_with_testenv(["errorhost20372"]), "Unable to read host:port string from worker. Launch command exited with error?", ())])
+    end
 
-append!(testruns, [
-    (()->addprocs_with_testenv(ErrorSimulator(:exit)), "Unable to read host:port string from worker. Launch command exited with error?", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:ntries)), "Unexpected output from worker launch command. Host:port string not found.", ()),
-    (()->addprocs_with_testenv(ErrorSimulator(:timeout)), "Timed out waiting to read host:port string from worker.", ("JULIA_WORKER_TIMEOUT"=>"1",))
-])
+    append!(testruns, [
+        (()->addprocs_with_testenv(ErrorSimulator(:exit)), "Unable to read host:port string from worker. Launch command exited with error?", ()),
+        (()->addprocs_with_testenv(ErrorSimulator(:ntries)), "Unexpected output from worker launch command. Host:port string not found.", ()),
+        (()->addprocs_with_testenv(ErrorSimulator(:timeout)), "Timed out waiting to read host:port string from worker.", ("JULIA_WORKER_TIMEOUT"=>"1",))
+    ])
 
-for (addp_testf, expected_errstr, env) in testruns
-    old_stdout = stdout
-    stdout_out, stdout_in = redirect_stdout()
-    stdout_txt = @async filter!(readlines(stdout_out)) do s
+    for (addp_testf, expected_errstr, env) in testruns
+        old_stdout = stdout
+        stdout_out, stdout_in = redirect_stdout()
+        stdout_txt = @async filter!(readlines(stdout_out)) do s
             return !startswith(s, "\tFrom worker startup:\t")
         end
-    try
-        withenv(env...) do
-            addp_testf()
+        try
+            withenv(env...) do
+                addp_testf()
+            end
+            error("Unexpected")
+        catch ex
+            redirect_stdout(old_stdout)
+            close(stdout_in)
+            @test isempty(fetch(stdout_txt))
+            @test isa(ex, CompositeException)
+            @test ex.exceptions[1].task.exception.msg == expected_errstr
         end
-        error("Unexpected")
-    catch ex
-        redirect_stdout(old_stdout)
-        close(stdout_in)
-        @test isempty(fetch(stdout_txt))
-        @test isa(ex, CompositeException)
-        @test ex.exceptions[1].task.exception.msg == expected_errstr
     end
 end
 
+module FooModLocal end
+const c1 = fill(1., 10)
 
-# Auto serialization of globals from Main.
-# bitstypes
-global v1 = 1
-@test remotecall_fetch(()->v1, id_other) == v1
-@test remotecall_fetch(()->isdefined(Main, :v1), id_other)
-for i in 2:5
-    global v1 = i
-    @test remotecall_fetch(()->v1, id_other) == i
-end
+@testset "Serialization/deserialization" begin
+    # Auto serialization of globals from Main.
+    # bitstypes
+    global v1 = 1
+    @test remotecall_fetch(()->v1, id_other) == v1
+    @test remotecall_fetch(()->isdefined(Main, :v1), id_other)
+    for i in 2:5
+        global v1 = i
+        @test remotecall_fetch(()->v1, id_other) == i
+    end
 
-# non-bitstypes
-global v2 = zeros(10)
-for i in 1:5
-    v2[i] = i
-    @test remotecall_fetch(()->v2, id_other) == v2
-end
+    # non-bitstypes
+    global v2 = zeros(10)
+    for i in 1:5
+        v2[i] = i
+        @test remotecall_fetch(()->v2, id_other) == v2
+    end
 
-# Different global bindings to the same object
-global v3 = fill(1., 10)
-global v4 = v3
-@test remotecall_fetch(()->v3, id_other) == remotecall_fetch(()->v4, id_other)
-@test remotecall_fetch(()->isdefined(Main, :v3), id_other)
-@test remotecall_fetch(()->isdefined(Main, :v4), id_other)
+    # Different global bindings to the same object
+    global v3 = fill(1., 10)
+    global v4 = v3
+    @test remotecall_fetch(()->v3, id_other) == remotecall_fetch(()->v4, id_other)
+    @test remotecall_fetch(()->isdefined(Main, :v3), id_other)
+    @test remotecall_fetch(()->isdefined(Main, :v4), id_other)
 
-# Global references to Types and Modules should work if they are locally defined
-global v5 = Int
-global v6 = DistributedNext
-@test remotecall_fetch(()->v5, id_other) === Int
-@test remotecall_fetch(()->v6, id_other) === DistributedNext
+    # Global references to Types and Modules should work if they are locally defined
+    global v5 = Int
+    global v6 = DistributedNext
+    @test remotecall_fetch(()->v5, id_other) === Int
+    @test remotecall_fetch(()->v6, id_other) === DistributedNext
 
-struct FooStructLocal end
-module FooModLocal end
-v5 = FooStructLocal
-v6 = FooModLocal
-@test_throws RemoteException remotecall_fetch(()->v5, id_other)
-@test_throws RemoteException remotecall_fetch(()->v6, id_other)
+    struct FooStructLocal end
+    v5 = FooStructLocal
+    v6 = FooModLocal
+    @test_throws RemoteException remotecall_fetch(()->v5, id_other)
+    @test_throws RemoteException remotecall_fetch(()->v6, id_other)
 
-@everywhere struct FooStructEverywhere end
-@everywhere module FooModEverywhere end
-v5 = FooStructEverywhere
-v6 = FooModEverywhere
-@test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
-@test remotecall_fetch(()->v6, id_other) === FooModEverywhere
+    @everywhere struct FooStructEverywhere end
+    @everywhere module FooModEverywhere end
+    v5 = FooStructEverywhere
+    v6 = FooModEverywhere
+    @test remotecall_fetch(()->v5, id_other) === FooStructEverywhere
+    @test remotecall_fetch(()->v6, id_other) === FooModEverywhere
 
-# hash value same but different object instance
-v7 = ones(10)
-oid1 = objectid(v7)
-hval1 = hash(v7)
-@test v7 == @fetchfrom id_other v7
-remote_oid1 = @fetchfrom id_other objectid(v7)
+    # hash value same but different object instance
+    v7 = ones(10)
+    oid1 = objectid(v7)
+    hval1 = hash(v7)
+    @test v7 == @fetchfrom id_other v7
+    remote_oid1 = @fetchfrom id_other objectid(v7)
 
-v7 = ones(10)
-@test oid1 != objectid(v7)
-@test hval1 == hash(v7)
-@test remote_oid1 != @fetchfrom id_other objectid(v7)
+    v7 = ones(10)
+    @test oid1 != objectid(v7)
+    @test hval1 == hash(v7)
+    @test remote_oid1 != @fetchfrom id_other objectid(v7)
 
 
-# Github issue #31252
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
+    # Github issue #31252
+    v31252 = :a
+    @test :a == @fetchfrom id_other v31252
 
-v31252 = :b
-@test :b == @fetchfrom id_other v31252
+    v31252 = :b
+    @test :b == @fetchfrom id_other v31252
 
-v31252 = :a
-@test :a == @fetchfrom id_other v31252
+    v31252 = :a
+    @test :a == @fetchfrom id_other v31252
 
 
-# Test that a global is not being repeatedly serialized when
-# a) referenced multiple times in the closure
-# b) hash value has not changed.
+    # Test that a global is not being repeatedly serialized when
+    # a) referenced multiple times in the closure
+    # b) hash value has not changed.
 
-@everywhere begin
-    using Serialization
-    global testsercnt_d = Dict()
-    mutable struct TestSerCnt
-        v
-    end
-    import Base.hash, Base.==
-    hash(x::TestSerCnt, h::UInt) = hash(hash(x.v), h)
-    ==(x1::TestSerCnt, x2::TestSerCnt) = (x1.v == x2.v)
+    @everywhere begin
+        using Serialization
+        global testsercnt_d = Dict()
+        mutable struct TestSerCnt
+            v
+        end
+        import Base.hash, Base.==
+            hash(x::TestSerCnt, h::UInt) = hash(hash(x.v), h)
+        ==(x1::TestSerCnt, x2::TestSerCnt) = (x1.v == x2.v)
+
+        function Serialization.serialize(s::AbstractSerializer, t::TestSerCnt)
+            Serialization.serialize_type(s, TestSerCnt)
+            serialize(s, t.v)
+            global testsercnt_d
+            cnt = get!(testsercnt_d, objectid(t), 0)
+            testsercnt_d[objectid(t)] = cnt+1
+        end
 
-    function Serialization.serialize(s::AbstractSerializer, t::TestSerCnt)
-        Serialization.serialize_type(s, TestSerCnt)
-        serialize(s, t.v)
-        global testsercnt_d
-        cnt = get!(testsercnt_d, objectid(t), 0)
-        testsercnt_d[objectid(t)] = cnt+1
+        Serialization.deserialize(s::AbstractSerializer, ::Type{TestSerCnt}) = TestSerCnt(deserialize(s))
     end
 
-    Serialization.deserialize(s::AbstractSerializer, ::Type{TestSerCnt}) = TestSerCnt(deserialize(s))
-end
-
-# hash value of tsc is not changed
-global tsc = TestSerCnt(zeros(10))
-for i in 1:5
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized only once
-@test testsercnt_d[objectid(tsc)] == 1
-
-# hash values are changed
-n=5
-testsercnt_d[objectid(tsc)] = 0
-for i in 1:n
-    tsc.v[i] = i
-    remotecall_fetch(()->tsc, id_other)
-end
-# should have been serialized as many times as the loop
-@test testsercnt_d[objectid(tsc)] == n
+    # hash value of tsc is not changed
+    global tsc = TestSerCnt(zeros(10))
+    for i in 1:5
+        remotecall_fetch(()->tsc, id_other)
+    end
+    # should have been serialized only once
+    @test testsercnt_d[objectid(tsc)] == 1
 
-# Multiple references in a closure should be serialized only once.
-global mrefs = TestSerCnt(fill(1.,10))
-@test remotecall_fetch(()->(mrefs.v, 2*mrefs.v, 3*mrefs.v), id_other) == (fill(1.,10), fill(2.,10), fill(3.,10))
-@test testsercnt_d[objectid(mrefs)] == 1
+    # hash values are changed
+    n=5
+    testsercnt_d[objectid(tsc)] = 0
+    for i in 1:n
+        tsc.v[i] = i
+        remotecall_fetch(()->tsc, id_other)
+    end
+    # should have been serialized as many times as the loop
+    @test testsercnt_d[objectid(tsc)] == n
 
+    # Multiple references in a closure should be serialized only once.
+    global mrefs = TestSerCnt(fill(1.,10))
+    @test remotecall_fetch(()->(mrefs.v, 2*mrefs.v, 3*mrefs.v), id_other) == (fill(1.,10), fill(2.,10), fill(3.,10))
+    @test testsercnt_d[objectid(mrefs)] == 1
 
-# nested anon functions
-global f1 = x->x
-global f2 = x->f1(x)
-v = rand()
-@test remotecall_fetch(f2, id_other, v) == v
-@test remotecall_fetch(x->f2(x), id_other, v) == v
 
-# consts
-const c1 = fill(1., 10)
-@test remotecall_fetch(()->c1, id_other) == c1
-@test remotecall_fetch(()->isconst(Main, :c1), id_other)
+    # nested anon functions
+    global f1 = x->x
+    global f2 = x->f1(x)
+    v = rand()
+    @test remotecall_fetch(f2, id_other, v) == v
+    @test remotecall_fetch(x->f2(x), id_other, v) == v
+
+    # consts
+    @test remotecall_fetch(()->c1, id_other) == c1
+    @test remotecall_fetch(()->isconst(Main, :c1), id_other)
+
+    # Test same calls with local vars
+    function wrapped_var_ser_tests()
+        # bitstypes
+        local lv1 = 1
+        @test remotecall_fetch(()->lv1, id_other) == lv1
+        @test !remotecall_fetch(()->isdefined(Main, :lv1), id_other)
+        for i in 2:5
+            lv1 = i
+            @test remotecall_fetch(()->lv1, id_other) == i
+        end
 
-# Test same calls with local vars
-function wrapped_var_ser_tests()
-    # bitstypes
-    local lv1 = 1
-    @test remotecall_fetch(()->lv1, id_other) == lv1
-    @test !remotecall_fetch(()->isdefined(Main, :lv1), id_other)
-    for i in 2:5
-        lv1 = i
-        @test remotecall_fetch(()->lv1, id_other) == i
-    end
+        # non-bitstypes
+        local lv2 = zeros(10)
+        for i in 1:5
+            lv2[i] = i
+            @test remotecall_fetch(()->lv2, id_other) == lv2
+        end
 
-    # non-bitstypes
-    local lv2 = zeros(10)
-    for i in 1:5
-        lv2[i] = i
-        @test remotecall_fetch(()->lv2, id_other) == lv2
+        # nested anon functions
+        local lf1 = x->x
+        local lf2 = x->lf1(x)
+        v = rand()
+        @test remotecall_fetch(lf2, id_other, v) == v
+        @test remotecall_fetch(x->lf2(x), id_other, v) == v
     end
 
-    # nested anon functions
-    local lf1 = x->x
-    local lf2 = x->lf1(x)
-    v = rand()
-    @test remotecall_fetch(lf2, id_other, v) == v
-    @test remotecall_fetch(x->lf2(x), id_other, v) == v
-end
-
-wrapped_var_ser_tests()
+    wrapped_var_ser_tests()
 
-# Test internal data structures being cleaned up upon gc.
-global ids_cleanup = fill(1., 6)
-global ids_func = ()->ids_cleanup
+    # Test internal data structures being cleaned up upon gc.
+    global ids_cleanup = fill(1., 6)
+    global ids_func = ()->ids_cleanup
 
-clust_ser = (DistributedNext.worker_from_id(id_other)).w_serializer
-@test remotecall_fetch(ids_func, id_other) == ids_cleanup
+    clust_ser = (DistributedNext.worker_from_id(id_other)).w_serializer
+    @test remotecall_fetch(ids_func, id_other) == ids_cleanup
 
-# TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`
+    # TODO Add test for cleanup from `clust_ser.glbs_in_tnobj`
+end
 
-# reported github issues - Mostly tests with globals and various distributed macros
-#2669, #5390
 v2669=10
-@test fetch(@spawnat :any (1+v2669)) == 11
 
-#12367
-refs = []
-if true
-    n = 10
-    for p in procs()
-        push!(refs, @spawnat p begin
-            @sync for i in 1:n
-                nothing
-            end
-        end)
+@testset "More various individual issues" begin
+    # reported github issues - Mostly tests with globals and various distributed macros
+    #2669, #5390
+    @test fetch(@spawnat :any (1+v2669)) == 11
+
+    #12367
+    refs = []
+    if true
+        n = 10
+        for p in procs()
+            push!(refs, @spawnat p begin
+                      @sync for i in 1:n
+                          nothing
+                      end
+                  end)
+        end
     end
-end
-foreach(wait, refs)
+    foreach(wait, refs)
 
-#6760
-if true
-    a = 2
-    x = @distributed (vcat) for k=1:2
-        sin(a)
+    #6760
+    if true
+        a = 2
+        x = @distributed (vcat) for k=1:2
+            sin(a)
+        end
     end
-end
-@test x == map(_->sin(2), 1:2)
-
-let thrown = false
-    try
-        remotecall_fetch(sqrt, 2, -1)
-    catch e
-        thrown = true
-        local b = IOBuffer()
-        showerror(b, e)
-        @test occursin("sqrt was called with a negative real argument", String(take!(b)))
-    end
-    @test thrown
-end
+    @test x == map(_->sin(2), 1:2)
 
-# issue #34333
-let
-    @test fetch(remotecall(Float64, id_other, 1)) == Float64(1)
-    @test fetch(remotecall_wait(Float64, id_other, 1)) == Float64(1)
-    @test remotecall_fetch(Float64, id_other, 1) == Float64(1)
-end
-
-#19463
-function foo19463()
-    w1 = workers()[1]
-    w2 = workers()[2]
-    w3 = workers()[3]
-
-    b1 = () -> 1
-    b2 = () -> fetch(@spawnat w1 b1()) + 1
-    b3 = () -> fetch(@spawnat w2 b2()) + 1
-    b4 = () -> fetch(@spawnat w3 b3()) + 1
-    b4()
-end
-@test foo19463() == 4
-
-# Testing clear!
-function setup_syms(n, pids)
-    syms = []
-    for i in 1:n
-        symstr = string("clrtest", randstring())
-        sym = Symbol(symstr)
-        eval(:(global $sym = rand()))
-        for p in pids
-            eval(:(@test $sym == remotecall_fetch(()->$sym, $p)))
-            eval(:(@test remotecall_fetch(isdefined, $p, Main, Symbol($symstr))))
+    let thrown = false
+        try
+            remotecall_fetch(sqrt, 2, -1)
+        catch e
+            thrown = true
+            local b = IOBuffer()
+            showerror(b, e)
+            @test occursin("sqrt was called with a negative real argument", String(take!(b)))
+        end
+        @test thrown
+    end
+
+    # issue #34333
+    let
+        @test fetch(remotecall(Float64, id_other, 1)) == Float64(1)
+        @test fetch(remotecall_wait(Float64, id_other, 1)) == Float64(1)
+        @test remotecall_fetch(Float64, id_other, 1) == Float64(1)
+    end
+
+    #19463
+    function foo19463()
+        w1 = workers()[1]
+        w2 = workers()[2]
+        w3 = workers()[3]
+
+        b1 = () -> 1
+        b2 = () -> fetch(@spawnat w1 b1()) + 1
+        b3 = () -> fetch(@spawnat w2 b2()) + 1
+        b4 = () -> fetch(@spawnat w3 b3()) + 1
+        b4()
+    end
+    @test foo19463() == 4
+end
+
+@testset "clear!()" begin
+    # Testing clear!
+    function setup_syms(n, pids)
+        syms = []
+        for i in 1:n
+            symstr = string("clrtest", randstring())
+            sym = Symbol(symstr)
+            eval(:(global $sym = rand()))
+            for p in pids
+                eval(:(@test $sym == remotecall_fetch(()->$sym, $p)))
+                eval(:(@test remotecall_fetch(isdefined, $p, Main, Symbol($symstr))))
+            end
+            push!(syms, sym)
         end
-        push!(syms, sym)
+        syms
     end
-    syms
-end
 
-function test_clear(syms, pids)
-    for p in pids
-        for sym in syms
-            remote_val = remotecall_fetch(()->getfield(Main, sym), p)
-            @test remote_val === nothing
-            @test remote_val != getfield(Main, sym)
+    function test_clear(syms, pids)
+        for p in pids
+            for sym in syms
+                remote_val = remotecall_fetch(()->getfield(Main, sym), p)
+                @test remote_val === nothing
+                @test remote_val != getfield(Main, sym)
+            end
         end
     end
-end
 
-syms = setup_syms(1, [id_other])
-clear!(syms[1], id_other)
-test_clear(syms, [id_other])
+    syms = setup_syms(1, [id_other])
+    clear!(syms[1], id_other)
+    test_clear(syms, [id_other])
 
-syms = setup_syms(1, workers())
-clear!(syms[1], workers())
-test_clear(syms, workers())
+    syms = setup_syms(1, workers())
+    clear!(syms[1], workers())
+    test_clear(syms, workers())
 
-syms = setup_syms(3, [id_other])
-clear!(syms, id_other)
-test_clear(syms, [id_other])
+    syms = setup_syms(3, [id_other])
+    clear!(syms, id_other)
+    test_clear(syms, [id_other])
 
-syms = setup_syms(3, workers())
-clear!(syms, workers())
-test_clear(syms, workers())
-
-# Test partial recovery from a deserialization error in CapturedException
-try
-    expr = quote
-                mutable struct DontExistOn1
-                    x
-                end
-                throw(BoundsError(DontExistOn1(1), 1))
-           end
-
-    remotecall_fetch(()->eval(expr), id_other)
-    error("unexpected")
-catch ex
-    @test isa(ex.captured.ex.exceptions[1].ex, ErrorException)
-    @test occursin("BoundsError", ex.captured.ex.exceptions[1].ex.msg)
-    ex = ex.captured.ex.exceptions[2].ex
-    @test (ex::UndefVarError).var === :DontExistOn1
+    syms = setup_syms(3, workers())
+    clear!(syms, workers())
+    test_clear(syms, workers())
 end
 
-let
-    # creates a new worker in a different folder and tries to include file
-    tmp_dir = mktempdir()
-    tmp_dir2 = joinpath(tmp_dir, "2")
-    tmp_file = joinpath(tmp_dir2, "testfile")
-    tmp_file2 = joinpath(tmp_dir2, "testfile2")
-    proc = addprocs_with_testenv(1, dir=tmp_dir)
+@testset "Deserialization error recovery and include()" begin
+    # Test partial recovery from a deserialization error in CapturedException
     try
-        mkdir(tmp_dir2)
-        write(tmp_file, "23.32 + 32 + myid() + include(\"testfile2\")")
-        write(tmp_file2, "myid() * 2")
-        function test_include_fails_to_open_file(fname)
-            try
-                include(fname)
-            catch exc
-                path = joinpath(@__DIR__, fname)
-                @test exc isa SystemError
-                @test exc.prefix == "opening file $(repr(path))"
+        expr = quote
+            mutable struct DontExistOn1
+                x
             end
+            throw(BoundsError(DontExistOn1(1), 1))
+        end
+
+        remotecall_fetch(()->eval(expr), id_other)
+        error("unexpected")
+    catch ex
+        @test isa(ex.captured.ex.exceptions[1].ex, ErrorException)
+        @test occursin("BoundsError", ex.captured.ex.exceptions[1].ex.msg)
+        ex = ex.captured.ex.exceptions[2].ex
+        @test (ex::UndefVarError).var === :DontExistOn1
+    end
+
+    let
+        # creates a new worker in a different folder and tries to include file
+        tmp_dir = mktempdir()
+        tmp_dir2 = joinpath(tmp_dir, "2")
+        tmp_file = joinpath(tmp_dir2, "testfile")
+        tmp_file2 = joinpath(tmp_dir2, "testfile2")
+        proc = addprocs_with_testenv(1, dir=tmp_dir)
+        try
+            mkdir(tmp_dir2)
+            write(tmp_file, "23.32 + 32 + myid() + include(\"testfile2\")")
+            write(tmp_file2, "myid() * 2")
+            function test_include_fails_to_open_file(fname)
+                try
+                    include(fname)
+                catch exc
+                    path = joinpath(@__DIR__, fname)
+                    @test exc isa SystemError
+                    @test exc.prefix == "opening file $(repr(path))"
+                end
+            end
+            test_include_fails_to_open_file("testfile")
+            test_include_fails_to_open_file("testfile2")
+            test_include_fails_to_open_file(joinpath("2", "testfile2"))
+            @test include(tmp_file) == 58.32
+            @test remotecall_fetch(include, proc[1], joinpath("2", "testfile")) == 55.32 + proc[1] * 3
+        finally
+            rmprocs(proc)
+            rm(tmp_file, force=true)
+            rm(tmp_file2, force=true)
+            rm(tmp_dir2, force=true)
+            #rm(tmp_dir, force=true)
         end
-        test_include_fails_to_open_file("testfile")
-        test_include_fails_to_open_file("testfile2")
-        test_include_fails_to_open_file(joinpath("2", "testfile2"))
-        @test include(tmp_file) == 58.32
-        @test remotecall_fetch(include, proc[1], joinpath("2", "testfile")) == 55.32 + proc[1] * 3
-    finally
-        rmprocs(proc)
-        rm(tmp_file, force=true)
-        rm(tmp_file2, force=true)
-        rm(tmp_dir2, force=true)
-        #rm(tmp_dir, force=true)
     end
 end
+
 # cookie and command line option `--worker` tests. remove workers, set cookie and test
 struct WorkerArgTester <: ClusterManager
     worker_opt
@@ -1542,8 +1582,8 @@ manage(::WorkerArgTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
 
 nprocs()>1 && rmprocs(workers())
 
-## These tests are disabled because DistributedNext has no way of supporting the
-## --worker argument.
+# These tests are disabled because DistributedNext has no way of supporting the
+# --worker argument.
 # npids = addprocs_with_testenv(WorkerArgTester(`--worker`, true))
 # @test remotecall_fetch(myid, npids[1]) == npids[1]
 # rmprocs(npids)
@@ -1557,111 +1597,116 @@ nprocs()>1 && rmprocs(workers())
 # npids = addprocs_with_testenv(WorkerArgTester(`--worker=foobar`, false))
 # @test remotecall_fetch(myid, npids[1]) == npids[1]
 
-# tests for start_worker options to retain stdio (issue #31035)
-struct RetainStdioTester <: ClusterManager
-    close_stdin::Bool
-    stderr_to_stdout::Bool
-end
-
-function launch(manager::RetainStdioTester, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
+@testset "start_worker options to retain stdio (issue #31035)" begin
+    struct RetainStdioTester <: ClusterManager
+        close_stdin::Bool
+        stderr_to_stdout::Bool
+    end
 
-    jlcmd = "using DistributedNext; start_worker(\"\"; close_stdin=$(manager.close_stdin), stderr_to_stdout=$(manager.stderr_to_stdout));"
-    cmd = detach(setenv(`$exename $exeflags --bind-to $(DistributedNext.LPROC.bind_addr) -e $jlcmd`, dir=dir))
-    proc = open(cmd, "r+")
+    function DistributedNext.launch(manager::RetainStdioTester, params::Dict, launched::Array, c::Condition)
+        dir = params[:dir]
+        exename = params[:exename]
+        exeflags = params[:exeflags]
 
-    wconfig = WorkerConfig()
-    wconfig.process = proc
-    wconfig.io = proc.out
-    push!(launched, wconfig)
+        jlcmd = "using DistributedNext; start_worker(\"\"; close_stdin=$(manager.close_stdin), stderr_to_stdout=$(manager.stderr_to_stdout));"
+        cmd = detach(setenv(`$exename $exeflags --bind-to $(DistributedNext.LPROC.bind_addr) -e $jlcmd`, dir=dir))
+        proc = open(cmd, "r+")
 
-    notify(c)
-end
-manage(::RetainStdioTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
+        wconfig = WorkerConfig()
+        wconfig.process = proc
+        wconfig.io = proc.out
+        push!(launched, wconfig)
 
+        notify(c)
+    end
+    DistributedNext.manage(::RetainStdioTester, ::Integer, ::WorkerConfig, ::Symbol) = nothing
 
-nprocs()>1 && rmprocs(workers())
-cluster_cookie("")
 
-for close_stdin in (true, false), stderr_to_stdout in (true, false)
-    local npids = addprocs_with_testenv(RetainStdioTester(close_stdin,stderr_to_stdout))
-    @test remotecall_fetch(myid, npids[1]) == npids[1]
-    if close_stdin
-        @test remotecall_fetch(()->stdin === devnull && !isreadable(stdin), npids[1])
-    else
-        @test remotecall_fetch(()->stdin !== devnull && isopen(stdin) && isreadable(stdin), npids[1])
-    end
-    @test stderr_to_stdout == remotecall_fetch(()->(stderr === stdout), npids[1])
-    rmprocs(npids)
-end
+    nprocs()>1 && rmprocs(workers())
+    cluster_cookie("")
 
-# Issue # 22865
-# Must be run on a new cluster, i.e., all workers must be in the same state.
-@assert nprocs() == 1
-p1,p2 = addprocs_with_testenv(2)
-@everywhere f22865(p) = remotecall_fetch(x->x.*2, p, fill(1.,2))
-@test fill(2.,2) == remotecall_fetch(f22865, p1, p2)
-rmprocs(p1, p2)
-
-function reuseport_tests()
-    # Run the test on all processes.
-    results = asyncmap(procs()) do p
-        remotecall_fetch(p) do
-            ports_lower = []        # ports of pids lower than myid()
-            ports_higher = []       # ports of pids higher than myid()
-            for w in DistributedNext.PGRP.workers
-                w.id == myid() && continue
-                port = Sockets._sockname(w.r_stream, true)[2]
-                if (w.id == 1)
-                    # master connects to workers
-                    push!(ports_higher, port)
-                elseif w.id < myid()
-                    push!(ports_lower, port)
-                elseif w.id > myid()
-                    push!(ports_higher, port)
+    for close_stdin in (true, false), stderr_to_stdout in (true, false)
+        local npids = addprocs_with_testenv(RetainStdioTester(close_stdin,stderr_to_stdout))
+        @test remotecall_fetch(myid, npids[1]) == npids[1]
+        if close_stdin
+            @test remotecall_fetch(()->stdin === devnull && !isreadable(stdin), npids[1])
+        else
+            @test remotecall_fetch(()->stdin !== devnull && isopen(stdin) && isreadable(stdin), npids[1])
+        end
+        @test stderr_to_stdout == remotecall_fetch(()->(stderr === stdout), npids[1])
+        rmprocs(npids)
+    end
+end
+
+@testset "Issue #22865" begin
+    # Must be run on a new cluster, i.e., all workers must be in the same state.
+    @assert nprocs() == 1
+    p1,p2 = addprocs_with_testenv(2)
+    @everywhere f22865(p) = remotecall_fetch(x->x.*2, p, fill(1.,2))
+    @test fill(2.,2) == remotecall_fetch(f22865, p1, p2)
+    rmprocs(p1, p2)
+end
+
+@testset "SO_REUSEPORT" begin
+    function reuseport_tests()
+        # Run the test on all processes.
+        results = asyncmap(procs()) do p
+            remotecall_fetch(p) do
+                ports_lower = []        # ports of pids lower than myid()
+                ports_higher = []       # ports of pids higher than myid()
+                for w in DistributedNext.PGRP.workers
+                    w.id == myid() && continue
+                    port = Sockets._sockname(w.r_stream, true)[2]
+                    if (w.id == 1)
+                        # master connects to workers
+                        push!(ports_higher, port)
+                    elseif w.id < myid()
+                        push!(ports_lower, port)
+                    elseif w.id > myid()
+                        push!(ports_higher, port)
+                    end
                 end
-            end
-            @assert (length(ports_lower) + length(ports_higher)) == nworkers()
-            for portset in [ports_lower, ports_higher]
-                if (length(portset) > 0) && (length(unique(portset)) != 1)
-                    @warn "SO_REUSEPORT TESTS FAILED. UNSUPPORTED/OLDER UNIX VERSION?"
-                    return 0
+                @assert (length(ports_lower) + length(ports_higher)) == nworkers()
+                for portset in [ports_lower, ports_higher]
+                    if (length(portset) > 0) && (length(unique(portset)) != 1)
+                        @warn "SO_REUSEPORT TESTS FAILED. UNSUPPORTED/OLDER UNIX VERSION?"
+                        return 0
+                    end
                 end
+                return myid()
             end
-            return myid()
         end
-    end
 
-    # Ensure that the code has indeed been successfully executed everywhere
-    @test all(in(results), procs())
-end
+        # Ensure that the code has indeed been successfully executed everywhere
+        @test all(in(results), procs())
+    end
 
-# Test that the client port is reused. SO_REUSEPORT may not be supported on
-# all UNIX platforms, Linux kernels prior to 3.9 and older versions of OSX
-@assert nprocs() == 1
-addprocs_with_testenv(4; lazy=false)
-if ccall(:jl_has_so_reuseport, Int32, ()) == 1
-    reuseport_tests()
-else
-    @info "SO_REUSEPORT is unsupported, skipping reuseport tests"
+    # Test that the client port is reused. SO_REUSEPORT may not be supported on
+    # all UNIX platforms, Linux kernels prior to 3.9 and older versions of OSX
+    @assert nprocs() == 1
+    addprocs_with_testenv(4; lazy=false)
+    if ccall(:jl_has_so_reuseport, Int32, ()) == 1
+        reuseport_tests()
+    else
+        @info "SO_REUSEPORT is unsupported, skipping reuseport tests"
+    end
 end
 
-# issue #27933
-a27933 = :_not_defined_27933
-@test remotecall_fetch(()->a27933, first(workers())) === a27933
+@testset "Even more various individual issues" begin
+    # issue #27933
+    a27933 = :_not_defined_27933
+    @test remotecall_fetch(()->a27933, first(workers())) === a27933
 
-# PR #28651
-for T in (UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64)
-    local n = @distributed (+) for i in Base.OneTo(T(10))
-        i
+    # PR #28651
+    for T in (UInt8, Int8, UInt16, Int16, UInt32, Int32, UInt64)
+        local n = @distributed (+) for i in Base.OneTo(T(10))
+            i
+        end
+        @test n == 55
     end
-    @test n == 55
-end
 
-# issue #28966
-let code = """
+    # issue #28966
+    let code = """
     import DistributedNext
     DistributedNext.addprocs(1)
     DistributedNext.@everywhere f() = myid()
@@ -1669,204 +1714,207 @@ let code = """
         @assert DistributedNext.remotecall_fetch(f, w) == w
     end
     """
-    @test success(`$(Base.julia_cmd()) --startup-file=no -e $code`)
-end
-
-# PR 32431: tests for internal DistributedNext.head_and_tail
-let (h, t) = DistributedNext.head_and_tail(1:10, 3)
-    @test h == 1:3
-    @test collect(t) == 4:10
-end
-let (h, t) = DistributedNext.head_and_tail(1:10, 0)
-    @test h == []
-    @test collect(t) == 1:10
-end
-let (h, t) = DistributedNext.head_and_tail(1:3, 5)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = DistributedNext.head_and_tail(1:3, 3)
-    @test h == 1:3
-    @test collect(t) == []
-end
-let (h, t) = DistributedNext.head_and_tail(Int[], 3)
-    @test h == []
-    @test collect(t) == []
-end
-let (h, t) = DistributedNext.head_and_tail(Int[], 0)
-    @test h == []
-    @test collect(t) == []
-end
-
-# issue #35937
-let e = @test_throws RemoteException pmap(1) do _
-            wait(@async error(42))
-        end
-    # check that the inner TaskFailedException is correctly formed & can be printed
-    es = sprint(showerror, e.value)
-    @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
-    @test contains(es, "\n\n    nested task error:")
-    @test contains(es, "\n\n    nested task error: 42\n")
-end
-
-# issue #27429, propagate relative `include` path to workers
-@everywhere include("includefile.jl")
-for p in procs()
-    @test @fetchfrom(p, i27429) == 27429
-end
-
-# Propagation of package environments for local workers (#28781)
-let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
-    pkg_project = joinpath(Base.pkgdir(DistributedNext), "Project.toml")
-    project = mkdir(joinpath(tmp, "project"))
-    depots = [mkdir(joinpath(tmp, "depot1")), mkdir(joinpath(tmp, "depot2"))]
-    load_path = [mkdir(joinpath(tmp, "load_path")), "@stdlib", "@", pkg_project]
-    pathsep = Sys.iswindows() ? ";" : ":"
-    env = Dict(
-        "JULIA_DEPOT_PATH" => join(depots, pathsep),
-        "JULIA_LOAD_PATH" => join(load_path, pathsep),
-        # Explicitly propagate `TMPDIR`, in the event that we're running on a
-        # CI system where `TMPDIR` is special.
-        "TMPDIR" => dirname(tmp),
-    )
-
-    funcscode = """
-    using Test
-
-    @everywhere begin
-        depot_path() = DEPOT_PATH
-        load_path() = LOAD_PATH
-        active_project() = Base.ACTIVE_PROJECT[]
+        @test success(`$(Base.julia_cmd()) --startup-file=no -e $code`)
     end
-    """
-
-    setupcode = """
-    using DistributedNext
-    addprocs(1)
-    """ * funcscode
 
-    testcode = setupcode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == DEPOT_PATH
-        @test remotecall_fetch(load_path, w)           == LOAD_PATH
-        @test remotecall_fetch(Base.load_path, w)      == Base.load_path()
-        @test remotecall_fetch(active_project, w)      == Base.ACTIVE_PROJECT[]
-        @test remotecall_fetch(Base.active_project, w) == Base.active_project()
+    # PR 32431: tests for internal DistributedNext.head_and_tail
+    let (h, t) = DistributedNext.head_and_tail(1:10, 3)
+        @test h == 1:3
+        @test collect(t) == 4:10
     end
-    """
-
-    # No active project. This test is disabled because it won't work with
-    # DistributedNext since the package isn't a stdlib.
-    # extracode = """
-    # for w in workers()
-    #     @test remotecall_fetch(active_project, w) === Base.ACTIVE_PROJECT[] === nothing
-    # end
-    # """
-    # cmd = setenv(`$(julia) -e $(testcode * extracode)`, env)
-    # @test success(cmd)
-
-    # --project
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(active_project, w) == Base.ACTIVE_PROJECT[] ==
-              $(repr(project))
+    let (h, t) = DistributedNext.head_and_tail(1:10, 0)
+        @test h == []
+        @test collect(t) == 1:10
     end
-    """
-    cmd = setenv(`$(julia) --project=$(project) -e $(testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_PROJECT
-    cmd = setenv(`$(julia) -e $(testcode * extracode)`,
-                 (env["JULIA_PROJECT"] = project; env))
-    @test success(cmd)
-    # Pkg.activate(...)
-    activateish = """
-    Base.ACTIVE_PROJECT[] = $(repr(project))
-    using DistributedNext
-    addprocs(1)
-    """
-    cmd = setenv(`$(julia) -e $(activateish * testcode * extracode)`, env)
-    @test success(cmd)
-    # JULIA_(LOAD|DEPOT)_PATH
-    shufflecode = """
-    d = reverse(DEPOT_PATH)
-    append!(empty!(DEPOT_PATH), d)
-    l = reverse(LOAD_PATH)
-    append!(empty!(LOAD_PATH), l)
-    """
-    addcode = """
-    using DistributedNext
-    addprocs(1) # after shuffling
-    """
-    extracode = """
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == $(repr(reverse(load_path)))
-        @test remotecall_fetch(depot_path, w) == $(repr(reverse(depots)))
+    let (h, t) = DistributedNext.head_and_tail(1:3, 5)
+        @test h == 1:3
+        @test collect(t) == []
     end
-    """
-    cmd = setenv(`$(julia) -e $(shufflecode * addcode * testcode * extracode)`, env)
-    @test success(cmd)
-    # Mismatch when shuffling after proc addition. Note that the use of
-    # `addcode` mimics the behaviour of -p1 as the first worker is started
-    # before `shufflecode` executes.
-    failcode = addcode * shufflecode * funcscode * """
-    @show workers()
-    for w in workers()
-        @test remotecall_fetch(load_path, w) == reverse(LOAD_PATH) == $(repr(load_path))
-        @test remotecall_fetch(depot_path, w) == reverse(DEPOT_PATH) == $(repr(depots))
+    let (h, t) = DistributedNext.head_and_tail(1:3, 3)
+        @test h == 1:3
+        @test collect(t) == []
     end
-    """
-    cmd = setenv(`$(julia) -e $(failcode)`, env)
-    @test success(cmd)
-
-    # Hideous hack to double escape path separators on Windows so that it gets
-    # interpolated into the string (and then Cmd) correctly.
-    escaped_pkg_project = Sys.iswindows() ? replace(pkg_project, "\\" => "\\\\") : pkg_project
-
-    # Passing env or exeflags to addprocs(...) to override defaults
-    envcode = """
-    using DistributedNext
-    project = mktempdir()
-    env = Dict(
-        "JULIA_LOAD_PATH" => string(LOAD_PATH[1], $(repr(pathsep)), "@stdlib", $(repr(pathsep)), "$(escaped_pkg_project)"),
-        "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
-        "TMPDIR" => ENV["TMPDIR"],
-    )
-    addprocs(1; env = env, exeflags = `--project=\$(project)`)
-    env["JULIA_PROJECT"] = project
-    addprocs(1; env = env)
-    """ * funcscode * """
-    for w in workers()
-        @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
-        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1], "@stdlib", "$(escaped_pkg_project)"]
-        @test remotecall_fetch(active_project, w)      == project
-        @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
+    let (h, t) = DistributedNext.head_and_tail(Int[], 3)
+        @test h == []
+        @test collect(t) == []
+    end
+    let (h, t) = DistributedNext.head_and_tail(Int[], 0)
+        @test h == []
+        @test collect(t) == []
+    end
+
+    # issue #35937
+    let e = @test_throws RemoteException pmap(1) do _
+        wait(@async error(42))
+    end
+        # check that the inner TaskFailedException is correctly formed & can be printed
+        es = sprint(showerror, e.value)
+        @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
+        @test contains(es, "\n\n    nested task error:")
+        @test contains(es, "\n\n    nested task error: 42\n")
     end
-    """
-    cmd = setenv(`$(julia) -e $(envcode)`, env)
-    @test success(cmd)
-end end
+
+    # issue #27429, propagate relative `include` path to workers
+    @everywhere include("includefile.jl")
+    for p in procs()
+        @test @fetchfrom(p, i27429) == 27429
+    end
+end
+
+@testset "Propagation of package environments for local workers (#28781)" begin
+    let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
+        pkg_project = joinpath(Base.pkgdir(DistributedNext), "Project.toml")
+        project = mkdir(joinpath(tmp, "project"))
+        depots = [mkdir(joinpath(tmp, "depot1")), mkdir(joinpath(tmp, "depot2"))]
+        load_path = [mkdir(joinpath(tmp, "load_path")), "@stdlib", "@", pkg_project]
+        pathsep = Sys.iswindows() ? ";" : ":"
+        env = Dict(
+            "JULIA_DEPOT_PATH" => join(depots, pathsep),
+            "JULIA_LOAD_PATH" => join(load_path, pathsep),
+            # Explicitly propagate `TMPDIR`, in the event that we're running on a
+            # CI system where `TMPDIR` is special.
+            "TMPDIR" => dirname(tmp),
+        )
+
+        funcscode = """
+        using Test
+
+        @everywhere begin
+            depot_path() = DEPOT_PATH
+            load_path() = LOAD_PATH
+            active_project() = Base.ACTIVE_PROJECT[]
+        end
+        """
+
+        setupcode = """
+        using DistributedNext
+        addprocs(1)
+        """ * funcscode
+
+        testcode = setupcode * """
+        for w in workers()
+            @test remotecall_fetch(depot_path, w)          == DEPOT_PATH
+            @test remotecall_fetch(load_path, w)           == LOAD_PATH
+            @test remotecall_fetch(Base.load_path, w)      == Base.load_path()
+            @test remotecall_fetch(active_project, w)      == Base.ACTIVE_PROJECT[]
+            @test remotecall_fetch(Base.active_project, w) == Base.active_project()
+        end
+        """
+
+        # No active project. This test is disabled because it won't work with
+        # DistributedNext since the package isn't a stdlib.
+        # extracode = """
+        # for w in workers()
+        #     @test remotecall_fetch(active_project, w) === Base.ACTIVE_PROJECT[] === nothing
+        # end
+        # """
+        # cmd = setenv(`$(julia) -e $(testcode * extracode)`, env)
+        # @test success(cmd)
+
+        # --project
+        extracode = """
+        for w in workers()
+            @test remotecall_fetch(active_project, w) == Base.ACTIVE_PROJECT[] ==
+                  $(repr(project))
+        end
+        """
+        cmd = setenv(`$(julia) --project=$(project) -e $(testcode * extracode)`, env)
+        @test success(cmd)
+        # JULIA_PROJECT
+        cmd = setenv(`$(julia) -e $(testcode * extracode)`,
+                     (env["JULIA_PROJECT"] = project; env))
+        @test success(cmd)
+        # Pkg.activate(...)
+        activateish = """
+        Base.ACTIVE_PROJECT[] = $(repr(project))
+        using DistributedNext
+        addprocs(1)
+        """
+        cmd = setenv(`$(julia) -e $(activateish * testcode * extracode)`, env)
+        @test success(cmd)
+        # JULIA_(LOAD|DEPOT)_PATH
+        shufflecode = """
+        d = reverse(DEPOT_PATH)
+        append!(empty!(DEPOT_PATH), d)
+        l = reverse(LOAD_PATH)
+        append!(empty!(LOAD_PATH), l)
+        """
+        addcode = """
+        using DistributedNext
+        addprocs(1) # after shuffling
+        """
+        extracode = """
+        for w in workers()
+            @test remotecall_fetch(load_path, w) == $(repr(reverse(load_path)))
+            @test remotecall_fetch(depot_path, w) == $(repr(reverse(depots)))
+        end
+        """
+        cmd = setenv(`$(julia) -e $(shufflecode * addcode * testcode * extracode)`, env)
+        @test success(cmd)
+        # Mismatch when shuffling after proc addition. Note that the use of
+        # `addcode` mimics the behaviour of -p1 as the first worker is started
+        # before `shufflecode` executes.
+        failcode = addcode * shufflecode * funcscode * """
+        @show workers()
+        for w in workers()
+            @test remotecall_fetch(load_path, w) == reverse(LOAD_PATH) == $(repr(load_path))
+            @test remotecall_fetch(depot_path, w) == reverse(DEPOT_PATH) == $(repr(depots))
+        end
+        """
+        cmd = setenv(`$(julia) -e $(failcode)`, env)
+        @test success(cmd)
+
+        # Hideous hack to double escape path separators on Windows so that it gets
+        # interpolated into the string (and then Cmd) correctly.
+        escaped_pkg_project = Sys.iswindows() ? replace(pkg_project, "\\" => "\\\\") : pkg_project
+
+        # Passing env or exeflags to addprocs(...) to override defaults
+        envcode = """
+        using DistributedNext
+        project = mktempdir()
+        env = Dict(
+            "JULIA_LOAD_PATH" => string(LOAD_PATH[1], $(repr(pathsep)), "@stdlib", $(repr(pathsep)), "$(escaped_pkg_project)"),
+            "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
+            "TMPDIR" => ENV["TMPDIR"],
+        )
+        addprocs(1; env = env, exeflags = `--project=\$(project)`)
+        env["JULIA_PROJECT"] = project
+        addprocs(1; env = env)
+        """ * funcscode * """
+        for w in workers()
+            @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
+            @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1], "@stdlib", "$(escaped_pkg_project)"]
+            @test remotecall_fetch(active_project, w)      == project
+            @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
+        end
+        """
+        cmd = setenv(`$(julia) -e $(envcode)`, env)
+        @test success(cmd)
+    end end
+end
 
 include("splitrange.jl")
 
-# Clear all workers for timeout tests (issue #45785)
-nprocs() > 1 && rmprocs(workers())
-begin
-    # First, assert that we get no messages when we close a cooperative worker
-    w = only(addprocs(1))
-    @test_nowarn begin
-        wait(rmprocs([w]))
-    end
-
-    # Next, ensure we get a log message when a worker does not cleanly exit
-    w = only(addprocs(1))
-    @test_logs (:warn, r"sending SIGQUIT") begin
-        remote_do(w) do
-            # Cause the 'exit()' message that `rmprocs()` sends to do nothing
-            Core.eval(Base, :(exit() = nothing))
-            # Hide the trace that `rmprocs()` will cause this worker to show
-            redirect_stderr(devnull)
-        end
-        wait(rmprocs([w]))
+@testset "Clear all workers for timeout tests (issue #45785)" begin
+    nprocs() > 1 && rmprocs(workers())
+    begin
+        # First, assert that we get no messages when we close a cooperative worker
+        w = only(addprocs(1))
+        @test_nowarn begin
+            wait(rmprocs([w]))
+        end
+
+        # Next, ensure we get a log message when a worker does not cleanly exit
+        w = only(addprocs(1))
+        @test_logs (:warn, r"sending SIGQUIT") begin
+            remote_do(w) do
+                # Cause the 'exit()' message that `rmprocs()` sends to do nothing
+                Core.eval(Base, :(exit() = nothing))
+                # Hide the trace that `rmprocs()` will cause this worker to show
+                redirect_stderr(devnull)
+            end
+            wait(rmprocs([w]))
+        end
     end
 end
 
diff --git a/test/managers.jl b/test/managers.jl
index 54ca3a5..1e0fc81 100644
--- a/test/managers.jl
+++ b/test/managers.jl
@@ -5,22 +5,24 @@ using DistributedNext
 using Sockets
 using DistributedNext: parse_machine, SSHManager, LocalManager
 
-@test parse_machine("127.0.0.1") == ("127.0.0.1", nothing)
-@test parse_machine("127.0.0.1:80") == ("127.0.0.1", 80)
-@test parse_machine("[2001:db8::1]") == ("2001:db8::1", nothing)
-@test parse_machine("[2001:db8::1]:443") == ("2001:db8::1", 443)
+@testset "Managers" begin
+    @test parse_machine("127.0.0.1") == ("127.0.0.1", nothing)
+    @test parse_machine("127.0.0.1:80") == ("127.0.0.1", 80)
+    @test parse_machine("[2001:db8::1]") == ("2001:db8::1", nothing)
+    @test parse_machine("[2001:db8::1]:443") == ("2001:db8::1", 443)
 
-@test parse_machine("127.0.0.1:90") == ("127.0.0.1", 90)
-@test parse_machine("127.0.0.1:1") == ("127.0.0.1", 1)
-@test parse_machine("127.0.0.1:65535") == ("127.0.0.1", 65535)
+    @test parse_machine("127.0.0.1:90") == ("127.0.0.1", 90)
+    @test parse_machine("127.0.0.1:1") == ("127.0.0.1", 1)
+    @test parse_machine("127.0.0.1:65535") == ("127.0.0.1", 65535)
 
-@test_throws ArgumentError parse_machine("127.0.0.1:-1")
-@test_throws ArgumentError parse_machine("127.0.0.1:0")
-@test_throws ArgumentError parse_machine("127.0.0.1:65536")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:443:888")
-@test_throws ArgumentError parse_machine("[2001:db8::1")
-@test_throws ArgumentError parse_machine("[2001:db8::1]:aaa")
+    @test_throws ArgumentError parse_machine("127.0.0.1:-1")
+    @test_throws ArgumentError parse_machine("127.0.0.1:0")
+    @test_throws ArgumentError parse_machine("127.0.0.1:65536")
+    @test_throws ArgumentError parse_machine("[2001:db8::1]:443:888")
+    @test_throws ArgumentError parse_machine("[2001:db8::1")
+    @test_throws ArgumentError parse_machine("[2001:db8::1]:aaa")
 
-@test occursin(r"^SSHManager\(machines=.*\)$",
-               sprint((t,x) -> show(t, "text/plain", x), SSHManager("127.0.0.1")))
-@test sprint((t,x) -> show(t, "text/plain", x), LocalManager(1, true)) == "LocalManager()"
+    @test occursin(r"^SSHManager\(machines=.*\)$",
+                   sprint((t,x) -> show(t, "text/plain", x), SSHManager("127.0.0.1")))
+    @test sprint((t,x) -> show(t, "text/plain", x), LocalManager(1, true)) == "LocalManager()"
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index d4d1d86..f5f56c7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -6,13 +6,15 @@ include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
 
 cmd = `$test_exename $test_exeflags`
 
-# Run the SSH tests with a single thread because LibSSH.jl is not thread-safe
-sshtestfile = joinpath(@__DIR__, "sshmanager.jl")
-run(addenv(`$cmd $sshtestfile`, "JULIA_NUM_THREADS" => "1"))
-
-disttestfile = joinpath(@__DIR__, "distributed_exec.jl")
-if !success(pipeline(`$cmd  $disttestfile`; stdout=stdout, stderr=stderr)) && ccall(:jl_running_on_valgrind,Cint,()) == 0
-    error("Distributed test failed, cmd : $cmd")
+# LibSSH.jl currently only works on 64bit unixes
+if Sys.isunix() && Sys.WORD_SIZE == 64
+    # Run the SSH tests with a single thread because LibSSH.jl is not thread-safe
+    sshtestfile = joinpath(@__DIR__, "sshmanager.jl")
+    run(addenv(`$cmd $sshtestfile`, "JULIA_NUM_THREADS" => "1"))
+else
+    @warn "Skipping the SSH tests because this platform is not supported"
 end
 
+include("distributed_exec.jl")
+
 include("managers.jl")
diff --git a/test/splitrange.jl b/test/splitrange.jl
index bbb8284..511e9db 100644
--- a/test/splitrange.jl
+++ b/test/splitrange.jl
@@ -1,35 +1,35 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test
-using DistributedNext
 using DistributedNext: splitrange
 
-@test splitrange(1, 11, 1) == Array{UnitRange{Int64},1}([1:11])
-@test splitrange(0, 10, 1) == Array{UnitRange{Int64},1}([0:10])
-@test splitrange(-1, 9, 1) == Array{UnitRange{Int64},1}([-1:9])
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays
 
-@test splitrange(1, 11, 2) == Array{UnitRange{Int64},1}([1:6,7:11])
-@test splitrange(0, 10, 2) == Array{UnitRange{Int64},1}([0:5,6:10])
-@test splitrange(-1, 9, 2) == Array{UnitRange{Int64},1}([-1:4,5:9])
+@testset "splitrange()" begin
+    @test splitrange(1, 11, 1) == Array{UnitRange{Int64},1}([1:11])
+    @test splitrange(0, 10, 1) == Array{UnitRange{Int64},1}([0:10])
+    @test splitrange(-1, 9, 1) == Array{UnitRange{Int64},1}([-1:9])
 
-@test splitrange(1, 11, 3) == Array{UnitRange{Int64},1}([1:4,5:8,9:11])
-@test splitrange(0, 10, 3) == Array{UnitRange{Int64},1}([0:3,4:7,8:10])
-@test splitrange(-1, 9, 3) == Array{UnitRange{Int64},1}([-1:2,3:6,7:9])
+    @test splitrange(1, 11, 2) == Array{UnitRange{Int64},1}([1:6,7:11])
+    @test splitrange(0, 10, 2) == Array{UnitRange{Int64},1}([0:5,6:10])
+    @test splitrange(-1, 9, 2) == Array{UnitRange{Int64},1}([-1:4,5:9])
 
-@test splitrange(1, 3, 3) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(1, 3, 4) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
-@test splitrange(0, 2, 3) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(0, 2, 4) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
-@test splitrange(-1, 1, 3) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
-@test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
+    @test splitrange(1, 11, 3) == Array{UnitRange{Int64},1}([1:4,5:8,9:11])
+    @test splitrange(0, 10, 3) == Array{UnitRange{Int64},1}([0:3,4:7,8:10])
+    @test splitrange(-1, 9, 3) == Array{UnitRange{Int64},1}([-1:2,3:6,7:9])
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
-using .Main.OffsetArrays
+    @test splitrange(1, 3, 3) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
+    @test splitrange(1, 3, 4) == Array{UnitRange{Int64},1}([1:1,2:2,3:3])
+    @test splitrange(0, 2, 3) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
+    @test splitrange(0, 2, 4) == Array{UnitRange{Int64},1}([0:0,1:1,2:2])
+    @test splitrange(-1, 1, 3) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
+    @test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
 
-oa = OffsetArray([123, -345], (-2,))
+    oa = OffsetArray([123, -345], (-2,))
 
-@everywhere using Test
-@sync @distributed for i in eachindex(oa)
-    @test i ∈ (-1, 0)
+    @everywhere using Test
+    @sync @distributed for i in eachindex(oa)
+        @test i ∈ (-1, 0)
+    end
 end
diff --git a/test/sshmanager.jl b/test/sshmanager.jl
index 9bed971..2f82637 100644
--- a/test/sshmanager.jl
+++ b/test/sshmanager.jl
@@ -8,28 +8,24 @@ import LibSSH.Demo: DemoServer
 
 include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
 
-# LibSSH.jl currently only works on 64bit unixes
-if Sys.isunix() && Sys.WORD_SIZE == 64
-    function test_n_remove_pids(new_pids)
-        for p in new_pids
-            w_in_remote = sort(remotecall_fetch(workers, p))
-            try
-                @test intersect(new_pids, w_in_remote) == new_pids
-            catch
-                print("p       :     $p\n")
-                print("newpids :     $new_pids\n")
-                print("w_in_remote : $w_in_remote\n")
-                print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
-                rethrow()
-            end
+function test_n_remove_pids(new_pids)
+    for p in new_pids
+        w_in_remote = sort(remotecall_fetch(workers, p))
+        try
+            @test intersect(new_pids, w_in_remote) == new_pids
+        catch
+            print("p       :     $p\n")
+            print("newpids :     $new_pids\n")
+            print("w_in_remote : $w_in_remote\n")
+            print("intersect   : $(intersect(new_pids, w_in_remote))\n\n\n")
+            rethrow()
         end
-
-        remotecall_fetch(rmprocs, 1, new_pids)
     end
 
-    println("\n\nTesting SSHManager. A minimum of 4GB of RAM is recommended.")
-    println("Please ensure port 9300 and 2222 are not in use.")
+    remotecall_fetch(rmprocs, 1, new_pids)
+end
 
+@testset "SSHManager" begin
     DemoServer(2222; auth_methods=[ssh.AuthMethod_None], allow_auth_none=true, verbose=false, timeout=3600) do
         sshflags = `-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o LogLevel=ERROR -p 2222 `
         #Issue #9951
diff --git a/test/topology.jl b/test/topology.jl
index 66cc78d..5426dcc 100644
--- a/test/topology.jl
+++ b/test/topology.jl
@@ -2,142 +2,146 @@
 
 using Random
 
-pids = addprocs_with_testenv(4; topology="master_worker")
+@testset "Topology" begin
+    pids = addprocs_with_testenv(4; topology="master_worker")
 
-let p1 = pids[1], p2 = pids[2]
-    @test_throws RemoteException remotecall_fetch(()->remotecall_fetch(myid, p2), p1)
-end
+    let p1 = pids[1], p2 = pids[2]
+        @test_throws RemoteException remotecall_fetch(()->remotecall_fetch(myid, p2), p1)
+    end
 
-function test_worker_counts()
-    # check if the nprocs/nworkers/workers are the same on the remaining workers
-    np=nprocs()
-    nw=nworkers()
-    ws=sort(workers())
+    function test_worker_counts()
+        # check if the nprocs/nworkers/workers are the same on the remaining workers
+        np=nprocs()
+        nw=nworkers()
+        ws=sort(workers())
 
-    for p in workers()
-        @test (true, true, true) == remotecall_fetch(p, np, nw, ws) do x,y,z
-            (x==nprocs(), y==nworkers(), z==sort(workers()))
+        for p in workers()
+            @test (true, true, true) == remotecall_fetch(p, np, nw, ws) do x,y,z
+                (x==nprocs(), y==nworkers(), z==sort(workers()))
+            end
         end
     end
-end
 
-function remove_workers_and_test()
-    while nworkers() > 0
-        rmprocs(workers()[1])
-        test_worker_counts()
-        if nworkers() == nprocs()
-            break
+    function remove_workers_and_test()
+        while nworkers() > 0
+            rmprocs(workers()[1])
+            test_worker_counts()
+            if nworkers() == nprocs()
+                break
+            end
         end
     end
-end
-
-remove_workers_and_test()
 
-# connect even pids to other even pids, odd to odd.
-mutable struct TopoTestManager <: ClusterManager
-    np::Integer
-end
+    remove_workers_and_test()
 
-function launch(manager::TopoTestManager, params::Dict, launched::Array, c::Condition)
-    dir = params[:dir]
-    exename = params[:exename]
-    exeflags = params[:exeflags]
-
-    cmd = `$exename $exeflags --bind-to $(DistributedNext.LPROC.bind_addr) $(DistributedNext.get_worker_arg())`
-    cmd = pipeline(detach(setenv(cmd, dir=dir)))
-    for i in 1:manager.np
-        io = open(cmd, "r+")
-        DistributedNext.write_cookie(io)
-
-        wconfig = WorkerConfig()
-        wconfig.process = io
-        wconfig.io = io.out
-        wconfig.ident = i
-        wconfig.connect_idents = Vector(i+2:2:manager.np)
-        push!(launched, wconfig)
+    # connect even pids to other even pids, odd to odd.
+    mutable struct TopoTestManager <: ClusterManager
+        np::Integer
     end
 
-    notify(c)
-end
+    function DistributedNext.launch(manager::TopoTestManager, params::Dict, launched::Array, c::Condition)
+        dir = params[:dir]
+        exename = params[:exename]
+        exeflags = params[:exeflags]
+
+        cmd = `$exename $exeflags --bind-to $(DistributedNext.LPROC.bind_addr) $(DistributedNext.get_worker_arg())`
+        cmd = pipeline(detach(setenv(cmd, dir=dir)))
+        for i in 1:manager.np
+            io = open(cmd, "r+")
+            DistributedNext.write_cookie(io)
+
+            wconfig = WorkerConfig()
+            wconfig.process = io
+            wconfig.io = io.out
+            wconfig.ident = i
+            wconfig.connect_idents = Vector(i+2:2:manager.np)
+            push!(launched, wconfig)
+        end
 
-const map_pid_ident=Dict()
-function manage(manager::TopoTestManager, id::Integer, config::WorkerConfig, op::Symbol)
-    if op === :register
-        map_pid_ident[id] = config.ident
-    elseif op === :interrupt
-        kill(config.process, 2)
+        notify(c)
     end
-end
 
-addprocs_with_testenv(TopoTestManager(8); topology="custom")
-
-while true
-    if any(x->get(map_pid_ident, x, 0)==0, workers())
-        yield()
-    else
-        break
+    map_pid_ident=Dict()
+    function DistributedNext.manage(manager::TopoTestManager, id::Integer, config::WorkerConfig, op::Symbol)
+        if op === :register
+            map_pid_ident[id] = config.ident
+        elseif op === :interrupt
+            kill(config.process, 2)
+        end
     end
-end
 
-let p1, p2
-for p1 in workers()
-    for p2 in workers()
-        i1 = map_pid_ident[p1]
-        i2 = map_pid_ident[p2]
-        if (iseven(i1) && iseven(i2)) || (isodd(i1) && isodd(i2))
-            @test p2 == remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
+    addprocs_with_testenv(TopoTestManager(8); topology="custom")
+
+    while true
+        if any(x->get(map_pid_ident, x, 0)==0, workers())
+            yield()
         else
-            @test_throws RemoteException remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
+            break
         end
     end
-end
-end
-
-remove_workers_and_test()
 
-# test `lazy` connection setup
-function def_count_conn()
-    @everywhere function count_connected_workers()
-        count(x -> isa(x, DistributedNext.Worker) && isdefined(x, :r_stream) && isopen(x.r_stream),
-                DistributedNext.PGRP.workers)
+    let p1, p2
+        for p1 in workers()
+            for p2 in workers()
+                i1 = map_pid_ident[p1]
+                i2 = map_pid_ident[p2]
+                if (iseven(i1) && iseven(i2)) || (isodd(i1) && isodd(i2))
+                    @test p2 == remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
+                else
+                    @test_throws RemoteException remotecall_fetch(p->remotecall_fetch(myid, p), p1, p2)
+                end
+            end
+        end
     end
-end
 
-addprocs_with_testenv(8)
-def_count_conn()
-
-# Test for 10 random combinations
-wl = workers()
-combinations = []
-while length(combinations) < 10
-    from = rand(wl)
-    to = rand(wl)
-    if from == to || ((from,to) in combinations) || ((to,from) in combinations)
-        continue
-    else
-        push!(combinations, (from,to))
+    remove_workers_and_test()
+
+    # test `lazy` connection setup
+    function def_count_conn()
+        @everywhere if !isdefined(Main, :count_connected_workers)
+            function count_connected_workers()
+                count(x -> isa(x, DistributedNext.Worker) && isdefined(x, :r_stream) && isopen(x.r_stream),
+                      DistributedNext.PGRP.workers)
+            end
+        end
     end
-end
 
-# Initially only master-worker connections ought to be setup
-expected_num_conns = 8
-let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
-    @test num_conns == expected_num_conns
-end
+    addprocs_with_testenv(8)
+    def_count_conn()
+
+    # Test for 10 random combinations
+    wl = workers()
+    combinations = []
+    while length(combinations) < 10
+        from = rand(wl)
+        to = rand(wl)
+        if from == to || ((from,to) in combinations) || ((to,from) in combinations)
+            continue
+        else
+            push!(combinations, (from,to))
+        end
+    end
 
-for (i, (from,to)) in enumerate(combinations)
-    remotecall_wait(topid->remotecall_fetch(myid, topid), from, to)
-    global expected_num_conns += 2    # one connection endpoint on both from and to
+    # Initially only master-worker connections ought to be setup
+    expected_num_conns = 8
     let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
         @test num_conns == expected_num_conns
     end
-end
 
-# With lazy=false, all connections ought to be setup during `addprocs`
-nprocs() > 1 && rmprocs(workers())
-addprocs_with_testenv(8; lazy=false)
-def_count_conn()
-@test sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers())) == 64
+    for (i, (from,to)) in enumerate(combinations)
+        remotecall_wait(topid->remotecall_fetch(myid, topid), from, to)
+        expected_num_conns += 2    # one connection endpoint on both from and to
+        let num_conns = sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers()))
+            @test num_conns == expected_num_conns
+        end
+    end
 
-# Cannot add more workers with a different `lazy` value
-@test_throws ArgumentError addprocs_with_testenv(1; lazy=true)
+    # With lazy=false, all connections ought to be setup during `addprocs`
+    nprocs() > 1 && rmprocs(workers())
+    addprocs_with_testenv(8; lazy=false)
+    def_count_conn()
+    @test sum(asyncmap(p->remotecall_fetch(count_connected_workers,p), workers())) == 64
+
+    # Cannot add more workers with a different `lazy` value
+    @test_throws ArgumentError addprocs_with_testenv(1; lazy=true)
+end

From c1a3be84b14662ee9e78628fade0fba04cbdea51 Mon Sep 17 00:00:00 2001
From: JamesWrigley <james@puiterwijk.org>
Date: Fri, 1 Nov 2024 22:44:20 +0100
Subject: [PATCH 9/9] Increase threads test wait time to avoid timeouts

---
 test/threads.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/threads.jl b/test/threads.jl
index 9d1d6d4..19444ae 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -48,8 +48,8 @@ isfailed(rr) = fetch_from_owner(istaskfailed, rr)
             # timedwait() instead of @sync to avoid deadlocks.
             t1 = Threads.@spawn fetch_from_owner(wait, recv)
             t2 = Threads.@spawn fetch_from_owner(wait, send)
-            @test timedwait(() -> istaskdone(t1), 5) == :ok
-            @test timedwait(() -> istaskdone(t2), 5) == :ok
+            @test timedwait(() -> istaskdone(t1), 60) == :ok
+            @test timedwait(() -> istaskdone(t2), 60) == :ok
 
             # Check the tasks
             @test isdone(send)