Skip to content

Commit f39af64

Browse files
authored
buildkite: get CUDA working again (#607)
* Use latest Open MPI * Use --with-cuda when building Open MPI * Add CUDA group to Buildkite * Disable ROCm for now * Disable soft-fail * Update badge * Move common test config to test/common.jl * add synchronize() function
1 parent 04cf5c4 commit f39af64

25 files changed

+202
-363
lines changed

.buildkite/pipeline.yml

Lines changed: 127 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -1,163 +1,137 @@
1-
steps:
2-
- label: "Build OpenMPI -- CUDA"
3-
agents:
4-
queue: "juliagpu"
5-
cuda: "11.0"
6-
env:
7-
OPENMPI_VER: "4.0"
8-
OPENMPI_VER_FULL: "4.0.3"
9-
UCX_VER: "1.12.1"
10-
CCACHE_DIR: "/root/ccache"
11-
commands: |
12-
echo "--- Install packages"
13-
apt-get install --yes --no-install-recommends curl ccache
14-
export PATH="/usr/lib/ccache/:$$PATH"
1+
- group: "CUDA"
2+
key: "cuda"
3+
steps:
4+
- label: "Build OpenMPI"
5+
key: "cuda-build-openmpi"
6+
agents:
7+
queue: "juliagpu"
8+
cuda: "11.0"
9+
env:
10+
OPENMPI_VER: "4.0"
11+
OPENMPI_VER_FULL: "4.0.3"
12+
UCX_VER: "1.12.1"
13+
CCACHE_DIR: "/root/ccache"
14+
commands: |
15+
echo "--- Install packages"
16+
apt-get install --yes --no-install-recommends curl ccache
17+
export PATH="/usr/lib/ccache/:$$PATH"
1518
16-
echo "--- Build UCX"
17-
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
18-
tar -zxf ucx.tar.gz
19-
pushd ucx-*
20-
./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix)
21-
make -j
22-
make install
23-
popd
19+
echo "--- Build UCX"
20+
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
21+
tar -zxf ucx.tar.gz
22+
pushd ucx-*
23+
./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix)
24+
make -j
25+
make install
26+
popd
2427
25-
echo "--- Build OpenMPI"
26-
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
27-
tar -zxf openmpi.tar.gz
28-
pushd openmpi-$${OPENMPI_VER_FULL}
29-
./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix)
30-
make -j
31-
make install
32-
popd
28+
echo "--- Build OpenMPI"
29+
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
30+
tar -zxf openmpi.tar.gz
31+
pushd openmpi-$${OPENMPI_VER_FULL}
32+
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-cuda=/usr/local/cuda --prefix=$$(realpath ../mpi-prefix)
33+
make -j
34+
make install
35+
popd
3336
34-
echo "--- Package prefix"
35-
tar -zcf mpi-prefix.tar.gz mpi-prefix/
37+
echo "--- Package prefix"
38+
tar -zcf mpi-prefix.tar.gz mpi-prefix/
3639
37-
echo "--- ccache stats"
38-
ccache -s
39-
artifact_paths:
40-
- "mpi-prefix.tar.gz"
41-
- label: "Build OpenMPI -- ROCM"
42-
agents:
43-
queue: "juliagpu"
44-
rocm: "*" # todo fix ROCM version
45-
env:
46-
OPENMPI_VER: "4.0"
47-
OPENMPI_VER_FULL: "4.0.3"
48-
UCX_VER: "1.12.1"
49-
CCACHE_DIR: "/root/ccache"
50-
commands: |
51-
echo "--- Install packages"
52-
apt-get install --yes --no-install-recommends curl ccache
53-
export PATH="/usr/lib/ccache/:$$PATH"
40+
echo "--- ccache stats"
41+
ccache -s
42+
artifact_paths:
43+
- "mpi-prefix.tar.gz"
5444

55-
echo "--- Build UCX"
56-
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
57-
tar -zxf ucx.tar.gz
58-
pushd ucx-*
59-
./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix)
60-
make -j
61-
make install
62-
popd
45+
- wait
46+
47+
- label: "Tests -- Julia 1.6"
48+
plugins:
49+
- JuliaCI/julia#v1:
50+
version: "1.6"
51+
persist_depot_dirs: packages,artifacts,compiled
52+
agents:
53+
queue: "juliagpu"
54+
cuda: "11.0"
55+
if: build.message !~ /\[skip tests\]/
56+
timeout_in_minutes: 60
57+
env:
58+
JULIA_MPI_TEST_ARRAYTYPE: CuArray
59+
JULIA_MPI_TEST_NPROCS: 2
60+
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
61+
OMPI_ALLOW_RUN_AS_ROOT: 1
62+
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
63+
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
64+
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
65+
JULIA_CUDA_MEMORY_POOL: "none"
66+
commands: |
67+
echo "--- Configure MPI"
68+
buildkite-agent artifact download --step "cuda-build-openmpi" mpi-prefix.tar.gz .
69+
mkdir -p $${JULIA_MPI_PATH}
70+
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
71+
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
72+
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
6373
64-
echo "--- Build OpenMPI"
65-
curl -L https://download.open-mpi.org/release/open-mpi/v$${OPENMPI_VER}/openmpi-$${OPENMPI_VER_FULL}.tar.gz --output openmpi.tar.gz
66-
tar -zxf openmpi.tar.gz
67-
pushd openmpi-*
68-
./configure --with-ucx=$$(realpath ../mpi-prefix) --prefix=$$(realpath ../mpi-prefix)
69-
make -j
70-
make install
71-
popd
74+
echo "--- Setup Julia packages"
75+
julia --color=yes --project=. -e '
76+
import Pkg
77+
Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))
78+
'
79+
julia --color=yes --project=test -e '
80+
using Pkg
81+
Pkg.develop(path="lib/MPIPreferences")
82+
using MPIPreferences
83+
MPIPreferences.use_system_binary(export_prefs=true)
84+
rm("test/Manifest.toml")
85+
'
7286
73-
echo "--- Package prefix"
74-
tar -zcf mpi-prefix.tar.gz mpi-prefix/
87+
echo "+++ Run tests"
88+
julia --color=yes --project=. -e '
89+
import Pkg
90+
Pkg.test("MPI")
91+
'
7592
76-
echo "--- ccache stats"
77-
ccache -s
78-
artifact_paths:
79-
- "mpi-prefix.tar.gz"
80-
- wait: ~
81-
- label: "CUDA -- 1.6"
82-
plugins:
83-
- JuliaCI/julia#v1:
84-
version: "1.6"
85-
persist_depot_dirs: packages,artifacts,compiled
86-
agents:
87-
queue: "juliagpu"
88-
cuda: "11.0"
89-
if: build.message !~ /\[skip tests\]/
90-
timeout_in_minutes: 60
91-
env:
92-
JULIA_MPI_TEST_ARRAYTYPE: CuArray
93-
JULIA_MPI_TEST_NPROCS: 2
94-
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
95-
OMPI_ALLOW_RUN_AS_ROOT: 1
96-
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
97-
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
98-
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
99-
JULIA_CUDA_MEMORY_POOL: "none"
100-
soft_fail: true
101-
commands: |
102-
echo "--- Configure MPI"
103-
buildkite-agent artifact download --step "Build OpenMPI -- CUDA" mpi-prefix.tar.gz .
104-
mkdir -p $${JULIA_MPI_PATH}
105-
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
106-
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
107-
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
93+
- label: "Tests -- Julia 1.7"
94+
plugins:
95+
- JuliaCI/julia#v1:
96+
version: "1.7"
97+
persist_depot_dirs: packages,artifacts,compiled
98+
agents:
99+
queue: "juliagpu"
100+
cuda: "11.0"
101+
if: build.message !~ /\[skip tests\]/
102+
timeout_in_minutes: 60
103+
env:
104+
JULIA_MPI_TEST_ARRAYTYPE: CuArray
105+
JULIA_MPI_TEST_NPROCS: 2
106+
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
107+
OMPI_ALLOW_RUN_AS_ROOT: 1
108+
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
109+
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
110+
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
111+
JULIA_CUDA_MEMORY_POOL: "none"
112+
commands: |
113+
echo "--- Configure MPI"
114+
buildkite-agent artifact download --step "cuda-build-openmpi" mpi-prefix.tar.gz .
115+
mkdir -p $${JULIA_MPI_PATH}
116+
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
117+
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
118+
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
108119
109-
echo "--- Setup Julia packages"
110-
julia --color=yes --project=test -e '
111-
using Pkg
112-
Pkg.develop(path="lib/MPIPreferences")
113-
using MPIPreferences
114-
MPIPreferences.use_system_binary(export_prefs=true)
115-
rm("test/Manifest.toml")'
116-
julia -e 'import Pkg; Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))'
117-
julia -e 'import Pkg; Pkg.develop(; path = pwd())'
118-
julia -e 'import Pkg; Pkg.precompile()'
120+
echo "--- Setup Julia packages"
121+
julia --color=yes --project=. -e '
122+
import Pkg
123+
Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))
124+
'
125+
julia --color=yes --project=test -e '
126+
using Pkg
127+
Pkg.develop(path="lib/MPIPreferences")
128+
using MPIPreferences
129+
MPIPreferences.use_system_binary(export_prefs=true)
130+
rm("test/Manifest.toml")
131+
'
119132
120-
echo "+++ Run tests"
121-
julia -e 'import Pkg; Pkg.test("MPI")'
122-
123-
- label: "CUDA -- 1.7"
124-
plugins:
125-
- JuliaCI/julia#v1:
126-
version: "1.7"
127-
persist_depot_dirs: packages,artifacts,compiled
128-
agents:
129-
queue: "juliagpu"
130-
cuda: "11.0"
131-
if: build.message !~ /\[skip tests\]/
132-
timeout_in_minutes: 60
133-
env:
134-
JULIA_MPI_TEST_ARRAYTYPE: CuArray
135-
JULIA_MPI_TEST_NPROCS: 2
136-
JULIA_MPI_PATH: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi"
137-
OMPI_ALLOW_RUN_AS_ROOT: 1
138-
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
139-
OMPI_MCA_btl_vader_single_copy_mechanism: 'none' # https://github.com/open-mpi/ompi/issues/4948
140-
OPAL_PREFIX: "${BUILDKITE_BUILD_CHECKOUT_PATH}/openmpi" # Should we set this for the user?
141-
JULIA_CUDA_MEMORY_POOL: "none"
142-
soft_fail: true
143-
commands: |
144-
echo "--- Configure MPI"
145-
buildkite-agent artifact download --step "Build OpenMPI -- CUDA" mpi-prefix.tar.gz .
146-
mkdir -p $${JULIA_MPI_PATH}
147-
tar -zxf mpi-prefix.tar.gz --strip-components 1 -C $${JULIA_MPI_PATH}
148-
export PATH=$${JULIA_MPI_PATH}/bin:$${PATH}
149-
export LD_LIBRARY_PATH=$${JULIA_MPI_PATH}/lib:$${LD_LIBRARY_PATH}
150-
151-
echo "--- Setup Julia packages"
152-
julia --color=yes --project=test -e '
153-
using Pkg
154-
Pkg.develop(path="lib/MPIPreferences")
155-
using MPIPreferences
156-
MPIPreferences.use_system_binary(export_prefs=true)
157-
rm("test/Manifest.toml")'
158-
julia -e 'import Pkg; Pkg.develop(; path = joinpath(pwd(), "lib", "MPIPreferences"))'
159-
julia -e 'import Pkg; Pkg.develop(; path = pwd())'
160-
julia -e 'import Pkg; Pkg.precompile()'
161-
162-
echo "+++ Run tests"
163-
julia -e 'import Pkg; Pkg.test("MPI")'
133+
echo "+++ Run tests"
134+
julia --color=yes --project=. -e '
135+
import Pkg
136+
Pkg.test("MPI")
137+
'

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
[![Docs latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://juliaparallel.github.io/MPI.jl/latest/)
44
[![Docs stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliaparallel.github.io/MPI.jl/stable/)
55
[![Unit tests](https://github.com/JuliaParallel/MPI.jl/workflows/Unit%20Tests/badge.svg?branch=master)](https://github.com/JuliaParallel/MPI.jl/actions?query=workflow%3A%22Unit+Tests%22+branch%3Amaster)
6-
[![GPU tests](https://gitlab.com/JuliaGPU/MPI.jl/badges/master/pipeline.svg)](https://gitlab.com/JuliaGPU/MPI.jl/pipelines)
6+
[![GPU tests](https://badge.buildkite.com/ed813bc4d79f557adbdb821b1c8c8de98999686e697df4a373.svg?branch=master)](https://buildkite.com/julialang/mpi-dot-jl)
77
[![codecov.io](https://codecov.io/github/JuliaParallel/MPI.jl/coverage.svg?branch=master)](https://codecov.io/github/JuliaParallel/MPI.jl?branch=master)
88
[![Coverage Status](https://coveralls.io/repos/JuliaParallel/MPI.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaParallel/MPI.jl?branch=master)
99

test/common.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
using Test
2+
using MPI
3+
4+
if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray"
5+
import CUDA
6+
ArrayType = CUDA.CuArray
7+
synchronize() = CUDA.synchronize()
8+
else
9+
ArrayType = Array
10+
synchronize() = nothing
11+
end

test/test_allgather.jl

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,4 @@
1-
using Test
2-
using MPI
3-
4-
if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray"
5-
import CUDA
6-
ArrayType = CUDA.CuArray
7-
else
8-
ArrayType = Array
9-
end
10-
1+
include("common.jl")
112

123
MPI.Init()
134

@@ -18,6 +9,7 @@ rank = MPI.Comm_rank(comm)
189
for T in Base.uniontypes(MPI.MPIDatatype)
1910
# test vector input
2011
A = ArrayType{T}([rank + 1])
12+
synchronize()
2113
C = MPI.Allgather(A, comm)
2214
@test C isa ArrayType{T,1}
2315
@test C == ArrayType{T,1}(1:size)
@@ -32,11 +24,12 @@ for T in Base.uniontypes(MPI.MPIDatatype)
3224

3325
# Test passing output buffer with set size
3426
A = ArrayType(T[val])
35-
27+
synchronize()
28+
3629
C = ArrayType{T}(undef, size)
3730
MPI.Allgather!(A, C, comm) # implied size
3831
@test C == ArrayType{T}(1:size)
39-
32+
4033
C = ArrayType{T}(undef, size)
4134
MPI.Allgather!(A, UBuffer(C,1), comm)
4235
@test C == ArrayType{T}(1:size)
@@ -47,12 +40,14 @@ for T in Base.uniontypes(MPI.MPIDatatype)
4740

4841
# Test explicit IN_PLACE
4942
C = ArrayType{T}([i == rank ? i : size + 1 for i = 0:size-1])
43+
synchronize()
5044
MPI.Allgather!(MPI.IN_PLACE, UBuffer(C, 1), comm)
5145
@test C isa ArrayType{T,1}
5246
@test C == ArrayType{T}(0:size-1)
5347

5448
# Test IN_PLACE
5549
C = ArrayType{T}([i == rank ? i : size + 1 for i = 0:size-1])
50+
synchronize()
5651
MPI.Allgather!(UBuffer(C, 1), comm)
5752
@test C isa ArrayType{T,1}
5853
@test C == ArrayType{T}(0:size-1)

test/test_allgatherv.jl

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,4 @@
1-
using Test
2-
using MPI
3-
4-
if get(ENV,"JULIA_MPI_TEST_ARRAYTYPE","") == "CuArray"
5-
import CUDA
6-
ArrayType = CUDA.CuArray
7-
else
8-
ArrayType = Array
9-
end
1+
include("common.jl")
102

113
MPI.Init()
124

@@ -19,6 +11,7 @@ check = collect(Iterators.flatten([fill(r, counts[r+1]) for r = 0:size-1]))
1911

2012
for T in Base.uniontypes(MPI.MPIDatatype)
2113
A = ArrayType{T}(fill(T(rank), counts[rank+1]))
14+
synchronize()
2215

2316
# Test passing the output buffer
2417
B = ArrayType{T}(undef, sum(counts))
@@ -31,6 +24,7 @@ for T in Base.uniontypes(MPI.MPIDatatype)
3124

3225
# Test explicit MPI_IN_PLACE
3326
B = ArrayType(fill(T(rank), sum(counts)))
27+
synchronize()
3428
MPI.Allgatherv!(MPI.IN_PLACE, VBuffer(B, counts), comm)
3529
@test B == ArrayType{T}(check)
3630
end

0 commit comments

Comments
 (0)