Skip to content

Commit 2a2d8c6

Browse files
authored
CI: Get the Slurm tests passing in CI again (#229)
1 parent 59ee855 commit 2a2d8c6

File tree

11 files changed

+324
-122
lines changed

11 files changed

+324
-122
lines changed

.github/workflows/UnitTests.yml

Lines changed: 0 additions & 56 deletions
This file was deleted.

.github/workflows/ci.yml

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
name: CI
2+
on:
3+
pull_request:
4+
push:
5+
branches:
6+
- master
7+
concurrency:
8+
# Skip intermediate builds: all builds except for builds on the `master` branch
9+
# Cancel intermediate builds: only pull request builds
10+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref != 'refs/heads/master' || github.run_number }}
11+
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
12+
permissions:
13+
contents: read
14+
jobs:
15+
finalize:
16+
timeout-minutes: 10
17+
needs:
18+
- unit-tests
19+
- test-slurm
20+
# Important: the next line MUST be `if: always()`.
21+
# Do not change that line.
22+
# That line is necessary to make sure that this job runs even if tests fail.
23+
if: always()
24+
runs-on: ubuntu-latest
25+
steps:
26+
- run: |
27+
echo unit-tests: ${{ needs.unit-tests.result }}
28+
echo test-slurm: ${{ needs.test-slurm.result }}
29+
- run: exit 1
30+
# The last line must NOT end with ||
31+
# All other lines MUST end with ||
32+
if: |
33+
(needs.unit-tests.result != 'success') ||
34+
(needs.test-slurm.result != 'success')
35+
unit-tests:
36+
runs-on: ubuntu-latest
37+
timeout-minutes: 20
38+
strategy:
39+
fail-fast: false
40+
matrix:
41+
version:
42+
- '1.2' # minimum Julia version supported in Project.toml
43+
- '1.6' # previous LTS
44+
- '1.10' # current LTS
45+
- '1' # automatically expands to the latest stable 1.x release of Julia
46+
steps:
47+
- uses: actions/checkout@v4
48+
with:
49+
persist-credentials: false
50+
- uses: julia-actions/setup-julia@v2
51+
with:
52+
version: ${{ matrix.version }}
53+
- uses: julia-actions/julia-runtest@v1
54+
test-slurm:
55+
runs-on: ubuntu-latest
56+
timeout-minutes: 20
57+
strategy:
58+
fail-fast: false
59+
matrix:
60+
version:
61+
# Please note: You must specify the full Julia version number (major.minor.patch).
62+
# This is because the value here will be directly interpolated into a download URL.
63+
# - '1.2.0' # minimum Julia version supported in Project.toml
64+
- '1.6.7' # previous LTS
65+
- '1.10.7' # current LTS
66+
- '1.11.2' # currently the latest stable release
67+
steps:
68+
- uses: actions/checkout@v4
69+
with:
70+
persist-credentials: false
71+
- name: Print Docker version
72+
run: |
73+
docker --version
74+
docker version
75+
# This next bit of code is taken from:
76+
# https://github.com/kleinhenz/SlurmClusterManager.jl
77+
# Original author: Joseph Kleinhenz
78+
# License: MIT
79+
- name: Setup Slurm inside Docker
80+
run: |
81+
docker version
82+
docker compose version
83+
docker build --build-arg "JULIA_VERSION=${MATRIX_JULIA_VERSION:?}" -t slurm-cluster-julia -f ci/Dockerfile .
84+
docker compose -f ci/docker-compose.yml up -d
85+
docker ps
86+
env:
87+
MATRIX_JULIA_VERSION: ${{matrix.version}}
88+
- name: Print some information for debugging purposes
89+
run: |
90+
docker exec -t slurmctld pwd
91+
docker exec -t slurmctld ls -la
92+
docker exec -t slurmctld ls -la ClusterManagers
93+
- name: Instantiate package
94+
run: docker exec -t slurmctld julia --project=ClusterManagers -e 'import Pkg; @show Base.active_project(); Pkg.instantiate(); Pkg.status()'
95+
- name: Run tests without a Slurm allocation
96+
run: docker exec -t slurmctld julia --project=ClusterManagers -e 'import Pkg; Pkg.test(; test_args=["slurm"])'
97+
- name: Run tests inside salloc
98+
run: docker exec -t slurmctld salloc -t 00:10:00 -n 2 julia --project=ClusterManagers -e 'import Pkg; Pkg.test(test_args=["slurm"])'
99+
- name: Run tests inside sbatch
100+
run: docker exec -t slurmctld ClusterManagers/ci/run_my_sbatch.sh

ci/Dockerfile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# This file is taken from:
2+
# https://github.com/kleinhenz/SlurmClusterManager.jl
3+
# Original author: Joseph Kleinhenz
4+
# License: MIT
5+
6+
FROM jkleinh/slurm-cluster@sha256:afd20dafc831b0fa781460dc871232579ccf1b54955e434531394c331ce388e4 as base
7+
MAINTAINER Joseph Kleinhenz <jkleinh@umich.edu>
8+
9+
ARG JULIA_VERSION=1.6.0
10+
11+
RUN mkdir -p /home/docker/.local/opt/julia \
12+
&& cd /home/docker/.local/opt/julia \
13+
&& folder="$(echo ${JULIA_VERSION} | cut -d. -f1-2)" \
14+
&& curl -L https://julialang-s3.julialang.org/bin/linux/x64/${folder}/julia-${JULIA_VERSION}-linux-x86_64.tar.gz | tar xz --strip 1 \
15+
&& /home/docker/.local/opt/julia/bin/julia --version
16+
17+
ENV PATH="/home/docker/.local/opt/julia/bin:${PATH}"
18+
19+
COPY --chown=docker . ClusterManagers
20+
21+
CMD /bin/bash -l

ci/docker-compose.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# This file is taken from:
2+
# https://github.com/kleinhenz/SlurmClusterManager.jl
3+
# Original author: Joseph Kleinhenz
4+
# License: MIT
5+
6+
version: "3.3"
7+
8+
services:
9+
slurmctld:
10+
image: slurm-cluster-julia
11+
command: ["slurmctld"]
12+
container_name: slurmctld
13+
hostname: slurmctld
14+
volumes:
15+
- slurm_jobdir:/home/docker
16+
- var_log_slurm:/var/log/slurm
17+
expose:
18+
- "6817"
19+
20+
c1:
21+
image: slurm-cluster-julia
22+
command: ["slurmd"]
23+
hostname: c1
24+
container_name: c1
25+
volumes:
26+
- slurm_jobdir:/home/docker
27+
- var_log_slurm:/var/log/slurm
28+
expose:
29+
- "6818"
30+
depends_on:
31+
- "slurmctld"
32+
33+
c2:
34+
image: slurm-cluster-julia
35+
command: ["slurmd"]
36+
hostname: c2
37+
container_name: c2
38+
volumes:
39+
- slurm_jobdir:/home/docker
40+
- var_log_slurm:/var/log/slurm
41+
expose:
42+
- "6818"
43+
depends_on:
44+
- "slurmctld"
45+
46+
volumes:
47+
slurm_jobdir:
48+
var_log_slurm:

ci/my_sbatch.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
# Slurm options:
4+
#SBATCH --ntasks=2
5+
#SBATCH --time=00:10:00
6+
7+
# Important note:
8+
# There should be no non-comment non-whitespace lines above this line.
9+
10+
set -euf -o pipefail
11+
12+
set -x
13+
14+
julia --project=ClusterManagers -e 'import Pkg; Pkg.test(; test_args=["slurm"])'

ci/run_my_sbatch.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
set -euf -o pipefail
4+
5+
set -x
6+
7+
rm -fv "${HOME:?}/my_stdout.txt"
8+
rm -fv "${HOME:?}/my_stderr.txt"
9+
10+
sbatch --wait --output="${HOME:?}/my_stdout.txt" --error="${HOME:?}/my_stderr.txt" ./ClusterManagers/ci/my_sbatch.sh
11+
12+
sleep 5
13+
cat "${HOME:?}/my_stdout.txt"
14+
cat "${HOME:?}/my_stderr.txt"

test/elastic.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
@testset "ElasticManager" begin
2+
TIMEOUT = 10.
3+
4+
em = ElasticManager(addr=:auto, port=0)
5+
6+
# launch worker
7+
run(`sh -c $(ClusterManagers.get_connect_cmd(em))`, wait=false)
8+
9+
# wait at most TIMEOUT seconds for it to connect
10+
@test :ok == timedwait(TIMEOUT) do
11+
length(em.active) == 1
12+
end
13+
14+
wait(rmprocs(workers()))
15+
end

test/lsf.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
@testset "LSFManager" begin
2+
p = addprocs_lsf(1, bsub_flags=`-P scicompsoft`)
3+
@test nprocs() == 2
4+
@test workers() == p
5+
@test fetch(@spawnat :any myid()) == p[1]
6+
@test remotecall_fetch(+,p[1],1,1) == 2
7+
rmprocs(p)
8+
@test nprocs() == 1
9+
@test workers() == [1]
10+
end

0 commit comments

Comments
 (0)