@@ -44,7 +44,7 @@ if do_help
44
44
--thorough Don't allow skipping tests that are not supported.
45
45
--quickfail Fail the entire run as soon as a single test errored.
46
46
--jobs=N Launch `N` processes to perform tests (default: Sys.CPU_THREADS).
47
- --gpus=N Expose `N` GPUs to test processes ( default: 1).
47
+ --gpu=1,2,... Which GPUs to use (comma-separated list of indices, default: all)
48
48
--sanitize[=tool] Run the tests under `compute-sanitizer`.
49
49
50
50
Remaining arguments filter the tests that will be executed.""" )
@@ -54,14 +54,15 @@ set_jobs, jobs = extract_flag!(ARGS, "--jobs"; typ=Int)
54
54
do_sanitize, sanitize_tool = extract_flag! (ARGS , " --sanitize" , " memcheck" )
55
55
do_thorough, _ = extract_flag! (ARGS , " --thorough" )
56
56
do_quickfail, _ = extract_flag! (ARGS , " --quickfail" )
57
+ do_gpu_list, gpu_list = extract_flag! (ARGS , " --gpu" )
58
+ do_list, _ = extract_flag! (ARGS , " --list" )
59
+ # # no options should remain
60
+ optlike_args = filter (startswith (" -" ), ARGS )
61
+ if ! isempty (optlike_args)
62
+ error (" Unknown test options `$(join (optlike_args, " " )) ` (try `--help` for usage instructions)" )
63
+ end
57
64
58
65
include (" setup.jl" ) # make sure everything is precompiled
59
- _, gpus = extract_flag! (ARGS , " --gpus" , ndevices ())
60
- if ! set_jobs
61
- cpu_jobs = Sys. CPU_THREADS
62
- memory_jobs = Int (Sys. free_memory ()) ÷ (2 * 2 ^ 30 )
63
- jobs = min (cpu_jobs, memory_jobs)
64
- end
65
66
66
67
# choose tests
67
68
const tests = [" core/initialization" ] # needs to run first
@@ -100,22 +101,16 @@ for (rootpath, dirs, files) in walkdir(@__DIR__)
100
101
end
101
102
unique! (tests)
102
103
103
- # parse some more command-line arguments
104
- # # --list to list all available tests
105
- do_list, _ = extract_flag! (ARGS , " --list" )
104
+ # list tests, if requested
106
105
if do_list
107
106
println (" Available tests:" )
108
107
for test in sort (tests)
109
108
println (" - $test " )
110
109
end
111
110
exit (0 )
112
111
end
113
- # # no options should remain
114
- optlike_args = filter (startswith (" -" ), ARGS )
115
- if ! isempty (optlike_args)
116
- error (" Unknown test options `$(join (optlike_args, " " )) ` (try `--help` for usage instructions)" )
117
- end
118
- # # the remaining args filter tests
112
+
113
+ # filter tests
119
114
if ! isempty (ARGS )
120
115
filter! (tests) do test
121
116
any (arg-> startswith (test, arg), ARGS )
@@ -128,46 +123,30 @@ label_match = match(r"^CUDA ([\d.]+)$", get(ENV, "BUILDKITE_LABEL", ""))
128
123
if label_match != = nothing
129
124
@test toolkit_release == VersionNumber (label_match. captures[1 ])
130
125
end
131
-
132
- # find suitable devices
133
126
@info " System information:\n " * sprint (io-> CUDA. versioninfo (io))
134
- candidates = []
135
- for (index,dev) in enumerate ( devices ())
136
- # fetch info that doesn't require a context
127
+
128
+ # select devices
129
+ function gpu_entry (dev)
137
130
id = deviceid (dev)
138
- mig = CUDA. uuid (dev) != CUDA. parent_uuid (dev)
139
- uuid = CUDA. uuid (dev)
140
131
name = CUDA. name (dev)
141
- cap = capability (dev)
142
-
143
- mem = try
144
- device! (dev)
145
- mem = CUDA. available_memory ()
146
- # immediately reset the device. this helps to reduce memory usage,
147
- # and is needed for systems that only provide exclusive access to the GPUs
148
- CUDA. device_reset! ()
149
- mem
150
- catch err
151
- if isa (err, OutOfGPUMemoryError)
152
- # the device doesn't even have enough memory left to instantiate a context...
153
- 0
154
- else
155
- rethrow ()
156
- end
132
+ uuid = CUDA. uuid (dev)
133
+ cap = CUDA. capability (dev)
134
+ mig = uuid != CUDA. parent_uuid (dev)
135
+ (; id, name, cap, uuid= " $(mig ? " MIG" : " GPU" ) -$uuid " )
136
+ end
137
+ gpus = if do_gpu_list
138
+ # parse the list of GPUs
139
+ map (gpu_list) do str
140
+ id = parse (Int, str)
141
+ gpu_entry (CuDevice (id))
157
142
end
158
-
159
- push! (candidates, (; id, uuid, mig, name, cap, mem))
160
-
161
- # NOTE: we don't use NVML here because it doesn't respect CUDA_VISIBLE_DEVICES
143
+ else
144
+ # find all GPUs
145
+ map (gpu_entry, CUDA. devices ())
162
146
end
163
- # # order by available memory, but also by capability if testing needs to be thorough
164
- sort! (candidates, by= x-> x. mem)
165
- # # apply
166
- picks = reverse (candidates[end - gpus+ 1 : end ]) # best GPU first
167
- ENV [" CUDA_VISIBLE_DEVICES" ] = join (map (pick-> " $(pick. mig ? " MIG" : " GPU" ) -$(pick. uuid) " , picks), " ," )
168
- @info " Testing using $(length (picks)) device(s): " * join (map (pick-> " $(pick. id) . $(pick. name) (UUID $(pick. uuid) )" , picks), " , " )
169
-
170
- @info " Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable."
147
+ @info (" Testing using device(s) " * join (map (gpu-> " $(gpu. id) ($(gpu. name) )" , gpus), " , " , " and " ) *
148
+ " . To change this, specify the `--gpus` argument to the test, or set the `CUDA_VISIBLE_DEVICES` environment variable." )
149
+ ENV [" CUDA_VISIBLE_DEVICES" ] = join (map (gpu-> gpu. uuid, gpus), " ," )
171
150
172
151
# determine tests to skip
173
152
skip_tests = []
@@ -181,7 +160,7 @@ if do_sanitize
181
160
# XXX : these hang for some reason
182
161
append! (skip_tests, [" base/sorting" ])
183
162
end
184
- if first (picks ). cap < v " 7.0"
163
+ if first (gpus ). cap < v " 7.0"
185
164
push! (skip_tests, " core/device/intrinsics/wmma" )
186
165
end
187
166
if Sys. ARCH == :aarch64
@@ -212,6 +191,14 @@ else
212
191
all_tests = copy (tests)
213
192
end
214
193
194
+ # determine parallelism
195
+ if ! set_jobs
196
+ cpu_jobs = Sys. CPU_THREADS
197
+ memory_jobs = Int (Sys. free_memory ()) ÷ (2 * 2 ^ 30 )
198
+ jobs = min (cpu_jobs, memory_jobs)
199
+ end
200
+ @info " Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable."
201
+
215
202
# add workers
216
203
const test_exeflags = Base. julia_cmd ()
217
204
filter! (test_exeflags. exec) do c
0 commit comments