@@ -167,42 +167,31 @@ __configure_fbgemm_gpu_build_docs () {
167
167
__configure_fbgemm_gpu_build_rocm () {
168
168
local fbgemm_variant_targets=" $1 "
169
169
170
- # Fetch available ROCm architectures on the machine
170
+ # By default, we build for a limited number of target architectures to save on
171
+ # build time. This list needs to be updated if the CI ROCm machines have
172
+ # different hardware.
173
+ #
174
+ # Target architecture mapping and ROCm compatibility table can be found at:
175
+ # https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html
176
+ # https://rocm.docs.amd.com/en/latest/compatibility/compatibility-matrix.html
177
+
171
178
if [ " $fbgemm_variant_targets " != " " ]; then
172
- echo " [BUILD] ROCm targets have been manually provided: ${fbgemm_variant_targets} "
179
+ # If targets are manually supplied, override
180
+ echo " [BUILD] Using the user-supplied ROCm targets ..."
173
181
local arch_list=" ${fbgemm_variant_targets} "
174
182
183
+ elif [ -n " ${BUILD_FROM_NOVA+x} " ]; then
184
+ # If BUILD_FROM_NOVA is set (regardless of 0 or 1 - some steps in Nova have
185
+ # the value set to 0), we are building in Nova. Nova machines take much
186
+ # longer time to build FBGEMM_GPU for ROCm, so we have to limit to just the
187
+ # latest model.
188
+ echo " [BUILD] Building in Nova environment, ignoring the provided PYTORCH_ROCM_ARCH list and limiting ROCm targets ..."
189
+ local arch_list=" gfx942"
190
+
175
191
else
176
- if which rocminfo; then
177
- # shellcheck disable=SC2155
178
- local arch_list=$( rocminfo | grep -o -m 1 ' gfx.*' )
179
- echo " [BUILD] Architectures list from rocminfo: ${arch_list} "
180
-
181
- if [ " $arch_list " == " " ]; then
182
- # It is possible to build FBGEMM_GPU-ROCm on a machine without AMD
183
- # cards, in which case the arch_list will be empty.
184
- echo " [BUILD] rocminfo did not return anything valid!"
185
-
186
- # By default, we build for a limited number of architectures to save on
187
- # build time. This list needs to be updated if the CI ROCm machines
188
- # have different hardware.
189
- #
190
- # Architecture mapping can be found at:
191
- # https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html
192
- if [ -z " ${BUILD_FROM_NOVA+x} " ]; then
193
- # If BUILD_FROM_NOVA is unset, then we are building from AMD host with
194
- # sufficient resources, so we can build for more architectures.
195
- local arch_list=" gfx908,gfx90a,gfx942"
196
- else
197
- # If BUILD_FROM_NOVA is set (regardless of 0 or 1), we are building in
198
- # Nova. Nova machines take a longer time to build FBGEMM_GPU for
199
- # ROCm, so we limit to one architecture.
200
- local arch_list=" gfx942"
201
- fi
202
- fi
203
- else
204
- echo " [BUILD] rocminfo not found in PATH!"
205
- fi
192
+ # If BUILD_FROM_NOVA is unset, then we are building from a compute host with
193
+ # sufficient resources, so we can build for more AMD Instinct architectures.
194
+ local arch_list=" gfx908,gfx90a,gfx942"
206
195
fi
207
196
208
197
echo " [BUILD] Setting the following ROCm targets: ${arch_list} "
@@ -286,8 +275,8 @@ __configure_fbgemm_gpu_build_cuda () {
286
275
echo " [BUILD] Unknown NVCC version $cuda_version_nvcc - setting TORCH_CUDA_ARCH_LIST to: ${arch_list} "
287
276
fi
288
277
fi
289
- echo " [BUILD] Setting the following CUDA targets: ${arch_list} "
290
278
279
+ echo " [BUILD] Setting the following CUDA targets: ${arch_list} "
291
280
# Unset the environment-supplied TORCH_CUDA_ARCH_LIST because it will take
292
281
# precedence over cmake -DTORCH_CUDA_ARCH_LIST
293
282
unset TORCH_CUDA_ARCH_LIST
0 commit comments