Skip to content

Commit d04b7fd

Browse files
q10facebook-github-bot
authored andcommitted
Fix CUDA 12.9 OSS compilation for HSTU (#4360)
Summary: X-link: facebookresearch/FBGEMM#1429 - Fix CUDA 12.9 OSS compilation for HSTU Pull Request resolved: #4360 Reviewed By: spcyppt Differential Revision: D76792311 Pulled By: q10 fbshipit-source-id: 82e0ab01f72e4e40cf5f0d4652c0c5aa3f9d2b61
1 parent 9db5e09 commit d04b7fd

File tree

4 files changed

+5
-7
lines changed

4 files changed

+5
-7
lines changed

.github/scripts/fbgemm_gpu_build.bash

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,8 @@ __configure_fbgemm_gpu_build_cuda () {
272272
local arch_list="7.0"
273273
fi
274274

275-
if [[ $cuda_version_nvcc == *"V12.8"* ]]; then
275+
if [[ $cuda_version_nvcc == *"V12.9"* ]] ||
276+
[[ $cuda_version_nvcc == *"V12.8"* ]]; then
276277
local arch_list="${arch_list};8.0;9.0a;10.0a;12.0a"
277278

278279
elif [[ $cuda_version_nvcc == *"V12.6"* ]] ||
@@ -282,6 +283,7 @@ __configure_fbgemm_gpu_build_cuda () {
282283

283284
else
284285
local arch_list="${arch_list};8.0;9.0"
286+
echo "[BUILD] Unknown NVCC version $cuda_version_nvcc - setting TORCH_CUDA_ARCH_LIST to: ${arch_list}"
285287
fi
286288
fi
287289
echo "[BUILD] Setting the following CUDA targets: ${arch_list}"

cmake/modules/GpuCppLibrary.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ function(gpu_cpp_library)
308308
target_compile_options(${lib_name} PRIVATE
309309
${args_CC_FLAGS}
310310
# Silence compiler warnings (in asmjit)
311-
-Wno-deprecated-anon-enum-enum-conversion
311+
-Wno-deprecated-enum-enum-conversion
312312
-Wno-deprecated-declarations)
313313

314314
############################################################################

fbgemm_gpu/codegen/training/optimizer/embedding_optimizer_split_device_kernel_template.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ DEVICE_INLINE void {{ mdesc }}_{{ optimizer }}_table_update_kernel(
117117
}
118118

119119
{%- if not ssd %}
120-
constexpr auto enable_optimizer_offloading = false;
120+
[[maybe_unused]] constexpr auto enable_optimizer_offloading = false;
121121
{%- endif %}
122122

123123
{{ split_precomputation }}

fbgemm_gpu/src/tbe/eeg/eeg_models.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ struct IndicesDistributionParameters {
6464
// Number of indices to generate
6565
int64_t numIndices;
6666

67-
// NOTE: Compiler-generated aggregate initialization constructors (P0960R3,
68-
// P1975R0) did not exist prior to C++20, but FBGEMM_GPU OSS still uses C++17,
69-
// namely when building against CUDA 11.8. Remove this constructor once CUDA
70-
// 11.8 is deprecated from FBGEMM_GPU support.
7167
IndicesDistributionParameters(
7268
const std::vector<double>& _1,
7369
const ZipfParameters& _2,

0 commit comments

Comments
 (0)