Skip to content

Commit 64833b5

Browse files
q10facebook-github-bot
authored andcommitted
Add support for building FBGEMM_GPU against Python 3.11 in OSS (#1646)
Summary: - Parallelize the FBGEMM CI builds to build and test static and shared libraries independently instead of in serial - Move the FBGEMM CI builds to run inside Docker containers - Add support for building FBGEMM_GPU against Python 3.11 in OSS - Move all FBGEMM_GPU nightly and release build jobs to run inside `amazonlinux:2023` Docker container - Assuming no build errors or resource starvation, the full OSS build process now runs under 30 minutes. Pull Request resolved: #1646 Reviewed By: shintaro-iwasaki Differential Revision: D44157228 Pulled By: q10 fbshipit-source-id: 6403ea9955856157785c50837b0b8e4c0cd26d53
1 parent c7cddec commit 64833b5

9 files changed

+257
-235
lines changed

.github/scripts/setup_env.bash

Lines changed: 79 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -264,22 +264,13 @@ print_gpu_info () {
264264
if which nvidia-smi; then
265265
# If nvidia-smi is installed on a machine without GPUs, this will return error
266266
(print_exec nvidia-smi) || true
267+
else
268+
echo "[CHECK] nvidia-smi not found"
267269
fi
268270
fi
269271
}
270272

271-
print_system_info () {
272-
echo "################################################################################"
273-
echo "# Print System Info"
274-
echo "#"
275-
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
276-
echo "################################################################################"
277-
echo ""
278-
279-
echo "################################################################################"
280-
echo "[INFO] Printing environment variables ..."
281-
print_exec printenv
282-
273+
__print_system_info_linux () {
283274
echo "################################################################################"
284275
echo "[INFO] Check ldd version ..."
285276
print_exec ldd --version
@@ -296,6 +287,36 @@ print_system_info () {
296287
print_exec cat /etc/os-release
297288
}
298289

290+
__print_system_info_macos () {
291+
echo "################################################################################"
292+
echo "[INFO] Check CPU info ..."
293+
sysctl -a | grep machdep.cpu
294+
295+
echo "################################################################################"
296+
echo "[INFO] Check MacOS version info ..."
297+
print_exec uname -a
298+
print_exec sw_vers
299+
}
300+
301+
print_system_info () {
302+
echo "################################################################################"
303+
echo "# Print System Info"
304+
echo "#"
305+
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
306+
echo "################################################################################"
307+
echo ""
308+
309+
echo "################################################################################"
310+
echo "[INFO] Printing environment variables ..."
311+
print_exec printenv
312+
313+
if [[ $OSTYPE == 'darwin'* ]]; then
314+
__print_system_info_macos
315+
else
316+
__print_system_info_linux
317+
fi
318+
}
319+
299320
print_ec2_info () {
300321
echo "################################################################################"
301322
echo "# Print EC2 Instance Info"
@@ -316,6 +337,30 @@ print_ec2_info () {
316337
echo "instance-type: $(get_ec2_metadata instance-type)"
317338
}
318339

340+
print_glibc_info () {
341+
local library_path="$1"
342+
if [ "$library_path" == "" ]; then
343+
echo "Usage: ${FUNCNAME[0]} LIBRARY_PATH"
344+
echo "Example(s):"
345+
echo " ${FUNCNAME[0]} /usr/lib/x86_64-linux-gnu/libstdc++.so.6"
346+
return 1
347+
fi
348+
349+
if [ -f "${library_path}" ]; then
350+
echo "[CHECK] Listing out the GLIBC versions referenced by: ${library_path}"
351+
objdump -TC "${library_path}" | grep GLIBC_ | sed 's/.*GLIBC_\([.0-9]*\).*/GLIBC_\1/g' | sort -Vu | cat
352+
echo ""
353+
354+
echo "[CHECK] Listing out the GLIBCXX versions referenced by: ${library_path}"
355+
objdump -TC "${library_path}" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
356+
echo ""
357+
358+
else
359+
echo "[CHECK] No file at path: ${library_path}"
360+
return 1
361+
fi
362+
}
363+
319364

320365
################################################################################
321366
# Miniconda Setup Functions
@@ -342,7 +387,7 @@ setup_miniconda () {
342387
print_exec mkdir -p "$miniconda_prefix"
343388

344389
echo "[SETUP] Downloading the Miniconda installer ..."
345-
print_exec wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
390+
(exec_with_retries wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh) || return 1
346391

347392
echo "[SETUP] Installing Miniconda ..."
348393
print_exec bash miniconda.sh -b -p "$miniconda_prefix" -u
@@ -360,9 +405,16 @@ setup_miniconda () {
360405
print_exec conda info
361406

362407
# These variables will be exported outside
408+
echo "[SETUP] Exporting Miniconda variables ..."
363409
export PATH="${miniconda_prefix}/bin:${PATH}"
364410
export CONDA="${miniconda_prefix}"
365411

412+
if [ -f "${GITHUB_PATH}" ]; then
413+
echo "[SETUP] Saving Miniconda variables to ${GITHUB_PATH} ..."
414+
echo "${miniconda_prefix}/bin" >> "${GITHUB_PATH}"
415+
echo "CONDA=${miniconda_prefix}" >> "${GITHUB_PATH}"
416+
fi
417+
366418
echo "[SETUP] Successfully set up Miniconda at ${miniconda_prefix}"
367419
}
368420

@@ -448,9 +500,11 @@ install_pytorch_conda () {
448500
fi
449501

450502
# Install PyTorch packages
503+
# NOTE: Installation of large package might fail due to corrupt package download
504+
# Use --force-reinstall to address this on retries - https://datascience.stackexchange.com/questions/41732/conda-verification-failed
451505
echo "[INSTALL] Attempting to install '${pytorch_package}' (${pytorch_version}, CPU=${pytorch_cpu:-0}) through Conda using channel '${pytorch_channel}' ..."
452506
# shellcheck disable=SC2086
453-
(exec_with_retries conda install -n "${env_name}" -y ${pytorch_package} -c "${pytorch_channel}") || return 1
507+
(exec_with_retries conda install --force-reinstall -n "${env_name}" -y ${pytorch_package} -c "${pytorch_channel}") || return 1
454508

455509
# Run check for GPU variant
456510
if [ "$pytorch_cpu" == "" ]; then
@@ -612,7 +666,7 @@ install_cuda () {
612666

613667
# Install CUDA packages
614668
echo "[INSTALL] Installing CUDA ${cuda_version} ..."
615-
(exec_with_retries conda install -n "${env_name}" -y cuda -c "nvidia/label/cuda-${cuda_version}") || return 1
669+
(exec_with_retries conda install --force-reinstall -n "${env_name}" -y cuda -c "nvidia/label/cuda-${cuda_version}") || return 1
616670

617671
# Ensure that nvcc is properly installed
618672
(test_binpath "${env_name}" nvcc) || return 1
@@ -806,15 +860,19 @@ install_cxx_compiler () {
806860
install_system_packages gcc gcc-c++
807861

808862
else
809-
# Install gxx_linux-64 from main instead of cxx-compiler from conda-forge, as
810-
# the latter breaks builds:
863+
# Install gxx_linux-64 from conda-forge instead of from anaconda channel.
864+
# sysroot_linux-64 needs to be installed alongside this:
865+
#
811866
# https://root-forum.cern.ch/t/error-timespec-get-has-not-been-declared-with-conda-root-package/45712/6
867+
# https://github.com/conda-forge/conda-forge.github.io/issues/1625
868+
# https://conda-forge.org/docs/maintainer/knowledge_base.html#using-centos-7
869+
# https://github.com/conda/conda-build/issues/4371
812870
#
813-
# NOTE: Install g++ 9.x instead of 11.x becaue 11.x builds libraries with
814-
# references to GLIBCXX_3.4.29, which is not available on systems with older
871+
# NOTE: We install g++ 10.x instead of 11.x becaue 11.x builds binaries that
872+
# reference GLIBCXX_3.4.29, which may not be available on systems with older
815873
# versions of libstdc++.so.6 such as CentOS Stream 8 and Ubuntu 20.04
816874
echo "[INSTALL] Installing C/C++ compilers through Conda ..."
817-
(exec_with_retries conda install -n "${env_name}" -y gxx_linux-64=9.3.0) || return 1
875+
(exec_with_retries conda install -n "${env_name}" -y gxx_linux-64=10.4.0 sysroot_linux-64=2.17 -c conda-forge) || return 1
818876

819877
# The compilers are visible in the PATH as `x86_64-conda-linux-gnu-cc` and
820878
# `x86_64-conda-linux-gnu-c++`, so symlinks will need to be created
@@ -1055,7 +1113,7 @@ check_fbgemm_gpu_build () {
10551113

10561114
for library in "${fbgemm_gpu_so_files[@]}"; do
10571115
echo "[CHECK] Listing out the GLIBCXX versions referenced by the library: ${library}"
1058-
objdump -TC "${library}" | grep GLIBCXX | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
1116+
print_glibc_info "${library}"
10591117

10601118
echo "[CHECK] Verifying sample subset of symbols in the library ..."
10611119
for symbol in "${lib_symbols_to_check[@]}"; do

0 commit comments

Comments
 (0)