Skip to content

Commit 01dc5b1

Browse files
q10facebook-github-bot
authored andcommitted
Re-enable compilation of merge_pooled_embeddings operator in OSS (#1621)
Summary: Pull Request resolved: #1621 - Re-enable building of `merge_pooled_embeddings` operators in OSS - Add `nm` checks for a few operators after OSS compilation to ensure that they are actually compiled into the shared library - Downgrade the GCC version used for OSS builds to 9.3, since later versions of GCC will build libraries that have a dependency on `GLIBCXX_3.4.29`, which is not available on systems with older versions of `libstdc++.so.6`, such as CentOS Stream 8 and Ubuntu 20.04 - Print SHA of the built wheels after build and before installation to verify that the packages are correctly uploaded to and downloaded from GHA Reviewed By: brad-mengchi, shintaro-iwasaki Differential Revision: D43764547 Pulled By: q10 fbshipit-source-id: 905d51feebe8d73c08689abc605f1160cc4b4600
1 parent 8fb51c1 commit 01dc5b1

File tree

5 files changed

+158
-48
lines changed

5 files changed

+158
-48
lines changed

.github/scripts/setup_env.bash

Lines changed: 152 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,26 @@ test_env_var () {
127127
fi
128128
}
129129

130+
test_library_symbol () {
131+
local lib_path="$1"
132+
local lib_symbol="$2"
133+
if [ "$lib_symbol" == "" ]; then
134+
echo "Usage: ${FUNCNAME[0]} LIB_PATH FULL_NAMESPACE_PATH_LIB_SYMBOL"
135+
echo "Example(s):"
136+
echo " ${FUNCNAME[0]} fbgemm_gpu_py.so fbgemm_gpu::merge_pooled_embeddings"
137+
return 1
138+
fi
139+
140+
# Add space and '(' to the grep string to get the full method path
141+
symbol_entries=$(nm -gDC "${lib_path}" | grep " ${lib_symbol}(")
142+
if [ "${symbol_entries}" != "" ]; then
143+
echo "[CHECK] Found symbol in ${lib_path}: ${lib_symbol}"
144+
else
145+
echo "[CHECK] Symbol NOT found in ${lib_path}: ${lib_symbol}"
146+
return 1
147+
fi
148+
}
149+
130150

131151
################################################################################
132152
# System Functions
@@ -141,8 +161,8 @@ install_system_packages () {
141161
fi
142162

143163
if which sudo; then
144-
local update_cmd=("sudo")
145-
local install_cmd=("sudo")
164+
local update_cmd=(sudo)
165+
local install_cmd=(sudo)
146166
else
147167
local update_cmd=()
148168
local install_cmd=()
@@ -184,7 +204,7 @@ run_python_test () {
184204
echo "################################################################################"
185205
fi
186206

187-
if conda run -n "${env_name}" python -m pytest -v -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then
207+
if conda run -n "${env_name}" python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then
188208
echo "[TEST] Python test suite PASSED: ${python_test_file}"
189209
else
190210
echo "[TEST] Python test suite FAILED: ${python_test_file}"
@@ -425,7 +445,7 @@ install_pytorch_conda () {
425445
fi
426446

427447
# Ensure that the PyTorch-CUDA headers are properly installed
428-
test_filepath "${env_name}" cuda_cmake_macros.h || return 1
448+
(test_filepath "${env_name}" cuda_cmake_macros.h) || return 1
429449
fi
430450

431451
# Check that PyTorch is importable
@@ -496,7 +516,7 @@ install_pytorch_pip () {
496516
if [ "$pytorch_variant_type" != "cpu" ]; then
497517
if [ "$pytorch_variant_type" == "cuda" ]; then
498518
# Ensure that the PyTorch-CUDA headers are properly installed
499-
test_filepath "${env_name}" cuda_cmake_macros.h || return 1
519+
(test_filepath "${env_name}" cuda_cmake_macros.h) || return 1
500520
fi
501521

502522
# Ensure that the PyTorch build is of the correct variant
@@ -549,13 +569,21 @@ install_cuda () {
549569
(exec_with_retries conda install -n "${env_name}" -y cuda -c "nvidia/label/cuda-${cuda_version}") || return 1
550570

551571
# Ensure that nvcc is properly installed
552-
test_binpath "${env_name}" nvcc || return 1
572+
(test_binpath "${env_name}" nvcc) || return 1
553573

554574
# Ensure that the CUDA headers are properly installed
555-
test_filepath "${env_name}" cuda_runtime.h || return 1
575+
(test_filepath "${env_name}" cuda_runtime.h) || return 1
556576

557577
# Ensure that the libraries are properly installed
558-
test_filepath "${env_name}" libnvToolsExt.so || return 1
578+
(test_filepath "${env_name}" libnvToolsExt.so) || return 1
579+
(test_filepath "${env_name}" libnvidia-ml.so) || return 1
580+
581+
echo "[INSTALL] Set environment variable NVML_LIB_PATH ..."
582+
# shellcheck disable=SC2155
583+
local conda_prefix=$(conda run -n "${env_name}" printenv CONDA_PREFIX)
584+
# shellcheck disable=SC2155
585+
local nvml_lib_path=$(find "${conda_prefix}" -name libnvidia-ml.so)
586+
print_exec conda env config vars set -n "${env_name}" NVML_LIB_PATH="${nvml_lib_path}"
559587

560588
# Print nvcc version
561589
print_exec conda run -n "${env_name}" nvcc --version
@@ -621,12 +649,12 @@ install_rocm_ubuntu () {
621649

622650
install_cxx_compiler () {
623651
local env_name="$1"
624-
local use_yum="$2"
652+
local use_system_package_manager="$2"
625653
if [ "$env_name" == "" ]; then
626654
echo "Usage: ${FUNCNAME[0]} ENV_NAME [USE_YUM]"
627655
echo "Example(s):"
628656
echo " ${FUNCNAME[0]} build_env # Install C/C++ compilers through Conda"
629-
echo " ${FUNCNAME[0]} build_env 1 # Install C/C++ compilers through yum"
657+
echo " ${FUNCNAME[0]} build_env 1 # Install C/C++ compilers through the system package manager"
630658
return 1
631659
else
632660
echo "################################################################################"
@@ -637,15 +665,20 @@ install_cxx_compiler () {
637665
echo ""
638666
fi
639667

640-
if [ "$use_yum" != "" ]; then
641-
echo "[INSTALL] Installing C/C++ compilers through yum ..."
668+
if [ "$use_system_package_manager" != "" ]; then
669+
echo "[INSTALL] Installing C/C++ compilers through the system package manager ..."
642670
install_system_packages gcc gcc-c++
671+
643672
else
644673
# Install gxx_linux-64 from main instead of cxx-compiler from conda-forge, as
645674
# the latter breaks builds:
646675
# https://root-forum.cern.ch/t/error-timespec-get-has-not-been-declared-with-conda-root-package/45712/6
676+
#
677+
# NOTE: Install g++ 9.x instead of 11.x becaue 11.x builds libraries with
678+
# references to GLIBCXX_3.4.29, which is not available on systems with older
679+
# versions of libstdc++.so.6 such as CentOS Stream 8 and Ubuntu 20.04
647680
echo "[INSTALL] Installing C/C++ compilers through Conda ..."
648-
(exec_with_retries conda install -n "${env_name}" -y gxx_linux-64) || return 1
681+
(exec_with_retries conda install -n "${env_name}" -y gxx_linux-64=9.3.0) || return 1
649682

650683
# The compilers are visible in the PATH as `x86_64-conda-linux-gnu-cc` and
651684
# `x86_64-conda-linux-gnu-c++`, so symlinks will need to be created
@@ -662,10 +695,10 @@ install_cxx_compiler () {
662695
fi
663696

664697
# Check C/C++ compilers are visible
665-
test_binpath "${env_name}" cc || return 1
666-
test_binpath "${env_name}" gcc || return 1
667-
test_binpath "${env_name}" c++ || return 1
668-
test_binpath "${env_name}" g++ || return 1
698+
(test_binpath "${env_name}" cc) || return 1
699+
(test_binpath "${env_name}" gcc) || return 1
700+
(test_binpath "${env_name}" c++) || return 1
701+
(test_binpath "${env_name}" g++) || return 1
669702

670703
# Print out the C++ version
671704
print_exec conda run -n "${env_name}" c++ --version
@@ -700,8 +733,8 @@ install_build_tools () {
700733
wheel) || return 1
701734

702735
# Check binaries are visible in the PAATH
703-
test_binpath "${env_name}" cmake || return 1
704-
test_binpath "${env_name}" ninja || return 1
736+
(test_binpath "${env_name}" cmake) || return 1
737+
(test_binpath "${env_name}" ninja) || return 1
705738

706739
# Check Python packages are importable
707740
local import_tests=( click hypothesis jinja2 numpy skbuild wheel )
@@ -865,28 +898,44 @@ __build_fbgemm_gpu_common_pre_steps () {
865898
# Private function that uses variables instantiated by its caller
866899

867900
# Check C/C++ compilers are visible (the build scripts look specifically for `gcc`)
868-
test_binpath "${env_name}" cc || return 1
869-
test_binpath "${env_name}" gcc || return 1
870-
test_binpath "${env_name}" c++ || return 1
871-
test_binpath "${env_name}" g++ || return 1
901+
(test_binpath "${env_name}" cc) || return 1
902+
(test_binpath "${env_name}" gcc) || return 1
903+
(test_binpath "${env_name}" c++) || return 1
904+
(test_binpath "${env_name}" g++) || return 1
872905

873-
if [ "$cpu_only" != "" ]; then
906+
if [ "$fbgemm_variant" == "cpu" ]; then
874907
# Update the package name and build args depending on if CUDA is specified
875908
echo "[BUILD] Applying CPU-only build args ..."
876-
cpu_only=1
877-
build_args="--cpu_only"
909+
build_args=(--cpu_only)
878910
package_name="${package_name}-cpu"
911+
912+
elif [ "$fbgemm_variant" == "rocm" ]; then
913+
(test_env_var "${env_name}" PYTORCH_ROCM_ARCH) || return 1
914+
915+
echo "[BUILD] Applying ROCm build args ..."
916+
build_args=()
917+
package_name="${package_name}-rocm"
918+
879919
else
920+
# Set to the default variant
921+
fbgemm_variant="gpu"
922+
880923
# Check nvcc is visible
881-
test_binpath "${env_name}" nvcc || return 1
924+
(test_binpath "${env_name}" nvcc) || return 1
882925

883926
# Check that cuDNN environment variables are available
884-
test_env_var "${env_name}" CUDNN_INCLUDE_DIR || return 1
885-
test_env_var "${env_name}" CUDNN_LIBRARY || return 1
927+
(test_env_var "${env_name}" CUDNN_INCLUDE_DIR) || return 1
928+
(test_env_var "${env_name}" CUDNN_LIBRARY) || return 1
929+
(test_env_var "${env_name}" NVML_LIB_PATH) || return 1
886930

887931
# Build only CUDA 7.0 and 8.0 (i.e. V100 and A100) because of 100 MB binary size limits from PyPI.
888932
echo "[BUILD] Applying GPU build args ..."
889-
build_args="-DTORCH_CUDA_ARCH_LIST='7.0;8.0'"
933+
# shellcheck disable=SC2155
934+
local nvml_lib_path=$(conda run -n "${env_name}" printenv NVML_LIB_PATH)
935+
build_args=(
936+
--nvml_lib_path="${nvml_lib_path}"
937+
-DTORCH_CUDA_ARCH_LIST='7.0;8.0'
938+
)
890939
fi
891940

892941
# Extract the Python tag
@@ -902,15 +951,61 @@ __build_fbgemm_gpu_common_pre_steps () {
902951
print_exec conda run -n "${env_name}" python setup.py clean
903952
}
904953

954+
check_fbgemm_gpu_build () {
955+
local fbgemm_variant="$1"
956+
if [ "$fbgemm_variant" == "" ]; then
957+
echo "Usage: ${FUNCNAME[0]} FBGEMM_VARIANT"
958+
echo "Example(s):"
959+
echo " ${FUNCNAME[0]} cpu"
960+
return 1
961+
fi
962+
963+
# Find the .SO file
964+
# shellcheck disable=SC2155
965+
local fbgemm_gpu_so_files=$(find . -name fbgemm_gpu_py.so)
966+
readarray -t fbgemm_gpu_so_files <<<"$fbgemm_gpu_so_files"
967+
if [ "${#fbgemm_gpu_so_files[@]}" -le 0 ]; then
968+
echo "[CHECK] .SO library fbgemm_gpu_py.so is missing from the build path!"
969+
return 1
970+
fi
971+
972+
# Prepare a sample set of symbols whose existence in the built library should be checked
973+
# This is by no means an exhaustive set, and should be updated accordingly
974+
local lib_symbols_to_check=(
975+
fbgemm_gpu::asynchronous_inclusive_cumsum_cpu
976+
fbgemm_gpu::jagged_2d_to_dense
977+
)
978+
979+
# Add more symbols to check for if it's a non-CPU variant
980+
if [ "${fbgemm_variant}" != "cpu" ]; then
981+
lib_symbols_to_check+=(
982+
fbgemm_gpu::asynchronous_inclusive_cumsum_gpu
983+
fbgemm_gpu::merge_pooled_embeddings
984+
)
985+
fi
986+
987+
for library in "${fbgemm_gpu_so_files[@]}"; do
988+
echo "[CHECK] Listing out the GLIBCXX versions referenced by the library: ${library}"
989+
objdump -TC "${library}" | grep GLIBCXX | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
990+
991+
echo "[CHECK] Verifying sample subset of symbols in the library ..."
992+
for symbol in "${lib_symbols_to_check[@]}"; do
993+
(test_library_symbol "${library}" "${symbol}") || return 1
994+
done
995+
996+
echo ""
997+
done
998+
}
999+
9051000
build_fbgemm_gpu_package () {
9061001
env_name="$1"
9071002
package_name="$2"
908-
cpu_only="$3"
1003+
fbgemm_variant="$3"
9091004
if [ "$package_name" == "" ]; then
9101005
echo "Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME [CPU_ONLY]"
9111006
echo "Example(s):"
912-
echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly # Build the full wheel package"
913-
echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly 1 # Build the CPU-only variant of the wheel package"
1007+
echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly # Build the full wheel package"
1008+
echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly cpu # Build the CPU-only variant of the wheel package"
9141009
return 1
9151010
else
9161011
echo "################################################################################"
@@ -922,32 +1017,38 @@ build_fbgemm_gpu_package () {
9221017
fi
9231018

9241019
# Run all the common FBGEMM-GPU build pre-steps (set up variables)
925-
__build_fbgemm_gpu_common_pre_steps
1020+
__build_fbgemm_gpu_common_pre_steps || return 1
9261021

9271022
# manylinux1_x86_64 is specified for PyPI upload
9281023
# Distribute Python extensions as wheels on Linux
929-
echo "[BUILD] Building FBGEMM-GPU (CPU=${cpu_only:-0}) wheel ..."
1024+
echo "[BUILD] Building FBGEMM-GPU (VARIANT=${fbgemm_variant}) wheel ..."
9301025
print_exec conda run -n "${env_name}" \
9311026
python setup.py bdist_wheel \
9321027
--package_name="${package_name}" \
9331028
--python-tag="${python_tag}" \
9341029
--plat-name=manylinux1_x86_64 \
935-
"${build_args}"
1030+
"${build_args[@]}"
1031+
1032+
# Run checks on the built libraries
1033+
(check_fbgemm_gpu_build "${fbgemm_variant}") || return 1
9361034

9371035
echo "[BUILD] Enumerating the built wheels ..."
9381036
print_exec ls -lth dist/*.whl
9391037

1038+
echo "[BUILD] Enumerating the wheel SHAs ..."
1039+
print_exec sha1sum dist/*.whl
1040+
9401041
echo "[BUILD] FBGEMM-GPU build wheel completed"
9411042
}
9421043

9431044
build_fbgemm_gpu_install () {
9441045
env_name="$1"
945-
cpu_only="$2"
1046+
fbgemm_variant="$2"
9461047
if [ "$env_name" == "" ]; then
9471048
echo "Usage: ${FUNCNAME[0]} ENV_NAME [CPU_ONLY]"
9481049
echo "Example(s):"
9491050
echo " ${FUNCNAME[0]} build_env # Build + install the package"
950-
echo " ${FUNCNAME[0]} build_env 1 # Build + Install the CPU-only variant of the package"
1051+
echo " ${FUNCNAME[0]} build_env cpu # Build + Install the CPU-only variant of the package"
9511052
return 1
9521053
else
9531054
echo "################################################################################"
@@ -963,9 +1064,12 @@ build_fbgemm_gpu_install () {
9631064

9641065
# Parallelism may need to be limited to prevent the build from being
9651066
# canceled for going over ulimits
966-
echo "[BUILD] Building and installing FBGEMM-GPU (CPU=${cpu_only:-0}) ..."
1067+
echo "[BUILD] Building and installing FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..."
9671068
print_exec conda run -n "${env_name}" \
968-
python setup.py install "${build_args}"
1069+
python setup.py install "${build_args[@]}"
1070+
1071+
# Run checks on the built libraries
1072+
(check_fbgemm_gpu_build "${fbgemm_variant}") || return 1
9691073

9701074
echo "[BUILD] FBGEMM-GPU build + install completed"
9711075
}
@@ -987,14 +1091,17 @@ install_fbgemm_gpu_package () {
9871091
echo ""
9881092
fi
9891093

990-
echo "[BUILD] Installing FBGEMM-GPU wheel: ${package_name} ..."
1094+
echo "[INSTALL] Printing out FBGEMM-GPU wheel SHA: ${package_name}"
1095+
print_exec sha1sum "${package_name}"
1096+
1097+
echo "[INSTALL] Installing FBGEMM-GPU wheel: ${package_name} ..."
9911098
conda run -n "${env_name}" python -m pip install "${package_name}"
9921099

993-
echo "[BUILD] Checking imports ..."
1100+
echo "[INSTALL] Checking imports ..."
9941101
(test_python_import "${env_name}" fbgemm_gpu) || return 1
9951102
(test_python_import "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
9961103

997-
echo "[BUILD] Wheel installation completed ..."
1104+
echo "[INSTALL] Wheel installation completed ..."
9981105
}
9991106

10001107

@@ -1055,8 +1162,8 @@ run_fbgemm_gpu_tests () {
10551162
echo "[TEST] Enumerating test files ..."
10561163
print_exec ls -lth ./*.py
10571164

1058-
# NOTE: These tests running on single CPU core with a less powerful testing
1059-
# GPU in GHA can take up to 5 hours.
1165+
# NOTE: Tests running on single CPU core with a less powerful testing GPU in
1166+
# GHA can take up to 5 hours.
10601167
for test_file in *.py; do
10611168
if echo "${files_to_skip[@]}" | grep "${test_file}"; then
10621169
echo "[TEST] Skipping test file known to be broken: ${test_file}"

.github/workflows/fbgemm_gpu_ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ jobs:
169169
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
170170

171171
- name: Build and Install FBGEMM_GPU (CPU version)
172-
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV cpuonly
172+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV cpu
173173

174174
- name: Test with PyTest
175175
timeout-minutes: 10

.github/workflows/fbgemm_nightly_build_cpu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ jobs:
7878
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
7979

8080
- name: Build FBGEMM_GPU Nightly (CPU version)
81-
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV fbgemm_gpu_nightly cpuonly
81+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV fbgemm_gpu_nightly cpu
8282

8383
- name: Upload Built Wheel as GHA Artifact
8484
uses: actions/upload-artifact@v3

.github/workflows/fbgemm_release_build_cpu.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ jobs:
7070
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
7171

7272
- name: Build FBGEMM_GPU (CPU version)
73-
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV fbgemm_gpu cpuonly
73+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV fbgemm_gpu cpu
7474

7575
- name: Upload Built Wheel as GHA Artifact
7676
uses: actions/upload-artifact@v3

0 commit comments

Comments
 (0)