@@ -127,6 +127,26 @@ test_env_var () {
127
127
fi
128
128
}
129
129
130
+ test_library_symbol () {
131
+ local lib_path=" $1 "
132
+ local lib_symbol=" $2 "
133
+ if [ " $lib_symbol " == " " ]; then
134
+ echo " Usage: ${FUNCNAME[0]} LIB_PATH FULL_NAMESPACE_PATH_LIB_SYMBOL"
135
+ echo " Example(s):"
136
+ echo " ${FUNCNAME[0]} fbgemm_gpu_py.so fbgemm_gpu::merge_pooled_embeddings"
137
+ return 1
138
+ fi
139
+
140
+ # Add space and '(' to the grep string to get the full method path
141
+ symbol_entries=$( nm -gDC " ${lib_path} " | grep " ${lib_symbol} (" )
142
+ if [ " ${symbol_entries} " != " " ]; then
143
+ echo " [CHECK] Found symbol in ${lib_path} : ${lib_symbol} "
144
+ else
145
+ echo " [CHECK] Symbol NOT found in ${lib_path} : ${lib_symbol} "
146
+ return 1
147
+ fi
148
+ }
149
+
130
150
131
151
# ###############################################################################
132
152
# System Functions
@@ -141,8 +161,8 @@ install_system_packages () {
141
161
fi
142
162
143
163
if which sudo; then
144
- local update_cmd=(" sudo" )
145
- local install_cmd=(" sudo" )
164
+ local update_cmd=(sudo)
165
+ local install_cmd=(sudo)
146
166
else
147
167
local update_cmd=()
148
168
local install_cmd=()
@@ -184,7 +204,7 @@ run_python_test () {
184
204
echo " ################################################################################"
185
205
fi
186
206
187
- if conda run -n " ${env_name} " python -m pytest -v -s -W ignore::pytest.PytestCollectionWarning " ${python_test_file} " ; then
207
+ if conda run -n " ${env_name} " python -m pytest -v -rsx - s -W ignore::pytest.PytestCollectionWarning " ${python_test_file} " ; then
188
208
echo " [TEST] Python test suite PASSED: ${python_test_file} "
189
209
else
190
210
echo " [TEST] Python test suite FAILED: ${python_test_file} "
@@ -425,7 +445,7 @@ install_pytorch_conda () {
425
445
fi
426
446
427
447
# Ensure that the PyTorch-CUDA headers are properly installed
428
- test_filepath " ${env_name} " cuda_cmake_macros.h || return 1
448
+ ( test_filepath " ${env_name} " cuda_cmake_macros.h) || return 1
429
449
fi
430
450
431
451
# Check that PyTorch is importable
@@ -496,7 +516,7 @@ install_pytorch_pip () {
496
516
if [ " $pytorch_variant_type " != " cpu" ]; then
497
517
if [ " $pytorch_variant_type " == " cuda" ]; then
498
518
# Ensure that the PyTorch-CUDA headers are properly installed
499
- test_filepath " ${env_name} " cuda_cmake_macros.h || return 1
519
+ ( test_filepath " ${env_name} " cuda_cmake_macros.h) || return 1
500
520
fi
501
521
502
522
# Ensure that the PyTorch build is of the correct variant
@@ -549,13 +569,21 @@ install_cuda () {
549
569
(exec_with_retries conda install -n " ${env_name} " -y cuda -c " nvidia/label/cuda-${cuda_version} " ) || return 1
550
570
551
571
# Ensure that nvcc is properly installed
552
- test_binpath " ${env_name} " nvcc || return 1
572
+ ( test_binpath " ${env_name} " nvcc) || return 1
553
573
554
574
# Ensure that the CUDA headers are properly installed
555
- test_filepath " ${env_name} " cuda_runtime.h || return 1
575
+ ( test_filepath " ${env_name} " cuda_runtime.h) || return 1
556
576
557
577
# Ensure that the libraries are properly installed
558
- test_filepath " ${env_name} " libnvToolsExt.so || return 1
578
+ (test_filepath " ${env_name} " libnvToolsExt.so) || return 1
579
+ (test_filepath " ${env_name} " libnvidia-ml.so) || return 1
580
+
581
+ echo " [INSTALL] Set environment variable NVML_LIB_PATH ..."
582
+ # shellcheck disable=SC2155
583
+ local conda_prefix=$( conda run -n " ${env_name} " printenv CONDA_PREFIX)
584
+ # shellcheck disable=SC2155
585
+ local nvml_lib_path=$( find " ${conda_prefix} " -name libnvidia-ml.so)
586
+ print_exec conda env config vars set -n " ${env_name} " NVML_LIB_PATH=" ${nvml_lib_path} "
559
587
560
588
# Print nvcc version
561
589
print_exec conda run -n " ${env_name} " nvcc --version
@@ -621,12 +649,12 @@ install_rocm_ubuntu () {
621
649
622
650
install_cxx_compiler () {
623
651
local env_name=" $1 "
624
- local use_yum =" $2 "
652
+ local use_system_package_manager =" $2 "
625
653
if [ " $env_name " == " " ]; then
626
654
echo " Usage: ${FUNCNAME[0]} ENV_NAME [USE_YUM]"
627
655
echo " Example(s):"
628
656
echo " ${FUNCNAME[0]} build_env # Install C/C++ compilers through Conda"
629
- echo " ${FUNCNAME[0]} build_env 1 # Install C/C++ compilers through yum "
657
+ echo " ${FUNCNAME[0]} build_env 1 # Install C/C++ compilers through the system package manager "
630
658
return 1
631
659
else
632
660
echo " ################################################################################"
@@ -637,15 +665,20 @@ install_cxx_compiler () {
637
665
echo " "
638
666
fi
639
667
640
- if [ " $use_yum " != " " ]; then
641
- echo " [INSTALL] Installing C/C++ compilers through yum ..."
668
+ if [ " $use_system_package_manager " != " " ]; then
669
+ echo " [INSTALL] Installing C/C++ compilers through the system package manager ..."
642
670
install_system_packages gcc gcc-c++
671
+
643
672
else
644
673
# Install gxx_linux-64 from main instead of cxx-compiler from conda-forge, as
645
674
# the latter breaks builds:
646
675
# https://root-forum.cern.ch/t/error-timespec-get-has-not-been-declared-with-conda-root-package/45712/6
676
+ #
677
+ # NOTE: Install g++ 9.x instead of 11.x becaue 11.x builds libraries with
678
+ # references to GLIBCXX_3.4.29, which is not available on systems with older
679
+ # versions of libstdc++.so.6 such as CentOS Stream 8 and Ubuntu 20.04
647
680
echo " [INSTALL] Installing C/C++ compilers through Conda ..."
648
- (exec_with_retries conda install -n " ${env_name} " -y gxx_linux-64) || return 1
681
+ (exec_with_retries conda install -n " ${env_name} " -y gxx_linux-64=9.3.0 ) || return 1
649
682
650
683
# The compilers are visible in the PATH as `x86_64-conda-linux-gnu-cc` and
651
684
# `x86_64-conda-linux-gnu-c++`, so symlinks will need to be created
@@ -662,10 +695,10 @@ install_cxx_compiler () {
662
695
fi
663
696
664
697
# Check C/C++ compilers are visible
665
- test_binpath " ${env_name} " cc || return 1
666
- test_binpath " ${env_name} " gcc || return 1
667
- test_binpath " ${env_name} " c++ || return 1
668
- test_binpath " ${env_name} " g++ || return 1
698
+ ( test_binpath " ${env_name} " cc) || return 1
699
+ ( test_binpath " ${env_name} " gcc) || return 1
700
+ ( test_binpath " ${env_name} " c++) || return 1
701
+ ( test_binpath " ${env_name} " g++) || return 1
669
702
670
703
# Print out the C++ version
671
704
print_exec conda run -n " ${env_name} " c++ --version
@@ -700,8 +733,8 @@ install_build_tools () {
700
733
wheel) || return 1
701
734
702
735
# Check binaries are visible in the PAATH
703
- test_binpath " ${env_name} " cmake || return 1
704
- test_binpath " ${env_name} " ninja || return 1
736
+ ( test_binpath " ${env_name} " cmake) || return 1
737
+ ( test_binpath " ${env_name} " ninja) || return 1
705
738
706
739
# Check Python packages are importable
707
740
local import_tests=( click hypothesis jinja2 numpy skbuild wheel )
@@ -865,28 +898,44 @@ __build_fbgemm_gpu_common_pre_steps () {
865
898
# Private function that uses variables instantiated by its caller
866
899
867
900
# Check C/C++ compilers are visible (the build scripts look specifically for `gcc`)
868
- test_binpath " ${env_name} " cc || return 1
869
- test_binpath " ${env_name} " gcc || return 1
870
- test_binpath " ${env_name} " c++ || return 1
871
- test_binpath " ${env_name} " g++ || return 1
901
+ ( test_binpath " ${env_name} " cc) || return 1
902
+ ( test_binpath " ${env_name} " gcc) || return 1
903
+ ( test_binpath " ${env_name} " c++) || return 1
904
+ ( test_binpath " ${env_name} " g++) || return 1
872
905
873
- if [ " $cpu_only " != " " ]; then
906
+ if [ " $fbgemm_variant " == " cpu " ]; then
874
907
# Update the package name and build args depending on if CUDA is specified
875
908
echo " [BUILD] Applying CPU-only build args ..."
876
- cpu_only=1
877
- build_args=" --cpu_only"
909
+ build_args=(--cpu_only)
878
910
package_name=" ${package_name} -cpu"
911
+
912
+ elif [ " $fbgemm_variant " == " rocm" ]; then
913
+ (test_env_var " ${env_name} " PYTORCH_ROCM_ARCH) || return 1
914
+
915
+ echo " [BUILD] Applying ROCm build args ..."
916
+ build_args=()
917
+ package_name=" ${package_name} -rocm"
918
+
879
919
else
920
+ # Set to the default variant
921
+ fbgemm_variant=" gpu"
922
+
880
923
# Check nvcc is visible
881
- test_binpath " ${env_name} " nvcc || return 1
924
+ ( test_binpath " ${env_name} " nvcc) || return 1
882
925
883
926
# Check that cuDNN environment variables are available
884
- test_env_var " ${env_name} " CUDNN_INCLUDE_DIR || return 1
885
- test_env_var " ${env_name} " CUDNN_LIBRARY || return 1
927
+ (test_env_var " ${env_name} " CUDNN_INCLUDE_DIR) || return 1
928
+ (test_env_var " ${env_name} " CUDNN_LIBRARY) || return 1
929
+ (test_env_var " ${env_name} " NVML_LIB_PATH) || return 1
886
930
887
931
# Build only CUDA 7.0 and 8.0 (i.e. V100 and A100) because of 100 MB binary size limits from PyPI.
888
932
echo " [BUILD] Applying GPU build args ..."
889
- build_args=" -DTORCH_CUDA_ARCH_LIST='7.0;8.0'"
933
+ # shellcheck disable=SC2155
934
+ local nvml_lib_path=$( conda run -n " ${env_name} " printenv NVML_LIB_PATH)
935
+ build_args=(
936
+ --nvml_lib_path=" ${nvml_lib_path} "
937
+ -DTORCH_CUDA_ARCH_LIST=' 7.0;8.0'
938
+ )
890
939
fi
891
940
892
941
# Extract the Python tag
@@ -902,15 +951,61 @@ __build_fbgemm_gpu_common_pre_steps () {
902
951
print_exec conda run -n " ${env_name} " python setup.py clean
903
952
}
904
953
954
+ check_fbgemm_gpu_build () {
955
+ local fbgemm_variant=" $1 "
956
+ if [ " $fbgemm_variant " == " " ]; then
957
+ echo " Usage: ${FUNCNAME[0]} FBGEMM_VARIANT"
958
+ echo " Example(s):"
959
+ echo " ${FUNCNAME[0]} cpu"
960
+ return 1
961
+ fi
962
+
963
+ # Find the .SO file
964
+ # shellcheck disable=SC2155
965
+ local fbgemm_gpu_so_files=$( find . -name fbgemm_gpu_py.so)
966
+ readarray -t fbgemm_gpu_so_files <<< " $fbgemm_gpu_so_files"
967
+ if [ " ${# fbgemm_gpu_so_files[@]} " -le 0 ]; then
968
+ echo " [CHECK] .SO library fbgemm_gpu_py.so is missing from the build path!"
969
+ return 1
970
+ fi
971
+
972
+ # Prepare a sample set of symbols whose existence in the built library should be checked
973
+ # This is by no means an exhaustive set, and should be updated accordingly
974
+ local lib_symbols_to_check=(
975
+ fbgemm_gpu::asynchronous_inclusive_cumsum_cpu
976
+ fbgemm_gpu::jagged_2d_to_dense
977
+ )
978
+
979
+ # Add more symbols to check for if it's a non-CPU variant
980
+ if [ " ${fbgemm_variant} " != " cpu" ]; then
981
+ lib_symbols_to_check+=(
982
+ fbgemm_gpu::asynchronous_inclusive_cumsum_gpu
983
+ fbgemm_gpu::merge_pooled_embeddings
984
+ )
985
+ fi
986
+
987
+ for library in " ${fbgemm_gpu_so_files[@]} " ; do
988
+ echo " [CHECK] Listing out the GLIBCXX versions referenced by the library: ${library} "
989
+ objdump -TC " ${library} " | grep GLIBCXX | sed ' s/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
990
+
991
+ echo " [CHECK] Verifying sample subset of symbols in the library ..."
992
+ for symbol in " ${lib_symbols_to_check[@]} " ; do
993
+ (test_library_symbol " ${library} " " ${symbol} " ) || return 1
994
+ done
995
+
996
+ echo " "
997
+ done
998
+ }
999
+
905
1000
build_fbgemm_gpu_package () {
906
1001
env_name=" $1 "
907
1002
package_name=" $2 "
908
- cpu_only =" $3 "
1003
+ fbgemm_variant =" $3 "
909
1004
if [ " $package_name " == " " ]; then
910
1005
echo " Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME [CPU_ONLY]"
911
1006
echo " Example(s):"
912
- echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly # Build the full wheel package"
913
- echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly 1 # Build the CPU-only variant of the wheel package"
1007
+ echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly # Build the full wheel package"
1008
+ echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly cpu # Build the CPU-only variant of the wheel package"
914
1009
return 1
915
1010
else
916
1011
echo " ################################################################################"
@@ -922,32 +1017,38 @@ build_fbgemm_gpu_package () {
922
1017
fi
923
1018
924
1019
# Run all the common FBGEMM-GPU build pre-steps (set up variables)
925
- __build_fbgemm_gpu_common_pre_steps
1020
+ __build_fbgemm_gpu_common_pre_steps || return 1
926
1021
927
1022
# manylinux1_x86_64 is specified for PyPI upload
928
1023
# Distribute Python extensions as wheels on Linux
929
- echo " [BUILD] Building FBGEMM-GPU (CPU =${cpu_only :- 0 } ) wheel ..."
1024
+ echo " [BUILD] Building FBGEMM-GPU (VARIANT =${fbgemm_variant } ) wheel ..."
930
1025
print_exec conda run -n " ${env_name} " \
931
1026
python setup.py bdist_wheel \
932
1027
--package_name=" ${package_name} " \
933
1028
--python-tag=" ${python_tag} " \
934
1029
--plat-name=manylinux1_x86_64 \
935
- " ${build_args} "
1030
+ " ${build_args[@]} "
1031
+
1032
+ # Run checks on the built libraries
1033
+ (check_fbgemm_gpu_build " ${fbgemm_variant} " ) || return 1
936
1034
937
1035
echo " [BUILD] Enumerating the built wheels ..."
938
1036
print_exec ls -lth dist/* .whl
939
1037
1038
+ echo " [BUILD] Enumerating the wheel SHAs ..."
1039
+ print_exec sha1sum dist/* .whl
1040
+
940
1041
echo " [BUILD] FBGEMM-GPU build wheel completed"
941
1042
}
942
1043
943
1044
build_fbgemm_gpu_install () {
944
1045
env_name=" $1 "
945
- cpu_only =" $2 "
1046
+ fbgemm_variant =" $2 "
946
1047
if [ " $env_name " == " " ]; then
947
1048
echo " Usage: ${FUNCNAME[0]} ENV_NAME [CPU_ONLY]"
948
1049
echo " Example(s):"
949
1050
echo " ${FUNCNAME[0]} build_env # Build + install the package"
950
- echo " ${FUNCNAME[0]} build_env 1 # Build + Install the CPU-only variant of the package"
1051
+ echo " ${FUNCNAME[0]} build_env cpu # Build + Install the CPU-only variant of the package"
951
1052
return 1
952
1053
else
953
1054
echo " ################################################################################"
@@ -963,9 +1064,12 @@ build_fbgemm_gpu_install () {
963
1064
964
1065
# Parallelism may need to be limited to prevent the build from being
965
1066
# canceled for going over ulimits
966
- echo " [BUILD] Building and installing FBGEMM-GPU (CPU =${cpu_only :- 0 } ) ..."
1067
+ echo " [BUILD] Building and installing FBGEMM-GPU (VARIANT =${fbgemm_variant } ) ..."
967
1068
print_exec conda run -n " ${env_name} " \
968
- python setup.py install " ${build_args} "
1069
+ python setup.py install " ${build_args[@]} "
1070
+
1071
+ # Run checks on the built libraries
1072
+ (check_fbgemm_gpu_build " ${fbgemm_variant} " ) || return 1
969
1073
970
1074
echo " [BUILD] FBGEMM-GPU build + install completed"
971
1075
}
@@ -987,14 +1091,17 @@ install_fbgemm_gpu_package () {
987
1091
echo " "
988
1092
fi
989
1093
990
- echo " [BUILD] Installing FBGEMM-GPU wheel: ${package_name} ..."
1094
+ echo " [INSTALL] Printing out FBGEMM-GPU wheel SHA: ${package_name} "
1095
+ print_exec sha1sum " ${package_name} "
1096
+
1097
+ echo " [INSTALL] Installing FBGEMM-GPU wheel: ${package_name} ..."
991
1098
conda run -n " ${env_name} " python -m pip install " ${package_name} "
992
1099
993
- echo " [BUILD ] Checking imports ..."
1100
+ echo " [INSTALL ] Checking imports ..."
994
1101
(test_python_import " ${env_name} " fbgemm_gpu) || return 1
995
1102
(test_python_import " ${env_name} " fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
996
1103
997
- echo " [BUILD ] Wheel installation completed ..."
1104
+ echo " [INSTALL ] Wheel installation completed ..."
998
1105
}
999
1106
1000
1107
@@ -1055,8 +1162,8 @@ run_fbgemm_gpu_tests () {
1055
1162
echo " [TEST] Enumerating test files ..."
1056
1163
print_exec ls -lth ./* .py
1057
1164
1058
- # NOTE: These tests running on single CPU core with a less powerful testing
1059
- # GPU in GHA can take up to 5 hours.
1165
+ # NOTE: Tests running on single CPU core with a less powerful testing GPU in
1166
+ # GHA can take up to 5 hours.
1060
1167
for test_file in * .py; do
1061
1168
if echo " ${files_to_skip[@]} " | grep " ${test_file} " ; then
1062
1169
echo " [TEST] Skipping test file known to be broken: ${test_file} "
0 commit comments