@@ -264,22 +264,13 @@ print_gpu_info () {
264
264
if which nvidia-smi; then
265
265
# If nvidia-smi is installed on a machine without GPUs, this will return error
266
266
(print_exec nvidia-smi) || true
267
+ else
268
+ echo " [CHECK] nvidia-smi not found"
267
269
fi
268
270
fi
269
271
}
270
272
271
- print_system_info () {
272
- echo " ################################################################################"
273
- echo " # Print System Info"
274
- echo " #"
275
- echo " # [TIMESTAMP] $( date --utc +%FT%T.%3NZ) "
276
- echo " ################################################################################"
277
- echo " "
278
-
279
- echo " ################################################################################"
280
- echo " [INFO] Printing environment variables ..."
281
- print_exec printenv
282
-
273
+ __print_system_info_linux () {
283
274
echo " ################################################################################"
284
275
echo " [INFO] Check ldd version ..."
285
276
print_exec ldd --version
@@ -296,6 +287,36 @@ print_system_info () {
296
287
print_exec cat /etc/os-release
297
288
}
298
289
290
+ __print_system_info_macos () {
291
+ echo " ################################################################################"
292
+ echo " [INFO] Check CPU info ..."
293
+ sysctl -a | grep machdep.cpu
294
+
295
+ echo " ################################################################################"
296
+ echo " [INFO] Check MacOS version info ..."
297
+ print_exec uname -a
298
+ print_exec sw_vers
299
+ }
300
+
301
+ print_system_info () {
302
+ echo " ################################################################################"
303
+ echo " # Print System Info"
304
+ echo " #"
305
+ echo " # [TIMESTAMP] $( date --utc +%FT%T.%3NZ) "
306
+ echo " ################################################################################"
307
+ echo " "
308
+
309
+ echo " ################################################################################"
310
+ echo " [INFO] Printing environment variables ..."
311
+ print_exec printenv
312
+
313
+ if [[ $OSTYPE == ' darwin' * ]]; then
314
+ __print_system_info_macos
315
+ else
316
+ __print_system_info_linux
317
+ fi
318
+ }
319
+
299
320
print_ec2_info () {
300
321
echo " ################################################################################"
301
322
echo " # Print EC2 Instance Info"
@@ -316,6 +337,30 @@ print_ec2_info () {
316
337
echo " instance-type: $( get_ec2_metadata instance-type) "
317
338
}
318
339
340
+ print_glibc_info () {
341
+ local library_path=" $1 "
342
+ if [ " $library_path " == " " ]; then
343
+ echo " Usage: ${FUNCNAME[0]} LIBRARY_PATH"
344
+ echo " Example(s):"
345
+ echo " ${FUNCNAME[0]} /usr/lib/x86_64-linux-gnu/libstdc++.so.6"
346
+ return 1
347
+ fi
348
+
349
+ if [ -f " ${library_path} " ]; then
350
+ echo " [CHECK] Listing out the GLIBC versions referenced by: ${library_path} "
351
+ objdump -TC " ${library_path} " | grep GLIBC_ | sed ' s/.*GLIBC_\([.0-9]*\).*/GLIBC_\1/g' | sort -Vu | cat
352
+ echo " "
353
+
354
+ echo " [CHECK] Listing out the GLIBCXX versions referenced by: ${library_path} "
355
+ objdump -TC " ${library_path} " | grep GLIBCXX_ | sed ' s/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
356
+ echo " "
357
+
358
+ else
359
+ echo " [CHECK] No file at path: ${library_path} "
360
+ return 1
361
+ fi
362
+ }
363
+
319
364
320
365
# ###############################################################################
321
366
# Miniconda Setup Functions
@@ -342,7 +387,7 @@ setup_miniconda () {
342
387
print_exec mkdir -p " $miniconda_prefix "
343
388
344
389
echo " [SETUP] Downloading the Miniconda installer ..."
345
- print_exec wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
390
+ (exec_with_retries wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh) || return 1
346
391
347
392
echo " [SETUP] Installing Miniconda ..."
348
393
print_exec bash miniconda.sh -b -p " $miniconda_prefix " -u
@@ -360,9 +405,16 @@ setup_miniconda () {
360
405
print_exec conda info
361
406
362
407
# These variables will be exported outside
408
+ echo " [SETUP] Exporting Miniconda variables ..."
363
409
export PATH=" ${miniconda_prefix} /bin:${PATH} "
364
410
export CONDA=" ${miniconda_prefix} "
365
411
412
+ if [ -f " ${GITHUB_PATH} " ]; then
413
+ echo " [SETUP] Saving Miniconda variables to ${GITHUB_PATH} ..."
414
+ echo " ${miniconda_prefix} /bin" >> " ${GITHUB_PATH} "
415
+ echo " CONDA=${miniconda_prefix} " >> " ${GITHUB_PATH} "
416
+ fi
417
+
366
418
echo " [SETUP] Successfully set up Miniconda at ${miniconda_prefix} "
367
419
}
368
420
@@ -448,9 +500,11 @@ install_pytorch_conda () {
448
500
fi
449
501
450
502
# Install PyTorch packages
503
+ # NOTE: Installation of large package might fail due to corrupt package download
504
+ # Use --force-reinstall to address this on retries - https://datascience.stackexchange.com/questions/41732/conda-verification-failed
451
505
echo " [INSTALL] Attempting to install '${pytorch_package} ' (${pytorch_version} , CPU=${pytorch_cpu:- 0} ) through Conda using channel '${pytorch_channel} ' ..."
452
506
# shellcheck disable=SC2086
453
- (exec_with_retries conda install -n " ${env_name} " -y ${pytorch_package} -c " ${pytorch_channel} " ) || return 1
507
+ (exec_with_retries conda install --force-reinstall - n " ${env_name} " -y ${pytorch_package} -c " ${pytorch_channel} " ) || return 1
454
508
455
509
# Run check for GPU variant
456
510
if [ " $pytorch_cpu " == " " ]; then
@@ -612,7 +666,7 @@ install_cuda () {
612
666
613
667
# Install CUDA packages
614
668
echo " [INSTALL] Installing CUDA ${cuda_version} ..."
615
- (exec_with_retries conda install -n " ${env_name} " -y cuda -c " nvidia/label/cuda-${cuda_version} " ) || return 1
669
+ (exec_with_retries conda install --force-reinstall - n " ${env_name} " -y cuda -c " nvidia/label/cuda-${cuda_version} " ) || return 1
616
670
617
671
# Ensure that nvcc is properly installed
618
672
(test_binpath " ${env_name} " nvcc) || return 1
@@ -806,15 +860,19 @@ install_cxx_compiler () {
806
860
install_system_packages gcc gcc-c++
807
861
808
862
else
809
- # Install gxx_linux-64 from main instead of cxx-compiler from conda-forge, as
810
- # the latter breaks builds:
863
+ # Install gxx_linux-64 from conda-forge instead of from anaconda channel.
864
+ # sysroot_linux-64 needs to be installed alongside this:
865
+ #
811
866
# https://root-forum.cern.ch/t/error-timespec-get-has-not-been-declared-with-conda-root-package/45712/6
867
+ # https://github.com/conda-forge/conda-forge.github.io/issues/1625
868
+ # https://conda-forge.org/docs/maintainer/knowledge_base.html#using-centos-7
869
+ # https://github.com/conda/conda-build/issues/4371
812
870
#
813
- # NOTE: Install g++ 9 .x instead of 11.x becaue 11.x builds libraries with
814
- # references to GLIBCXX_3.4.29, which is not available on systems with older
871
+ # NOTE: We install g++ 10 .x instead of 11.x becaue 11.x builds binaries that
872
+ # reference GLIBCXX_3.4.29, which may not be available on systems with older
815
873
# versions of libstdc++.so.6 such as CentOS Stream 8 and Ubuntu 20.04
816
874
echo " [INSTALL] Installing C/C++ compilers through Conda ..."
817
- (exec_with_retries conda install -n " ${env_name} " -y gxx_linux-64=9.3.0 ) || return 1
875
+ (exec_with_retries conda install -n " ${env_name} " -y gxx_linux-64=10.4.0 sysroot_linux-64=2.17 -c conda-forge ) || return 1
818
876
819
877
# The compilers are visible in the PATH as `x86_64-conda-linux-gnu-cc` and
820
878
# `x86_64-conda-linux-gnu-c++`, so symlinks will need to be created
@@ -1055,7 +1113,7 @@ check_fbgemm_gpu_build () {
1055
1113
1056
1114
for library in " ${fbgemm_gpu_so_files[@]} " ; do
1057
1115
echo " [CHECK] Listing out the GLIBCXX versions referenced by the library: ${library} "
1058
- objdump -TC " ${library} " | grep GLIBCXX | sed ' s/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g ' | sort -Vu | cat
1116
+ print_glibc_info " ${library} "
1059
1117
1060
1118
echo " [CHECK] Verifying sample subset of symbols in the library ..."
1061
1119
for symbol in " ${lib_symbols_to_check[@]} " ; do
0 commit comments