Skip to content

Commit 6a63116

Browse files
q10facebook-github-bot
authored andcommitted
Improvements to OSS builds and the Release Process (#1627)
Summary: - Add support for CUDA 11.8 in the OSS builds - Annotate the package wheels with Python 3.9 and 3.10 support tags - Update `setup.py` to auto-derive the package version from the git information (namely tags) to allow us for fast tag-and-release - Check for the actual presence of NVIDIA drivers in OSS builds, and error out with friendly message instead of cryptic `RuntimeError: No such operator fbgemm::jagged_2d_to_dense` errors when `fbgemm_gpu` is installed and loaded on a system with a GPU but without GPU drivers installed Pull Request resolved: #1627 Reviewed By: brad-mengchi, shintaro-iwasaki Differential Revision: D43868995 Pulled By: q10 fbshipit-source-id: 41da34bd5a82032d20daa4972cc9c848a6ed09e1
1 parent 936ec59 commit 6a63116

File tree

10 files changed

+105
-75
lines changed

10 files changed

+105
-75
lines changed

.github/scripts/setup_env.bash

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ print_exec () {
1414
echo "+ $*"
1515
echo ""
1616
"$@"
17+
echo ""
1718
}
1819

1920
exec_with_retries () {
@@ -238,6 +239,30 @@ free_disk_space () {
238239
# Info Functions
239240
################################################################################
240241

242+
print_gpu_info () {
243+
echo "################################################################################"
244+
echo "[INFO] Check GPU info ..."
245+
install_system_packages lshw
246+
print_exec sudo lshw -C display
247+
248+
echo "################################################################################"
249+
echo "[INFO] Check NVIDIA GPU info ..."
250+
251+
if [[ "${ENFORCE_NVIDIA_GPU}" ]]; then
252+
# Ensure that nvidia-smi is available and returns GPU entries
253+
if ! nvidia-smi; then
254+
echo "[CHECK] NVIDIA driver is required, but does not appear to have been installed. This will cause FBGEMM_GPU installation to fail!"
255+
return 1
256+
fi
257+
258+
else
259+
if which nvidia-smi; then
260+
# If nvidia-smi is installed on a machine without GPUs, this will return error
261+
(print_exec nvidia-smi) || true
262+
fi
263+
fi
264+
}
265+
241266
print_system_info () {
242267
echo "################################################################################"
243268
echo "# Print System Info"
@@ -264,17 +289,6 @@ print_system_info () {
264289
print_exec uname -a
265290
print_exec cat /proc/version
266291
print_exec cat /etc/os-release
267-
268-
echo "################################################################################"
269-
echo "[INFO] Check GPU info ..."
270-
install_system_packages lshw
271-
print_exec sudo lshw -C display
272-
273-
if which nvidia-smi; then
274-
echo "################################################################################"
275-
echo "[INFO] Check NVIDIA GPU info ..."
276-
print_exec nvidia-smi
277-
fi
278292
}
279293

280294
print_ec2_info () {
@@ -335,7 +349,7 @@ setup_miniconda () {
335349
print_exec . ~/.bashrc
336350

337351
echo "[SETUP] Updating Miniconda base packages ..."
338-
print_exec conda update -n base -c defaults -y conda
352+
(exec_with_retries conda update -n base -c defaults -y conda) || return 1
339353

340354
# Print Conda info
341355
print_exec conda info
@@ -369,12 +383,12 @@ create_conda_environment () {
369383
(exec_with_retries conda create -y --name "${env_name}" python="${python_version}") || return 1
370384

371385
echo "[SETUP] Upgrading PIP to latest ..."
372-
print_exec conda run -n "${env_name}" pip install --upgrade pip
386+
(exec_with_retries conda run -n "${env_name}" pip install --upgrade pip) || return 1
373387

374388
# The pyOpenSSL and cryptography packages versions need to line up for PyPI publishing to work
375389
# https://stackoverflow.com/questions/74981558/error-updating-python3-pip-attributeerror-module-lib-has-no-attribute-openss
376390
echo "[SETUP] Upgrading pyOpenSSL ..."
377-
print_exec conda run -n "${env_name}" python -m pip install "pyOpenSSL>22.1.0"
391+
(exec_with_retries conda run -n "${env_name}" python -m pip install "pyOpenSSL>22.1.0") || return 1
378392

379393
# This test fails with load errors if the pyOpenSSL and cryptography package versions don't align
380394
echo "[SETUP] Testing pyOpenSSL import ..."
@@ -886,7 +900,7 @@ prepare_fbgemm_gpu_build () {
886900
git submodule update --init --recursive
887901

888902
echo "[BUILD] Installing other build dependencies ..."
889-
print_exec conda run -n "${env_name}" python -m pip install -r requirements.txt
903+
(exec_with_retries conda run -n "${env_name}" python -m pip install -r requirements.txt) || return 1
890904

891905
(test_python_import "${env_name}" numpy) || return 1
892906
(test_python_import "${env_name}" skbuild) || return 1
@@ -1095,7 +1109,7 @@ install_fbgemm_gpu_package () {
10951109
print_exec sha1sum "${package_name}"
10961110

10971111
echo "[INSTALL] Installing FBGEMM-GPU wheel: ${package_name} ..."
1098-
conda run -n "${env_name}" python -m pip install "${package_name}"
1112+
(exec_with_retries conda run -n "${env_name}" python -m pip install "${package_name}") || return 1
10991113

11001114
echo "[INSTALL] Checking imports ..."
11011115
(test_python_import "${env_name}" fbgemm_gpu) || return 1
@@ -1217,4 +1231,5 @@ publish_to_pypi () {
12171231
"${package_name}"
12181232

12191233
echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_name}"
1234+
echo "[PUBLISH] NOTE: The publish command is a successful no-op if the wheel version already existed in PyPI; please double check!"
12201235
}

.github/workflows/fbgemm_gpu_ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ jobs:
3838
- name: Display System Info
3939
run: . $PRELUDE; print_system_info
4040

41+
- name: Display GPU Info
42+
run: . $PRELUDE; print_gpu_info
43+
4144
- name: Free Disk Space
4245
run: . $PRELUDE; free_disk_space
4346

@@ -150,6 +153,9 @@ jobs:
150153
- name: Display System Info
151154
run: . $PRELUDE; print_system_info
152155

156+
- name: Display GPU Info
157+
run: . $PRELUDE; print_gpu_info
158+
153159
- name: Setup Miniconda
154160
run: |
155161
. $PRELUDE; setup_miniconda $HOME/miniconda

.github/workflows/fbgemm_nightly_build.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
matrix:
4747
os: [ linux.12xlarge ]
4848
python-version: [ "3.8", "3.9", "3.10" ]
49-
cuda-version: [ "11.7.1" ]
49+
cuda-version: [ "11.7.1", "11.8.0" ]
5050

5151
steps:
5252
- name: Checkout the Repository
@@ -57,6 +57,9 @@ jobs:
5757
- name: Display System Info
5858
run: . $PRELUDE; print_system_info
5959

60+
- name: Display GPU Info
61+
run: . $PRELUDE; print_gpu_info
62+
6063
- name: Setup Miniconda
6164
run: |
6265
. $PRELUDE; setup_miniconda $HOME/miniconda
@@ -103,12 +106,15 @@ jobs:
103106
env:
104107
PRELUDE: .github/scripts/setup_env.bash
105108
BUILD_ENV: build_binary
109+
ENFORCE_NVIDIA_GPU: 1
106110
strategy:
107111
fail-fast: false
108112
matrix:
109113
os: [ linux.g5.4xlarge.nvidia.gpu ]
110114
python-version: [ "3.8", "3.9", "3.10" ]
111-
cuda-version: [ "11.7.1" ]
115+
cuda-version: [ "11.7.1", "11.8.0" ]
116+
# Specify exactly ONE CUDA version for artifact publish
117+
cuda-version-publish: [ "11.7.1" ]
112118
needs: build_artifact
113119

114120
steps:
@@ -118,10 +124,10 @@ jobs:
118124
submodules: true
119125

120126
- name: Display System Info
121-
run: . $PRELUDE; print_system_info
127+
run: . $PRELUDE; print_system_info; print_ec2_info
122128

123-
- name: Display EC2 Info
124-
run: . $PRELUDE; print_ec2_info
129+
- name: Display GPU Info
130+
run: . $PRELUDE; print_gpu_info
125131

126132
- name: Setup Miniconda
127133
run: |
@@ -157,7 +163,7 @@ jobs:
157163
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV
158164

159165
- name: Push FBGEMM_GPU Nightly Binary to PYPI
160-
if: ${{ github.event_name != 'pull_request' && github.event_name != 'push' }}
166+
if: ${{ github.event_name != 'pull_request' && github.event_name != 'push' && matrix.cuda-version == matrix.cuda-version-publish }}
161167
env:
162168
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
163169
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu_nightly-*.whl "$PYPI_TOKEN"

.github/workflows/fbgemm_nightly_build_cpu.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ jobs:
5656
- name: Display System Info
5757
run: . $PRELUDE; print_system_info
5858

59+
- name: Display GPU Info
60+
run: . $PRELUDE; print_gpu_info
61+
5962
- name: Setup Miniconda
6063
run: |
6164
. $PRELUDE; setup_miniconda $HOME/miniconda
@@ -110,10 +113,10 @@ jobs:
110113
submodules: true
111114

112115
- name: Display System Info
113-
run: . $PRELUDE; print_system_info
116+
run: . $PRELUDE; print_system_info; print_ec2_info
114117

115-
- name: Display EC2 Info
116-
run: . $PRELUDE; print_ec2_info
118+
- name: Display GPU Info
119+
run: . $PRELUDE; print_gpu_info
117120

118121
- name: Setup Miniconda
119122
run: |

.github/workflows/fbgemm_release_build.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
matrix:
3939
os: [ linux.12xlarge ]
4040
python-version: [ "3.8", "3.9", "3.10" ]
41-
cuda-version: [ "11.7.1" ]
41+
cuda-version: [ "11.7.1", "11.8.0" ]
4242

4343
steps:
4444
- name: Checkout the Repository
@@ -49,6 +49,9 @@ jobs:
4949
- name: Display System Info
5050
run: . $PRELUDE; print_system_info
5151

52+
- name: Display GPU Info
53+
run: . $PRELUDE; print_gpu_info
54+
5255
- name: Setup Miniconda
5356
run: |
5457
. $PRELUDE; setup_miniconda $HOME/miniconda
@@ -95,12 +98,15 @@ jobs:
9598
env:
9699
PRELUDE: .github/scripts/setup_env.bash
97100
BUILD_ENV: build_binary
101+
ENFORCE_NVIDIA_GPU: 1
98102
strategy:
99103
fail-fast: false
100104
matrix:
101105
os: [ linux.g5.4xlarge.nvidia.gpu ]
102106
python-version: [ "3.8", "3.9", "3.10" ]
103-
cuda-version: [ "11.7.1" ]
107+
cuda-version: [ "11.7.1", "11.8.0" ]
108+
# Specify exactly ONE CUDA version for artifact publish
109+
cuda-version-publish: [ "11.7.1" ]
104110
needs: build_artifact
105111
steps:
106112
- name: Checkout the Repository
@@ -109,10 +115,10 @@ jobs:
109115
submodules: true
110116

111117
- name: Display System Info
112-
run: . $PRELUDE; print_system_info
118+
run: . $PRELUDE; print_system_info; print_ec2_info
113119

114-
- name: Display EC2 Info
115-
run: . $PRELUDE; print_ec2_info
120+
- name: Display GPU Info
121+
run: . $PRELUDE; print_gpu_info
116122

117123
- name: Setup Miniconda
118124
run: |
@@ -148,7 +154,7 @@ jobs:
148154
run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV
149155

150156
- name: Push FBGEMM_GPU Binary to PYPI
151-
if: ${{ github.event_name != 'pull_request' && github.event_name != 'push' }}
157+
if: ${{ github.event_name != 'pull_request' && github.event_name != 'push' && matrix.cuda-version == matrix.cuda-version-publish }}
152158
env:
153159
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
154160
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu-*.whl "$PYPI_TOKEN"

.github/workflows/fbgemm_release_build_cpu.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ jobs:
4848
- name: Display System Info
4949
run: . $PRELUDE; print_system_info
5050

51+
- name: Display GPU Info
52+
run: . $PRELUDE; print_gpu_info
53+
5154
- name: Setup Miniconda
5255
run: |
5356
. $PRELUDE; setup_miniconda $HOME/miniconda
@@ -102,10 +105,10 @@ jobs:
102105
submodules: true
103106

104107
- name: Display System Info
105-
run: . $PRELUDE; print_system_info
108+
run: . $PRELUDE; print_system_info; print_ec2_info
106109

107-
- name: Display EC2 Info
108-
run: . $PRELUDE; print_ec2_info
110+
- name: Display GPU Info
111+
run: . $PRELUDE; print_gpu_info
109112

110113
- name: Setup Miniconda
111114
run: |

fbgemm_gpu/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ if(NOT FBGEMM_CPU_ONLY)
328328
src/merge_pooled_embeddings_gpu.cpp
329329
src/topology_utils.cpp)
330330
else()
331-
message(STATUS "Could not find NVML_LIB_PATH; will NOT include certain sources into the build!")
331+
message(STATUS
332+
"Could not find NVML_LIB_PATH; skipping certain sources into the build")
332333
endif()
333334
endif()
334335

fbgemm_gpu/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ jinja2
44
ninja
55
numpy
66
scikit-build
7+
setuptools_git_versioning

fbgemm_gpu/setup.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,41 @@
77
import argparse
88
import os
99
import random
10-
import re
1110
import subprocess
1211
import sys
12+
1313
from datetime import date
1414
from typing import List, Optional
1515

16+
import setuptools_git_versioning as gitversion
1617
import torch
1718
from skbuild import setup
1819

1920

20-
def get_version():
21-
# get version string from version.py
22-
# TODO: ideally the version.py should be generated when setup is run
23-
version_file = os.path.join(os.path.dirname(__file__), "version.py")
24-
version_regex = r"__version__ = ['\"]([^'\"]*)['\"]"
25-
with open(version_file, "r") as f:
26-
version = re.search(version_regex, f.read(), re.M).group(1)
27-
return version
21+
def generate_package_version(package_name: str):
22+
print("[SETUP.PY] Generating the package version ...")
23+
24+
if "nightly" in package_name:
25+
# Use date stamp for nightly versions
26+
print("[SETUP.PY] Package is for NIGHTLY; using timestamp for the versioning")
27+
today = date.today()
28+
version = f"{today.year}.{today.month}.{today.day}"
29+
30+
elif "test" in package_name:
31+
# Use date stamp for nightly versions
32+
print("[SETUP.PY] Package is for TEST: using random number for the versioning")
33+
version = (f"0.0.{random.randint(0, 1000)}",)
2834

35+
else:
36+
# Use git tag / branch / commit info to generate a PEP-440-compliant version string
37+
print("[SETUP.PY] Package is for RELEASE: using git info for the versioning")
38+
print(
39+
f"[SETUP.PY] TAG: {gitversion.get_tag()}, BRANCH: {gitversion.get_branch()}, SHA: {gitversion.get_sha()}"
40+
)
41+
version = gitversion.version_from_git()
2942

30-
def get_nightly_version():
31-
today = date.today()
32-
return f"{today.year}.{today.month}.{today.day}"
43+
print(f"[SETUP.PY] Setting the package version: {version}")
44+
return version
3345

3446

3547
def get_cxx11_abi():
@@ -170,23 +182,15 @@ def main(argv: List[str]) -> None:
170182
if args.nvml_lib_path:
171183
cmake_args.append(f"-DNVML_LIB_PATH={args.nvml_lib_path}")
172184

173-
name = args.package_name
174-
print("name: ", name)
175-
is_nightly = "nightly" in name
176-
is_test = "test" in name
177-
178-
version = get_nightly_version() if is_nightly else get_version()
179-
if is_test:
180-
version = (f"0.0.{random.randint(0, 1000)}",)
181-
print(f"-- {name} building version: {version}")
185+
package_version = generate_package_version(args.package_name)
182186

183187
# Repair command line args for setup.
184188
sys.argv = [sys.argv[0]] + unknown
185189

186190
setup(
187191
# Metadata
188-
name=name,
189-
version=version,
192+
name=args.package_name,
193+
version=package_version,
190194
author="FBGEMM Team",
191195
author_email="packages@pytorch.org",
192196
long_description=long_description,
@@ -210,6 +214,8 @@ def main(argv: List[str]) -> None:
210214
"License :: OSI Approved :: BSD License",
211215
"Programming Language :: Python :: 3",
212216
"Programming Language :: Python :: 3.8",
217+
"Programming Language :: Python :: 3.9",
218+
"Programming Language :: Python :: 3.10",
213219
"Topic :: Scientific/Engineering :: Artificial Intelligence",
214220
],
215221
)

0 commit comments

Comments
 (0)