Skip to content

Commit 4fb52dc

Browse files
Merge pull request #1615 from bitsandbytes-foundation/sensible-error-on-failed-lib-loading
C lib loading: add fallback with sensible error msg
2 parents 544c203 + ebfda25 commit 4fb52dc

File tree

3 files changed

+318
-140
lines changed

3 files changed

+318
-140
lines changed

bitsandbytes/cextension.py

Lines changed: 214 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import os
44
from pathlib import Path
55
import re
6+
from typing import Optional
67

78
import torch
89

910
from bitsandbytes.consts import DYNAMIC_LIBRARY_SUFFIX, PACKAGE_DIR
10-
from bitsandbytes.cuda_specs import CUDASpecs, get_cuda_specs
11+
from bitsandbytes.cuda_specs import CUDASpecs, get_cuda_specs, get_cuda_version_tuple
1112

1213
logger = logging.getLogger(__name__)
1314

@@ -44,11 +45,19 @@ class BNBNativeLibrary:
4445
def __init__(self, lib: ct.CDLL):
4546
self._lib = lib
4647

47-
def __getattr__(self, item):
48-
return getattr(self._lib, item)
48+
def __getattr__(self, name):
49+
def throw_on_call(*args, **kwargs):
50+
if hasattr(self._lib, name):
51+
return getattr(self._lib, name)(*args, **kwargs)
52+
raise RuntimeError(
53+
f"Method '{name}' not available in CPU-only version of bitsandbytes.\n"
54+
"Reinstall with GPU support or use CUDA-enabled hardware."
55+
)
56+
57+
return throw_on_call
4958

5059
def __getitem__(self, item):
51-
return getattr(self._lib, item)
60+
return self.__getattr__(item)
5261

5362

5463
class CudaBNBNativeLibrary(BNBNativeLibrary):
@@ -61,42 +70,224 @@ def __init__(self, lib: ct.CDLL):
6170
lib.cget_managed_ptr.restype = ct.c_void_p
6271

6372

73+
def get_available_cuda_binary_versions() -> list[str]:
74+
"""Get formatted CUDA versions from existing library files using cuda_specs logic"""
75+
lib_pattern = f"libbitsandbytes_cuda*{DYNAMIC_LIBRARY_SUFFIX}"
76+
versions = []
77+
for lib in Path(__file__).parent.glob(lib_pattern):
78+
match = re.search(r"cuda(\d{3})", lib.name)
79+
if match:
80+
ver_code = int(match.group(1))
81+
major = ver_code // 10
82+
minor = ver_code % 10
83+
versions.append(f"{major}.{minor}")
84+
return sorted(versions)
85+
86+
87+
def parse_cuda_version(version_str: str) -> str:
88+
"""Convert raw version string (e.g. '118' from env var) to formatted version (e.g. '11.8')"""
89+
if version_str.isdigit() and len(version_str) == 3:
90+
return f"{version_str[:2]}.{version_str[2]}"
91+
return version_str # fallback as safety net
92+
93+
94+
class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary):
95+
"""
96+
Mock library handler that defers errors until native methods are called.
97+
98+
This class serves as a fallback when the native bitsandbytes library fails to load.
99+
It captures the original error and generates detailed troubleshooting guidance.
100+
101+
Key behaviors:
102+
- Allows attribute access and method assignment without immediate errors
103+
- Throws a RuntimeError with diagnostic information only when a native method is called, as otherwise it would error out on import, breaking backward compatibility
104+
- Handles both missing CUDA dependencies and version mismatch scenarios
105+
106+
Error scenarios covered:
107+
1. Missing shared library dependencies (e.g., libcudart.so not in LD_LIBRARY_PATH or through PyTorch CUDA installation)
108+
2. CUDA version mismatch between PyTorch and available pre-compiled binaries
109+
3. Completely missing pre-compiled binaries when CUDA is detected
110+
4. Custom BNB_CUDA_VERSION override but mismatch
111+
5. CPU-only installation attempts when GPU functionality is requested
112+
113+
"""
114+
115+
def __init__(self, error_msg: str):
116+
self.error_msg = error_msg
117+
self.user_cuda_version = get_cuda_version_tuple()
118+
self.available_versions = get_available_cuda_binary_versions()
119+
self.override_value = os.environ.get("BNB_CUDA_VERSION")
120+
self.requested_version = (
121+
parse_cuda_version(self.override_value)
122+
if self.override_value
123+
else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
124+
if self.user_cuda_version
125+
else "unknown"
126+
)
127+
128+
# Pre-generate the error message based on error type
129+
if "cannot open shared object file" in error_msg:
130+
self.formatted_error = self._format_dependency_error()
131+
else: # lib loading errors
132+
self.formatted_error = self._format_lib_error_message(
133+
available_versions=self.available_versions,
134+
user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
135+
if self.user_cuda_version
136+
else "unknown",
137+
original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
138+
requested_version=self.requested_version,
139+
)
140+
141+
def _format_lib_error_message(
142+
self,
143+
available_versions: list[str],
144+
user_cuda_version: str,
145+
original_error: str = "",
146+
requested_version: Optional[str] = None,
147+
) -> str:
148+
"""Format detailed error message for library loading failures"""
149+
analysis = ""
150+
no_cpu_lib_found = "libbitsandbytes_cpu.so: cannot open" in original_error
151+
no_cuda_lib_found = "CUDA binary not found" in original_error
152+
153+
if no_cpu_lib_found:
154+
analysis = "\n🚨 Failed to load CPU-only bitsandbytes library 🚨\n\n"
155+
156+
elif no_cuda_lib_found:
157+
version_list_str = "\n - " + "\n - ".join(available_versions) if available_versions else "NONE"
158+
analysis = (
159+
(
160+
f"\n🚨 CUDA VERSION MISMATCH 🚨\n"
161+
f"Requested CUDA version: {requested_version}\n"
162+
f"Detected PyTorch CUDA version: {user_cuda_version}\n"
163+
f"Available pre-compiled versions: {version_list_str}\n\n"
164+
"This means:\n"
165+
"The version you're trying to use is NOT distributed with this package\n\n"
166+
)
167+
if available_versions
168+
else "\n🚨 Forgot to compile the bitsandbytes library? 🚨\n"
169+
"1. You're not using the package but checked-out the source code\n"
170+
"2. You MUST compile from source\n\n"
171+
)
172+
173+
base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
174+
175+
troubleshooting = (
176+
(
177+
"This typically happens when:\n"
178+
"1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
179+
"2. The library wasn't compiled properly during installation from source\n\n"
180+
)
181+
if no_cuda_lib_found
182+
else "This typically happens when you checked the code out from source and your torch installation doesn't detect CUDA on your machine.\n\n"
183+
)
184+
185+
note = (
186+
(
187+
"To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
188+
"If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
189+
)
190+
if no_cuda_lib_found
191+
else ""
192+
)
193+
194+
compile_instructions = (
195+
(
196+
"You have two options:\n"
197+
"1. COMPILE FROM SOURCE (required if no binary exists):\n"
198+
" https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
199+
"2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n\n"
200+
)
201+
if no_cuda_lib_found
202+
else "COMPILE FROM SOURCE for CPU-only:\n `cmake -DCOMPUTE_BACKEND=cpu -S . && make`\n\n"
203+
)
204+
205+
diagnostics = (
206+
"🔍 Run this command for detailed diagnostics:\n"
207+
"python -m bitsandbytes\n\n"
208+
"If you've tried everything and still have issues:\n"
209+
"1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
210+
"2. Describe what you've tried in detail\n"
211+
"3. Open an issue with this information:\n"
212+
" https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
213+
)
214+
215+
return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
216+
217+
def _format_dependency_error(self) -> str:
218+
"""Format error message for missing shared libraries"""
219+
# Extract missing library name from error
220+
error_parts = self.error_msg.split(":")
221+
missing_lib = error_parts[0].strip() if len(error_parts) > 0 else "unknown library"
222+
cuda_major_version = (
223+
self.requested_version.split(".")[0] if "." in self.requested_version else self.requested_version
224+
)
225+
226+
return (
227+
f"\n🚨 CUDA SETUP ERROR: Missing dependency: {missing_lib} 🚨\n\n"
228+
f"CUDA {cuda_major_version}.x runtime libraries were not found in the LD_LIBRARY_PATH.\n\n"
229+
f"To fix this, make sure that:\n"
230+
f"1. You have installed CUDA {cuda_major_version}.x toolkit on your system\n"
231+
f"2. The CUDA runtime libraries are in your LD_LIBRARY_PATH\n\n"
232+
f"You can add them with (and persist the change by adding the line to your .bashrc):\n"
233+
f" export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/cuda-{cuda_major_version}.x/lib64\n\n"
234+
f"Original error: {self.error_msg}\n\n"
235+
f"🔍 Run this command for detailed diagnostics:\n"
236+
f"python -m bitsandbytes\n\n"
237+
f"If you've tried everything and still have issues:\n"
238+
f"1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
239+
f"2. Describe what you've tried in detail\n"
240+
f"3. Open an issue with this information:\n"
241+
f" https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
242+
)
243+
244+
def __getattr__(self, name):
245+
"""Return a dummy function that throws when called, rather than on attribute access"""
246+
247+
def throw_on_call(*args, **kwargs):
248+
raise RuntimeError(f"{self.formatted_error}Native code method attempted to call: lib.{name}()")
249+
250+
return throw_on_call
251+
252+
def __getitem__(self, name):
253+
return self.__getattr__(name)
254+
255+
64256
def get_native_library() -> BNBNativeLibrary:
65-
binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
257+
"""
258+
Load CUDA library XOR CPU, as the latter contains a subset of symbols of the former.
259+
"""
66260
cuda_specs = get_cuda_specs()
261+
binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
262+
67263
if cuda_specs:
68264
cuda_binary_path = get_cuda_bnb_library_path(cuda_specs)
69-
if cuda_binary_path.exists():
70-
binary_path = cuda_binary_path
71-
else:
72-
logger.warning("Could not find the bitsandbytes CUDA binary at %r", cuda_binary_path)
265+
266+
if not cuda_binary_path.exists():
267+
raise RuntimeError(f"Configured CUDA binary not found at {cuda_binary_path}")
268+
269+
binary_path = cuda_binary_path
270+
73271
logger.debug(f"Loading bitsandbytes native library from: {binary_path}")
272+
273+
# Try to load the library - any errors will propagate up
74274
dll = ct.cdll.LoadLibrary(str(binary_path))
75275

76276
if hasattr(dll, "get_context"): # only a CUDA-built library exposes this
77277
return CudaBNBNativeLibrary(dll)
78278

79279
logger.warning(
80280
"The installed version of bitsandbytes was compiled without GPU support. "
81-
"8-bit optimizers and GPU quantization are unavailable.",
281+
"8-bit optimizers and GPU quantization are unavailable."
82282
)
83283
return BNBNativeLibrary(dll)
84284

85285

86286
try:
87287
lib = get_native_library()
88288
except Exception as e:
89-
lib = None
90-
logger.error(f"Could not load bitsandbytes native library: {e}", exc_info=True)
91-
if torch.cuda.is_available():
92-
logger.warning(
93-
"""
94-
CUDA Setup failed despite CUDA being available. Please run the following command to get more information:
95-
96-
python -m bitsandbytes
289+
error_msg = str(e)
290+
logger.error(f"bitsandbytes library load error: {error_msg}\n", exc_info=True)
97291

98-
Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
99-
to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
100-
and open an issue at: https://github.com/bitsandbytes-foundation/bitsandbytes/issues
101-
""",
102-
)
292+
# create a mock with error messaging as fallback
293+
lib = ErrorHandlerMockBNBNativeLibrary(error_msg)

bitsandbytes/diagnostics/main.py

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,6 @@
1313

1414

1515
def sanity_check():
16-
from bitsandbytes.cextension import lib
17-
18-
if lib is None:
19-
print_dedented(
20-
"""
21-
Couldn't load the bitsandbytes library, likely due to missing binaries.
22-
Please ensure bitsandbytes is properly installed.
23-
24-
For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND=cuda -S .`.
25-
See the documentation for more details if needed.
26-
27-
Trying a simple check anyway, but this will likely fail...
28-
""",
29-
)
30-
3116
from bitsandbytes.optim import Adam
3217

3318
p = torch.nn.Parameter(torch.rand(10, 10).cuda())
@@ -67,12 +52,15 @@ def main():
6752
print("SUCCESS!")
6853
print("Installation was successful!")
6954
return
70-
except ImportError:
71-
print(
72-
f"WARNING: {__package__} is currently running as CPU-only!\n"
73-
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
74-
f"If you think that this is so erroneously,\nplease report an issue!",
75-
)
55+
except RuntimeError as e:
56+
if "not available in CPU-only" in str(e):
57+
print(
58+
f"WARNING: {__package__} is currently running as CPU-only!\n"
59+
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
60+
f"If you think that this is so erroneously,\nplease report an issue!",
61+
)
62+
else:
63+
raise e
7664
except Exception:
7765
traceback.print_exc()
7866
print_dedented(

0 commit comments

Comments
 (0)