Last minute pre-release changes

matthewdouglas · matthewdouglas · commit 0d1b3a320022 · 2025-05-27T16:40:23.000-04:00
diff --git a/bitsandbytes/backends/cuda/ops.py b/bitsandbytes/backends/cuda/ops.py
@@ -445,20 +445,22 @@ def _gemv_4bit_impl(
     out: torch.Tensor,
 ) -> None:
     torch._check_is_size(blocksize)
-    torch._check(
-        A.numel() == A.size(-1),
-        lambda: f"A must be a vector with leading dimensions of 1, got {A.shape}",
-    )
-    torch._check(
-        A.dtype in [torch.float16, torch.bfloat16, torch.float32],
-        lambda: f"A must be float16, bfloat16, or float32, got {A.dtype}",
-    )
-    torch._check(
-        B.dtype in [torch.uint8, torch.bfloat16, torch.float16, torch.float32],
-        lambda: f"B must be backed by storage of type uint8, bfloat16, float16, or float32, got {B.dtype}",
-    )
-    torch._check(absmax.dtype == torch.float32, lambda: f"absmax must be float32, got {absmax.dtype}")
-    torch._check(code.dtype == torch.float32, lambda: f"code must be float32, got {code.dtype}")
+
+    # Note: these checks are not strictly necessary, and cost more than they are worth, so they are commented out for now.
+    # torch._check(
+    #     A.numel() == A.size(-1),
+    #     lambda: f"A must be a vector with leading dimensions of 1, got {A.shape}",
+    # )
+    # torch._check(
+    #     A.dtype in [torch.float16, torch.bfloat16, torch.float32],
+    #     lambda: f"A must be float16, bfloat16, or float32, got {A.dtype}",
+    # )
+    # torch._check(
+    #     B.dtype in [torch.uint8, torch.bfloat16, torch.float16, torch.float32],
+    #     lambda: f"B must be backed by storage of type uint8, bfloat16, float16, or float32, got {B.dtype}",
+    # )
+    # torch._check(absmax.dtype == torch.float32, lambda: f"absmax must be float32, got {absmax.dtype}")
+    # torch._check(code.dtype == torch.float32, lambda: f"code must be float32, got {code.dtype}")
 
     m = ct.c_int32(shapeB[0])
     n = ct.c_int32(1)
diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
@@ -1,4 +1,5 @@
 import ctypes as ct
+import functools
 import logging
 import os
 from pathlib import Path
@@ -29,10 +30,8 @@ def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path:
         library_name = re.sub(r"cuda\d+", f"cuda{override_value}", library_name, count=1)
         logger.warning(
             f"WARNING: BNB_CUDA_VERSION={override_value} environment variable detected; loading {library_name}.\n"
-            "This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.\n"
+            "This can be used to load a bitsandbytes version built with a CUDA version that is different from the PyTorch CUDA version.\n"
             "If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=\n"
-            "If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH\n"
-            "For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64\n",
         )
 
     return PACKAGE_DIR / library_name
@@ -45,10 +44,14 @@ class BNBNativeLibrary:
     def __init__(self, lib: ct.CDLL):
         self._lib = lib
 
+    @functools.cache  # noqa: B019
     def __getattr__(self, name):
+        fn = getattr(self._lib, name, None)
+
+        if fn is not None:
+            return fn
+
         def throw_on_call(*args, **kwargs):
-            if hasattr(self._lib, name):
-                return getattr(self._lib, name)(*args, **kwargs)
             raise RuntimeError(
                 f"Method '{name}' not available in CPU-only version of bitsandbytes.\n"
                 "Reinstall with GPU support or use CUDA-enabled hardware."
diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py
@@ -6,7 +6,6 @@
 import torch
 
 from bitsandbytes.cextension import get_cuda_bnb_library_path
-from bitsandbytes.consts import NONPYTORCH_DOC_URL
 from bitsandbytes.cuda_specs import CUDASpecs
 from bitsandbytes.diagnostics.utils import print_dedented
 
@@ -114,26 +113,10 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
     if not binary_path.exists():
         print_dedented(
             f"""
-        Library not found: {binary_path}. Maybe you need to compile it from source?
-        If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION`,
-        for example, `make CUDA_VERSION=113`.
-
-        The CUDA version for the compile might depend on your conda install, if using conda.
-        Inspect CUDA version via `conda list | grep cuda`.
-        """,
-        )
-
-    cuda_major, cuda_minor = cuda_specs.cuda_version_tuple
-    if cuda_major < 11:
-        print_dedented(
-            """
-            WARNING: CUDA versions lower than 11 are currently not supported for LLM.int8().
-            You will be only to use 8-bit optimizers and quantization routines!
+            Library not found: {binary_path}. Maybe you need to compile it from source?
             """,
         )
 
-    print(f"To manually override the PyTorch CUDA version please see: {NONPYTORCH_DOC_URL}")
-
     # 7.5 is the minimum CC for int8 tensor cores
     if not cuda_specs.has_imma:
         print_dedented(
@@ -144,10 +127,6 @@ def print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
             """,
         )
 
-    # TODO:
-    # (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
-    # (2) Multiple CUDA versions installed
-
 
 def print_cuda_runtime_diagnostics() -> None:
     cudart_paths = list(find_cudart_libraries())
diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py
@@ -1,16 +1,30 @@
+import importlib
+import platform
 import sys
 import traceback
 
 import torch
 
+from bitsandbytes import __version__ as bnb_version
 from bitsandbytes.consts import PACKAGE_GITHUB_URL
 from bitsandbytes.cuda_specs import get_cuda_specs
 from bitsandbytes.diagnostics.cuda import (
     print_cuda_diagnostics,
-    print_cuda_runtime_diagnostics,
 )
 from bitsandbytes.diagnostics.utils import print_dedented, print_header
 
+_RELATED_PACKAGES = [
+    "accelerate",
+    "diffusers",
+    "numpy",
+    "pip",
+    "peft",
+    "safetensors",
+    "transformers",
+    "triton",
+    "trl",
+]
+
 
 def sanity_check():
     from bitsandbytes.optim import Adam
@@ -27,47 +41,77 @@ def sanity_check():
     assert p1 != p2
 
 
+def get_package_version(name: str) -> str:
+    try:
+        version = importlib.metadata.version(name)
+    except importlib.metadata.PackageNotFoundError:
+        version = "not found"
+    return version
+
+
+def show_environment():
+    """Simple utility to print out environment information."""
+
+    print(f"Platform: {platform.platform()}")
+    if platform.system() == "Linux":
+        print(f"  libc: {'-'.join(platform.libc_ver())}")
+
+    print(f"Python: {platform.python_version()}")
+
+    print(f"PyTorch: {torch.__version__}")
+    print(f"  CUDA: {torch.version.cuda or 'N/A'}")
+    print(f"  HIP: {torch.version.hip or 'N/A'}")
+    print(f"  XPU: {getattr(torch.version, 'xpu', 'N/A') or 'N/A'}")
+
+    print("Related packages:")
+    for pkg in _RELATED_PACKAGES:
+        version = get_package_version(pkg)
+        print(f"  {pkg}: {version}")
+
+
 def main():
-    print_header("")
-    print_header("BUG REPORT INFORMATION")
+    print_header(f"bitsandbytes v{bnb_version}")
+    show_environment()
     print_header("")
 
-    print_header("OTHER")
     cuda_specs = get_cuda_specs()
-    print("CUDA specs:", cuda_specs)
-    if not torch.cuda.is_available():
-        print("Torch says CUDA is not available. Possible reasons:")
-        print("1. CUDA driver not installed")
-        print("2. CUDA not installed")
-        print("3. You have multiple conflicting CUDA libraries")
+
     if cuda_specs:
         print_cuda_diagnostics(cuda_specs)
-    print_cuda_runtime_diagnostics()
-    print_header("")
-    print_header("DEBUG INFO END")
-    print_header("")
-    print("Checking that the library is importable and CUDA is callable...")
-    try:
-        sanity_check()
-        print("SUCCESS!")
-        print("Installation was successful!")
-        return
-    except RuntimeError as e:
-        if "not available in CPU-only" in str(e):
-            print(
-                f"WARNING: {__package__} is currently running as CPU-only!\n"
-                "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
-                f"If you think that this is so erroneously,\nplease report an issue!",
-            )
-        else:
-            raise e
-    except Exception:
-        traceback.print_exc()
-    print_dedented(
-        f"""
-        Above we output some debug information.
-        Please provide this info when creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose
-        WARNING: Please be sure to sanitize sensitive info from the output before posting it.
-        """,
-    )
-    sys.exit(1)
+
+    # TODO: There's a lot of noise in this; needs improvement.
+    # print_cuda_runtime_diagnostics()
+
+    if not torch.cuda.is_available():
+        print("PyTorch says CUDA is not available. Possible reasons:")
+        print("1. CUDA driver not installed")
+        print("2. Using a CPU-only PyTorch build")
+        print("3. No GPU detected")
+
+    else:
+        print("Checking that the library is importable and CUDA is callable...")
+
+        try:
+            sanity_check()
+            print("SUCCESS!")
+            return
+        except RuntimeError as e:
+            if "not available in CPU-only" in str(e):
+                print(
+                    f"WARNING: {__package__} is currently running as CPU-only!\n"
+                    "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
+                    f"If you think that this is so erroneously,\nplease report an issue!",
+                )
+            else:
+                raise e
+        except Exception:
+            traceback.print_exc()
+
+        print_dedented(
+            f"""
+            Above we output some debug information.
+            Please provide this info when creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose
+            WARNING: Please be sure to sanitize sensitive info from the output before posting it.
+            """,
+        )
+        sys.exit(1)
diff --git a/bitsandbytes/diagnostics/utils.py b/bitsandbytes/diagnostics/utils.py
@@ -3,7 +3,7 @@
 HEADER_WIDTH = 60
 
 
-def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "+") -> None:
+def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "=") -> None:
     txt = f" {txt} " if txt else ""
     print(txt.center(width, filler))
 
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
@@ -851,8 +851,8 @@ def dequantize_blockwise(
         torch.ops.bitsandbytes.dequantize_blockwise.out(
             A,
             absmax,
-            code.to(A.device),
-            blocksize,
+            quant_state.code.to(A.device),
+            quant_state.blocksize,
             quant_state.dtype,
             out=out,
         )