Skip to content

Commit c22814f

Browse files
jeejeeleesfeng33
authored andcommitted
[Quantization] Bump to use latest bitsandbytes (vllm-project#20424)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
1 parent c7df9bf commit c22814f

File tree

8 files changed

+14
-14
lines changed

8 files changed

+14
-14
lines changed

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
498498
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
499499
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
500500
else \
501-
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
501+
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.46.1' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
502502
fi
503503

504504
ENV VLLM_USAGE_SOURCE production-docker-image

docs/features/quantization/bnb.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Compared to other quantization methods, BitsAndBytes eliminates the need for cal
1010
Below are the steps to utilize BitsAndBytes with vLLM.
1111

1212
```bash
13-
pip install bitsandbytes>=0.45.3
13+
pip install bitsandbytes>=0.46.1
1414
```
1515

1616
vLLM reads the model's config file and supports both in-flight quantization and pre-quantized checkpoint.

requirements/nightly_torch_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ tokenizers==0.21.1
3434
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
3535
schemathesis>=3.39.15 # Required for openai schema test.
3636
# quantization
37-
bitsandbytes>=0.45.3
37+
bitsandbytes>=0.46.1
3838
buildkite-test-collector==0.1.9
3939

4040

requirements/test.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ tokenizers==0.21.1
3939
huggingface-hub[hf_xet]>=0.33.0 # Required for Xet downloads.
4040
schemathesis>=3.39.15 # Required for openai schema test.
4141
# quantization
42-
bitsandbytes>=0.45.3
42+
bitsandbytes==0.46.1
4343
buildkite-test-collector==0.1.9
4444

4545

requirements/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ backoff==2.2.1
4545
# via
4646
# -r requirements/test.in
4747
# schemathesis
48-
bitsandbytes==0.45.3
48+
bitsandbytes==0.46.1
4949
# via -r requirements/test.in
5050
black==24.10.0
5151
# via datamodel-code-generator

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ def _verify_cuda_graph(self) -> None:
969969

970970
def _verify_bnb_config(self) -> None:
971971
"""
972-
The current version of bitsandbytes (0.45.3) with 8-bit models does not
972+
The current version of bitsandbytes (0.46.1) with 8-bit models does not
973973
yet support CUDA graph.
974974
# TODO Remove this when bitsandbytes supports.
975975
"""

vllm/model_executor/layers/quantization/bitsandbytes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,12 @@ class BitsAndBytesLinearMethod(LinearMethodBase):
156156
def __init__(self, quant_config: BitsAndBytesConfig):
157157
try:
158158
import bitsandbytes
159-
if bitsandbytes.__version__ < "0.45.3":
159+
if bitsandbytes.__version__ < "0.46.1":
160160
raise ImportError("bitsandbytes version is wrong. Please "
161-
"install bitsandbytes>=0.45.3.")
161+
"install bitsandbytes>=0.46.1.")
162162
except ImportError as err:
163-
raise ImportError("Please install bitsandbytes>=0.45.3 via "
164-
"`pip install bitsandbytes>=0.45.3` to use "
163+
raise ImportError("Please install bitsandbytes>=0.46.1 via "
164+
"`pip install bitsandbytes>=0.46.1` to use "
165165
"bitsandbytes quantizer.") from err
166166

167167
self.quant_config = quant_config

vllm/model_executor/model_loader/bitsandbytes_loader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,12 @@ def _get_quantized_weights_iterator(
183183
try:
184184
import bitsandbytes
185185

186-
if bitsandbytes.__version__ < "0.45.3":
186+
if bitsandbytes.__version__ < "0.46.1":
187187
raise ImportError("bitsandbytes version is wrong. Please "
188-
"install bitsandbytes>=0.45.3.")
188+
"install bitsandbytes>=0.46.1.")
189189
except ImportError as err:
190-
raise ImportError("Please install bitsandbytes>=0.45.3 via "
191-
"`pip install bitsandbytes>=0.45.3` to use "
190+
raise ImportError("Please install bitsandbytes>=0.46.1 via "
191+
"`pip install bitsandbytes>=0.46.1` to use "
192192
"bitsandbytes quantizer.") from err
193193

194194
hf_weights_files, use_safetensors = self._prepare_weights(

0 commit comments

Comments
 (0)