Skip to content

Commit 83ca9ae

Browse files
authored
Mark invariant normalizer in Gemma as non-persistent (#19788)
Signed-off-by: Yu-Hang Tang <Tang.Maxin@gmail.com>
1 parent e2148dc commit 83ca9ae

File tree

4 files changed

+29
-3
lines changed

4 files changed

+29
-3
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import numpy as np
4+
import pytest
5+
6+
MODELS = ["google/gemma-2b", "google/gemma-2-2b", "google/gemma-3-4b-it"]
7+
8+
9+
@pytest.mark.parametrize("model", MODELS)
10+
def test_dummy_loader(vllm_runner, model: str) -> None:
11+
with vllm_runner(
12+
model,
13+
load_format="dummy",
14+
) as llm:
15+
normalizers = llm.collective_rpc(lambda self: self.worker.model_runner.
16+
model.model.normalizer.cpu().item())
17+
assert np.allclose(
18+
normalizers,
19+
llm.llm_engine.model_config.hf_config.hidden_size**0.5,
20+
rtol=1e-3)

vllm/model_executor/models/gemma.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
281281
# data type such as bfloat16, not float32.
282282
# See https://github.com/huggingface/transformers/pull/29402
283283
normalizer = self.config.hidden_size**0.5
284-
self.register_buffer("normalizer", torch.tensor(normalizer))
284+
self.register_buffer("normalizer",
285+
torch.tensor(normalizer),
286+
persistent=False)
285287
self.make_empty_intermediate_tensors = (
286288
make_empty_intermediate_tensors_factory(
287289
["hidden_states", "residual"], config.hidden_size))

vllm/model_executor/models/gemma2.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
267267
# data type such as bfloat16, not float32.
268268
# See https://github.com/huggingface/transformers/pull/29402
269269
normalizer = self.config.hidden_size**0.5
270-
self.register_buffer("normalizer", torch.tensor(normalizer))
270+
self.register_buffer("normalizer",
271+
torch.tensor(normalizer),
272+
persistent=False)
271273
self.make_empty_intermediate_tensors = (
272274
make_empty_intermediate_tensors_factory(
273275
["hidden_states", "residual"], config.hidden_size))

vllm/model_executor/models/gemma3.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
371371
# data type such as bfloat16, not float32.
372372
# See https://github.com/huggingface/transformers/pull/29402
373373
normalizer = self.config.hidden_size**0.5
374-
self.register_buffer("normalizer", torch.tensor(normalizer))
374+
self.register_buffer("normalizer",
375+
torch.tensor(normalizer),
376+
persistent=False)
375377
self.make_empty_intermediate_tensors = (
376378
make_empty_intermediate_tensors_factory(
377379
["hidden_states", "residual"], config.hidden_size))

0 commit comments

Comments
 (0)