Skip to content

Commit 931e4e9

Browse files
committed
fix initial device, update import
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent e4487e2 commit 931e4e9

File tree

4 files changed

+5
-9
lines changed

4 files changed

+5
-9
lines changed

src/llmcompressor/pipelines/basic/pipeline.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
from llmcompressor.modifiers.utils.pytorch_helpers import apply_pad_mask_to_batch
1010
from llmcompressor.pipelines.registry import CalibrationPipeline
1111
from llmcompressor.pytorch.utils.helpers import tensors_to_device
12-
from llmcompressor.utils.dev import dispatch_for_generation
13-
from llmcompressor.utils.helpers import calibration_forward_context
12+
from llmcompressor.utils import calibration_forward_context, dispatch_for_generation
1413

1514
if TYPE_CHECKING:
1615
from llmcompressor.args.dataset_arguments import DatasetArguments

src/llmcompressor/pipelines/cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def empty(cls, num_batches: int, offload_device: torch.device):
6161
def from_dataloader(
6262
cls,
6363
dataloader: torch.utils.data.DataLoader,
64-
model_device: torch.device,
64+
model_device: torch.device = torch.device("cpu"),
6565
mask_padding: bool = True,
6666
offload_device: Optional[torch.device] = torch.device("cpu"),
6767
):

src/llmcompressor/pipelines/layer_sequential/helpers.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import torch
77
import tqdm
88
from compressed_tensors.quantization import find_name_or_class_matches
9-
from compressed_tensors.utils import get_execution_device
109
from torch.nn import Module
1110
from torch.utils.data.dataloader import DataLoader
1211

@@ -62,15 +61,14 @@ def capture_first_layer_intermediates(
6261
:param mask_padding: zero out padding tokens if True. This affects modifiers such as
6362
GPTQ and SparseGPT
6463
"""
65-
model_device = get_execution_device(model)
6664
intermediates = IntermediatesCache.empty(len(dataloader), torch.device("cpu"))
6765
signature = inspect.signature(first_layer.forward)
6866

6967
with calibration_forward_context(model), early_stop_hook(first_layer):
7068
desc = "Preparing intermediates cache"
7169
for batch_index, batch in enumerate(tqdm.tqdm(dataloader, desc=desc)):
7270
batch = apply_pad_mask_to_batch(batch) if mask_padding else batch
73-
batch = tensors_to_device(batch, model_device)
71+
batch = tensors_to_device(batch, torch.device("cpu"))
7472

7573
try:
7674
model(**batch)

src/llmcompressor/pipelines/sequential/pipeline.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import TYPE_CHECKING
22

33
import torch
4-
from compressed_tensors.utils import disable_offloading, get_execution_device
4+
from compressed_tensors.utils import disable_offloading
55
from torch.utils.data.dataloader import DataLoader
66
from tqdm import tqdm
77

@@ -69,8 +69,7 @@ def __call__(
6969

7070
with calibration_forward_context(model), DisableQuantization(model):
7171
# prepare intermediates cache
72-
model_device = get_execution_device(model)
73-
activations = IntermediatesCache.from_dataloader(dataloader, model_device)
72+
activations = IntermediatesCache.from_dataloader(dataloader)
7473

7574
for subgraph_index, subgraph in enumerate(subgraphs):
7675
# prepare tqdm description texts

0 commit comments

Comments
 (0)