Skip to content

Commit bf2539b

Browse files
committed
Use torch.compile to speed up GPTQ algo
Signed-off-by: aladerran <aladerran@gmail.com>
1 parent 1c4f639 commit bf2539b

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Dict, Optional, Tuple, Union
44

55
import torch
6+
import torch._dynamo.config
67
import transformers
78
from compressed_tensors.quantization import (
89
ActivationOrdering,
@@ -16,6 +17,8 @@
1617
from llmcompressor.observers.base import Observer
1718
from llmcompressor.pytorch.utils.helpers import tensor_sparsity
1819

20+
torch._dynamo.config.capture_scalar_outputs = True
21+
1922
GPTQ_PRECISION = torch.float32
2023

2124
__all__ = ["make_empty_hessian", "accumulate_hessian", "quantize_weight"]
@@ -68,6 +71,7 @@ def accumulate_hessian(
6871
return H, num_samples
6972

7073

74+
@torch.compile
7175
def quantize_weight(
7276
module: torch.nn.Module,
7377
quant_args: QuantizationArgs,

0 commit comments

Comments
 (0)