We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1c4f639 commit bf2539bCopy full SHA for bf2539b
src/llmcompressor/modifiers/quantization/gptq/gptq_quantize.py
@@ -3,6 +3,7 @@
3
from typing import Dict, Optional, Tuple, Union
4
5
import torch
6
+import torch._dynamo.config
7
import transformers
8
from compressed_tensors.quantization import (
9
ActivationOrdering,
@@ -16,6 +17,8 @@
16
17
from llmcompressor.observers.base import Observer
18
from llmcompressor.pytorch.utils.helpers import tensor_sparsity
19
20
+torch._dynamo.config.capture_scalar_outputs = True
21
+
22
GPTQ_PRECISION = torch.float32
23
24
__all__ = ["make_empty_hessian", "accumulate_hessian", "quantize_weight"]
@@ -68,6 +71,7 @@ def accumulate_hessian(
68
71
return H, num_samples
69
72
70
73
74
+@torch.compile
75
def quantize_weight(
76
module: torch.nn.Module,
77
quant_args: QuantizationArgs,
0 commit comments