Skip to content

Commit 96476fe

Browse files
committed
remove unused util
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent 96631d1 commit 96476fe

File tree

1 file changed

+0
-27
lines changed
  • src/llmcompressor/transformers/compression

1 file changed

+0
-27
lines changed

src/llmcompressor/transformers/compression/helpers.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -104,33 +104,6 @@ def infer_sparsity_structure_from_model(model: torch.nn.Module) -> Optional[str]
104104
return None
105105

106106

107-
def quantization_memory_requirement(model: torch.nn.Module) -> int:
108-
"""
109-
Determines the max number of bytes needed to store quantization scale and zp data
110-
111-
:param model: model to calculate requirements for
112-
:return: number of bytes required to reserve for quantization
113-
"""
114-
115-
total_elements = 0
116-
for _, module in model.named_modules():
117-
if isinstance(module, Linear):
118-
for param in module.parameters():
119-
# assume the max of group 128 and static scale/zp
120-
# TODO: base this on the recipe instead instead of assuming max
121-
122-
# potentially just bias term
123-
max_quant_shape = param.shape[0] // 128
124-
125-
if len(param.size()) > 1: # weights
126-
max_quant_shape *= param.shape[1]
127-
128-
total_elements += max_quant_shape * 4
129-
130-
bytes_ratio = 32 // 16 # assuming float16
131-
return total_elements * bytes_ratio
132-
133-
134107
def infer_sparse_targets_and_ignores(
135108
model: torch.nn.Module,
136109
sparsity_structure: str,

0 commit comments

Comments
 (0)