File tree Expand file tree Collapse file tree 1 file changed +0
-27
lines changed
src/llmcompressor/transformers/compression Expand file tree Collapse file tree 1 file changed +0
-27
lines changed Original file line number Diff line number Diff line change @@ -104,33 +104,6 @@ def infer_sparsity_structure_from_model(model: torch.nn.Module) -> Optional[str]
104
104
return None
105
105
106
106
107
- def quantization_memory_requirement (model : torch .nn .Module ) -> int :
108
- """
109
- Determines the max number of bytes needed to store quantization scale and zp data
110
-
111
- :param model: model to calculate requirements for
112
- :return: number of bytes required to reserve for quantization
113
- """
114
-
115
- total_elements = 0
116
- for _ , module in model .named_modules ():
117
- if isinstance (module , Linear ):
118
- for param in module .parameters ():
119
- # assume the max of group 128 and static scale/zp
120
- # TODO: base this on the recipe instead instead of assuming max
121
-
122
- # potentially just bias term
123
- max_quant_shape = param .shape [0 ] // 128
124
-
125
- if len (param .size ()) > 1 : # weights
126
- max_quant_shape *= param .shape [1 ]
127
-
128
- total_elements += max_quant_shape * 4
129
-
130
- bytes_ratio = 32 // 16 # assuming float16
131
- return total_elements * bytes_ratio
132
-
133
-
134
107
def infer_sparse_targets_and_ignores (
135
108
model : torch .nn .Module ,
136
109
sparsity_structure : str ,
You can’t perform that action at this time.
0 commit comments