[Decompression] Keep unused parameters when decompressing from memory (#340)

kylesayrs · web-flow · commit e554fba97200 · 2025-06-04T11:34:18.000-04:00
* keep unused during decompression

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

* update docstring and typehint

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;

---------

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/compressors/model_compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressors/model_compressor.py
@@ -462,18 +462,13 @@ def decompress_model(self, model: Module):
 
                 # quantization second
                 if prefix in module_to_scheme:
-                    generator = self.quantization_compressor.decompress_from_state_dict(
-                        state_dict,
-                        names_to_scheme=module_to_scheme,
+                    state_dict = (
+                        self.quantization_compressor.decompress_module_from_state_dict(
+                            prefix,
+                            state_dict,
+                            scheme=module_to_scheme[prefix],
+                        )
                     )
-                    # generates (mod_path, {param_name, param_val})
-                    # of compressed params and used params, but not unused params
-                    # some used params are removed by get_unexpected_file_keys
-                    state_dict = {
-                        merge_names(module_path, param_name): param_value
-                        for module_path, compressed_data in generator
-                        for param_name, param_value in compressed_data.items()
-                    }
 
                 # remove any existing parameters
                 exec_device = get_execution_device(module)
diff --git a/src/compressed_tensors/compressors/quantized_compressors/base.py b/src/compressed_tensors/compressors/quantized_compressors/base.py
@@ -24,6 +24,7 @@
     get_nested_weight_mappings,
     merge_names,
 )
+from compressed_tensors.utils.safetensors_load import match_param_name
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -223,9 +224,7 @@ def decompress_from_state_dict(
             state_dict, self.compression_param_names
         )
         for module_path in weight_mappings.keys():
-            weight_data = {}
-            for param_name, param_value in weight_mappings[module_path].items():
-                weight_data[param_name] = param_value
+            weight_data = weight_mappings[module_path].copy()
 
             if "weight_scale" in weight_data:
                 quant_args = names_to_scheme[module_path].weights
@@ -234,3 +233,31 @@ def decompress_from_state_dict(
                 )
                 weight_data["weight"] = decompressed
                 yield module_path, weight_data
+
+    def decompress_module_from_state_dict(
+        self,
+        prefix: str,
+        state_dict: Dict[str, torch.Tensor],
+        scheme: QuantizationScheme,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Only used by in-memory decompression pathways to decompress the parameters of
+        one module
+
+        :param prefix: prefix of state_dict, typically the path to the module
+        :param state_dict: state dict containing module parameter values
+        :param scheme: quantization scheme of module to decompress
+        :return: state dict with weight decompressed if applicable
+        """
+        state_dict = {
+            key.removeprefix(f"{prefix}."): value for key, value in state_dict.items()
+        }
+
+        if "weight_scale" in state_dict:
+            state_dict["weight"] = self.decompress_weight(
+                compressed_data=state_dict, quantization_args=scheme.weights
+            )
+
+        state_dict = {f"{prefix}.{key}": value for key, value in state_dict.items()}
+
+        return state_dict