update

tangbinhan · tangbinhan · commit b9c5dee0d0bb · 2025-07-04T18:01:50.000+08:00
diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py
@@ -391,7 +391,7 @@ def load_state_dict(self, state_dict: dict):
 
             state_dict[self.weight_key] = weight_tensor
 
-            super().load_state_dict(state_dict)
+        super().load_state_dict(state_dict)
 
 
 class QKVParallelLinear(ColumnParallelLinear):
diff --git a/fastdeploy/model_executor/model_loader/load_time_quantization_loader.py b/fastdeploy/model_executor/model_loader/load_time_quantization_loader.py
@@ -76,12 +76,12 @@ def _get_quantized_weights(
                 apply_quant_action(
                     quant_filtered_map,
                     key,
-                    weight.clone(),
+                    weight,
                     state_dict,
                     quant_layer_instance_map,
                 )
             else:
-                state_dict[key] = weight.clone()
+                state_dict[key] = weight
         deal_state_dict(state_dict)
         paddle.device.cuda.empty_cache()
         paddle.device.cuda.synchronize()