fix update_and_allocate (#1834)

lvyufeng · web-flow · commit 5ce1a8367e25 · 2024-11-22T17:27:02.000+08:00
diff --git a/mindnlp/common/modules/__init__.py b/mindnlp/common/modules/__init__.py
@@ -13,17 +13,15 @@
 # limitations under the License.
 # ============================================================================
 """modules init"""
-from . import loss, attentions, crf, accumulator
+from . import loss, attentions, crf
 from .attentions import ScaledDotProductAttention, DotProductAttention, \
     BilinearAttention, AdditiveAttention, CosineAttention, \
     LinearAttention
 from .crf import CRF
 from .loss import RDropLoss, CMRC2018Loss
-from .accumulator import *
 
 __all__ = []
 
 __all__.extend(attentions.__all__)
 __all__.extend(crf.__all__)
 __all__.extend(loss.__all__)
-__all__.extend(accumulator.__all__)
diff --git a/mindnlp/core/autograd/function.py b/mindnlp/core/autograd/function.py
@@ -9,7 +9,7 @@
 
 grad_ = GradOperation(False, True, False)
 
-def value_and_grad(fn, params_or_argnums, has_aux=False, attach_grads=False):
+def value_and_grad(fn, params_or_argnums, has_aux=False, attach_grads=True):
     use_argnums = False
     if isinstance(params_or_argnums, Generator):
         params_or_argnums = tuple(params_or_argnums)
diff --git a/mindnlp/peft/tuners/adalora/layer.py b/mindnlp/peft/tuners/adalora/layer.py
@@ -16,9 +16,9 @@
 import warnings
 from typing import Any, List, Optional
 
-from mindspore import Tensor, get_grad
+from mindspore import Tensor
 
-from mindnlp.core import nn, ops
+from mindnlp.core import nn, ops, no_grad
 from mindnlp.core.nn import Parameter
 from mindnlp.core.nn import ParameterDict, ModuleDict
 from mindnlp.peft.utils import transpose
@@ -440,36 +440,16 @@ def budget_schedule(self, step: int):
             mask_ind = step % self.peft_config.deltaT == 0
         return budget, mask_ind
 
-    def update_ipt(self, model,gradient):
-        r"""
-        This method updates the importance parameter table (ipt) for the given model using the provided gradient.
-        
-        Args:
-            self: The instance of the RankAllocator class.
-            model: The model for which the importance parameter table is being updated.
-                   Type: model object
-                   Purpose: To access the parameters and names of the model for updating the ipt.
-                   Restrictions: None
-            gradient: The gradient to be used for updating the ipt.
-                      Type: gradient object
-                      Purpose: To calculate the importance parameter table based on the gradient.
-                      Restrictions: None
-        
-        Returns:
-            None. The method does not return any value.
-        
-        Raises:
-            None
-        """
+    def update_ipt(self, model):
         # Update the sensitivity and uncertainty for every weight
-        for n, p in model.parameters_and_names():
+        for n, p in model.named_parameters():
             if "lora_" in n and self.adapter_name in n:
                 if n not in self.ipt:
-                    grad = get_grad(gradient, p)
                     self.ipt[n] = ops.zeros_like(p)
                     self.exp_avg_ipt[n] = ops.zeros_like(p)
                     self.exp_avg_unc[n] = ops.zeros_like(p)
-                    self.ipt[n] = (p * grad).abs()
+                with no_grad():
+                    self.ipt[n] = (p * p.grad).abs()
                     # Sensitivity smoothing
                     self.exp_avg_ipt[n] = self.beta1 * self.exp_avg_ipt[n] + (1 - self.beta1) * self.ipt[n]
                     # Uncertainty quantification
@@ -586,27 +566,10 @@ def mask_to_budget(self, model, budget):
                 rank_pattern[n] = (~(triplet_ipt[n] <= mask_threshold)).view(-1).asnumpy().tolist()
         return rank_pattern
 
-    def update_and_allocate(self, model, global_step, gradient, force_mask=False):
-        r"""
-        This method updates the model and allocates budget based on the global step and gradient information.
-        
-        Args:
-        - self: Reference to the current instance of the class.
-        - model: The model to be updated and allocated the budget.
-        - global_step: The current global step of the training process.
-        - gradient: The gradient information used for updating the model.
-        - force_mask: A boolean flag indicating whether to force the masking operation. Default is False.
-        
-        Returns:
-        - budget: The allocated budget for the current step.
-        - rank_pattern: The rank pattern based on the budget allocation, or None if no masking is needed.
-        
-        Raises:
-        - No specific exceptions are raised by this method.
-        """
+    def update_and_allocate(self, model, global_step, force_mask=False):
         # # Update the importance score and allocate the budget
         if global_step < self.peft_config.total_step - self.peft_config.tfinal:
-            self.update_ipt(model,gradient)
+            self.update_ipt(model)
         budget, mask_ind = self.budget_schedule(global_step)
         # Allocate the budget according to importance scores
         if mask_ind or force_mask:
diff --git a/mindnlp/peft/tuners/adalora/model.py b/mindnlp/peft/tuners/adalora/model.py
@@ -444,7 +444,7 @@ def resize_state_dict_by_rank_pattern(self, rank_pattern, state_dict, adapter_na
                         state_dict[key][0] = dims
         return state_dict
 
-    def update_and_allocate(self, global_step, gradient):
+    def update_and_allocate(self, global_step):
         """
         This method updates Adalora budget and mask.
 
@@ -468,18 +468,20 @@ def update_and_allocate(self, global_step, gradient):
         lora_config = self.peft_config[self.trainable_adapter_name]
         # Update the importance score and allocate the budget
         if global_step < lora_config.total_step - lora_config.tfinal:
-            _, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, gradient)
+            _, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step)
             if rank_pattern:
                 lora_config.rank_pattern = rank_pattern
         # Finalize the budget allocation
         elif global_step == lora_config.total_step - lora_config.tfinal:
-            _, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, gradient,force_mask=True)
+            _, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, force_mask=True)
             # for some reason, this freezes the trainable parameters and nothing gets updates
-            # self.resize_cells_by_rank_pattern(rank_pattern, self.trainable_adapter_name)
+            # self.resize_modules_by_rank_pattern(rank_pattern, self.trainable_adapter_name)
             lora_config.rank_pattern = rank_pattern
             self.rankallocator.reset_ipt()
         # Currently using inefficient way to mask the unimportant weights using the rank pattern
         #  due to problem mentioned above
         elif global_step > lora_config.total_step - lora_config.tfinal:
             self.rankallocator.mask_using_rank_pattern(self.model, lora_config.rank_pattern)
         # Pass the function and do forward propagation
+        else:
+            return None