Skip to content

Commit 5ce1a83

Browse files
authored
fix update_and_allocate (#1834)
1 parent a128138 commit 5ce1a83

File tree

4 files changed

+16
-53
lines changed

4 files changed

+16
-53
lines changed

mindnlp/common/modules/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,15 @@
1313
# limitations under the License.
1414
# ============================================================================
1515
"""modules init"""
16-
from . import loss, attentions, crf, accumulator
16+
from . import loss, attentions, crf
1717
from .attentions import ScaledDotProductAttention, DotProductAttention, \
1818
BilinearAttention, AdditiveAttention, CosineAttention, \
1919
LinearAttention
2020
from .crf import CRF
2121
from .loss import RDropLoss, CMRC2018Loss
22-
from .accumulator import *
2322

2423
__all__ = []
2524

2625
__all__.extend(attentions.__all__)
2726
__all__.extend(crf.__all__)
2827
__all__.extend(loss.__all__)
29-
__all__.extend(accumulator.__all__)

mindnlp/core/autograd/function.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
grad_ = GradOperation(False, True, False)
1111

12-
def value_and_grad(fn, params_or_argnums, has_aux=False, attach_grads=False):
12+
def value_and_grad(fn, params_or_argnums, has_aux=False, attach_grads=True):
1313
use_argnums = False
1414
if isinstance(params_or_argnums, Generator):
1515
params_or_argnums = tuple(params_or_argnums)

mindnlp/peft/tuners/adalora/layer.py

Lines changed: 8 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
import warnings
1717
from typing import Any, List, Optional
1818

19-
from mindspore import Tensor, get_grad
19+
from mindspore import Tensor
2020

21-
from mindnlp.core import nn, ops
21+
from mindnlp.core import nn, ops, no_grad
2222
from mindnlp.core.nn import Parameter
2323
from mindnlp.core.nn import ParameterDict, ModuleDict
2424
from mindnlp.peft.utils import transpose
@@ -440,36 +440,16 @@ def budget_schedule(self, step: int):
440440
mask_ind = step % self.peft_config.deltaT == 0
441441
return budget, mask_ind
442442

443-
def update_ipt(self, model,gradient):
444-
r"""
445-
This method updates the importance parameter table (ipt) for the given model using the provided gradient.
446-
447-
Args:
448-
self: The instance of the RankAllocator class.
449-
model: The model for which the importance parameter table is being updated.
450-
Type: model object
451-
Purpose: To access the parameters and names of the model for updating the ipt.
452-
Restrictions: None
453-
gradient: The gradient to be used for updating the ipt.
454-
Type: gradient object
455-
Purpose: To calculate the importance parameter table based on the gradient.
456-
Restrictions: None
457-
458-
Returns:
459-
None. The method does not return any value.
460-
461-
Raises:
462-
None
463-
"""
443+
def update_ipt(self, model):
464444
# Update the sensitivity and uncertainty for every weight
465-
for n, p in model.parameters_and_names():
445+
for n, p in model.named_parameters():
466446
if "lora_" in n and self.adapter_name in n:
467447
if n not in self.ipt:
468-
grad = get_grad(gradient, p)
469448
self.ipt[n] = ops.zeros_like(p)
470449
self.exp_avg_ipt[n] = ops.zeros_like(p)
471450
self.exp_avg_unc[n] = ops.zeros_like(p)
472-
self.ipt[n] = (p * grad).abs()
451+
with no_grad():
452+
self.ipt[n] = (p * p.grad).abs()
473453
# Sensitivity smoothing
474454
self.exp_avg_ipt[n] = self.beta1 * self.exp_avg_ipt[n] + (1 - self.beta1) * self.ipt[n]
475455
# Uncertainty quantification
@@ -586,27 +566,10 @@ def mask_to_budget(self, model, budget):
586566
rank_pattern[n] = (~(triplet_ipt[n] <= mask_threshold)).view(-1).asnumpy().tolist()
587567
return rank_pattern
588568

589-
def update_and_allocate(self, model, global_step, gradient, force_mask=False):
590-
r"""
591-
This method updates the model and allocates budget based on the global step and gradient information.
592-
593-
Args:
594-
- self: Reference to the current instance of the class.
595-
- model: The model to be updated and allocated the budget.
596-
- global_step: The current global step of the training process.
597-
- gradient: The gradient information used for updating the model.
598-
- force_mask: A boolean flag indicating whether to force the masking operation. Default is False.
599-
600-
Returns:
601-
- budget: The allocated budget for the current step.
602-
- rank_pattern: The rank pattern based on the budget allocation, or None if no masking is needed.
603-
604-
Raises:
605-
- No specific exceptions are raised by this method.
606-
"""
569+
def update_and_allocate(self, model, global_step, force_mask=False):
607570
# # Update the importance score and allocate the budget
608571
if global_step < self.peft_config.total_step - self.peft_config.tfinal:
609-
self.update_ipt(model,gradient)
572+
self.update_ipt(model)
610573
budget, mask_ind = self.budget_schedule(global_step)
611574
# Allocate the budget according to importance scores
612575
if mask_ind or force_mask:

mindnlp/peft/tuners/adalora/model.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def resize_state_dict_by_rank_pattern(self, rank_pattern, state_dict, adapter_na
444444
state_dict[key][0] = dims
445445
return state_dict
446446

447-
def update_and_allocate(self, global_step, gradient):
447+
def update_and_allocate(self, global_step):
448448
"""
449449
This method updates Adalora budget and mask.
450450
@@ -468,18 +468,20 @@ def update_and_allocate(self, global_step, gradient):
468468
lora_config = self.peft_config[self.trainable_adapter_name]
469469
# Update the importance score and allocate the budget
470470
if global_step < lora_config.total_step - lora_config.tfinal:
471-
_, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, gradient)
471+
_, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step)
472472
if rank_pattern:
473473
lora_config.rank_pattern = rank_pattern
474474
# Finalize the budget allocation
475475
elif global_step == lora_config.total_step - lora_config.tfinal:
476-
_, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, gradient,force_mask=True)
476+
_, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, force_mask=True)
477477
# for some reason, this freezes the trainable parameters and nothing gets updates
478-
# self.resize_cells_by_rank_pattern(rank_pattern, self.trainable_adapter_name)
478+
# self.resize_modules_by_rank_pattern(rank_pattern, self.trainable_adapter_name)
479479
lora_config.rank_pattern = rank_pattern
480480
self.rankallocator.reset_ipt()
481481
# Currently using inefficient way to mask the unimportant weights using the rank pattern
482482
# due to problem mentioned above
483483
elif global_step > lora_config.total_step - lora_config.tfinal:
484484
self.rankallocator.mask_using_rank_pattern(self.model, lora_config.rank_pattern)
485485
# Pass the function and do forward propagation
486+
else:
487+
return None

0 commit comments

Comments
 (0)