|
16 | 16 | import warnings
|
17 | 17 | from typing import Any, List, Optional
|
18 | 18 |
|
19 |
| -from mindspore import Tensor, get_grad |
| 19 | +from mindspore import Tensor |
20 | 20 |
|
21 |
| -from mindnlp.core import nn, ops |
| 21 | +from mindnlp.core import nn, ops, no_grad |
22 | 22 | from mindnlp.core.nn import Parameter
|
23 | 23 | from mindnlp.core.nn import ParameterDict, ModuleDict
|
24 | 24 | from mindnlp.peft.utils import transpose
|
@@ -440,36 +440,16 @@ def budget_schedule(self, step: int):
|
440 | 440 | mask_ind = step % self.peft_config.deltaT == 0
|
441 | 441 | return budget, mask_ind
|
442 | 442 |
|
443 |
| - def update_ipt(self, model,gradient): |
444 |
| - r""" |
445 |
| - This method updates the importance parameter table (ipt) for the given model using the provided gradient. |
446 |
| - |
447 |
| - Args: |
448 |
| - self: The instance of the RankAllocator class. |
449 |
| - model: The model for which the importance parameter table is being updated. |
450 |
| - Type: model object |
451 |
| - Purpose: To access the parameters and names of the model for updating the ipt. |
452 |
| - Restrictions: None |
453 |
| - gradient: The gradient to be used for updating the ipt. |
454 |
| - Type: gradient object |
455 |
| - Purpose: To calculate the importance parameter table based on the gradient. |
456 |
| - Restrictions: None |
457 |
| - |
458 |
| - Returns: |
459 |
| - None. The method does not return any value. |
460 |
| - |
461 |
| - Raises: |
462 |
| - None |
463 |
| - """ |
| 443 | + def update_ipt(self, model): |
464 | 444 | # Update the sensitivity and uncertainty for every weight
|
465 |
| - for n, p in model.parameters_and_names(): |
| 445 | + for n, p in model.named_parameters(): |
466 | 446 | if "lora_" in n and self.adapter_name in n:
|
467 | 447 | if n not in self.ipt:
|
468 |
| - grad = get_grad(gradient, p) |
469 | 448 | self.ipt[n] = ops.zeros_like(p)
|
470 | 449 | self.exp_avg_ipt[n] = ops.zeros_like(p)
|
471 | 450 | self.exp_avg_unc[n] = ops.zeros_like(p)
|
472 |
| - self.ipt[n] = (p * grad).abs() |
| 451 | + with no_grad(): |
| 452 | + self.ipt[n] = (p * p.grad).abs() |
473 | 453 | # Sensitivity smoothing
|
474 | 454 | self.exp_avg_ipt[n] = self.beta1 * self.exp_avg_ipt[n] + (1 - self.beta1) * self.ipt[n]
|
475 | 455 | # Uncertainty quantification
|
@@ -586,27 +566,10 @@ def mask_to_budget(self, model, budget):
|
586 | 566 | rank_pattern[n] = (~(triplet_ipt[n] <= mask_threshold)).view(-1).asnumpy().tolist()
|
587 | 567 | return rank_pattern
|
588 | 568 |
|
589 |
| - def update_and_allocate(self, model, global_step, gradient, force_mask=False): |
590 |
| - r""" |
591 |
| - This method updates the model and allocates budget based on the global step and gradient information. |
592 |
| - |
593 |
| - Args: |
594 |
| - - self: Reference to the current instance of the class. |
595 |
| - - model: The model to be updated and allocated the budget. |
596 |
| - - global_step: The current global step of the training process. |
597 |
| - - gradient: The gradient information used for updating the model. |
598 |
| - - force_mask: A boolean flag indicating whether to force the masking operation. Default is False. |
599 |
| - |
600 |
| - Returns: |
601 |
| - - budget: The allocated budget for the current step. |
602 |
| - - rank_pattern: The rank pattern based on the budget allocation, or None if no masking is needed. |
603 |
| - |
604 |
| - Raises: |
605 |
| - - No specific exceptions are raised by this method. |
606 |
| - """ |
| 569 | + def update_and_allocate(self, model, global_step, force_mask=False): |
607 | 570 | # # Update the importance score and allocate the budget
|
608 | 571 | if global_step < self.peft_config.total_step - self.peft_config.tfinal:
|
609 |
| - self.update_ipt(model,gradient) |
| 572 | + self.update_ipt(model) |
610 | 573 | budget, mask_ind = self.budget_schedule(global_step)
|
611 | 574 | # Allocate the budget according to importance scores
|
612 | 575 | if mask_ind or force_mask:
|
|
0 commit comments