21
21
import math
22
22
import warnings
23
23
from typing import List , Optional , Tuple , Union , Dict
24
- import numpy as np
25
24
26
25
import mindspore
27
- from mindspore import Tensor
28
26
from mindspore .common .initializer import initializer , Normal
29
27
28
+ from mindnlp .utils import logging
29
+
30
30
from mindnlp .core import nn , ops
31
31
from mindnlp .core .nn import functional as F
32
32
from mindnlp .core .nn import BCEWithLogitsLoss , CrossEntropyLoss , MSELoss
41
41
from ...modeling_utils import PreTrainedModel
42
42
from ...ms_utils import ALL_LAYERNORM_LAYERS
43
43
44
- from mindnlp .utils import logging
45
44
from .configuration_minicpm3 import MiniCPM3Config
46
- import re
45
+
47
46
48
47
49
48
logger = logging .get_logger (__name__ )
@@ -205,7 +204,7 @@ def _set_cos_sin_cache(self, seq_len, dtype):
205
204
ext_factors = mindspore .Tensor (self .long_factor , dtype = mindspore .float32 )
206
205
else :
207
206
ext_factors = mindspore .Tensor (self .short_factor , dtype = mindspore .float32 )
208
-
207
+
209
208
freqs = ops .mul (
210
209
ops .outer (t , 1.0 / ext_factors ),
211
210
self .inv_freq .to (dtype )
@@ -568,7 +567,7 @@ def forward(
568
567
use_cache = use_cache ,
569
568
** kwargs ,
570
569
)
571
-
570
+
572
571
hidden_states = residual + hidden_states * (self .scale_depth / math .sqrt (self .num_hidden_layers ))
573
572
574
573
# Fully Connected
@@ -766,7 +765,7 @@ def forward(
766
765
all_self_attns += (layer_outputs [1 ],)
767
766
768
767
hidden_states = self .norm (hidden_states )
769
-
768
+
770
769
# add hidden states from the last decoder layer
771
770
if output_hidden_states :
772
771
all_hidden_states += (hidden_states ,)
@@ -884,7 +883,6 @@ def forward(
884
883
shift_logits = shift_logits .view (- 1 , self .config .vocab_size )
885
884
shift_labels = shift_labels .view (- 1 )
886
885
# Enable model parallelism
887
- shift_labels = shift_labels
888
886
loss = loss_fct (shift_logits , shift_labels )
889
887
890
888
if not return_dict :
@@ -963,7 +961,7 @@ def _reorder_cache(past_key_values, beam_idx):
963
961
tuple (past_state .index_select (0 , beam_idx ) for past_state in layer_past ),
964
962
)
965
963
return reordered_past
966
-
964
+
967
965
def chat (self , tokenizer , query : str , history : List [Dict ] = None , role : str = "user" ,
968
966
max_length : int = 4096 , num_beams = 1 , do_sample = True , top_p = 0.8 , temperature = 0.3 , logits_processor = None ,
969
967
** kwargs ):
@@ -975,7 +973,7 @@ def chat(self, tokenizer, query: str, history: List[Dict] = None, role: str = "u
975
973
else :
976
974
gen_kwargs = {"max_length" : max_length , "num_beams" : num_beams , "do_sample" : do_sample , "top_p" : top_p ,
977
975
"temperature" : temperature , "logits_processor" : logits_processor , ** kwargs }
978
-
976
+
979
977
history .append ({"role" : role , "content" : query })
980
978
history_str = tokenizer .apply_chat_template (history , tokenize = False , add_generation_prompt = True )
981
979
inputs = tokenizer (history_str , return_tensors = 'ms' )
@@ -1057,7 +1055,6 @@ def forward(
1057
1055
1058
1056
loss = None
1059
1057
if labels is not None :
1060
- labels = labels
1061
1058
if self .config .problem_type is None :
1062
1059
if self .num_labels == 1 :
1063
1060
self .config .problem_type = "regression"
@@ -1089,10 +1086,10 @@ def forward(
1089
1086
hidden_states = transformer_outputs .hidden_states ,
1090
1087
attentions = transformer_outputs .attentions ,
1091
1088
)
1092
-
1089
+
1093
1090
__all__ = [
1094
1091
"MiniCPM3Model" ,
1095
1092
"MiniCPM3ForCausalLM" ,
1096
1093
"MiniCPM3ForSequenceClassification" ,
1097
1094
"MiniCPM3PreTrainedModel" ,
1098
- ]
1095
+ ]
0 commit comments