fix baichuan finfo error

lvyufeng · lvyufeng · commit 7fd83f51dc63 · 2024-12-19T14:18:31.000+08:00
diff --git a/mindnlp/transformers/models/baichuan/modeling_baichuan.py b/mindnlp/transformers/models/baichuan/modeling_baichuan.py
@@ -280,7 +280,7 @@ def _make_causal_mask(
     """
     bsz, tgt_len = input_ids_shape
     mask = ops.full(
-        (tgt_len, tgt_len), float(ops.finfo(dtype).min), dtype)
+        (tgt_len, tgt_len), float(ops.finfo(dtype).min), dtype=dtype)
     mask_cond = ops.arange(mask.shape[-1])
     mask = ops.masked_fill(mask, mask_cond < (mask_cond + 1).view(mask.shape[-1], 1), 0.)
     mask = mask.to(dtype)
@@ -309,7 +309,7 @@ def _expand_mask(mask: Tensor, dtype: mstype, tgt_len: Optional[int] = None):
 
     return inverted_mask.masked_fill(
         inverted_mask.to(mindspore.bool_),
-        ops.finfo(dtype).min)
+        float(ops.finfo(dtype).min))
 
 def _get_interleave(n):
     """
@@ -688,8 +688,7 @@ def forward(
                     f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.shape}"
                 )
             attn_weights = attn_weights + attention_mask
-            attn_weights = ops.maximum(attn_weights,
-                                       Tensor(np.finfo(mindspore.dtype_to_nptype(attn_weights.dtype)).min))
+            attn_weights = ops.maximum(attn_weights, float(ops.finfo(attn_weights.dtype).min))
 
         # upcast attention to fp32
         attn_weights = F.softmax(attn_weights, dim=-1).astype(query_states.dtype)
@@ -882,7 +881,7 @@ def forward(
                 else:
                     attention_mask = attention_mask[:, -1:, :]
             attn_weights = attn_weights + attention_mask.astype(attn_weights.dtype)
-            attn_weights = ops.maximum(attn_weights, mindspore.tensor(np.finfo(mindspore.dtype_to_nptype(attn_weights.dtype)).min))
+            attn_weights = ops.maximum(attn_weights, float(ops.finfo(attn_weights.dtype).min))
 
         attn_weights = F.softmax(attn_weights, dim=-1)
 
@@ -1561,7 +1560,7 @@ def forward(
             src_len, tgt_len = alibi_mask.shape[-2:]
             expanded_mask = expanded_mask.unsqueeze(1).broadcast_to((bsz, 1, src_len, tgt_len)).to(alibi_mask.dtype)
             inverted_mask = 1.0 - expanded_mask
-            inverted_mask = inverted_mask.masked_fill(inverted_mask.to(mindspore.bool_), np.finfo(mindspore.dtype_to_nptype(alibi_mask.dtype)).min)
+            inverted_mask = inverted_mask.masked_fill(inverted_mask.to(mindspore.bool_), float(ops.finfo(alibi_mask.dtype).min))
             attention_mask = inverted_mask + alibi_mask.unsqueeze(0)
         else:
             attention_mask = alibi_mask
@@ -1854,7 +1853,7 @@ def prepare_inputs_for_generation(
         position_ids = kwargs.get("position_ids", None)
         if attention_mask is not None and position_ids is None:
             # create position_ids on the fly for batch generation
-            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids = attention_mask.int().cumsum(-1) - 1
             position_ids = position_ids.masked_fill(attention_mask == 0, 1)
             if past_key_values:
                 position_ids = position_ids[:, -1].unsqueeze(-1)