fix mindspore2.5-2.6 caused error (#1985)

lvyufeng · web-flow · commit fd5fa73a1ddc · 2025-03-13T15:44:15.000+08:00
diff --git a/llm/inference/llama2/simple_inference_with_static_cache.py b/llm/inference/llama2/simple_inference_with_static_cache.py
@@ -45,13 +45,14 @@ def decode_one_tokens(model, cur_token, input_pos, cache_position, past_key_valu
     generated_ids[:, seq_length] = next_token[:, 0]
 
     cache_position = mindspore.tensor([seq_length + 1])
+    s = time.time()
     for _ in range(1, NUM_TOKENS_TO_GENERATE):
-        s = time.time()
         next_token = decode_one_tokens(model, next_token, None, cache_position, past_key_values)
-        t = time.time()
-        print(t - s)
         generated_ids[:, cache_position] = next_token.int()
         cache_position += 1
+    mindspore.hal.synchronize()
+    t = time.time()
+    print((t - s) / (NUM_TOKENS_TO_GENERATE - 1))
 
 text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
 print(text)
diff --git a/llm/inference/llama3/run_llama3.py b/llm/inference/llama3/run_llama3.py
@@ -1,5 +1,8 @@
 import mindspore
 from mindnlp.transformers import AutoTokenizer, AutoModelForCausalLM
+import faulthandler
+
+faulthandler.enable()
 
 model_id = "LLM-Research/Meta-Llama-3-8B-Instruct"
 
diff --git a/mindnlp/core/ops/other.py b/mindnlp/core/ops/other.py
@@ -6,7 +6,7 @@
 from mindspore.common.initializer import initializer
 from mindspore.ops._primitive_cache import _get_cache_prim
 
-from mindnlp.configs import use_pyboost, ON_ORANGE_PI
+from mindnlp.configs import use_pyboost, ON_ORANGE_PI, SUPPORT_BF16
 from .reduction import any
 from .comparison import eq
 
@@ -623,12 +623,12 @@ def meshgrid(*tensors, indexing=None):
 # repeat_interleave
 has_repeat_interleave = hasattr(mindspore.mint, 'repeat_interleave')
 def repeat_interleave(input, repeats, dim=None):
-    if use_pyboost() and has_repeat_interleave:
+    if use_pyboost() and has_repeat_interleave and SUPPORT_BF16:
         return mindspore.mint.repeat_interleave(input, repeats, dim=dim)
     if input.dtype == mindspore.bool_:
         input = input.int()
-        return input.repeat(repeats, dim).bool()
-    return input.repeat(repeats, dim)
+        return input.repeat_interleave(repeats, dim).bool()
+    return input.repeat_interleave(repeats, dim)
 
 # roll
 DEVICE_TARGET = mindspore.get_context('device_target')
diff --git a/mindnlp/core/serialization.py b/mindnlp/core/serialization.py
@@ -1412,8 +1412,7 @@ def convert(info: dict[str, Any]):
 
         try:
             if info['dtype'] == 'BF16' and not SUPPORT_BF16:
-                logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
-                ms_dtype = mindspore.float16
+                raise ValueError('not support bfloat16.')
             out = Tensor.convert_bytes_to_tensor(buf, tuple(shape), ms_dtype)
         except:
             array = np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)