Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mindnlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
from . import transformers
from . import diffusers

__version__ = '0.5.0rc2'
12 changes: 12 additions & 0 deletions mindtorch/_apis/npu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,6 +1566,12 @@ def flash_attention_score(query, key, value, real_shift, drop_mask, padding_mask
return pyboost.flash_attention_score_impl(query, key, value, real_shift, drop_mask, padding_mask, attn_mask, prefix, actual_seq_qlen, actual_seq_kvlen, head_num, keep_prob, scale_value, pre_tokens, next_tokens, inner_precise, input_layout, sparse_mode)
return legacy.flash_attention_score(query, key, value, real_shift, drop_mask, padding_mask, attn_mask, prefix, actual_seq_qlen, actual_seq_kvlen, head_num, keep_prob, scale_value, pre_tokens, next_tokens, inner_precise, input_layout, sparse_mode)

def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1, quant_scale1, deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value, pre_tokens, next_tokens, input_layout, num_key_value_heads, sparse_mode, inner_precise):
return pyboost.prompt_flash_attention_impl(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1, quant_scale1, deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value, pre_tokens, next_tokens, input_layout, num_key_value_heads, sparse_mode, inner_precise)

def incre_flash_attention(query, key, value, attn_mask, actual_seq_lengths, pse_shift, dequant_scale1, quant_scale1, dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table, kv_padding_size, num_heads, input_layout, scale_value, num_key_value_heads, block_size, inner_precise):
return pyboost.incre_flash_attention_impl(query, key, value, attn_mask, actual_seq_lengths, pse_shift, dequant_scale1, quant_scale1, dequant_scale2, quant_scale2, quant_offset2, antiquant_scale, antiquant_offset, block_table, kv_padding_size, num_heads, input_layout, scale_value, num_key_value_heads, block_size, inner_precise)

def randperm(n, generator, dtype):
seed, offset = generator._step(12) # pylint: disable=protected-access
if use_pyboost():
Expand Down Expand Up @@ -1617,3 +1623,9 @@ def new_ones(input, size, dtype):

def kl_div(input, target, reduction, log_target):
return pyboost.kl_div_op(input, target, reduction, log_target)

def repeat_interleave_int(input, repeats, dim, output_size):
return pyboost.repeat_interleave_int_op(input, repeats, dim, output_size)

def repeat_interleave_tensor(input, repeats, dim, output_size):
return pyboost.repeat_interleave_tensor_op(input, repeats, dim, output_size)
Loading
Loading