Skip to content

Commit 08ded8d

Browse files
authored
Merge branch 'mindspore-lab:master' into master
2 parents 68ce4d9 + 1a66f9f commit 08ded8d

File tree

12 files changed

+5044
-157
lines changed

12 files changed

+5044
-157
lines changed

mindnlp/peft_lora_mindnlp.ipynb

Lines changed: 2572 additions & 0 deletions
Large diffs are not rendered by default.

mindnlp/peft_lora_pytorch.ipynb

Lines changed: 657 additions & 0 deletions
Large diffs are not rendered by default.

mindnlp/transformers/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
mobilenet_v1,
153153
mobilenet_v2,
154154
mobilevit,
155+
mobilevitv2,
155156
mpnet,
156157
mpt,
157158
mllama,
@@ -395,6 +396,7 @@
395396
from .mobilenet_v1 import *
396397
from .mobilenet_v2 import *
397398
from .mobilevit import *
399+
from .mobilevitv2 import *
398400
from .mpnet import *
399401
from .mllama import *
400402
from .mluke import *
@@ -640,6 +642,7 @@
640642
__all__.extend(mobilenet_v1.__all__)
641643
__all__.extend(mobilenet_v2.__all__)
642644
__all__.extend(mobilevit.__all__)
645+
__all__.extend(mobilevitv2.__all__)
643646
__all__.extend(mpnet.__all__)
644647
__all__.extend(mpt.__all__)
645648
__all__.extend(mt5.__all__)

mindnlp/transformers/models/auto/modeling_auto.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,7 @@
468468
("ctrl", "CTRLLMHeadModel"),
469469
("data2vec-text", "Data2VecTextForCausalLM"),
470470
("dbrx", "DbrxForCausalLM"),
471+
("deepseek_v2", "DeepseekV2ForCausalLM"),
471472
("electra", "ElectraForCausalLM"),
472473
("ernie", "ErnieForCausalLM"),
473474
("falcon", "FalconForCausalLM"),

mindnlp/transformers/models/big_bird/modeling_big_bird.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -470,13 +470,13 @@ def bigbird_block_sparse_attention(
470470
[
471471
to_mask[:, :, :, : 3 * to_block_size],
472472
to_mask[:, :, :, -to_block_size:],
473-
ops.ones([bsz, 1, 1, n_rand_blocks * to_block_size], dtype=to_mask.dtype),
473+
ops.ones((bsz, 1, 1, n_rand_blocks * to_block_size), dtype=to_mask.dtype),
474474
],
475475
dim=3,
476476
)
477477
second_rand_pad = ops.cat(
478478
[
479-
ops.ones([bsz, n_heads, from_block_size, 4 * to_block_size], dtype=rand_mask.dtype),
479+
ops.ones((bsz, n_heads, from_block_size, 4 * to_block_size), dtype=rand_mask.dtype),
480480
rand_mask[:, :, 0],
481481
],
482482
dim=3,
@@ -604,13 +604,13 @@ def bigbird_block_sparse_attention(
604604
[
605605
to_mask[:, :, :, :to_block_size],
606606
to_mask[:, :, :, -3 * to_block_size :],
607-
ops.ones([bsz, 1, 1, n_rand_blocks * to_block_size], dtype=to_mask.dtype),
607+
ops.ones((bsz, 1, 1, n_rand_blocks * to_block_size), dtype=to_mask.dtype),
608608
],
609609
dim=3,
610610
)
611611
second_last_rand_pad = ops.cat(
612612
[
613-
ops.ones([bsz, n_heads, from_block_size, 4 * to_block_size], dtype=rand_mask.dtype),
613+
ops.ones((bsz, n_heads, from_block_size, 4 * to_block_size), dtype=rand_mask.dtype),
614614
rand_mask[:, :, -1],
615615
],
616616
dim=3,

mindnlp/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -450,13 +450,13 @@ def bigbird_block_sparse_attention(
450450
[
451451
to_mask[:, :, :, : 3 * to_block_size],
452452
to_mask[:, :, :, -to_block_size:],
453-
ops.ones([bsz, 1, 1, n_rand_blocks * to_block_size], dtype=to_mask.dtype),
453+
ops.ones((bsz, 1, 1, n_rand_blocks * to_block_size), dtype=to_mask.dtype),
454454
],
455455
dim=3,
456456
)
457457
second_rand_pad = ops.cat(
458458
[
459-
ops.ones([bsz, n_heads, from_block_size, 4 * to_block_size], dtype=rand_mask.dtype),
459+
ops.ones((bsz, n_heads, from_block_size, 4 * to_block_size), dtype=rand_mask.dtype),
460460
rand_mask[:, :, 0],
461461
],
462462
dim=3,
@@ -584,13 +584,13 @@ def bigbird_block_sparse_attention(
584584
[
585585
to_mask[:, :, :, :to_block_size],
586586
to_mask[:, :, :, -3 * to_block_size :],
587-
ops.ones([bsz, 1, 1, n_rand_blocks * to_block_size], dtype=to_mask.dtype),
587+
ops.ones((bsz, 1, 1, n_rand_blocks * to_block_size), dtype=to_mask.dtype),
588588
],
589589
dim=3,
590590
)
591591
second_last_rand_pad = ops.cat(
592592
[
593-
ops.ones([bsz, n_heads, from_block_size, 4 * to_block_size], dtype=rand_mask.dtype),
593+
ops.ones((bsz, n_heads, from_block_size, 4 * to_block_size), dtype=rand_mask.dtype),
594594
rand_mask[:, :, -1],
595595
],
596596
dim=3,

0 commit comments

Comments
 (0)