Skip to content

Commit 22e77d8

Browse files
authored
Merge branch 'mindspore-lab:master' into master
2 parents 67327f3 + 95f0253 commit 22e77d8

File tree

8 files changed

+4745
-0
lines changed

8 files changed

+4745
-0
lines changed

mindnlp/peft_lora_mindnlp.ipynb

Lines changed: 2572 additions & 0 deletions
Large diffs are not rendered by default.

mindnlp/peft_lora_pytorch.ipynb

Lines changed: 657 additions & 0 deletions
Large diffs are not rendered by default.

mindnlp/transformers/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
mobilenet_v1,
153153
mobilenet_v2,
154154
mobilevit,
155+
mobilevitv2,
155156
mpnet,
156157
mpt,
157158
mllama,
@@ -395,6 +396,7 @@
395396
from .mobilenet_v1 import *
396397
from .mobilenet_v2 import *
397398
from .mobilevit import *
399+
from .mobilevitv2 import *
398400
from .mpnet import *
399401
from .mllama import *
400402
from .mluke import *
@@ -640,6 +642,7 @@
640642
__all__.extend(mobilenet_v1.__all__)
641643
__all__.extend(mobilenet_v2.__all__)
642644
__all__.extend(mobilevit.__all__)
645+
__all__.extend(mobilevitv2.__all__)
643646
__all__.extend(mpnet.__all__)
644647
__all__.extend(mpt.__all__)
645648
__all__.extend(mt5.__all__)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2024 Huawei Technologies Co., Ltd
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================
15+
"""
16+
MobileVit Model init
17+
"""
18+
from . import configuration_mobilevitv2, modeling_mobilevitv2
19+
from ..mobilevit import feature_extraction_mobilevit, image_processing_mobilevit
20+
21+
from .configuration_mobilevitv2 import *
22+
from ..mobilevit.feature_extraction_mobilevit import *
23+
from ..mobilevit.image_processing_mobilevit import *
24+
from .modeling_mobilevitv2 import *
25+
26+
__all__ = []
27+
__all__.extend(configuration_mobilevitv2.__all__)
28+
__all__.extend(feature_extraction_mobilevit.__all__)
29+
__all__.extend(image_processing_mobilevit.__all__)
30+
__all__.extend(modeling_mobilevitv2.__all__)
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# coding=utf-8
2+
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""MobileViTV2 model configuration"""
16+
17+
18+
from ...configuration_utils import PretrainedConfig
19+
20+
from ....utils import logging
21+
22+
23+
logger = logging.get_logger(__name__)
24+
25+
26+
class MobileViTV2Config(PretrainedConfig):
27+
r"""
28+
This is the configuration class to store the configuration of a [`MobileViTV2Model`]. It is used to instantiate a
29+
MobileViTV2 model according to the specified arguments, defining the model architecture. Instantiating a
30+
configuration with the defaults will yield a similar configuration to that of the MobileViTV2
31+
[apple/mobilevitv2-1.0](https://huggingface.co/apple/mobilevitv2-1.0) architecture.
32+
33+
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
34+
documentation from [`PretrainedConfig`] for more information.
35+
36+
Args:
37+
num_channels (`int`, *optional*, defaults to 3):
38+
The number of input channels.
39+
image_size (`int`, *optional*, defaults to 256):
40+
The size (resolution) of each image.
41+
patch_size (`int`, *optional*, defaults to 2):
42+
The size (resolution) of each patch.
43+
expand_ratio (`float`, *optional*, defaults to 2.0):
44+
Expansion factor for the MobileNetv2 layers.
45+
hidden_act (`str` or `function`, *optional*, defaults to `"swish"`):
46+
The non-linear activation function (function or string) in the Transformer encoder and convolution layers.
47+
conv_kernel_size (`int`, *optional*, defaults to 3):
48+
The size of the convolutional kernel in the MobileViTV2 layer.
49+
output_stride (`int`, *optional*, defaults to 32):
50+
The ratio of the spatial resolution of the output to the resolution of the input image.
51+
classifier_dropout_prob (`float`, *optional*, defaults to 0.1):
52+
The dropout ratio for attached classifiers.
53+
initializer_range (`float`, *optional*, defaults to 0.02):
54+
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
55+
layer_norm_eps (`float`, *optional*, defaults to 1e-05):
56+
The epsilon used by the layer normalization layers.
57+
aspp_out_channels (`int`, *optional*, defaults to 512):
58+
Number of output channels used in the ASPP layer for semantic segmentation.
59+
atrous_rates (`List[int]`, *optional*, defaults to `[6, 12, 18]`):
60+
Dilation (atrous) factors used in the ASPP layer for semantic segmentation.
61+
aspp_dropout_prob (`float`, *optional*, defaults to 0.1):
62+
The dropout ratio for the ASPP layer for semantic segmentation.
63+
semantic_loss_ignore_index (`int`, *optional*, defaults to 255):
64+
The index that is ignored by the loss function of the semantic segmentation model.
65+
n_attn_blocks (`List[int]`, *optional*, defaults to `[2, 4, 3]`):
66+
The number of attention blocks in each MobileViTV2Layer
67+
base_attn_unit_dims (`List[int]`, *optional*, defaults to `[128, 192, 256]`):
68+
The base multiplier for dimensions of attention blocks in each MobileViTV2Layer
69+
width_multiplier (`float`, *optional*, defaults to 1.0):
70+
The width multiplier for MobileViTV2.
71+
ffn_multiplier (`int`, *optional*, defaults to 2):
72+
The FFN multiplier for MobileViTV2.
73+
attn_dropout (`float`, *optional*, defaults to 0.0):
74+
The dropout in the attention layer.
75+
ffn_dropout (`float`, *optional*, defaults to 0.0):
76+
The dropout between FFN layers.
77+
78+
Example:
79+
80+
```python
81+
>>> from transformers import MobileViTV2Config, MobileViTV2Model
82+
83+
>>> # Initializing a mobilevitv2-small style configuration
84+
>>> configuration = MobileViTV2Config()
85+
86+
>>> # Initializing a model from the mobilevitv2-small style configuration
87+
>>> model = MobileViTV2Model(configuration)
88+
89+
>>> # Accessing the model configuration
90+
>>> configuration = model.config
91+
```"""
92+
93+
model_type = "mobilevitv2"
94+
95+
def __init__(
96+
self,
97+
num_channels=3,
98+
image_size=256,
99+
patch_size=2,
100+
expand_ratio=2.0,
101+
hidden_act="swish",
102+
conv_kernel_size=3,
103+
output_stride=32,
104+
classifier_dropout_prob=0.1,
105+
initializer_range=0.02,
106+
layer_norm_eps=1e-5,
107+
aspp_out_channels=512,
108+
atrous_rates=[6, 12, 18],
109+
aspp_dropout_prob=0.1,
110+
semantic_loss_ignore_index=255,
111+
n_attn_blocks=[2, 4, 3],
112+
base_attn_unit_dims=[128, 192, 256],
113+
width_multiplier=1.0,
114+
ffn_multiplier=2,
115+
attn_dropout=0.0,
116+
ffn_dropout=0.0,
117+
**kwargs,
118+
):
119+
super().__init__(**kwargs)
120+
121+
self.num_channels = num_channels
122+
self.image_size = image_size
123+
self.patch_size = patch_size
124+
self.expand_ratio = expand_ratio
125+
self.hidden_act = hidden_act
126+
self.conv_kernel_size = conv_kernel_size
127+
self.output_stride = output_stride
128+
self.initializer_range = initializer_range
129+
self.layer_norm_eps = layer_norm_eps
130+
self.n_attn_blocks = n_attn_blocks
131+
self.base_attn_unit_dims = base_attn_unit_dims
132+
self.width_multiplier = width_multiplier
133+
self.ffn_multiplier = ffn_multiplier
134+
self.ffn_dropout = ffn_dropout
135+
self.attn_dropout = attn_dropout
136+
self.classifier_dropout_prob = classifier_dropout_prob
137+
138+
# decode head attributes for semantic segmentation
139+
self.aspp_out_channels = aspp_out_channels
140+
self.atrous_rates = atrous_rates
141+
self.aspp_dropout_prob = aspp_dropout_prob
142+
self.semantic_loss_ignore_index = semantic_loss_ignore_index
143+
144+
145+
__all__=['MobileViTV2Config']

0 commit comments

Comments
 (0)