Skip to content

Commit 0387034

Browse files
committed
feat(transformers): suplement glm4v processor
1 parent 1e0c5b3 commit 0387034

File tree

12 files changed

+838
-20
lines changed

12 files changed

+838
-20
lines changed

examples/transformers/glm4v/generate.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import argparse
22

33
import numpy as np
4-
from transformers import AutoProcessor
54

65
import mindspore as ms
76

8-
from mindone.transformers import Glm4vForConditionalGeneration
7+
from mindone.transformers import AutoProcessor, Glm4vForConditionalGeneration
98

109

1110
def generate(args):

mindone/transformers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,8 +1531,10 @@
15311531
if version.parse(transformers.__version__) >= version.parse("4.53.0"):
15321532
from .models.glm4v import (
15331533
Glm4vForConditionalGeneration,
1534+
Glm4vImageProcessor,
15341535
Glm4vModel,
15351536
Glm4vPreTrainedModel,
1537+
Glm4vProcessor,
15361538
Glm4vTextModel,
15371539
Glm4vVisionModel,
15381540
)

mindone/transformers/integrations/flash_attention.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ def flash_attention_forward(
6969
if kwargs.get("is_causal", None) is not None:
7070
kwargs.pop("is_causal")
7171

72+
if not hasattr(module, "is_causal"):
73+
module.is_causal = False
74+
7275
attn_output = _flash_attention_forward(
7376
query,
7477
key,

mindone/transformers/models/auto/image_processing_auto.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,20 @@
6262
("dpt", ("DPTImageProcessor",)),
6363
("efficientnet", ("EfficientNetImageProcessor",)),
6464
("flava", ("FlavaImageProcessor",)),
65-
("oneformer", ("OneFormerImageProcessor",)),
65+
("glm4v", ("Glm4vImageProcessor",)),
6666
("llava_next", ("LlavaNextImageProcessor",)),
6767
("llava_next_video", ("LlavaNextVideoImageProcessor",)),
6868
("llava_onevision", ("LlavaOnevisionImageProcessor",)),
6969
("maskformer", ("MaskFormerImageProcessor",)),
7070
("mllama", ("MllamaImageProcessor",)),
71-
("qwen2_5_vl", ("Qwen2VLImageProcessor",)),
71+
("oneformer", ("OneFormerImageProcessor",)),
7272
("owlv2", ("Owlv2ImageProcessor",)),
7373
("owlvit", ("OwlViTImageProcessor",)),
74-
("videomae", ("VideoMAEImageProcessor",)),
74+
("qwen2_5_vl", ("Qwen2VLImageProcessor",)),
7575
("sam", ("SamImageProcessor",)),
7676
("segformer", ("SegformerImageProcessor",)),
7777
("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
78+
("videomae", ("VideoMAEImageProcessor",)),
7879
("yolos", ("YolosImageProcessor",)),
7980
]
8081
)

mindone/transformers/models/auto/processing_auto.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,19 @@
5454
("chinese_clip", "ChineseCLIPProcessor"),
5555
("colpali", "ColPaliProcessor"),
5656
("flava", "FlavaProcessor"),
57+
("glm4v", "Glm4vProcessor"),
5758
("idefics", "IdeficsProcessor"),
58-
("layoutlmv3", "LayoutMv3Processor"),
5959
("instructblip", "InstructBlipProcessor"),
60+
("layoutlmv3", "LayoutMv3Processor"),
6061
("llava_next", "LlavaNextProcessor"),
6162
("llava_next_video", "LlavaNextVideoProcessor"),
6263
("llava_onevision", "LlavaOnevisionProcessor"),
63-
("pop2piano", "Pop2PianoProcessor"),
64-
("qwen2_5_vl", "Qwen2_5_VLProcessor"),
64+
("oneformer", "OneFormerProcessor"),
6565
("owlv2", "Owlv2Processor"),
6666
("owlvit", "OwlViTProcessor"),
67+
("pop2piano", "Pop2PianoProcessor"),
68+
("qwen2_5_vl", "Qwen2_5_VLProcessor"),
6769
("sam", "SamProcessor"),
68-
("oneformer", "OneFormerProcessor"),
6970
("seamless_m4t", "SeamlessM4TProcessor"),
7071
("siglip", "SiglipProcessor"),
7172
]

mindone/transformers/models/glm4v/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717

18+
from .image_processing_glm4v import Glm4vImageProcessor
1819
from .modeling_glm4v import (
1920
Glm4vForConditionalGeneration,
2021
Glm4vModel,
2122
Glm4vPreTrainedModel,
2223
Glm4vTextModel,
2324
Glm4vVisionModel,
2425
)
26+
from .processing_glm4v import Glm4vProcessor

0 commit comments

Comments
 (0)