mindspore-lab
diff --git a/‎docs/diffusers/_toctree.yml‎
Lines changed: 8 additions & 0 deletions b/‎docs/diffusers/_toctree.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/diffusers/api/models/autoencoder_kl_hunyuanimage.md‎
Lines changed: 26 additions & 0 deletions b/‎docs/diffusers/api/models/autoencoder_kl_hunyuanimage.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎docs/diffusers/api/models/autoencoder_kl_hunyuanimage_refiner.md‎
Lines changed: 26 additions & 0 deletions b/‎docs/diffusers/api/models/autoencoder_kl_hunyuanimage_refiner.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎docs/diffusers/api/models/hunyuanimage_transformer_2d.md‎
Lines changed: 26 additions & 0 deletions b/‎docs/diffusers/api/models/hunyuanimage_transformer_2d.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎docs/diffusers/api/pipelines/hunyuanimage21.md‎
Lines changed: 138 additions & 0 deletions b/‎docs/diffusers/api/pipelines/hunyuanimage21.md‎
Lines changed: 138 additions & 0 deletions
diff --git a/‎mindone/diffusers/__init__.py‎
Lines changed: 14 additions & 0 deletions b/‎mindone/diffusers/__init__.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎mindone/diffusers/guiders/__init__.py‎
Lines changed: 3 additions & 14 deletions b/‎mindone/diffusers/guiders/__init__.py‎
Lines changed: 3 additions & 14 deletions
@@ -269,6 +269,8 @@
         title: FluxTransformer2DModel
       - local: api/models/hunyuan_transformer2d
         title: HunyuanDiT2DModel
+      - local: api/models/hunyuanimage_transformer_2d
+        title: HunyuanImageTransformer2DModel
       - local: api/models/hunyuan_video_transformer_3d
         title: HunyuanVideoTransformer3DModel
       - local: api/models/latte_transformer3d
@@ -333,6 +335,10 @@
         title: AutoencoderKLCogVideoX
       - local: api/models/autoencoderkl_cosmos
         title: AutoencoderKLCosmos
+      - local: api/models/autoencoder_kl_hunyuanimage
+        title: AutoencoderKLHunyuanImage
+      - local: api/models/autoencoder_kl_hunyuanimage_refiner
+        title: AutoencoderKLHunyuanImageRefiner
       - local: api/models/autoencoder_kl_hunyuan_video
         title: AutoencoderKLHunyuanVideo
       - local: api/models/autoencoderkl_ltx_video
@@ -429,6 +435,8 @@
       title: HiDream-I1
     - local: api/pipelines/framepack
       title: Framepack
+    - local: api/pipelines/hunyuanimage21
+      title: HunyuanImage2.1
     - local: api/pipelines/hunyuandit
       title: Hunyuan-DiT
     - local: api/pipelines/hunyuan_video
 
@@ -0,0 +1,26 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# AutoencoderKLHunyuanImage
+
+The 2D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1].
+
+The model can be loaded with the following code snippet.
+
+```python
+from mindone.diffusers import AutoencoderKLHunyuanImage
+
+vae = AutoencoderKLHunyuanImage.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="vae", mindspore_dtype=ms.bfloat16)
+```
+
+::: mindone.diffusers.AutoencoderKLHunyuanImage
+
+::: mindone.diffusers.models.autoencoders.vae.DecoderOutput
@@ -0,0 +1,26 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# AutoencoderKLHunyuanImageRefiner
+
+The 3D variational autoencoder (VAE) model with KL loss used in [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1) for its refiner pipeline.
+
+The model can be loaded with the following code snippet.
+
+```python
+from mindone.diffusers import AutoencoderKLHunyuanImageRefiner
+
+vae = AutoencoderKLHunyuanImageRefiner.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Refiner-Diffusers", subfolder="vae", mindspore_dtype=ms.bfloat16)
+```
+
+::: mindone.diffusers.AutoencoderKLHunyuanImageRefiner
+
+::: mindone.diffusers.models.autoencoders.vae.DecoderOutput
@@ -0,0 +1,26 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# HunyuanImageTransformer2DModel
+
+A Diffusion Transformer model for [HunyuanImage2.1](https://github.com/Tencent-Hunyuan/HunyuanImage-2.1).
+
+The model can be loaded with the following code snippet.
+
+```python
+from mindone.diffusers import HunyuanImageTransformer2DModel
+
+transformer = HunyuanImageTransformer2DModel.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Diffusers", subfolder="transformer", mindspore_dtype=ms.bfloat16)
+```
+
+::: mindone.diffusers.HunyuanImageTransformer2DModel
+
+::: mindone.diffusers.models.modeling_outputs.Transformer2DModelOutput
@@ -0,0 +1,138 @@
+<!-- Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License. -->
+
+# HunyuanImage2.1
+
+
+HunyuanImage-2.1 is a 17B text-to-image model that is capable of generating 2K (2048 x 2048) resolution images
+
+HunyuanImage-2.1 comes in the following variants:
+
+| model type | model id |
+|:----------:|:--------:|
+| HunyuanImage-2.1 | [hunyuanvideo-community/HunyuanImage-2.1-Diffusers](https://huggingface.co/hunyuanvideo-community/HunyuanImage-2.1-Diffusers) |
+| HunyuanImage-2.1-Distilled | [hunyuanvideo-community/HunyuanImage-2.1-Distilled-Diffusers](https://huggingface.co/hunyuanvideo-community/HunyuanImage-2.1-Distilled-Diffusers) |
+| HunyuanImage-2.1-Refiner | [hunyuanvideo-community/HunyuanImage-2.1-Refiner-Diffusers](https://huggingface.co/hunyuanvideo-community/HunyuanImage-2.1-Refiner-Diffusers) |
+
+> [!TIP]
+> [Caching](../../optimization/cache) may also speed up inference by storing and reusing intermediate outputs.
+
+## HunyuanImage-2.1
+
+HunyuanImage-2.1 applies [Adaptive Projected Guidance (APG)](https://huggingface.co/papers/2410.02416) combined with Classifier-Free Guidance (CFG) in the denoising loop. `HunyuanImagePipeline` has a `guider` component (read more about [Guider](../modular_diffusers/guiders.md)) and does not take a `guidance_scale` parameter at runtime. To change guider-related parameters, e.g., `guidance_scale`, you can update the `guider` configuration instead.
+
+```python
+import mindspore as ms
+from mindone.diffusers import HunyuanImagePipeline
+
+pipe = HunyuanImagePipeline.from_pretrained(
+    "hunyuanvideo-community/HunyuanImage-2.1-Diffusers",
+    mindspore_dtype=ms.bfloat16
+)
+```
+
+You can inspect the `guider` object:
+
+```py
+>>> pipe.guider
+AdaptiveProjectedMixGuidance {
+  "_class_name": "AdaptiveProjectedMixGuidance",
+  "_diffusers_version": "0.36.0.dev0",
+  "adaptive_projected_guidance_momentum": -0.5,
+  "adaptive_projected_guidance_rescale": 10.0,
+  "adaptive_projected_guidance_scale": 10.0,
+  "adaptive_projected_guidance_start_step": 5,
+  "enabled": true,
+  "eta": 0.0,
+  "guidance_rescale": 0.0,
+  "guidance_scale": 3.5,
+  "start": 0.0,
+  "stop": 1.0,
+  "use_original_formulation": false
+}
+
+State:
+  step: None
+  num_inference_steps: None
+  timestep: None
+  count_prepared: 0
+  enabled: True
+  num_conditions: 2
+  momentum_buffer: None
+  is_apg_enabled: False
+  is_cfg_enabled: True
+```
+
+To update the guider with a different configuration, use the `new()` method. For example, to generate an image with `guidance_scale=5.0` while keeping all other default guidance parameters:
+
+```py
+import mindspore as ms
+from mindone.diffusers import HunyuanImagePipeline
+
+pipe = HunyuanImagePipeline.from_pretrained(
+    "hunyuanvideo-community/HunyuanImage-2.1-Diffusers",
+    mindspore_dtype=ms.bfloat16
+)
+
+# Update the guider configuration
+pipe.guider = pipe.guider.new(guidance_scale=5.0)
+
+prompt = (
+    "A cute, cartoon-style anthropomorphic penguin plush toy with fluffy fur, standing in a painting studio, "
+    "wearing a red knitted scarf and a red beret with the word 'Tencent' on it, holding a paintbrush with a "
+    "focused expression as it paints an oil painting of the Mona Lisa, rendered in a photorealistic photographic style."
+)
+
+image = pipe(
+    prompt=prompt,
+    num_inference_steps=50,
+    height=2048,
+    width=2048,
+).images[0]
+image.save("image.png")
+```
+
+
+## HunyuanImage-2.1-Distilled
+
+use `distilled_guidance_scale` with the guidance-distilled checkpoint,
+
+```py
+import mindspore as ms
+from mindone.diffusers import HunyuanImagePipeline
+pipe = HunyuanImagePipeline.from_pretrained("hunyuanvideo-community/HunyuanImage-2.1-Distilled-Diffusers", mindspore_dtype=ms.bfloat16)
+
+prompt = (
+    "A cute, cartoon-style anthropomorphic penguin plush toy with fluffy fur, standing in a painting studio, "
+    "wearing a red knitted scarf and a red beret with the word 'Tencent' on it, holding a paintbrush with a "
+    "focused expression as it paints an oil painting of the Mona Lisa, rendered in a photorealistic photographic style."
+)
+
+out = pipe(
+    prompt,
+    num_inference_steps=8,
+    distilled_guidance_scale=3.25,
+    height=2048,
+    width=2048,
+    generator=generator,
+).images[0]
+
+```
+
+
+::: mindone.diffusers.HunyuanImagePipeline
+
+::: mindone.diffusers.HunyuanImageRefinerPipeline
+
+::: mindone.diffusers.pipelines.hunyuan_image.pipeline_output.HunyuanImagePipelineOutput
@@ -17,7 +17,9 @@
     "configuration_utils": ["ConfigMixin"],
     "guiders": [
         "AdaptiveProjectedGuidance",
+        "AdaptiveProjectedMixGuidance",
         "AutoGuidance",
+        "BaseGuidance",
         "ClassifierFreeGuidance",
         "ClassifierFreeZeroStarGuidance",
         "FrequencyDecoupledGuidance",
@@ -61,6 +63,8 @@
         "AutoencoderKLAllegro",
         "AutoencoderKLCogVideoX",
         "AutoencoderKLCosmos",
+        "AutoencoderKLHunyuanImage",
+        "AutoencoderKLHunyuanImageRefiner",
         "AutoencoderKLHunyuanVideo",
         "AutoencoderKLLTXVideo",
         "AutoencoderKLMagvit",
@@ -90,6 +94,7 @@
         "HunyuanDiT2DControlNetModel",
         "HunyuanDiT2DModel",
         "HunyuanDiT2DMultiControlNetModel",
+        "HunyuanImageTransformer2DModel",
         "HunyuanVideoFramepackTransformer3DModel",
         "HunyuanVideoTransformer3DModel",
         "I2VGenXLUNet",
@@ -204,6 +209,8 @@
         "HunyuanDiTControlNetPipeline",
         "HunyuanDiTPAGPipeline",
         "HunyuanDiTPipeline",
+        "HunyuanImagePipeline",
+        "HunyuanImageRefinerPipeline",
         "HunyuanSkyreelsImageToVideoPipeline",
         "HunyuanVideoFramepackPipeline",
         "HunyuanVideoImageToVideoPipeline",
@@ -411,7 +418,9 @@
     from .configuration_utils import ConfigMixin
     from .guiders import (
         AdaptiveProjectedGuidance,
+        AdaptiveProjectedMixGuidance,
         AutoGuidance,
+        BaseGuidance,
         ClassifierFreeGuidance,
         ClassifierFreeZeroStarGuidance,
         FrequencyDecoupledGuidance,
@@ -442,6 +451,8 @@
         AutoencoderKLAllegro,
         AutoencoderKLCogVideoX,
         AutoencoderKLCosmos,
+        AutoencoderKLHunyuanImage,
+        AutoencoderKLHunyuanImageRefiner,
         AutoencoderKLHunyuanVideo,
         AutoencoderKLLTXVideo,
         AutoencoderKLMagvit,
@@ -471,6 +482,7 @@
         HunyuanDiT2DControlNetModel,
         HunyuanDiT2DModel,
         HunyuanDiT2DMultiControlNetModel,
+        HunyuanImageTransformer2DModel,
         HunyuanVideoFramepackTransformer3DModel,
         HunyuanVideoTransformer3DModel,
         I2VGenXLUNet,
@@ -596,6 +608,8 @@
         HunyuanDiTControlNetPipeline,
         HunyuanDiTPAGPipeline,
         HunyuanDiTPipeline,
+        HunyuanImagePipeline,
+        HunyuanImageRefinerPipeline,
         HunyuanSkyreelsImageToVideoPipeline,
         HunyuanVideoFramepackPipeline,
         HunyuanVideoImageToVideoPipeline,
 
@@ -12,26 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Union
-
+from ..utils import logging
 from .adaptive_projected_guidance import AdaptiveProjectedGuidance
+from .adaptive_projected_guidance_mix import AdaptiveProjectedMixGuidance
 from .auto_guidance import AutoGuidance
 from .classifier_free_guidance import ClassifierFreeGuidance
 from .classifier_free_zero_star_guidance import ClassifierFreeZeroStarGuidance
 from .frequency_decoupled_guidance import FrequencyDecoupledGuidance
+from .guider_utils import BaseGuidance
 from .perturbed_attention_guidance import PerturbedAttentionGuidance
 from .skip_layer_guidance import SkipLayerGuidance
 from .smoothed_energy_guidance import SmoothedEnergyGuidance
 from .tangential_classifier_free_guidance import TangentialClassifierFreeGuidance
-
-GuiderType = Union[
-    AdaptiveProjectedGuidance,
-    AutoGuidance,
-    ClassifierFreeGuidance,
-    ClassifierFreeZeroStarGuidance,
-    FrequencyDecoupledGuidance,
-    PerturbedAttentionGuidance,
-    SkipLayerGuidance,
-    SmoothedEnergyGuidance,
-    TangentialClassifierFreeGuidance,
-]