Skip to content

Commit 7f897a9

Browse files
committed
fix
1 parent 0966663 commit 7f897a9

File tree

2 files changed

+39
-27
lines changed

2 files changed

+39
-27
lines changed

src/diffusers/pipelines/modular_pipeline.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ def to_dict(self) -> Dict[str, Any]:
8282
def __repr__(self):
8383
def format_value(v):
8484
if hasattr(v, "shape") and hasattr(v, "dtype"):
85-
return f"Tensor(\n dtype={v.dtype}, shape={v.shape}\n {v})"
85+
return f"Tensor(dtype={v.dtype}, shape={v.shape})"
8686
elif isinstance(v, list) and len(v) > 0 and hasattr(v[0], "shape") and hasattr(v[0], "dtype"):
87-
return f"[Tensor(\n dtype={v[0].dtype}, shape={v[0].shape}\n {v[0]}), ...]"
87+
return f"[Tensor(dtype={v[0].dtype}, shape={v[0].shape}), ...]"
8888
else:
8989
return repr(v)
9090

@@ -238,6 +238,10 @@ def __init__(self):
238238
if not (len(self.block_classes) == len(self.block_names) == len(self.block_trigger_inputs)):
239239
raise ValueError(f"In {self.__class__.__name__}, the number of block_classes, block_names, and block_trigger_inputs must be the same.")
240240
default_blocks = [t for t in self.block_trigger_inputs if t is None]
241+
# can only have 1 or 0 default block, and has to put in the last
242+
# the order of blocksmatters here because the first block with matching trigger will be dispatched
243+
# e.g. blocks = [inpaint, img2img] and block_trigger_inputs = ["mask", "image"]
244+
# if both mask and image are provided, it is inpaint; if only image is provided, it is img2img
241245
if len(default_blocks) > 1 or (
242246
len(default_blocks) == 1 and self.block_trigger_inputs[-1] is not None
243247
):
@@ -248,6 +252,7 @@ def __init__(self):
248252

249253
# Map trigger inputs to block objects
250254
self.trigger_to_block_map = dict(zip(self.block_trigger_inputs, self.blocks.values()))
255+
self.trigger_to_block_name_map = dict(zip(self.block_trigger_inputs, self.blocks.keys()))
251256
self.block_to_trigger_map = dict(zip(self.blocks.keys(), self.block_trigger_inputs))
252257

253258
@property
@@ -324,6 +329,9 @@ def __call__(self, pipeline, state: PipelineState) -> PipelineState:
324329
if input_name is not None and state.get_input(input_name) is not None:
325330
block = self.trigger_to_block_map[input_name]
326331
break
332+
elif input_name is not None and state.get_intermediate(input_name) is not None:
333+
block = self.trigger_to_block_map[input_name]
334+
break
327335

328336
if block is None:
329337
logger.warning(f"skipping auto block: {self.__class__.__name__}")

src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_modular.py

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,7 @@ def intermediates_inputs(self) -> List[str]:
11121112
"mask", # inpainting
11131113
"masked_image_latents", # inpainting
11141114
"noise", # inpainting
1115+
"image_latents", # inpainting
11151116
]
11161117

11171118
@property
@@ -2028,25 +2029,24 @@ class StableDiffusionXLAutoVaeEncoderStep(AutoPipelineBlocks):
20282029
block_trigger_inputs = ["mask_image", "image"]
20292030

20302031

2031-
class StableDiffusionXLAutoSetTimestepsStep(AutoPipelineBlocks):
2032-
block_classes = [StableDiffusionXLImg2ImgSetTimestepsStep, StableDiffusionXLSetTimestepsStep]
2033-
block_names = ["img2img", "text2img"]
2034-
block_trigger_inputs = ["image", None]
2032+
class StableDiffusionXLBeforeDenoiseStep(SequentialPipelineBlocks):
2033+
block_classes = [StableDiffusionXLInputStep, StableDiffusionXLSetTimestepsStep, StableDiffusionXLPrepareLatentsStep, StableDiffusionXLPrepareAdditionalConditioningStep]
2034+
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
20352035

2036+
class StableDiffusionXLImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
2037+
block_classes = [StableDiffusionXLInputStep, StableDiffusionXLImg2ImgSetTimestepsStep, StableDiffusionXLImg2ImgPrepareLatentsStep, StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep]
2038+
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
20362039

2037-
class StableDiffusionXLAutoPrepareLatentsStep(AutoPipelineBlocks):
2038-
block_classes = [StableDiffusionXLInpaintPrepareLatentsStep, StableDiffusionXLImg2ImgPrepareLatentsStep, StableDiffusionXLPrepareLatentsStep]
2039-
block_names = ["inpaint","img2img", "text2img"]
2040-
block_trigger_inputs = ["mask_image", "image", None]
2040+
class StableDiffusionXLInpaintBeforeDenoiseStep(SequentialPipelineBlocks):
2041+
block_classes = [StableDiffusionXLInputStep, StableDiffusionXLImg2ImgSetTimestepsStep, StableDiffusionXLInpaintPrepareLatentsStep, StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep]
2042+
block_names = ["input", "set_timesteps", "prepare_latents", "prepare_add_cond"]
20412043

20422044

2043-
class StableDiffusionXLAutoPrepareAdditionalConditioningStep(AutoPipelineBlocks):
2044-
block_classes = [
2045-
StableDiffusionXLImg2ImgPrepareAdditionalConditioningStep,
2046-
StableDiffusionXLPrepareAdditionalConditioningStep,
2047-
]
2048-
block_names = ["img2img", "text2img"]
2049-
block_trigger_inputs = ["image", None]
2045+
class StableDiffusionXLAutoBeforeDenoiseStep(AutoPipelineBlocks):
2046+
block_classes = [StableDiffusionXLInpaintBeforeDenoiseStep, StableDiffusionXLImg2ImgBeforeDenoiseStep, StableDiffusionXLBeforeDenoiseStep]
2047+
block_names = ["inpaint", "img2img", "text2img"]
2048+
block_trigger_inputs = ["mask", "image_latents", None]
2049+
20502050

20512051

20522052
class StableDiffusionXLAutoDenoiseStep(AutoPipelineBlocks):
@@ -2064,10 +2064,10 @@ class StableDiffusionXLAutoDecodeStep(AutoPipelineBlocks):
20642064
TEXT2IMAGE_BLOCKS = OrderedDict([
20652065
("text_encoder", StableDiffusionXLTextEncoderStep),
20662066
("input", StableDiffusionXLInputStep),
2067-
("set_timesteps", StableDiffusionXLAutoSetTimestepsStep),
2068-
("prepare_latents", StableDiffusionXLAutoPrepareLatentsStep),
2069-
("prepare_add_cond", StableDiffusionXLAutoPrepareAdditionalConditioningStep),
2070-
("denoise", StableDiffusionXLAutoDenoiseStep),
2067+
("set_timesteps", StableDiffusionXLSetTimestepsStep),
2068+
("prepare_latents", StableDiffusionXLPrepareLatentsStep),
2069+
("prepare_add_cond", StableDiffusionXLPrepareAdditionalConditioningStep),
2070+
("denoise", StableDiffusionXLDenoiseStep),
20712071
("decode", StableDiffusionXLDecodeStep)
20722072
])
20732073

@@ -2099,11 +2099,8 @@ class StableDiffusionXLAutoDecodeStep(AutoPipelineBlocks):
20992099

21002100
AUTO_BLOCKS = OrderedDict([
21012101
("text_encoder", StableDiffusionXLTextEncoderStep),
2102-
("input", StableDiffusionXLInputStep),
21032102
("image_encoder", StableDiffusionXLAutoVaeEncoderStep),
2104-
("set_timesteps", StableDiffusionXLAutoSetTimestepsStep),
2105-
("prepare_latents", StableDiffusionXLAutoPrepareLatentsStep),
2106-
("prepare_add_cond", StableDiffusionXLAutoPrepareAdditionalConditioningStep),
2103+
("before_denoise", StableDiffusionXLAutoBeforeDenoiseStep),
21072104
("denoise", StableDiffusionXLAutoDenoiseStep),
21082105
("decode", StableDiffusionXLAutoDecodeStep)
21092106
])
@@ -2138,11 +2135,18 @@ def vae_scale_factor(self):
21382135
vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
21392136
return vae_scale_factor
21402137

2138+
@property
2139+
def num_channels_unet(self):
2140+
num_channels_unet = 4
2141+
if hasattr(self, "unet") and self.unet is not None:
2142+
num_channels_unet = self.unet.config.in_channels
2143+
return num_channels_unet
2144+
21412145
@property
21422146
def num_channels_latents(self):
21432147
num_channels_latents = 4
2144-
if hasattr(self, "unet") and self.unet is not None:
2145-
num_channels_latents = self.unet.config.in_channels
2148+
if hasattr(self, "vae") and self.vae is not None:
2149+
num_channels_latents = self.vae.config.latent_channels
21462150
return num_channels_latents
21472151

21482152
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline._get_add_time_ids

0 commit comments

Comments
 (0)