Skip to content

Commit 4c75017

Browse files
authored
Merge branch 'main' into fastercache
2 parents 7ad7cc8 + 8ae8008 commit 4c75017

27 files changed

+851
-222
lines changed

docs/source/en/using-diffusers/img2img.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,12 +461,12 @@ Chain it to an upscaler pipeline to increase the image resolution:
461461
from diffusers import StableDiffusionLatentUpscalePipeline
462462

463463
upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(
464-
"stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
464+
"stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True
465465
)
466466
upscaler.enable_model_cpu_offload()
467467
upscaler.enable_xformers_memory_efficient_attention()
468468

469-
image_2 = upscaler(prompt, image=image_1, output_type="latent").images[0]
469+
image_2 = upscaler(prompt, image=image_1).images[0]
470470
```
471471

472472
Finally, chain it to a super-resolution pipeline to further enhance the resolution:

docs/source/en/using-diffusers/write_own_pipeline.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ Let's try it out!
106106

107107
## Deconstruct the Stable Diffusion pipeline
108108

109-
Stable Diffusion is a text-to-image *latent diffusion* model. It is called a latent diffusion model because it works with a lower-dimensional representation of the image instead of the actual pixel space, which makes it more memory efficient. The encoder compresses the image into a smaller representation, and a decoder to convert the compressed representation back into an image. For text-to-image models, you'll need a tokenizer and an encoder to generate text embeddings. From the previous example, you already know you need a UNet model and a scheduler.
109+
Stable Diffusion is a text-to-image *latent diffusion* model. It is called a latent diffusion model because it works with a lower-dimensional representation of the image instead of the actual pixel space, which makes it more memory efficient. The encoder compresses the image into a smaller representation, and a decoder converts the compressed representation back into an image. For text-to-image models, you'll need a tokenizer and an encoder to generate text embeddings. From the previous example, you already know you need a UNet model and a scheduler.
110110

111111
As you can see, this is already more complex than the DDPM pipeline which only contains a UNet model. The Stable Diffusion model has three separate pretrained models.
112112

examples/community/README.md

Lines changed: 101 additions & 28 deletions
Large diffs are not rendered by default.

examples/dreambooth/train_dreambooth_lora_sana.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,8 @@ def main(args):
995995
if args.enable_npu_flash_attention:
996996
if is_torch_npu_available():
997997
logger.info("npu flash attention enabled.")
998-
transformer.enable_npu_flash_attention()
998+
for block in transformer.transformer_blocks:
999+
block.attn2.set_use_npu_flash_attention(True)
9991000
else:
10001001
raise ValueError("npu flash attention requires torch_npu extensions and is supported only on npu device ")
10011002

examples/instruct_pix2pix/train_instruct_pix2pix.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ def preprocess_images(examples):
695695
)
696696
# We need to ensure that the original and the edited images undergo the same
697697
# augmentation transforms.
698-
images = np.concatenate([original_images, edited_images])
698+
images = np.stack([original_images, edited_images])
699699
images = torch.tensor(images)
700700
images = 2 * (images / 255) - 1
701701
return train_transforms(images)
@@ -706,7 +706,7 @@ def preprocess_train(examples):
706706
# Since the original and edited images were concatenated before
707707
# applying the transformations, we need to separate them and reshape
708708
# them accordingly.
709-
original_images, edited_images = preprocessed_images.chunk(2)
709+
original_images, edited_images = preprocessed_images
710710
original_images = original_images.reshape(-1, 3, args.resolution, args.resolution)
711711
edited_images = edited_images.reshape(-1, 3, args.resolution, args.resolution)
712712

examples/instruct_pix2pix/train_instruct_pix2pix_sdxl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ def preprocess_images(examples):
766766
)
767767
# We need to ensure that the original and the edited images undergo the same
768768
# augmentation transforms.
769-
images = np.concatenate([original_images, edited_images])
769+
images = np.stack([original_images, edited_images])
770770
images = torch.tensor(images)
771771
images = 2 * (images / 255) - 1
772772
return train_transforms(images)
@@ -906,7 +906,7 @@ def preprocess_train(examples):
906906
# Since the original and edited images were concatenated before
907907
# applying the transformations, we need to separate them and reshape
908908
# them accordingly.
909-
original_images, edited_images = preprocessed_images.chunk(2)
909+
original_images, edited_images = preprocessed_images
910910
original_images = original_images.reshape(-1, 3, args.resolution, args.resolution)
911911
edited_images = edited_images.reshape(-1, 3, args.resolution, args.resolution)
912912

examples/model_search/README.md

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -82,31 +82,11 @@ pipeline = EasyPipelineForInpainting.from_huggingface(
8282
## Search Civitai and Huggingface
8383

8484
```python
85-
from pipeline_easy import (
86-
search_huggingface,
87-
search_civitai,
88-
)
89-
90-
# Search Lora
91-
Lora = search_civitai(
92-
"Keyword_to_search_Lora",
93-
model_type="LORA",
94-
base_model = "SD 1.5",
95-
download=True,
96-
)
9785
# Load Lora into the pipeline.
98-
pipeline.load_lora_weights(Lora)
99-
86+
pipeline.auto_load_lora_weights("Detail Tweaker")
10087

101-
# Search TextualInversion
102-
TextualInversion = search_civitai(
103-
"EasyNegative",
104-
model_type="TextualInversion",
105-
base_model = "SD 1.5",
106-
download=True
107-
)
10888
# Load TextualInversion into the pipeline.
109-
pipeline.load_textual_inversion(TextualInversion, token="EasyNegative")
89+
pipeline.auto_load_textual_inversion("EasyNegative", token="EasyNegative")
11090
```
11191

11292
### Search Civitai

0 commit comments

Comments
 (0)