Skip to content

Commit 5a9173f

Browse files
committed
Merge branch 'main' into stalker-modular_lora
2 parents 0bb7ed4 + 94d64b8 commit 5a9173f

File tree

148 files changed

+4488
-2743
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+4488
-2743
lines changed

docker/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
5555
FROM node:20-slim AS web-builder
5656
ENV PNPM_HOME="/pnpm"
5757
ENV PATH="$PNPM_HOME:$PATH"
58+
RUN corepack use pnpm@8.x
5859
RUN corepack enable
5960

6061
WORKDIR /build

invokeai/app/api/routers/model_manager.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import traceback
77
from copy import deepcopy
88
from tempfile import TemporaryDirectory
9-
from typing import Any, Dict, List, Optional, Type
9+
from typing import List, Optional, Type
1010

1111
from fastapi import Body, Path, Query, Response, UploadFile
1212
from fastapi.responses import FileResponse, HTMLResponse
@@ -430,13 +430,11 @@ async def delete_model_image(
430430
async def install_model(
431431
source: str = Query(description="Model source to install, can be a local path, repo_id, or remote URL"),
432432
inplace: Optional[bool] = Query(description="Whether or not to install a local model in place", default=False),
433-
# TODO(MM2): Can we type this?
434-
config: Optional[Dict[str, Any]] = Body(
435-
description="Dict of fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
436-
default=None,
433+
access_token: Optional[str] = Query(description="access token for the remote resource", default=None),
434+
config: ModelRecordChanges = Body(
435+
description="Object containing fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
437436
example={"name": "string", "description": "string"},
438437
),
439-
access_token: Optional[str] = None,
440438
) -> ModelInstallJob:
441439
"""Install a model using a string identifier.
442440
@@ -451,8 +449,9 @@ async def install_model(
451449
- model/name:fp16:path/to/model.safetensors
452450
- model/name::path/to/model.safetensors
453451
454-
`config` is an optional dict containing model configuration values that will override
455-
the ones that are probed automatically.
452+
`config` is a ModelRecordChanges object. Fields in this object will override
453+
the ones that are probed automatically. Pass an empty object to accept
454+
all the defaults.
456455
457456
`access_token` is an optional access token for use with Urls that require
458457
authentication.
@@ -737,7 +736,7 @@ async def convert_model(
737736
# write the converted file to the convert path
738737
raw_model = converted_model.model
739738
assert hasattr(raw_model, "save_pretrained")
740-
raw_model.save_pretrained(convert_path)
739+
raw_model.save_pretrained(convert_path) # type: ignore
741740
assert convert_path.exists()
742741

743742
# temporarily rename the original safetensors file so that there is no naming conflict
@@ -750,12 +749,12 @@ async def convert_model(
750749
try:
751750
new_key = installer.install_path(
752751
convert_path,
753-
config={
754-
"name": original_name,
755-
"description": model_config.description,
756-
"hash": model_config.hash,
757-
"source": model_config.source,
758-
},
752+
config=ModelRecordChanges(
753+
name=original_name,
754+
description=model_config.description,
755+
hash=model_config.hash,
756+
source=model_config.source,
757+
),
759758
)
760759
except Exception as e:
761760
logger.error(str(e))

invokeai/app/invocations/create_gradient_mask.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class GradientMaskOutput(BaseInvocationOutput):
3939
title="Create Gradient Mask",
4040
tags=["mask", "denoise"],
4141
category="latents",
42-
version="1.1.0",
42+
version="1.2.0",
4343
)
4444
class CreateGradientMaskInvocation(BaseInvocation):
4545
"""Creates mask for denoising model run."""
@@ -93,6 +93,7 @@ def invoke(self, context: InvocationContext) -> GradientMaskOutput:
9393

9494
# redistribute blur so that the original edges are 0 and blur outwards to 1
9595
blur_tensor = (blur_tensor - 0.5) * 2
96+
blur_tensor[blur_tensor < 0] = 0.0
9697

9798
threshold = 1 - self.minimum_denoise
9899

invokeai/app/invocations/denoise_latents.py

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
from invokeai.app.util.controlnet_utils import prepare_control_image
3838
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
3939
from invokeai.backend.lora import LoRAModelRaw
40-
from invokeai.backend.model_manager import BaseModelType
40+
from invokeai.backend.model_manager import BaseModelType, ModelVariantType
4141
from invokeai.backend.model_patcher import ModelPatcher
42-
from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
42+
from invokeai.backend.stable_diffusion import PipelineIntermediateState
4343
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, DenoiseInputs
4444
from invokeai.backend.stable_diffusion.diffusers_pipeline import (
4545
ControlNetData,
@@ -60,9 +60,13 @@
6060
from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
6161
from invokeai.backend.stable_diffusion.extensions.controlnet import ControlNetExt
6262
from invokeai.backend.stable_diffusion.extensions.freeu import FreeUExt
63+
from invokeai.backend.stable_diffusion.extensions.inpaint import InpaintExt
64+
from invokeai.backend.stable_diffusion.extensions.inpaint_model import InpaintModelExt
6365
from invokeai.backend.stable_diffusion.extensions.lora import LoRAExt
6466
from invokeai.backend.stable_diffusion.extensions.preview import PreviewExt
6567
from invokeai.backend.stable_diffusion.extensions.rescale_cfg import RescaleCFGExt
68+
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
69+
from invokeai.backend.stable_diffusion.extensions.t2i_adapter import T2IAdapterExt
6670
from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
6771
from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
6872
from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -499,6 +503,33 @@ def parse_controlnet_field(
499503
)
500504
)
501505

506+
@staticmethod
507+
def parse_t2i_adapter_field(
508+
exit_stack: ExitStack,
509+
context: InvocationContext,
510+
t2i_adapters: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
511+
ext_manager: ExtensionsManager,
512+
) -> None:
513+
if t2i_adapters is None:
514+
return
515+
516+
# Handle the possibility that t2i_adapters could be a list or a single T2IAdapterField.
517+
if isinstance(t2i_adapters, T2IAdapterField):
518+
t2i_adapters = [t2i_adapters]
519+
520+
for t2i_adapter_field in t2i_adapters:
521+
ext_manager.add_extension(
522+
T2IAdapterExt(
523+
node_context=context,
524+
model_id=t2i_adapter_field.t2i_adapter_model,
525+
image=context.images.get_pil(t2i_adapter_field.image.image_name),
526+
weight=t2i_adapter_field.weight,
527+
begin_step_percent=t2i_adapter_field.begin_step_percent,
528+
end_step_percent=t2i_adapter_field.end_step_percent,
529+
resize_mode=t2i_adapter_field.resize_mode,
530+
)
531+
)
532+
502533
def prep_ip_adapter_image_prompts(
503534
self,
504535
context: InvocationContext,
@@ -708,7 +739,7 @@ def prep_inpaint_mask(
708739
else:
709740
masked_latents = torch.where(mask < 0.5, 0.0, latents)
710741

711-
return 1 - mask, masked_latents, self.denoise_mask.gradient
742+
return mask, masked_latents, self.denoise_mask.gradient
712743

713744
@staticmethod
714745
def prepare_noise_and_latents(
@@ -766,10 +797,6 @@ def _new_invoke(self, context: InvocationContext) -> LatentsOutput:
766797
dtype = TorchDevice.choose_torch_dtype()
767798

768799
seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
769-
latents = latents.to(device=device, dtype=dtype)
770-
if noise is not None:
771-
noise = noise.to(device=device, dtype=dtype)
772-
773800
_, _, latent_height, latent_width = latents.shape
774801

775802
conditioning_data = self.get_conditioning_data(
@@ -802,21 +829,6 @@ def _new_invoke(self, context: InvocationContext) -> LatentsOutput:
802829
denoising_end=self.denoising_end,
803830
)
804831

805-
denoise_ctx = DenoiseContext(
806-
inputs=DenoiseInputs(
807-
orig_latents=latents,
808-
timesteps=timesteps,
809-
init_timestep=init_timestep,
810-
noise=noise,
811-
seed=seed,
812-
scheduler_step_kwargs=scheduler_step_kwargs,
813-
conditioning_data=conditioning_data,
814-
attention_processor_cls=CustomAttnProcessor2_0,
815-
),
816-
unet=None,
817-
scheduler=scheduler,
818-
)
819-
820832
# get the unet's config so that we can pass the base to sd_step_callback()
821833
unet_config = context.models.get_config(self.unet.unet.key)
822834

@@ -844,6 +856,39 @@ def step_callback(state: PipelineIntermediateState) -> None:
844856
weight=lora_field.weight,
845857
)
846858
)
859+
### seamless
860+
if self.unet.seamless_axes:
861+
ext_manager.add_extension(SeamlessExt(self.unet.seamless_axes))
862+
863+
### inpaint
864+
mask, masked_latents, is_gradient_mask = self.prep_inpaint_mask(context, latents)
865+
# NOTE: We used to identify inpainting models by inpecting the shape of the loaded UNet model weights. Now we
866+
# use the ModelVariantType config. During testing, there was a report of a user with models that had an
867+
# incorrect ModelVariantType value. Re-installing the model fixed the issue. If this issue turns out to be
868+
# prevalent, we will have to revisit how we initialize the inpainting extensions.
869+
if unet_config.variant == ModelVariantType.Inpaint:
870+
ext_manager.add_extension(InpaintModelExt(mask, masked_latents, is_gradient_mask))
871+
elif mask is not None:
872+
ext_manager.add_extension(InpaintExt(mask, is_gradient_mask))
873+
874+
# Initialize context for modular denoise
875+
latents = latents.to(device=device, dtype=dtype)
876+
if noise is not None:
877+
noise = noise.to(device=device, dtype=dtype)
878+
denoise_ctx = DenoiseContext(
879+
inputs=DenoiseInputs(
880+
orig_latents=latents,
881+
timesteps=timesteps,
882+
init_timestep=init_timestep,
883+
noise=noise,
884+
seed=seed,
885+
scheduler_step_kwargs=scheduler_step_kwargs,
886+
conditioning_data=conditioning_data,
887+
attention_processor_cls=CustomAttnProcessor2_0,
888+
),
889+
unet=None,
890+
scheduler=scheduler,
891+
)
847892

848893
# context for loading additional models
849894
with ExitStack() as exit_stack:
@@ -852,6 +897,7 @@ def step_callback(state: PipelineIntermediateState) -> None:
852897
# ext = extension_field.to_extension(exit_stack, context, ext_manager)
853898
# ext_manager.add_extension(ext)
854899
self.parse_controlnet_field(exit_stack, context, self.control, ext_manager)
900+
self.parse_t2i_adapter_field(exit_stack, context, self.t2i_adapter, ext_manager)
855901

856902
# ext: t2i/ip adapter
857903
ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)
@@ -883,6 +929,10 @@ def _old_invoke(self, context: InvocationContext) -> LatentsOutput:
883929
seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
884930

885931
mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
932+
# At this point, the mask ranges from 0 (leave unchanged) to 1 (inpaint).
933+
# We invert the mask here for compatibility with the old backend implementation.
934+
if mask is not None:
935+
mask = 1 - mask
886936

887937
# TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
888938
# below. Investigate whether this is appropriate.
@@ -927,7 +977,7 @@ def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
927977
ExitStack() as exit_stack,
928978
unet_info.model_on_device() as (cached_weights, unet),
929979
ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
930-
set_seamless(unet, self.unet.seamless_axes), # FIXME
980+
SeamlessExt.static_patch_model(unet, self.unet.seamless_axes), # FIXME
931981
# Apply the LoRA after unet has been moved to its target device for faster patching.
932982
ModelPatcher.apply_lora_unet(
933983
unet,

invokeai/app/invocations/latents_to_image.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from invokeai.app.invocations.model import VAEField
2525
from invokeai.app.invocations.primitives import ImageOutput
2626
from invokeai.app.services.shared.invocation_context import InvocationContext
27-
from invokeai.backend.stable_diffusion import set_seamless
27+
from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
2828
from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
2929
from invokeai.backend.util.devices import TorchDevice
3030

@@ -59,7 +59,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
5959

6060
vae_info = context.models.load(self.vae.vae)
6161
assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
62-
with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae:
62+
with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
6363
assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
6464
latents = latents.to(vae.device)
6565
if self.fp32:

invokeai/app/invocations/spandrel_image_to_image.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from invokeai.backend.tiles.utils import TBLR, Tile
2424

2525

26-
@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.2.0")
26+
@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.3.0")
2727
class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
2828
"""Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel)."""
2929

@@ -36,16 +36,6 @@ class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
3636
tile_size: int = InputField(
3737
default=512, description="The tile size for tiled image-to-image. Set to 0 to disable tiling."
3838
)
39-
scale: float = InputField(
40-
default=4.0,
41-
gt=0.0,
42-
le=16.0,
43-
description="The final scale of the output image. If the model does not upscale the image, this will be ignored.",
44-
)
45-
fit_to_multiple_of_8: bool = InputField(
46-
default=False,
47-
description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
48-
)
4939

5040
@classmethod
5141
def scale_tile(cls, tile: Tile, scale: int) -> Tile:
@@ -152,6 +142,47 @@ def upscale_image(
152142

153143
return pil_image
154144

145+
@torch.inference_mode()
146+
def invoke(self, context: InvocationContext) -> ImageOutput:
147+
# Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
148+
# revisit this.
149+
image = context.images.get_pil(self.image.image_name, mode="RGB")
150+
151+
# Load the model.
152+
spandrel_model_info = context.models.load(self.image_to_image_model)
153+
154+
# Do the upscaling.
155+
with spandrel_model_info as spandrel_model:
156+
assert isinstance(spandrel_model, SpandrelImageToImageModel)
157+
158+
# Upscale the image
159+
pil_image = self.upscale_image(image, self.tile_size, spandrel_model, context.util.is_canceled)
160+
161+
image_dto = context.images.save(image=pil_image)
162+
return ImageOutput.build(image_dto)
163+
164+
165+
@invocation(
166+
"spandrel_image_to_image_autoscale",
167+
title="Image-to-Image (Autoscale)",
168+
tags=["upscale"],
169+
category="upscale",
170+
version="1.0.0",
171+
)
172+
class SpandrelImageToImageAutoscaleInvocation(SpandrelImageToImageInvocation):
173+
"""Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel) until the target scale is reached."""
174+
175+
scale: float = InputField(
176+
default=4.0,
177+
gt=0.0,
178+
le=16.0,
179+
description="The final scale of the output image. If the model does not upscale the image, this will be ignored.",
180+
)
181+
fit_to_multiple_of_8: bool = InputField(
182+
default=False,
183+
description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
184+
)
185+
155186
@torch.inference_mode()
156187
def invoke(self, context: InvocationContext) -> ImageOutput:
157188
# Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to

0 commit comments

Comments
 (0)