invoke-ai
diff --git a/‎docker/Dockerfile
Lines changed: 1 addition & 0 deletions b/‎docker/Dockerfile
Lines changed: 1 addition & 0 deletions
diff --git a/‎invokeai/app/api/routers/model_manager.py
Lines changed: 14 additions & 15 deletions b/‎invokeai/app/api/routers/model_manager.py
Lines changed: 14 additions & 15 deletions
diff --git a/‎invokeai/app/invocations/create_gradient_mask.py
Lines changed: 2 additions & 1 deletion b/‎invokeai/app/invocations/create_gradient_mask.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎invokeai/app/invocations/denoise_latents.py
Lines changed: 73 additions & 23 deletions b/‎invokeai/app/invocations/denoise_latents.py
Lines changed: 73 additions & 23 deletions
diff --git a/‎invokeai/app/invocations/latents_to_image.py
Lines changed: 2 additions & 2 deletions b/‎invokeai/app/invocations/latents_to_image.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎invokeai/app/invocations/spandrel_image_to_image.py
Lines changed: 42 additions & 11 deletions b/‎invokeai/app/invocations/spandrel_image_to_image.py
Lines changed: 42 additions & 11 deletions
@@ -55,6 +55,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 FROM node:20-slim AS web-builder
 ENV PNPM_HOME="/pnpm"
 ENV PATH="$PNPM_HOME:$PATH"
+RUN corepack use pnpm@8.x
 RUN corepack enable
 
 WORKDIR /build
 
@@ -6,7 +6,7 @@
 import traceback
 from copy import deepcopy
 from tempfile import TemporaryDirectory
-from typing import Any, Dict, List, Optional, Type
+from typing import List, Optional, Type
 
 from fastapi import Body, Path, Query, Response, UploadFile
 from fastapi.responses import FileResponse, HTMLResponse
@@ -430,13 +430,11 @@ async def delete_model_image(
 async def install_model(
     source: str = Query(description="Model source to install, can be a local path, repo_id, or remote URL"),
     inplace: Optional[bool] = Query(description="Whether or not to install a local model in place", default=False),
-    # TODO(MM2): Can we type this?
-    config: Optional[Dict[str, Any]] = Body(
-        description="Dict of fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
-        default=None,
+    access_token: Optional[str] = Query(description="access token for the remote resource", default=None),
+    config: ModelRecordChanges = Body(
+        description="Object containing fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
         example={"name": "string", "description": "string"},
     ),
-    access_token: Optional[str] = None,
 ) -> ModelInstallJob:
     """Install a model using a string identifier.
 
@@ -451,8 +449,9 @@ async def install_model(
        - model/name:fp16:path/to/model.safetensors
        - model/name::path/to/model.safetensors
 
-    `config` is an optional dict containing model configuration values that will override
-    the ones that are probed automatically.
+    `config` is a ModelRecordChanges object. Fields in this object will override
+    the ones that are probed automatically. Pass an empty object to accept
+    all the defaults.
 
     `access_token` is an optional access token for use with Urls that require
     authentication.
@@ -737,7 +736,7 @@ async def convert_model(
         # write the converted file to the convert path
         raw_model = converted_model.model
         assert hasattr(raw_model, "save_pretrained")
-        raw_model.save_pretrained(convert_path)
+        raw_model.save_pretrained(convert_path)  # type: ignore
         assert convert_path.exists()
 
         # temporarily rename the original safetensors file so that there is no naming conflict
@@ -750,12 +749,12 @@ async def convert_model(
         try:
             new_key = installer.install_path(
                 convert_path,
-                config={
-                    "name": original_name,
-                    "description": model_config.description,
-                    "hash": model_config.hash,
-                    "source": model_config.source,
-                },
+                config=ModelRecordChanges(
+                    name=original_name,
+                    description=model_config.description,
+                    hash=model_config.hash,
+                    source=model_config.source,
+                ),
             )
         except Exception as e:
             logger.error(str(e))
 
@@ -39,7 +39,7 @@ class GradientMaskOutput(BaseInvocationOutput):
     title="Create Gradient Mask",
     tags=["mask", "denoise"],
     category="latents",
-    version="1.1.0",
+    version="1.2.0",
 )
 class CreateGradientMaskInvocation(BaseInvocation):
     """Creates mask for denoising model run."""
@@ -93,6 +93,7 @@ def invoke(self, context: InvocationContext) -> GradientMaskOutput:
 
             # redistribute blur so that the original edges are 0 and blur outwards to 1
             blur_tensor = (blur_tensor - 0.5) * 2
+            blur_tensor[blur_tensor < 0] = 0.0
 
             threshold = 1 - self.minimum_denoise
 
 
@@ -37,9 +37,9 @@
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora import LoRAModelRaw
-from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.model_manager import BaseModelType, ModelVariantType
 from invokeai.backend.model_patcher import ModelPatcher
-from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion import PipelineIntermediateState
 from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, DenoiseInputs
 from invokeai.backend.stable_diffusion.diffusers_pipeline import (
     ControlNetData,
@@ -60,9 +60,13 @@
 from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
 from invokeai.backend.stable_diffusion.extensions.controlnet import ControlNetExt
 from invokeai.backend.stable_diffusion.extensions.freeu import FreeUExt
+from invokeai.backend.stable_diffusion.extensions.inpaint import InpaintExt
+from invokeai.backend.stable_diffusion.extensions.inpaint_model import InpaintModelExt
 from invokeai.backend.stable_diffusion.extensions.lora import LoRAExt
 from invokeai.backend.stable_diffusion.extensions.preview import PreviewExt
 from invokeai.backend.stable_diffusion.extensions.rescale_cfg import RescaleCFGExt
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
+from invokeai.backend.stable_diffusion.extensions.t2i_adapter import T2IAdapterExt
 from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
 from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -499,6 +503,33 @@ def parse_controlnet_field(
                 )
             )
 
+    @staticmethod
+    def parse_t2i_adapter_field(
+        exit_stack: ExitStack,
+        context: InvocationContext,
+        t2i_adapters: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
+        ext_manager: ExtensionsManager,
+    ) -> None:
+        if t2i_adapters is None:
+            return
+
+        # Handle the possibility that t2i_adapters could be a list or a single T2IAdapterField.
+        if isinstance(t2i_adapters, T2IAdapterField):
+            t2i_adapters = [t2i_adapters]
+
+        for t2i_adapter_field in t2i_adapters:
+            ext_manager.add_extension(
+                T2IAdapterExt(
+                    node_context=context,
+                    model_id=t2i_adapter_field.t2i_adapter_model,
+                    image=context.images.get_pil(t2i_adapter_field.image.image_name),
+                    weight=t2i_adapter_field.weight,
+                    begin_step_percent=t2i_adapter_field.begin_step_percent,
+                    end_step_percent=t2i_adapter_field.end_step_percent,
+                    resize_mode=t2i_adapter_field.resize_mode,
+                )
+            )
+
     def prep_ip_adapter_image_prompts(
         self,
         context: InvocationContext,
@@ -708,7 +739,7 @@ def prep_inpaint_mask(
         else:
             masked_latents = torch.where(mask < 0.5, 0.0, latents)
 
-        return 1 - mask, masked_latents, self.denoise_mask.gradient
+        return mask, masked_latents, self.denoise_mask.gradient
 
     @staticmethod
     def prepare_noise_and_latents(
@@ -766,10 +797,6 @@ def _new_invoke(self, context: InvocationContext) -> LatentsOutput:
         dtype = TorchDevice.choose_torch_dtype()
 
         seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
-        latents = latents.to(device=device, dtype=dtype)
-        if noise is not None:
-            noise = noise.to(device=device, dtype=dtype)
-
         _, _, latent_height, latent_width = latents.shape
 
         conditioning_data = self.get_conditioning_data(
@@ -802,21 +829,6 @@ def _new_invoke(self, context: InvocationContext) -> LatentsOutput:
             denoising_end=self.denoising_end,
         )
 
-        denoise_ctx = DenoiseContext(
-            inputs=DenoiseInputs(
-                orig_latents=latents,
-                timesteps=timesteps,
-                init_timestep=init_timestep,
-                noise=noise,
-                seed=seed,
-                scheduler_step_kwargs=scheduler_step_kwargs,
-                conditioning_data=conditioning_data,
-                attention_processor_cls=CustomAttnProcessor2_0,
-            ),
-            unet=None,
-            scheduler=scheduler,
-        )
-
         # get the unet's config so that we can pass the base to sd_step_callback()
         unet_config = context.models.get_config(self.unet.unet.key)
 
@@ -844,6 +856,39 @@ def step_callback(state: PipelineIntermediateState) -> None:
                         weight=lora_field.weight,
                     )
                 )
+        ### seamless
+        if self.unet.seamless_axes:
+            ext_manager.add_extension(SeamlessExt(self.unet.seamless_axes))
+
+        ### inpaint
+        mask, masked_latents, is_gradient_mask = self.prep_inpaint_mask(context, latents)
+        # NOTE: We used to identify inpainting models by inpecting the shape of the loaded UNet model weights. Now we
+        # use the ModelVariantType config. During testing, there was a report of a user with models that had an
+        # incorrect ModelVariantType value. Re-installing the model fixed the issue. If this issue turns out to be
+        # prevalent, we will have to revisit how we initialize the inpainting extensions.
+        if unet_config.variant == ModelVariantType.Inpaint:
+            ext_manager.add_extension(InpaintModelExt(mask, masked_latents, is_gradient_mask))
+        elif mask is not None:
+            ext_manager.add_extension(InpaintExt(mask, is_gradient_mask))
+
+        # Initialize context for modular denoise
+        latents = latents.to(device=device, dtype=dtype)
+        if noise is not None:
+            noise = noise.to(device=device, dtype=dtype)
+        denoise_ctx = DenoiseContext(
+            inputs=DenoiseInputs(
+                orig_latents=latents,
+                timesteps=timesteps,
+                init_timestep=init_timestep,
+                noise=noise,
+                seed=seed,
+                scheduler_step_kwargs=scheduler_step_kwargs,
+                conditioning_data=conditioning_data,
+                attention_processor_cls=CustomAttnProcessor2_0,
+            ),
+            unet=None,
+            scheduler=scheduler,
+        )
 
         # context for loading additional models
         with ExitStack() as exit_stack:
@@ -852,6 +897,7 @@ def step_callback(state: PipelineIntermediateState) -> None:
             #    ext = extension_field.to_extension(exit_stack, context, ext_manager)
             #    ext_manager.add_extension(ext)
             self.parse_controlnet_field(exit_stack, context, self.control, ext_manager)
+            self.parse_t2i_adapter_field(exit_stack, context, self.t2i_adapter, ext_manager)
 
             # ext: t2i/ip adapter
             ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)
@@ -883,6 +929,10 @@ def _old_invoke(self, context: InvocationContext) -> LatentsOutput:
         seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
 
         mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+        # At this point, the mask ranges from 0 (leave unchanged) to 1 (inpaint).
+        # We invert the mask here for compatibility with the old backend implementation.
+        if mask is not None:
+            mask = 1 - mask
 
         # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
         # below. Investigate whether this is appropriate.
@@ -927,7 +977,7 @@ def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
             ExitStack() as exit_stack,
             unet_info.model_on_device() as (cached_weights, unet),
             ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
-            set_seamless(unet, self.unet.seamless_axes),  # FIXME
+            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
             # Apply the LoRA after unet has been moved to its target device for faster patching.
             ModelPatcher.apply_lora_unet(
                 unet,
 
@@ -24,7 +24,7 @@
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.stable_diffusion import set_seamless
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
 from invokeai.backend.util.devices import TorchDevice
 
@@ -59,7 +59,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
 
         vae_info = context.models.load(self.vae.vae)
         assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
-        with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae:
+        with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
             assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
             latents = latents.to(vae.device)
             if self.fp32:
 
@@ -23,7 +23,7 @@
 from invokeai.backend.tiles.utils import TBLR, Tile
 
 
-@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.2.0")
+@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.3.0")
 class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
     """Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel)."""
 
@@ -36,16 +36,6 @@ class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
     tile_size: int = InputField(
         default=512, description="The tile size for tiled image-to-image. Set to 0 to disable tiling."
     )
-    scale: float = InputField(
-        default=4.0,
-        gt=0.0,
-        le=16.0,
-        description="The final scale of the output image. If the model does not upscale the image, this will be ignored.",
-    )
-    fit_to_multiple_of_8: bool = InputField(
-        default=False,
-        description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
-    )
 
     @classmethod
     def scale_tile(cls, tile: Tile, scale: int) -> Tile:
@@ -152,6 +142,47 @@ def upscale_image(
 
         return pil_image
 
+    @torch.inference_mode()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
+        # revisit this.
+        image = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        # Load the model.
+        spandrel_model_info = context.models.load(self.image_to_image_model)
+
+        # Do the upscaling.
+        with spandrel_model_info as spandrel_model:
+            assert isinstance(spandrel_model, SpandrelImageToImageModel)
+
+            # Upscale the image
+            pil_image = self.upscale_image(image, self.tile_size, spandrel_model, context.util.is_canceled)
+
+        image_dto = context.images.save(image=pil_image)
+        return ImageOutput.build(image_dto)
+
+
+@invocation(
+    "spandrel_image_to_image_autoscale",
+    title="Image-to-Image (Autoscale)",
+    tags=["upscale"],
+    category="upscale",
+    version="1.0.0",
+)
+class SpandrelImageToImageAutoscaleInvocation(SpandrelImageToImageInvocation):
+    """Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel) until the target scale is reached."""
+
+    scale: float = InputField(
+        default=4.0,
+        gt=0.0,
+        le=16.0,
+        description="The final scale of the output image. If the model does not upscale the image, this will be ignored.",
+    )
+    fit_to_multiple_of_8: bool = InputField(
+        default=False,
+        description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
+    )
+
     @torch.inference_mode()
     def invoke(self, context: InvocationContext) -> ImageOutput:
         # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to