Merge branch 'main' into lstein/feat/load-one-file

lstein · web-flow · commit 74f0c317ce33 · 2024-06-19T10:26:37.000-04:00
diff --git a/invokeai/app/invocations/denoise_latents.py b/invokeai/app/invocations/denoise_latents.py
@@ -16,7 +16,9 @@
 from torchvision.transforms.functional import resize as tv_resize
 from transformers import CLIPVisionModelWithProjection
 
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
 from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
+from invokeai.app.invocations.controlnet_image_processors import ControlField
 from invokeai.app.invocations.fields import (
     ConditioningField,
     DenoiseMaskField,
@@ -27,6 +29,7 @@
     UIType,
 )
 from invokeai.app.invocations.ip_adapter import IPAdapterField
+from invokeai.app.invocations.model import ModelIdentifierField, UNetField
 from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -36,6 +39,11 @@
 from invokeai.backend.model_manager import BaseModelType
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion.diffusers_pipeline import (
+    ControlNetData,
+    StableDiffusionGeneratorPipeline,
+    T2IAdapterData,
+)
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
     BasicConditioningInfo,
     IPAdapterConditioningInfo,
@@ -45,20 +53,11 @@
     TextConditioningData,
     TextConditioningRegions,
 )
+from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
+from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.mask import to_standard_float_mask
 from invokeai.backend.util.silence_warnings import SilenceWarnings
 
-from ...backend.stable_diffusion.diffusers_pipeline import (
-    ControlNetData,
-    StableDiffusionGeneratorPipeline,
-    T2IAdapterData,
-)
-from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
-from ...backend.util.devices import TorchDevice
-from .baseinvocation import BaseInvocation, invocation
-from .controlnet_image_processors import ControlField
-from .model import ModelIdentifierField, UNetField
-
 
 def get_scheduler(
     context: InvocationContext,
@@ -658,155 +657,155 @@ def prep_inpaint_mask(
         return 1 - mask, masked_latents, self.denoise_mask.gradient
 
     @torch.no_grad()
+    @SilenceWarnings()  # This quenches the NSFW nag from diffusers.
     def invoke(self, context: InvocationContext) -> LatentsOutput:
-        with SilenceWarnings():  # this quenches NSFW nag from diffusers
-            seed = None
-            noise = None
-            if self.noise is not None:
-                noise = context.tensors.load(self.noise.latents_name)
-                seed = self.noise.seed
-
-            if self.latents is not None:
-                latents = context.tensors.load(self.latents.latents_name)
-                if seed is None:
-                    seed = self.latents.seed
-
-                if noise is not None and noise.shape[1:] != latents.shape[1:]:
-                    raise Exception(f"Incompatable 'noise' and 'latents' shapes: {latents.shape=} {noise.shape=}")
-
-            elif noise is not None:
-                latents = torch.zeros_like(noise)
-            else:
-                raise Exception("'latents' or 'noise' must be provided!")
-
+        seed = None
+        noise = None
+        if self.noise is not None:
+            noise = context.tensors.load(self.noise.latents_name)
+            seed = self.noise.seed
+
+        if self.latents is not None:
+            latents = context.tensors.load(self.latents.latents_name)
             if seed is None:
-                seed = 0
+                seed = self.latents.seed
 
-            mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+            if noise is not None and noise.shape[1:] != latents.shape[1:]:
+                raise Exception(f"Incompatable 'noise' and 'latents' shapes: {latents.shape=} {noise.shape=}")
 
-            # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
-            # below. Investigate whether this is appropriate.
-            t2i_adapter_data = self.run_t2i_adapters(
-                context,
-                self.t2i_adapter,
-                latents.shape,
-                do_classifier_free_guidance=True,
-            )
+        elif noise is not None:
+            latents = torch.zeros_like(noise)
+        else:
+            raise Exception("'latents' or 'noise' must be provided!")
 
-            ip_adapters: List[IPAdapterField] = []
-            if self.ip_adapter is not None:
-                # ip_adapter could be a list or a single IPAdapterField. Normalize to a list here.
-                if isinstance(self.ip_adapter, list):
-                    ip_adapters = self.ip_adapter
-                else:
-                    ip_adapters = [self.ip_adapter]
-
-            # If there are IP adapters, the following line runs the adapters' CLIPVision image encoders to return
-            # a series of image conditioning embeddings. This is being done here rather than in the
-            # big model context below in order to use less VRAM on low-VRAM systems.
-            # The image prompts are then passed to prep_ip_adapter_data().
-            image_prompts = self.prep_ip_adapter_image_prompts(context=context, ip_adapters=ip_adapters)
-
-            # get the unet's config so that we can pass the base to dispatch_progress()
-            unet_config = context.models.get_config(self.unet.unet.key)
-
-            def step_callback(state: PipelineIntermediateState) -> None:
-                context.util.sd_step_callback(state, unet_config.base)
-
-            def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
-                for lora in self.unet.loras:
-                    lora_info = context.models.load(lora.lora)
-                    assert isinstance(lora_info.model, LoRAModelRaw)
-                    yield (lora_info.model, lora.weight)
-                    del lora_info
-                return
-
-            unet_info = context.models.load(self.unet.unet)
-            assert isinstance(unet_info.model, UNet2DConditionModel)
-            with (
-                ExitStack() as exit_stack,
-                unet_info.model_on_device() as (model_state_dict, unet),
-                ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
-                set_seamless(unet, self.unet.seamless_axes),  # FIXME
-                # Apply the LoRA after unet has been moved to its target device for faster patching.
-                ModelPatcher.apply_lora_unet(
-                    unet,
-                    loras=_lora_loader(),
-                    model_state_dict=model_state_dict,
-                ),
-            ):
-                assert isinstance(unet, UNet2DConditionModel)
-                latents = latents.to(device=unet.device, dtype=unet.dtype)
-                if noise is not None:
-                    noise = noise.to(device=unet.device, dtype=unet.dtype)
-                if mask is not None:
-                    mask = mask.to(device=unet.device, dtype=unet.dtype)
-                if masked_latents is not None:
-                    masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)
-
-                scheduler = get_scheduler(
-                    context=context,
-                    scheduler_info=self.unet.scheduler,
-                    scheduler_name=self.scheduler,
-                    seed=seed,
-                )
+        if seed is None:
+            seed = 0
 
-                pipeline = self.create_pipeline(unet, scheduler)
+        mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
 
-                _, _, latent_height, latent_width = latents.shape
-                conditioning_data = self.get_conditioning_data(
-                    context=context, unet=unet, latent_height=latent_height, latent_width=latent_width
-                )
+        # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
+        # below. Investigate whether this is appropriate.
+        t2i_adapter_data = self.run_t2i_adapters(
+            context,
+            self.t2i_adapter,
+            latents.shape,
+            do_classifier_free_guidance=True,
+        )
 
-                controlnet_data = self.prep_control_data(
-                    context=context,
-                    control_input=self.control,
-                    latents_shape=latents.shape,
-                    # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
-                    do_classifier_free_guidance=True,
-                    exit_stack=exit_stack,
-                )
+        ip_adapters: List[IPAdapterField] = []
+        if self.ip_adapter is not None:
+            # ip_adapter could be a list or a single IPAdapterField. Normalize to a list here.
+            if isinstance(self.ip_adapter, list):
+                ip_adapters = self.ip_adapter
+            else:
+                ip_adapters = [self.ip_adapter]
+
+        # If there are IP adapters, the following line runs the adapters' CLIPVision image encoders to return
+        # a series of image conditioning embeddings. This is being done here rather than in the
+        # big model context below in order to use less VRAM on low-VRAM systems.
+        # The image prompts are then passed to prep_ip_adapter_data().
+        image_prompts = self.prep_ip_adapter_image_prompts(context=context, ip_adapters=ip_adapters)
+
+        # get the unet's config so that we can pass the base to dispatch_progress()
+        unet_config = context.models.get_config(self.unet.unet.key)
+
+        def step_callback(state: PipelineIntermediateState) -> None:
+            context.util.sd_step_callback(state, unet_config.base)
+
+        def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+            for lora in self.unet.loras:
+                lora_info = context.models.load(lora.lora)
+                assert isinstance(lora_info.model, LoRAModelRaw)
+                yield (lora_info.model, lora.weight)
+                del lora_info
+            return
+
+        unet_info = context.models.load(self.unet.unet)
+        assert isinstance(unet_info.model, UNet2DConditionModel)
+        with (
+            ExitStack() as exit_stack,
+            unet_info.model_on_device() as (model_state_dict, unet),
+            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
+            set_seamless(unet, self.unet.seamless_axes),  # FIXME
+            # Apply the LoRA after unet has been moved to its target device for faster patching.
+            ModelPatcher.apply_lora_unet(
+                unet,
+                loras=_lora_loader(),
+                model_state_dict=model_state_dict,
+            ),
+        ):
+            assert isinstance(unet, UNet2DConditionModel)
+            latents = latents.to(device=unet.device, dtype=unet.dtype)
+            if noise is not None:
+                noise = noise.to(device=unet.device, dtype=unet.dtype)
+            if mask is not None:
+                mask = mask.to(device=unet.device, dtype=unet.dtype)
+            if masked_latents is not None:
+                masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)
+
+            scheduler = get_scheduler(
+                context=context,
+                scheduler_info=self.unet.scheduler,
+                scheduler_name=self.scheduler,
+                seed=seed,
+            )
 
-                ip_adapter_data = self.prep_ip_adapter_data(
-                    context=context,
-                    ip_adapters=ip_adapters,
-                    image_prompts=image_prompts,
-                    exit_stack=exit_stack,
-                    latent_height=latent_height,
-                    latent_width=latent_width,
-                    dtype=unet.dtype,
-                )
+            pipeline = self.create_pipeline(unet, scheduler)
 
-                num_inference_steps, timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
-                    scheduler,
-                    device=unet.device,
-                    steps=self.steps,
-                    denoising_start=self.denoising_start,
-                    denoising_end=self.denoising_end,
-                    seed=seed,
-                )
+            _, _, latent_height, latent_width = latents.shape
+            conditioning_data = self.get_conditioning_data(
+                context=context, unet=unet, latent_height=latent_height, latent_width=latent_width
+            )
 
-                result_latents = pipeline.latents_from_embeddings(
-                    latents=latents,
-                    timesteps=timesteps,
-                    init_timestep=init_timestep,
-                    noise=noise,
-                    seed=seed,
-                    mask=mask,
-                    masked_latents=masked_latents,
-                    gradient_mask=gradient_mask,
-                    num_inference_steps=num_inference_steps,
-                    scheduler_step_kwargs=scheduler_step_kwargs,
-                    conditioning_data=conditioning_data,
-                    control_data=controlnet_data,
-                    ip_adapter_data=ip_adapter_data,
-                    t2i_adapter_data=t2i_adapter_data,
-                    callback=step_callback,
-                )
+            controlnet_data = self.prep_control_data(
+                context=context,
+                control_input=self.control,
+                latents_shape=latents.shape,
+                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                do_classifier_free_guidance=True,
+                exit_stack=exit_stack,
+            )
+
+            ip_adapter_data = self.prep_ip_adapter_data(
+                context=context,
+                ip_adapters=ip_adapters,
+                image_prompts=image_prompts,
+                exit_stack=exit_stack,
+                latent_height=latent_height,
+                latent_width=latent_width,
+                dtype=unet.dtype,
+            )
+
+            num_inference_steps, timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
+                scheduler,
+                device=unet.device,
+                steps=self.steps,
+                denoising_start=self.denoising_start,
+                denoising_end=self.denoising_end,
+                seed=seed,
+            )
+
+            result_latents = pipeline.latents_from_embeddings(
+                latents=latents,
+                timesteps=timesteps,
+                init_timestep=init_timestep,
+                noise=noise,
+                seed=seed,
+                mask=mask,
+                masked_latents=masked_latents,
+                gradient_mask=gradient_mask,
+                num_inference_steps=num_inference_steps,
+                scheduler_step_kwargs=scheduler_step_kwargs,
+                conditioning_data=conditioning_data,
+                control_data=controlnet_data,
+                ip_adapter_data=ip_adapter_data,
+                t2i_adapter_data=t2i_adapter_data,
+                callback=step_callback,
+            )
 
-            # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
-            result_latents = result_latents.to("cpu")
-            TorchDevice.empty_cache()
+        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+        result_latents = result_latents.to("cpu")
+        TorchDevice.empty_cache()
 
-            name = context.tensors.save(tensor=result_latents)
+        name = context.tensors.save(tensor=result_latents)
         return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
diff --git a/invokeai/app/services/config/config_default.py b/invokeai/app/services/config/config_default.py
@@ -112,6 +112,7 @@ class InvokeAIAppConfig(BaseSettings):
         force_tiled_decode: Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty).
         pil_compress_level: The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = no compression, 1 = fastest with slightly larger filesize, 9 = slowest with smallest filesize. 1 is typically the best setting.
         max_queue_size: Maximum number of items in the session queue.
+        clear_queue_on_startup: Empties session queue on startup.
         allow_nodes: List of nodes to allow. Omit to allow all.
         deny_nodes: List of nodes to deny. Omit to deny none.
         node_cache_size: How many cached nodes to keep in memory.
@@ -184,6 +185,7 @@ class InvokeAIAppConfig(BaseSettings):
     force_tiled_decode:            bool = Field(default=False,              description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty).")
     pil_compress_level:             int = Field(default=1,                  description="The compress_level setting of PIL.Image.save(), used for PNG encoding. All settings are lossless. 0 = no compression, 1 = fastest with slightly larger filesize, 9 = slowest with smallest filesize. 1 is typically the best setting.")
     max_queue_size:                 int = Field(default=10000, gt=0,        description="Maximum number of items in the session queue.")
+    clear_queue_on_startup:        bool = Field(default=False,              description="Empties session queue on startup.")
 
     # NODES
     allow_nodes:    Optional[list[str]] = Field(default=None,               description="List of nodes to allow. Omit to allow all.")
diff --git a/invokeai/app/services/session_queue/session_queue_sqlite.py b/invokeai/app/services/session_queue/session_queue_sqlite.py
@@ -37,10 +37,14 @@ class SqliteSessionQueue(SessionQueueBase):
     def start(self, invoker: Invoker) -> None:
         self.__invoker = invoker
         self._set_in_progress_to_canceled()
-        prune_result = self.prune(DEFAULT_QUEUE_ID)
-
-        if prune_result.deleted > 0:
-            self.__invoker.services.logger.info(f"Pruned {prune_result.deleted} finished queue items")
+        if self.__invoker.services.configuration.clear_queue_on_startup:
+            clear_result = self.clear(DEFAULT_QUEUE_ID)
+            if clear_result.deleted > 0:
+                self.__invoker.services.logger.info(f"Cleared all {clear_result.deleted} queue items")
+        else:
+            prune_result = self.prune(DEFAULT_QUEUE_ID)
+            if prune_result.deleted > 0:
+                self.__invoker.services.logger.info(f"Pruned {prune_result.deleted} finished queue items")
 
     def __init__(self, db: SqliteDatabase) -> None:
         super().__init__()
diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
@@ -10,7 +10,7 @@
 import invokeai.backend.util.logging as logger
 from invokeai.app.util.misc import uuid_string
 from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
-from invokeai.backend.util.util import SilenceWarnings
+from invokeai.backend.util.silence_warnings import SilenceWarnings
 
 from .config import (
     AnyModelConfig,
diff --git a/invokeai/backend/util/silence_warnings.py b/invokeai/backend/util/silence_warnings.py
diff --git a/invokeai/backend/util/util.py b/invokeai/backend/util/util.py