|
8 | 8 | import torchvision
|
9 | 9 | import torchvision.transforms as T
|
10 | 10 | from diffusers.configuration_utils import ConfigMixin
|
11 |
| -from diffusers.image_processor import VaeImageProcessor |
12 | 11 | from diffusers.models.adapter import T2IAdapter
|
13 |
| -from diffusers.models.attention_processor import ( |
14 |
| - AttnProcessor2_0, |
15 |
| - LoRAAttnProcessor2_0, |
16 |
| - LoRAXFormersAttnProcessor, |
17 |
| - XFormersAttnProcessor, |
18 |
| -) |
19 |
| -from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL |
20 |
| -from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny |
21 | 12 | from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
|
22 | 13 | from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler
|
23 | 14 | from diffusers.schedulers.scheduling_tcd import TCDScheduler
|
|
38 | 29 | LatentsField,
|
39 | 30 | OutputField,
|
40 | 31 | UIType,
|
41 |
| - WithBoard, |
42 |
| - WithMetadata, |
43 | 32 | )
|
44 | 33 | from invokeai.app.invocations.ip_adapter import IPAdapterField
|
45 |
| -from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput |
| 34 | +from invokeai.app.invocations.primitives import DenoiseMaskOutput, LatentsOutput |
46 | 35 | from invokeai.app.invocations.t2i_adapter import T2IAdapterField
|
47 | 36 | from invokeai.app.services.shared.invocation_context import InvocationContext
|
48 | 37 | from invokeai.app.util.controlnet_utils import prepare_control_image
|
@@ -1033,83 +1022,3 @@ def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
|
1033 | 1022 |
|
1034 | 1023 | name = context.tensors.save(tensor=result_latents)
|
1035 | 1024 | return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
|
1036 |
| - |
1037 |
| - |
1038 |
| -@invocation( |
1039 |
| - "l2i", |
1040 |
| - title="Latents to Image", |
1041 |
| - tags=["latents", "image", "vae", "l2i"], |
1042 |
| - category="latents", |
1043 |
| - version="1.2.2", |
1044 |
| -) |
1045 |
| -class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard): |
1046 |
| - """Generates an image from latents.""" |
1047 |
| - |
1048 |
| - latents: LatentsField = InputField( |
1049 |
| - description=FieldDescriptions.latents, |
1050 |
| - input=Input.Connection, |
1051 |
| - ) |
1052 |
| - vae: VAEField = InputField( |
1053 |
| - description=FieldDescriptions.vae, |
1054 |
| - input=Input.Connection, |
1055 |
| - ) |
1056 |
| - tiled: bool = InputField(default=False, description=FieldDescriptions.tiled) |
1057 |
| - fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32) |
1058 |
| - |
1059 |
| - @torch.no_grad() |
1060 |
| - def invoke(self, context: InvocationContext) -> ImageOutput: |
1061 |
| - latents = context.tensors.load(self.latents.latents_name) |
1062 |
| - |
1063 |
| - vae_info = context.models.load(self.vae.vae) |
1064 |
| - assert isinstance(vae_info.model, (UNet2DConditionModel, AutoencoderKL, AutoencoderTiny)) |
1065 |
| - with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae: |
1066 |
| - assert isinstance(vae, torch.nn.Module) |
1067 |
| - latents = latents.to(vae.device) |
1068 |
| - if self.fp32: |
1069 |
| - vae.to(dtype=torch.float32) |
1070 |
| - |
1071 |
| - use_torch_2_0_or_xformers = hasattr(vae.decoder, "mid_block") and isinstance( |
1072 |
| - vae.decoder.mid_block.attentions[0].processor, |
1073 |
| - ( |
1074 |
| - AttnProcessor2_0, |
1075 |
| - XFormersAttnProcessor, |
1076 |
| - LoRAXFormersAttnProcessor, |
1077 |
| - LoRAAttnProcessor2_0, |
1078 |
| - ), |
1079 |
| - ) |
1080 |
| - # if xformers or torch_2_0 is used attention block does not need |
1081 |
| - # to be in float32 which can save lots of memory |
1082 |
| - if use_torch_2_0_or_xformers: |
1083 |
| - vae.post_quant_conv.to(latents.dtype) |
1084 |
| - vae.decoder.conv_in.to(latents.dtype) |
1085 |
| - vae.decoder.mid_block.to(latents.dtype) |
1086 |
| - else: |
1087 |
| - latents = latents.float() |
1088 |
| - |
1089 |
| - else: |
1090 |
| - vae.to(dtype=torch.float16) |
1091 |
| - latents = latents.half() |
1092 |
| - |
1093 |
| - if self.tiled or context.config.get().force_tiled_decode: |
1094 |
| - vae.enable_tiling() |
1095 |
| - else: |
1096 |
| - vae.disable_tiling() |
1097 |
| - |
1098 |
| - # clear memory as vae decode can request a lot |
1099 |
| - TorchDevice.empty_cache() |
1100 |
| - |
1101 |
| - with torch.inference_mode(): |
1102 |
| - # copied from diffusers pipeline |
1103 |
| - latents = latents / vae.config.scaling_factor |
1104 |
| - image = vae.decode(latents, return_dict=False)[0] |
1105 |
| - image = (image / 2 + 0.5).clamp(0, 1) # denormalize |
1106 |
| - # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 |
1107 |
| - np_image = image.cpu().permute(0, 2, 3, 1).float().numpy() |
1108 |
| - |
1109 |
| - image = VaeImageProcessor.numpy_to_pil(np_image)[0] |
1110 |
| - |
1111 |
| - TorchDevice.empty_cache() |
1112 |
| - |
1113 |
| - image_dto = context.images.save(image=image) |
1114 |
| - |
1115 |
| - return ImageOutput.build(image_dto) |
0 commit comments