13
13
14
14
# Debugging notes - the original method apply_model is being called for sd1.5 is in modules.sd_hijack_utils and is ldm.models.diffusion.ddpm.LatentDiffusion
15
15
# For sdxl - OpenAIWrapper will be called, which will call the underlying diffusion_model
16
-
16
+ # When controlnet is enabled, the underlying model is not available to use, therefore we skip
17
17
18
18
def find_noise_for_image (p , cond , uncond , cfg_scale , steps ):
19
19
x = p .init_latent
@@ -78,11 +78,11 @@ def find_noise_for_image(p, cond, uncond, cfg_scale, steps):
78
78
return x / x .std ()
79
79
80
80
81
- Cached = namedtuple ("Cached" , ["noise" , "cfg_scale" , "steps" , "latent" , "original_prompt" , "original_negative_prompt" , "sigma_adjustment" ])
81
+ Cached = namedtuple ("Cached" , ["noise" , "cfg_scale" , "steps" , "latent" , "original_prompt" , "original_negative_prompt" , "sigma_adjustment" , "second_order_correction" , "noise_sigma_intensity" ])
82
82
83
83
84
84
# Based on changes suggested by briansemrau in https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/736
85
- def find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg_scale , steps ):
85
+ def find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg_scale , steps , correction_factor , sigma_intensity ):
86
86
x = p .init_latent
87
87
88
88
s_in = x .new_ones ([x .shape [0 ]])
@@ -98,11 +98,7 @@ def find_noise_for_image_sigma_adjustment(p, cond, uncond, cfg_scale, steps):
98
98
99
99
for i in trange (1 , len (sigmas )):
100
100
shared .state .sampling_step += 1
101
-
102
- x_in = torch .cat ([x ] * 2 )
103
101
sigma_in = torch .cat ([sigmas [i - 1 ] * s_in ] * 2 )
104
-
105
-
106
102
if shared .sd_model .is_sdxl :
107
103
cond_tensor = cond ['crossattn' ]
108
104
uncond_tensor = uncond ['crossattn' ]
@@ -113,46 +109,73 @@ def find_noise_for_image_sigma_adjustment(p, cond, uncond, cfg_scale, steps):
113
109
image_conditioning = torch .cat ([p .image_conditioning ] * 2 )
114
110
cond_in = {"c_concat" : [image_conditioning ], "c_crossattn" : [cond_in ]}
115
111
116
- c_out , c_in = [K .utils .append_dims (k , x_in .ndim ) for k in dnw .get_scalings (sigma_in )[skip :]]
117
-
118
112
if i == 1 :
119
113
t = dnw .sigma_to_t (torch .cat ([sigmas [i ] * s_in ] * 2 ))
114
+ dt = (sigmas [i ] - sigmas [i - 1 ]) / (2 * sigmas [i ])
120
115
else :
121
116
t = dnw .sigma_to_t (sigma_in )
117
+ dt = (sigmas [i ] - sigmas [i - 1 ]) / sigmas [i - 1 ]
118
+
119
+ noise = noise_from_model (x , t , dt , sigma_in , cond_in , cfg_scale , dnw , skip )
120
+
121
+ if correction_factor > 0 :
122
+ recalculated_noise = noise_from_model (x + noise , t , dt , sigma_in , cond_in , cfg_scale , dnw , skip )
123
+ noise = recalculated_noise * correction_factor + noise * (1 - correction_factor )
124
+
125
+ x += noise
126
+
127
+ sd_samplers_common .store_latent (x )
128
+
129
+ # This shouldn't be necessary, but solved some VRAM issues
130
+ #del x_in, sigma_in, cond_in, c_out, c_in, t
131
+ #del eps, denoised_uncond, denoised_cond, denoised, dt
132
+
133
+ shared .state .nextjob ()
134
+
135
+ return x / (x .std ()* (1 - sigma_intensity ) + sigmas [- 1 ]* sigma_intensity )
122
136
137
+ def noise_from_model (x , t , dt , sigma_in , cond_in , cfg_scale , dnw , skip ):
123
138
139
+ if cfg_scale == 1 : # Case where denoised_uncond should not be calculated - 50% speedup, also good for sdxl in experiments
140
+ x_in = x
141
+ sigma_in = sigma_in [1 :2 ]
142
+ c_out , c_in = [K .utils .append_dims (k , x_in .ndim ) for k in dnw .get_scalings (sigma_in )[skip :]]
143
+ cond_in = {"c_concat" :[cond_in ["c_concat" ][0 ][1 :2 ]], "c_crossattn" : [cond_in ["c_crossattn" ][0 ][1 :2 ]]}
124
144
if shared .sd_model .is_sdxl :
125
145
num_classes_hack = shared .sd_model .model .diffusion_model .num_classes
126
146
shared .sd_model .model .diffusion_model .num_classes = None
147
+ print ("\n DIMS" )
148
+ print (x_in .shape , c_in .shape , t [1 :2 ].shape , cond_in ["c_crossattn" ][0 ].shape )
127
149
try :
128
- eps = shared .sd_model .model (x_in * c_in , t , {"crossattn" : cond_in ["c_crossattn" ][0 ]} )
150
+ eps = shared .sd_model .model (x_in * c_in , t [ 1 : 2 ] , {"crossattn" : cond_in ["c_crossattn" ][0 ]})
129
151
finally :
130
152
shared .sd_model .model .diffusion_model .num_classes = num_classes_hack
131
153
else :
132
- eps = shared .sd_model .apply_model (x_in * c_in , t , cond = cond_in )
154
+ eps = shared .sd_model .apply_model (x_in * c_in , t [ 1 : 2 ] , cond = cond_in )
133
155
134
- denoised_uncond , denoised_cond = (x_in + eps * c_out ).chunk (2 )
156
+ return - eps * c_out * dt
157
+ else :
158
+ x_in = torch .cat ([x ] * 2 )
135
159
136
- denoised = denoised_uncond + ( denoised_cond - denoised_uncond ) * cfg_scale
160
+ c_out , c_in = [ K . utils . append_dims ( k , x_in . ndim ) for k in dnw . get_scalings ( sigma_in )[ skip :]]
137
161
138
- if i == 1 :
139
- d = (x - denoised ) / (2 * sigmas [i ])
162
+ if shared .sd_model .is_sdxl :
163
+ num_classes_hack = shared .sd_model .model .diffusion_model .num_classes
164
+ shared .sd_model .model .diffusion_model .num_classes = None
165
+ print ("\n DIMS" )
166
+ print (x_in .shape , c_in .shape , t .shape , cond_in ["c_crossattn" ][0 ].shape )
167
+ try :
168
+ eps = shared .sd_model .model (x_in * c_in , t , {"crossattn" : cond_in ["c_crossattn" ][0 ]} )
169
+ finally :
170
+ shared .sd_model .model .diffusion_model .num_classes = num_classes_hack
140
171
else :
141
- d = (x - denoised ) / sigmas [i - 1 ]
142
-
143
- dt = sigmas [i ] - sigmas [i - 1 ]
144
- x = x + d * dt
145
-
146
- sd_samplers_common .store_latent (x )
147
-
148
- # This shouldn't be necessary, but solved some VRAM issues
149
- del x_in , sigma_in , cond_in , c_out , c_in , t ,
150
- del eps , denoised_uncond , denoised_cond , denoised , d , dt
172
+ eps = shared .sd_model .apply_model (x_in * c_in , t , cond = cond_in )
151
173
152
- shared . state . nextjob ( )
174
+ denoised_uncond , denoised_cond = ( eps * c_out ). chunk ( 2 )
153
175
154
- return x / sigmas [ - 1 ]
176
+ denoised = denoised_uncond + ( denoised_cond - denoised_uncond ) * cfg_scale
155
177
178
+ return - denoised * dt
156
179
157
180
class Script (scripts .Script ):
158
181
def __init__ (self ):
@@ -183,17 +206,20 @@ def ui(self, is_img2img):
183
206
cfg = gr .Slider (label = "Decode CFG scale" , minimum = 0.0 , maximum = 15.0 , step = 0.1 , value = 1.0 , elem_id = self .elem_id ("cfg" ))
184
207
randomness = gr .Slider (label = "Randomness" , minimum = 0.0 , maximum = 1.0 , step = 0.01 , value = 0.0 , elem_id = self .elem_id ("randomness" ))
185
208
sigma_adjustment = gr .Checkbox (label = "Sigma adjustment for finding noise for image" , value = False , elem_id = self .elem_id ("sigma_adjustment" ))
209
+ second_order_correction = gr .Slider (label = "Correct noise by running model again" , minimum = 0.0 , maximum = 1.0 , step = 0.01 , value = 0.5 , elem_id = self .elem_id ("second_order_correction" ))
210
+ noise_sigma_intensity = gr .Slider (label = "Weight scaling std vs sigma based" , minimum = - 1.0 , maximum = 2.0 , step = 0.01 , value = 0.5 , elem_id = self .elem_id ("noise_sigma_intensity" ))
186
211
187
212
return [
188
213
info ,
189
214
override_sampler ,
190
215
override_prompt , original_prompt , original_negative_prompt ,
191
216
override_steps , st ,
192
217
override_strength ,
193
- cfg , randomness , sigma_adjustment ,
218
+ cfg , randomness , sigma_adjustment , second_order_correction ,
219
+ noise_sigma_intensity
194
220
]
195
221
196
- def run (self , p , _ , override_sampler , override_prompt , original_prompt , original_negative_prompt , override_steps , st , override_strength , cfg , randomness , sigma_adjustment ):
222
+ def run (self , p , _ , override_sampler , override_prompt , original_prompt , original_negative_prompt , override_steps , st , override_strength , cfg , randomness , sigma_adjustment , second_order_correction , noise_sigma_intensity ):
197
223
# Override
198
224
if override_sampler :
199
225
p .sampler_name = "Euler"
@@ -211,7 +237,9 @@ def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subs
211
237
same_params = self .cache is not None and self .cache .cfg_scale == cfg and self .cache .steps == st \
212
238
and self .cache .original_prompt == original_prompt \
213
239
and self .cache .original_negative_prompt == original_negative_prompt \
214
- and self .cache .sigma_adjustment == sigma_adjustment
240
+ and self .cache .sigma_adjustment == sigma_adjustment \
241
+ and self .cache .second_order_correction == second_order_correction \
242
+ and self .cache .noise_sigma_intensity == noise_sigma_intensity
215
243
same_everything = same_params and self .cache .latent .shape == lat .shape and np .abs (self .cache .latent - lat ).sum () < 100
216
244
217
245
rand_noise = processing .create_random_tensors (p .init_latent .shape [1 :], seeds = seeds , subseeds = subseeds , subseed_strength = p .subseed_strength , seed_resize_from_h = p .seed_resize_from_h , seed_resize_from_w = p .seed_resize_from_w , p = p )
@@ -231,10 +259,10 @@ def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subs
231
259
cond = p .sd_model .get_learned_conditioning (p .batch_size * [original_prompt ])
232
260
uncond = p .sd_model .get_learned_conditioning (p .batch_size * [original_negative_prompt ])
233
261
if sigma_adjustment :
234
- rec_noise = find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg , st )
262
+ rec_noise = find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg , st , second_order_correction , noise_sigma_intensity )
235
263
else :
236
264
rec_noise = find_noise_for_image (p , cond , uncond , cfg , st )
237
- self .cache = Cached (rec_noise , cfg , st , lat , original_prompt , original_negative_prompt , sigma_adjustment )
265
+ self .cache = Cached (rec_noise , cfg , st , lat , original_prompt , original_negative_prompt , sigma_adjustment , second_order_correction , noise_sigma_intensity )
238
266
239
267
combined_noise = ((1 - randomness ) * rec_noise + randomness * rand_noise ) / ((randomness ** 2 + (1 - randomness )** 2 ) ** 0.5 )
240
268
0 commit comments