add min snr loss weight

lucidrains · lucidrains · commit 77e4ced5cd8d · 2023-03-18T09:09:40.000-07:00
diff --git a/README.md b/README.md
@@ -141,3 +141,11 @@ sampled_images.shape # (4, 3, 128, 128)
     copyright = {Creative Commons Attribution 4.0 International}
 }
 ```
+
+```bibtex
+@inproceedings{Hang2023EfficientDT,
+    title   = {Efficient Diffusion Training via Min-SNR Weighting Strategy},
+    author  = {Tiankai Hang and Shuyang Gu and Chen Li and Jianmin Bao and Dong Chen and Han Hu and Xin Geng and Baining Guo},
+    year    = {2023}
+}
+```
diff --git a/rin_pytorch/rin_pytorch.py b/rin_pytorch/rin_pytorch.py
@@ -568,6 +568,8 @@ def __init__(
         objective = 'v',
         schedule_kwargs: dict = dict(),
         time_difference = 0.,
+        min_snr_loss_weight = True,
+        min_snr_gamma = 5,
         train_prob_self_cond = 0.9,
         scale = 1.                      # this will be set to < 1. for better convergence when training on higher resolution images
     ):
@@ -611,6 +613,11 @@ def __init__(
 
         self.train_prob_self_cond = train_prob_self_cond
 
+        # min snr loss weight
+
+        self.min_snr_loss_weight = min_snr_loss_weight
+        self.min_snr_gamma = min_snr_gamma
+
     @property
     def device(self):
         return next(self.model.parameters()).device
@@ -811,16 +818,36 @@ def forward(self, img, *args, **kwargs):
 
         pred = self.model(noised_img, times, self_cond, self_latents)
 
-        if self.objective == 'x0':
-            target = img
-
-        elif self.objective == 'eps':
+        if self.objective == 'eps':
             target = noise
 
+        elif self.objective == 'x0':
+            target = img
+
         elif self.objective == 'v':
             target = alpha * noise - sigma * img
 
-        return F.mse_loss(pred, target)
+        loss = F.mse_loss(pred, target, reduction = 'none')
+        loss = reduce(loss, 'b ... -> b', 'mean')
+
+        # min snr loss weight
+
+        snr = (alpha * alpha) / (sigma * sigma)
+        maybe_clipped_snr = snr.clone()
+
+        if self.min_snr_loss_weight:
+            maybe_clipped_snr.clamp_(min = self.min_snr_gamma)
+
+        if self.objective == 'eps':
+            loss_weight = maybe_clipped_snr / snr
+
+        elif self.objective == 'x0':
+            loss_weight = maybe_clipped_snr
+
+        elif self.objective == 'v':
+            loss_weight = maybe_clipped_snr / (snr + 1)
+
+        return (loss * loss_weight).mean()
 
 # dataset classes
 
@@ -872,7 +899,7 @@ def __init__(
         train_num_steps = 100000,
         ema_update_every = 10,
         ema_decay = 0.995,
-        adam_betas = (0.9, 0.99),
+        betas = (0.9, 0.99),
         save_and_sample_every = 1000,
         num_samples = 25,
         results_folder = './results',
@@ -912,7 +939,7 @@ def __init__(
 
         # optimizer
 
-        self.opt = Adam(diffusion_model.parameters(), lr = train_lr, betas = adam_betas)
+        self.opt = Adam(diffusion_model.parameters(), lr = train_lr, betas = betas)
 
         # for logging results in a folder periodically
 
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'RIN-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.6.1',
+  version = '0.7.1',
   license='MIT',
   description = 'RIN - Recurrent Interface Network - Pytorch',
   author = 'Phil Wang',