ashawkey · elliottzheng · Feb 14, 2023
diff --git a/gradio_app.py b/gradio_app.py
@@ -20,6 +20,7 @@
 parser.add_argument('--workspace', type=str, default='trial_gradio')
 parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
 parser.add_argument('--seed', type=int, default=0)
+parser.add_argument('--deterministic', action='store_true', help="make the training deterministic, but slower")
 
 parser.add_argument('--save_mesh', action='store_true', help="export an obj mesh with texture")
 parser.add_argument('--mcubes_resolution', type=int, default=256, help="mcubes resolution for extracting mesh")
@@ -39,7 +40,7 @@
 parser.add_argument('--uniform_sphere_rate', type=float, default=0.5, help="likelihood of sampling camera location uniformly on the sphere surface area")
 # model options
 parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
-parser.add_argument('--density_activation', type=str, default='softplus', choices=['softplus', 'exp'] help="density activation function")
+parser.add_argument('--density_activation', type=str, default='softplus', choices=['softplus', 'exp'], help="density activation function")
 parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
 parser.add_argument('--blob_density', type=float, default=10, help="max (center) density for the density blob")
 parser.add_argument('--blob_radius', type=float, default=0.3, help="control the radius for the density blob")
@@ -106,7 +107,7 @@
 
 if opt.guidance == 'stable-diffusion':
     from nerf.sd import StableDiffusion
-    guidance = StableDiffusion(device, opt.sd_version, opt.hf_key)
+    guidance = StableDiffusion(opt, device, opt.sd_version, opt.hf_key)
 elif opt.guidance == 'clip':
     from nerf.clip import CLIP
     guidance = CLIP(device)

diff --git a/main.py b/main.py
@@ -21,6 +21,7 @@
     parser.add_argument('--workspace', type=str, default='workspace')
     parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
     parser.add_argument('--seed', type=int, default=0)
+    parser.add_argument('--deterministic', action='store_true', help="make the training more deterministic, but slower")
 
     parser.add_argument('--save_mesh', action='store_true', help="export an obj mesh with texture")
     parser.add_argument('--mcubes_resolution', type=int, default=256, help="mcubes resolution for extracting mesh")
@@ -111,7 +112,7 @@
 
     print(opt)
 
-    seed_everything(opt.seed)
+    seed_everything(opt.seed, deterministic = opt.deterministic)
 
     model = NeRFNetwork(opt)
 
@@ -160,7 +161,7 @@
 
         if opt.guidance == 'stable-diffusion':
             from nerf.sd import StableDiffusion
-            guidance = StableDiffusion(device, opt.sd_version, opt.hf_key)
+            guidance = StableDiffusion(opt, device, opt.sd_version, opt.hf_key)
         elif opt.guidance == 'clip':
             from nerf.clip import CLIP
             guidance = CLIP(device)

diff --git a/nerf/sd.py b/nerf/sd.py
@@ -1,6 +1,7 @@
 from transformers import CLIPTextModel, CLIPTokenizer, logging
 from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler, DDIMScheduler
 from diffusers.utils.import_utils import is_xformers_available
+import functools
 
 # suppress partial model loading warning
 logging.set_verbosity_error()
@@ -25,16 +26,40 @@ def backward(ctx, grad):
         batch_size = len(gt_grad)
         return gt_grad / batch_size, None
 
+class DeterministicInterpolate(nn.Module): 
+    # https://github.com/open-mmlab/mmsegmentation/issues/255
+    # this is a deterministic version of nn.functional.interpolate bilinear mode
+    def __init__(self, channel: int, scale_factor: int):
+        super().__init__()
+        # assert 'mode' not in kwargs and 'align_corners' not in kwargs and 'size' not in kwargs
+        assert isinstance(scale_factor, int) and scale_factor > 1 and scale_factor % 2 == 0
+        self.scale_factor = scale_factor
+        kernel_size = scale_factor + 1  # keep kernel size being odd
+        self.weight = nn.Parameter(
+            torch.empty((1, 1, kernel_size, kernel_size), dtype=torch.float32).expand(channel, -1, -1, -1)
+        )
+        self.conv = functools.partial(
+            F.conv2d, weight=self.weight, bias=None, padding=scale_factor // 2, groups=channel
+        )
+        with torch.no_grad():
+            self.weight.fill_(1 / (kernel_size * kernel_size))
+            self.weight.requires_grad_(False)
+
+    def forward(self, t):
+        if t is None:
+            return t
+        return self.conv(F.interpolate(t, scale_factor=self.scale_factor, mode='nearest'))
+
 def seed_everything(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
     #torch.backends.cudnn.deterministic = True
     #torch.backends.cudnn.benchmark = True
 
 class StableDiffusion(nn.Module):
-    def __init__(self, device, sd_version='2.1', hf_key=None):
+    def __init__(self, opt, device, sd_version='2.1', hf_key=None):
         super().__init__()
-
+        self.opt = opt
         self.device = device
         self.sd_version = sd_version
 
@@ -64,6 +89,12 @@ def __init__(self, device, sd_version='2.1', hf_key=None):
         self.scheduler = DDIMScheduler.from_pretrained(model_key, subfolder="scheduler")
         # self.scheduler = PNDMScheduler.from_pretrained(model_key, subfolder="scheduler")
 
+        if opt.deterministic:
+            assert opt.h == opt.w
+            self.interpolate = DeterministicInterpolate(channel=3, scale_factor=512//opt.w).to(self.device)
+        else:
+            self.interpolate = functools.partial(F.interpolate, size=(512, 512), mode='bilinear', align_corners=False)
+
         self.num_train_timesteps = self.scheduler.config.num_train_timesteps
         self.min_step = int(self.num_train_timesteps * 0.02)
         self.max_step = int(self.num_train_timesteps * 0.98)
@@ -95,7 +126,7 @@ def train_step(self, text_embeddings, pred_rgb, guidance_scale=100):
 
         # interp to 512x512 to be fed into vae.
 
-        pred_rgb_512 = F.interpolate(pred_rgb, (512, 512), mode='bilinear', align_corners=False)
+        pred_rgb_512 = self.interpolate(pred_rgb)
 
         # timestep ~ U(0.02, 0.98) to avoid very high/low noise level
         t = torch.randint(self.min_step, self.max_step + 1, [1], dtype=torch.long, device=self.device)
@@ -220,7 +251,7 @@ def prompt_to_img(self, prompts, negative_prompts='', height=512, width=512, num
 
     device = torch.device('cuda')
 
-    sd = StableDiffusion(device, opt.sd_version, opt.hf_key)
+    sd = StableDiffusion(opt, device, opt.sd_version, opt.hf_key)
 
     imgs = sd.prompt_to_img(opt.prompt, opt.negative, opt.H, opt.W, opt.steps)
 

diff --git a/nerf/utils.py b/nerf/utils.py
@@ -105,14 +105,17 @@ def get_rays(poses, intrinsics, H, W, N=-1, error_map=None):
     return results
 
 
-def seed_everything(seed):
+def seed_everything(seed, deterministic=False):
     random.seed(seed)
     os.environ['PYTHONHASHSEED'] = str(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
-    #torch.backends.cudnn.deterministic = True
-    #torch.backends.cudnn.benchmark = True
+    if deterministic:
+        os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8" # https://discuss.pytorch.org/t/random-seed-with-external-gpu/102260/3 https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False # https://pytorch.org/docs/stable/notes/randomness.html#cuda-convolution-benchmarking
+        torch.use_deterministic_algorithms(True) # will raise error when nondeterministic functions are used.
 
 
 def torch_vis_2d(x, renormalize=False):