Merge pull request #128 from Sarasra/main

jonbarron · web-flow · commit 31d857bc6687 · 2023-08-18T14:39:47.000-07:00
Add robustnerf mask to mipnerf360
diff --git a/internal/configs.py b/internal/configs.py
@@ -91,6 +91,13 @@ class Config:
   interlevel_loss_mult: float = 1.0  # Mult. for the loss on the proposal MLP.
   orientation_loss_mult: float = 0.0  # Multiplier on the orientation loss.
   orientation_coarse_loss_mult: float = 0.0  # Coarser orientation loss weights.
+  # RobustNerf loss hyperparameters
+  robustnerf_inlier_quantile = 0.5
+  enable_robutnerf_loss = False
+  robustnerf_inner_patch_size = 8
+  robustnerf_smoothed_filter_size = 3
+  robustnerf_smoothed_inlier_quantile = 0.5
+  robustnerf_inner_patch_inlier_quantile = 0.5
   # What that loss is imposed on, options are 'normals' or 'normals_pred'.
   orientation_loss_target: str = 'normals_pred'
   predicted_normal_loss_mult: float = 0.0  # Mult. on the predicted normal loss.
diff --git a/internal/robustnerf.py b/internal/robustnerf.py
@@ -0,0 +1,117 @@
+"""Computes RobustNeRF mask."""
+from typing import Mapping, Tuple
+
+from jax import lax
+import jax.numpy as jnp
+
+
+def robustnerf_mask(
+    errors: jnp.ndarray, loss_threshold: float, config: {str: float}
+) -> Tuple[jnp.ndarray, Mapping[str, jnp.ndarray]]:
+  """Computes RobustNeRF mask.
+
+  Args:
+    errors: f32[n,h,w,c]. Per-subpixel errors in a batch of patches.
+    loss_threshold: f32[]. Upper bound on per-pixel loss to use to determine
+      if a pixel is an inlier or not.
+    config: Config object. A dictionary of hyperparameters.
+
+  Returns:
+    mask: f32[n,h,w,c or 1]. Binary mask that broadcasts to shape [n,h,w,c].
+    stats: { str: f32[] }. Statistics to pass on.
+  """
+  epsilon = 1e-3
+  error_dtype = errors.dtype
+  error_per_pixel = jnp.mean(errors, axis=-1, keepdims=True)  # f32[n,h,w,1]
+  next_loss_threshold = jnp.quantile(
+      error_per_pixel, config.robustnerf_inlier_quantile
+  )
+  mask = jnp.ones_like(error_per_pixel, dtype=error_dtype)
+  stats = {
+      'loss_threshold': next_loss_threshold,
+  }
+  if config.enable_robustnerf_loss:
+    assert (
+        config.robustnerf_inner_patch_size <= config.patch_size
+    ), 'patch_size must be larger than robustnerf_inner_patch_size.'
+
+    # Inlier pixels have a value of 1.0 in the mask.
+    is_inlier_pixel = (error_per_pixel < loss_threshold).astype(error_dtype)
+    stats['is_inlier_loss'] = jnp.mean(is_inlier_pixel)
+
+    # Apply fxf (3x3) box filter 'window' for smoothing (diffusion).
+    f = config.robustnerf_smoothed_filter_size
+    window = jnp.ones((1, 1, f, f)) / (f * f)
+    has_inlier_neighbors = lax.conv(
+        jnp.transpose(is_inlier_pixel, [0, 3, 1, 2]), window, (1, 1), 'SAME'
+    )
+    has_inlier_neighbors = jnp.transpose(has_inlier_neighbors, [0, 2, 3, 1])
+
+    # Binarize after smoothing.
+    # config.robustnerf_smoothed_inlier_quantile default is 0.5 which means at
+    # least 50% of neighbouring pixels are inliers.
+    has_inlier_neighbors = (
+        has_inlier_neighbors > 1 - config.robustnerf_smoothed_inlier_quantile
+    ).astype(error_dtype)
+    stats['has_inlier_neighbors'] = jnp.mean(has_inlier_neighbors)
+    is_inlier_pixel = (
+        has_inlier_neighbors + is_inlier_pixel > epsilon
+    ).astype(error_dtype)
+    # Construct binary mask for inner pixels. The entire inner patch is either
+    # active or inactive.
+    # patch_size is the input patch (h,w), inner patch size can be any value
+    # smaller than patch_size. Default is for the inner patch size to be half
+    # the input patch size (i.e. 16x16 -> 8x8).
+    inner_patch_mask = _robustnerf_inner_patch_mask(
+        config.robustnerf_inner_patch_size, config.patch_size
+    )
+    is_inlier_patch = jnp.mean(
+        is_inlier_pixel, axis=[1, 2], keepdims=True
+    )  # f32[n,1,1,1]
+    # robustnerf_inner_patch_inlier_quantile what percentage of the patch
+    # should be inliers so that the patch is counted as an inlier patch.
+    is_inlier_patch = (
+        is_inlier_patch > 1 - config.robustnerf_inner_patch_inlier_quantile
+    ).astype(error_dtype)
+    is_inlier_patch = is_inlier_patch * inner_patch_mask
+    stats['is_inlier_patch'] = jnp.mean(is_inlier_patch)
+
+    # A pixel is an inlier if it is an inlier according to any of the above
+    # criteria.
+    mask = (
+        is_inlier_patch + is_inlier_pixel > epsilon
+    ).astype(error_dtype)
+
+  stats['mask'] = jnp.mean(mask)
+  return mask, stats
+
+
+def _robustnerf_inner_patch_mask(
+    inner_patch_size, outer_patch_size, *, dtype=jnp.float32
+):
+  """Constructs binary mask for inner patch.
+
+  Args:
+    inner_patch_size: Size of the (square) inside patch.
+    outer_patch_size: Size of the (square) outer patch.
+    dtype: dtype for result
+
+  Returns:
+    Binary mask of shape (1, outer_patch_size, outer_patch_size, 1). Mask is
+      1.0 for the center (inner_patch_size, inner_patch_size) square and 0.0
+      elsewhere.
+  """
+  pad_size_lower = (outer_patch_size - inner_patch_size) // 2
+  pad_size_upper = outer_patch_size - (inner_patch_size + pad_size_lower)
+  mask = jnp.pad(
+      jnp.ones((1, inner_patch_size, inner_patch_size, 1), dtype=dtype),
+      (
+          (0, 0),  # batch
+          (pad_size_lower, pad_size_upper),  # height
+          (pad_size_lower, pad_size_upper),  # width
+          (0, 0),  # channels
+      ),
+  )
+  return mask
+
+
diff --git a/internal/train_utils.py b/internal/train_utils.py
@@ -27,6 +27,7 @@
 from internal import math
 from internal import models
 from internal import ref_utils
+from internal import robustnerf 
 from internal import stepfun
 from internal import utils
 import jax
@@ -68,7 +69,7 @@ def summarize_tree(tree, fn, ancestry=(), max_depth=3):
   return stats
 
 
-def compute_data_loss(batch, renderings, rays, config):
+def compute_data_loss(batch, renderings, rays, loss_threshold, config):
   """Computes data loss terms for RGB, normal, and depth outputs."""
   data_losses = []
   stats = collections.defaultdict(lambda: [])
@@ -100,6 +101,11 @@ def compute_data_loss(batch, renderings, rays, config):
       scaling_grad = 1. / (1e-3 + jax.lax.stop_gradient(rgb_render_clip))
       # Reweighted L2 loss.
       data_loss = resid_sq_clip * scaling_grad**2
+    elif config.data_loss_type == 'robustnerf':
+        mask, robust_stats = robustnerf.robustnerf_mask(resid_sq, loss_threshold, 
+                config)
+        data_loss = data_loss * mask
+        stats.update(robust_stats)
     else:
       assert False
     data_losses.append((lossmult * data_loss).sum() / denom)
@@ -236,6 +242,7 @@ def train_step(
       batch,
       cameras,
       train_frac,
+      loss_threshold,
   ):
     """One optimization step.
 
@@ -245,6 +252,7 @@ def train_step(
       batch: dict, a mini-batch of data for training.
       cameras: module containing camera poses.
       train_frac: float, the fraction of training that is complete.
+      loss_threshold: float, the loss threshold for inliers (for robustness).
 
     Returns:
       A tuple (new_state, stats, rng) with
@@ -273,7 +281,8 @@ def loss_fn(variables):
 
       losses = {}
 
-      data_loss, stats = compute_data_loss(batch, renderings, rays, config)
+      data_loss, stats = compute_data_loss(batch, renderings, rays,
+              loss_threshold, config)
       losses['data'] = data_loss
 
       if config.interlevel_loss_mult > 0:
@@ -332,7 +341,7 @@ def loss_fn(variables):
   train_pstep = jax.pmap(
       train_step,
       axis_name='batch',
-      in_axes=(0, 0, 0, None, None),
+      in_axes=(0, 0, 0, None, None, None),
       donate_argnums=(0, 1))
   return train_pstep
 
@@ -394,7 +403,8 @@ def setup_model(
 ) -> Tuple[models.Model, TrainState, Callable[
     [FrozenVariableDict, jnp.array, utils.Rays],
     MutableMapping[Text, Any]], Callable[
-        [jnp.array, TrainState, utils.Batch, Optional[Tuple[Any, ...]], float],
+        [jnp.array, TrainState, utils.Batch,
+            Optional[Tuple[Any, ...]], float, float],
         Tuple[TrainState, Dict[Text, Any], jnp.array]], Callable[[int], float]]:
   """Creates NeRF model, optimizer, and pmap-ed train/render functions."""
 
diff --git a/train.py b/train.py
@@ -106,6 +106,7 @@ def main(unused_argv):
     num_steps = config.early_exit_steps
   else:
     num_steps = config.max_steps
+  loss_threshold = 1.0
   for step, batch in zip(range(init_step, num_steps + 1), pdataset):
 
     if reset_stats and (jax.host_id() == 0):
@@ -122,7 +123,9 @@ def main(unused_argv):
         batch,
         cameras,
         train_frac,
+        loss_threshold,
     )
+    loss_threshold = jnp.mean(stats['loss_threshold'])
 
     if step % config.gc_every == 0:
       gc.collect()  # Disable automatic garbage collection for efficiency.