fixed a bug

Frederik Rahbaek Warburg · Frederik Rahbaek Warburg · commit d16119d13627 · 2022-04-12T13:29:38.000+02:00
diff --git a/stochman/curves.py b/stochman/curves.py
@@ -39,10 +39,7 @@ def __init__(
             _begin = begin.detach()  # BxD
             _end = end.detach()  # BxD
         else:
-            raise ValueError(
-                "BasicCurve.__init__ requires begin and end points to have "
-                "the same shape"
-            )
+            raise ValueError("BasicCurve.__init__ requires begin and end points to have " "the same shape")
 
         # register begin and end as buffers
         self.register_buffer("begin", _begin)  # BxD
@@ -82,6 +79,7 @@ def plot(
         """
         with torch.no_grad():
             import matplotlib.pyplot as plt
+
             t = torch.linspace(t0, t1, N, dtype=self.begin.dtype, device=self.device)
             points = self(t)  # NxD or BxNxD
 
@@ -126,7 +124,7 @@ def euclidean_length(self, t0: float = 0.0, t1: float = 1.0, N: int = 100) -> to
         if not is_batched:
             points = points.unsqueeze(0)  # 1xNxD
         delta = points[:, 1:] - points[:, :-1]  # Bx(N-1)xD
-        energies = (delta ** 2).sum(dim=2)  # Bx(N-1)
+        energies = (delta**2).sum(dim=2)  # Bx(N-1)
         lengths = energies.sqrt().sum(dim=1)  # B
         return lengths
 
diff --git a/stochman/manifold.py b/stochman/manifold.py
@@ -444,7 +444,7 @@ def curve_energy(self, curve: BasicCurve, reduction: Optional[str] = "sum", dt=N
         emb_curve = self.embed(curve)  # BxNxD
         B, N, D = emb_curve.shape
         delta = emb_curve[:, 1:, :] - emb_curve[:, :-1, :]  # Bx(N-1)xD
-        energy = (delta ** 2).sum((1, 2)) * dt  # B
+        energy = (delta**2).sum((1, 2)) * dt  # B
         return tensor_reduction(energy, reduction)
 
     def curve_length(self, curve: BasicCurve, dt=None):
@@ -541,7 +541,7 @@ def __init__(self, data, sigma, rho, device=None):
         """
         super().__init__()
         self.data = data
-        self.sigma2 = sigma ** 2
+        self.sigma2 = sigma**2
         self.rho = rho
         self.device = device
 
@@ -586,7 +586,7 @@ def metric(self, c, return_deriv=False):
             if return_deriv:
                 weighted_delta = (w_p / sigma2).reshape(-1, 1).expand(-1, D) * delta  # NxD
                 dSdc = 2.0 * torch.diag(w_p.mm(delta).flatten()) - weighted_delta.t().mm(delta2)  # DxD
-                dM = dSdc.t() * (m ** 2).reshape(-1, 1).expand(-1, D)  # DxD
+                dM = dSdc.t() * (m**2).reshape(-1, 1).expand(-1, D)  # DxD
                 dMdc.append(dM.reshape(1, D, D))
 
         if return_deriv:
diff --git a/stochman/nnj.py b/stochman/nnj.py
@@ -77,8 +77,10 @@ def _jacobian_wrt_input_mult_left_vec(self, x: Tensor, val: Tensor, jac_in: Tens
     def _jacobian_wrt_input_transpose_mult_left_vec(self, x: Tensor, val: Tensor, jac_in: Tensor) -> Tensor:
         return F.linear(jac_in.movedim(1, -1), self.weight.T, bias=None).movedim(-1, 1)
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
-        
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
+
         b, c = x.shape
         diag_elements = torch.diagonal(tmp, dim1=1, dim2=2)
         feat_k2 = (x**2).unsqueeze(1)
@@ -125,7 +127,9 @@ def _jacobian_wrt_input_mult_left_vec(self, x: Tensor, val: Tensor, jac_in: Tens
             .movedim(dims2, dims1)
         )
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
         # non parametric, so return empty
         return None
 
@@ -137,17 +141,16 @@ def _jacobian_wrt_input_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag
         weight = torch.ones(c2, c1, int(self.scale_factor), int(self.scale_factor), device=x.device)
 
         tmp = F.conv2d(
-                tmp.reshape(-1, c2, h2, w2),
-                weight=weight,
-                bias=None,
-                stride=int(self.scale_factor),
-                padding=0,
-                dilation=1,
-                groups=1,
-            )
-
-        return tmp.reshape(b, c1*h1*w1)
+            tmp.reshape(-1, c2, h2, w2),
+            weight=weight,
+            bias=None,
+            stride=int(self.scale_factor),
+            padding=0,
+            dilation=1,
+            groups=1,
+        )
 
+        return tmp.reshape(b, c1 * h1 * w1)
 
 
 class Conv1d(AbstractJacobian, nn.Conv1d):
@@ -416,36 +419,36 @@ def _jacobian_wrt_weight_T_mult_right(
         return Jt_tmp
 
     def _jacobian_wrt_weight_mult_left(
-            self, x: Tensor, val: Tensor, tmp: Tensor, use_less_memory: bool = True
-        ) -> Tensor:
+        self, x: Tensor, val: Tensor, tmp: Tensor, use_less_memory: bool = True
+    ) -> Tensor:
         b, c1, h1, w1 = x.shape
         c2, h2, w2 = val.shape[1:]
         kernel_h, kernel_w = self.kernel_size
         num_of_rows = tmp.shape[-2]
 
         # expand rows as cubes [(output channel)x(output height)x(output width)]
-        tmp_rows = tmp.movedim(-1,-2).reshape(b, c2, h2, w2, num_of_rows)
+        tmp_rows = tmp.movedim(-1, -2).reshape(b, c2, h2, w2, num_of_rows)
         # see rows as columns of the transposed matrix
         tmpt_cols = tmp_rows
         # transpose the images in (output height)x(output width)
         tmpt_cols = torch.flip(tmpt_cols, [-3, -2])
         # switch batch size and output channel
-        tmpt_cols = tmpt_cols.movedim(0,1)
+        tmpt_cols = tmpt_cols.movedim(0, 1)
 
         if use_less_memory:
 
-            tmp_J = torch.zeros(b, c2*c1*kernel_h*kernel_w, num_of_rows, device=x.device)
+            tmp_J = torch.zeros(b, c2 * c1 * kernel_h * kernel_w, num_of_rows, device=x.device)
             for i in range(b):
                 # set the weight to the convolution
-                input_single_batch = x[i:i+1,:,:,:]
-                reversed_input_single_batch = torch.flip(input_single_batch, [-2,-1]).movedim(0,1)
-                
-                tmp_single_batch = tmpt_cols[:,i:i+1,:,:,:]
+                input_single_batch = x[i : i + 1, :, :, :]
+                reversed_input_single_batch = torch.flip(input_single_batch, [-2, -1]).movedim(0, 1)
+
+                tmp_single_batch = tmpt_cols[:, i : i + 1, :, :, :]
 
                 # convolve each column
                 tmp_J_single_batch = (
                     F.conv2d(
-                        tmpt_cols.movedim((1, 2, 3), (-3, -2, -1)).reshape(-1, 1, h2, w2),
+                        tmp_single_batch.movedim((1, 2, 3), (-3, -2, -1)).reshape(-1, 1, h2, w2),
                         weight=reversed_input_single_batch,
                         bias=None,
                         stride=self.stride,
@@ -458,14 +461,14 @@ def _jacobian_wrt_weight_mult_left(
                 )
 
                 # reshape as a (num of weights)x(num of column) matrix
-                tmp_J_single_batch = tmp_J_single_batch.reshape(c2*c1*kernel_h*kernel_w, num_of_rows)
+                tmp_J_single_batch = tmp_J_single_batch.reshape(c2 * c1 * kernel_h * kernel_w, num_of_rows)
                 tmp_J[i, :, :] = tmp_J_single_batch
 
             # transpose
-            tmp_J = tmp_J.movedim(-1,-2)
-        else:        
+            tmp_J = tmp_J.movedim(-1, -2)
+        else:
             # set the weight to the convolution
-            reversed_inputs = torch.flip(x, [-2,-1]).movedim(0,1)
+            reversed_inputs = torch.flip(x, [-2, -1]).movedim(0, 1)
 
             # convolve each column
             Jt_tmptt_cols = (
@@ -483,9 +486,9 @@ def _jacobian_wrt_weight_mult_left(
             )
 
             # reshape as a (num of input)x(num of output) matrix, one for each batch size
-            Jt_tmptt_cols = Jt_tmptt_cols.reshape(c2*c1*kernel_h*kernel_w,num_of_rows)
+            Jt_tmptt_cols = Jt_tmptt_cols.reshape(c2 * c1 * kernel_h * kernel_w, num_of_rows)
             # transpose
-            tmp_J = Jt_tmptt_cols.movedim(0,1)
+            tmp_J = Jt_tmptt_cols.movedim(0, 1)
 
         return tmp
 
@@ -495,7 +498,9 @@ def _jacobian_wrt_input_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag
         else:
             return self._jacobian_wrt_input_full_sandwich(x, val, tmp)
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
         if diag:
             return self._jacobian_wrt_weight_diag_sandwich(x, val, tmp)
         else:
@@ -652,7 +657,9 @@ def _jacobian_wrt_input_mult_left_vec(self, x: Tensor, val: Tensor, jac_in: Tens
     def _jacobian_wrt_input_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
         return tmp
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
         return None
 
 
@@ -675,7 +682,9 @@ def _jacobian_wrt_input_mult_left_vec(self, x: Tensor, val: Tensor, jac_in: Tens
     def _jacobian_wrt_input_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
         return tmp
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
         return None
 
 
@@ -785,7 +794,9 @@ def _jacobian_wrt_input_mult_left_vec(self, x: Tensor, val: Tensor, jac_in: Tens
         jac_in = jac_in[arange_repeated, idx, :, :, :].reshape(*val.shape, *jac_in_orig_shape[4:])
         return jac_in
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
         # non parametric, so return empty
         return None
 
@@ -800,9 +811,9 @@ def _jacobian_wrt_input_diag_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor)
         new_tmp = new_tmp.reshape(b * c1, h1 * w1)
         idx = self.idx.reshape(b * c2, h2 * w2)
         arange_repeated = torch.repeat_interleave(torch.arange(b * c1), h2 * w2).long()
-        arange_repeated = arange_repeated.reshape(b*c2, h2*w2)
-    
-        new_tmp[arange_repeated, idx] = tmp.reshape(b*c2, h2*w2)
+        arange_repeated = arange_repeated.reshape(b * c2, h2 * w2)
+
+        new_tmp[arange_repeated, idx] = tmp.reshape(b * c2, h2 * w2)
 
         return new_tmp.reshape(b, c1 * h1 * w1)
 
@@ -892,7 +903,9 @@ def _jacobian(self, x: Tensor, val: Tensor) -> Tensor:
         jac = 1.0 - val**2
         return jac
 
-    def _jacobian_wrt_weight_sandwich(self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False) -> Tensor:
+    def _jacobian_wrt_weight_sandwich(
+        self, x: Tensor, val: Tensor, tmp: Tensor, diag: bool = False
+    ) -> Tensor:
         # non parametric, so return empty
         return None
 
diff --git a/stochman/utils.py b/stochman/utils.py
@@ -11,7 +11,7 @@ def forward(ctx, M, p0: torch.Tensor, p1: torch.Tensor):
             dist2 = dist**2
 
             lm0 = C.deriv(torch.zeros(1, device=p0.device)).squeeze(1)  # log(p0, p1); Bx(d)
-            lm1 = -C.deriv(torch.ones(1, device=p0.device)).squeeze(1)   # log(p1, p0); Bx(d)
+            lm1 = -C.deriv(torch.ones(1, device=p0.device)).squeeze(1)  # log(p1, p0); Bx(d)
             G0 = M.metric(p0)  # Bx(d)x(d) or Bx(d)
             G1 = M.metric(p1)  # Bx(d)x(d) or Bx(d)
             if G0.ndim == 3:  # metric is square
@@ -32,9 +32,7 @@ def forward(ctx, M, p0: torch.Tensor, p1: torch.Tensor):
     @staticmethod
     def backward(ctx, grad_output):
         Glm0, Glm1 = ctx.saved_tensors
-        return (None,
-                2.0 * grad_output.view(-1, 1) * Glm0,
-                2.0 * grad_output.view(-1, 1) * Glm1)
+        return (None, 2.0 * grad_output.view(-1, 1) * Glm0, 2.0 * grad_output.view(-1, 1) * Glm1)
 
 
 def squared_manifold_distance(manifold, p0: torch.Tensor, p1: torch.Tensor):
@@ -53,9 +51,9 @@ def squared_manifold_distance(manifold, p0: torch.Tensor, p1: torch.Tensor):
 
 
 def tensor_reduction(x: torch.Tensor, reduction: str):
-    if reduction == 'sum':
+    if reduction == "sum":
         return x.sum()
-    elif reduction == 'mean':
+    elif reduction == "mean":
         return x.mean()
     elif reduction is None or reduction == "none":
         return x
diff --git a/tests/test_curves.py b/tests/test_curves.py
@@ -113,7 +113,7 @@ def test_constant_speed(self, curve_class):
         assert isinstance(curve_length, torch.Tensor)
         assert new_t.shape == (batch_size, timesteps)
         assert Ct.shape == (batch_size, timesteps, dim)
-        assert curve_length.shape == (batch_size, )
+        assert curve_length.shape == (batch_size,)
 
     def test_plotting_in_axis(self, curve_class):
         batch_size = 5
diff --git a/tests/test_nnj.py b/tests/test_nnj.py
@@ -19,7 +19,7 @@
 
 
 def _compare_jacobian(f: Callable, x: torch.Tensor) -> torch.Tensor:
-    """ Use pytorch build-in jacobian function to compare for correctness of computations"""
+    """Use pytorch build-in jacobian function to compare for correctness of computations"""
     out = f(x)
     output = torch.autograd.functional.jacobian(f, x)
     m = out.ndim
@@ -153,7 +153,7 @@ def test_jacobians(self, model, input_shape, device, dtype):
 
     @pytest.mark.parametrize("return_jac", [True, False])
     def test_jac_return(self, model, input_shape, device, return_jac):
-        """ Test that all models returns the jacobian output if asked for it """
+        """Test that all models returns the jacobian output if asked for it"""
         if "cuda" in device and not torch.cuda.is_available():
             pytest.skip("Test requires cuda support")