Add sparse_sigmoid to jax.nn

mtthss · jax authors · commit 0b602c5c4d06 · 2024-04-09T03:10:04.000-07:00
PiperOrigin-RevId: 623108517
diff --git a/jax/_src/nn/functions.py b/jax/_src/nn/functions.py
@@ -173,6 +173,38 @@ def sigmoid(x: ArrayLike) -> Array:
   """
   return lax.logistic(x)
 
+@jax.jit
+def sparse_sigmoid(x: ArrayLike) -> Array:
+  r"""Sparse sigmoid activation function.
+
+  Computes the function:
+
+  .. math::
+
+    \mathrm{sparse\_sigmoid}(x) = \begin{cases}
+      0, & x \leq -1\\
+      \frac{1}{2}(x+1), & -1 < x < 1 \\
+      1, & 1 \leq x
+    \end{cases}
+
+  This is the twin function of the ``sigmoid`` activation ensuring a zero output
+  for inputs less than -1, a 1 ouput for inputs greater than 1, and a linear
+  output for inputs between -1 and 1. It is the derivative of ``sparse_plus``.
+
+  For more information, see `Learning with Fenchel-Young Losses (section 6.2)
+  <https://arxiv.org/abs/1901.02324>`_.
+
+  Args:
+    x : input array
+
+  Returns:
+    An array.
+
+  See also:
+    :func:`sigmoid`
+  """
+  return 0.5 * jnp.clip(x + 1.0, 0.0, 2.0)
+
 @jax.jit
 def silu(x: ArrayLike) -> Array:
   r"""SiLU (aka swish) activation function.
diff --git a/jax/nn/__init__.py b/jax/nn/__init__.py
@@ -42,6 +42,7 @@
   softmax as softmax,
   softplus as softplus,
   sparse_plus as sparse_plus,
+  sparse_sigmoid as sparse_sigmoid,
   silu as silu,
   swish as swish,
   squareplus as squareplus,
diff --git a/tests/nn_test.py b/tests/nn_test.py
@@ -71,6 +71,17 @@ def testSparseplusGrad(self):
     check_grads(nn.sparse_plus, (0.,), order=1,
                 rtol=1e-2 if jtu.test_device_matches(["tpu"]) else None)
 
+  def testSparseplusAndSparseSigmoid(self):
+    self.assertAllClose(
+        jax.grad(nn.sparse_plus)(0.), nn.sparse_sigmoid(0.),
+        check_dtypes=False)
+    self.assertAllClose(
+        jax.grad(nn.sparse_plus)(2.), nn.sparse_sigmoid(2.),
+        check_dtypes=False)
+    self.assertAllClose(
+        jax.grad(nn.sparse_plus)(-2.), nn.sparse_sigmoid(-2.),
+        check_dtypes=False)
+
   def testSquareplusGrad(self):
     check_grads(nn.squareplus, (1e-8,), order=4,
                 rtol=1e-2 if jtu.test_device_matches(["tpu"]) else None)
@@ -133,6 +144,11 @@ def testSparseplusValue(self):
     val = nn.sparse_plus(89.)
     self.assertAllClose(val, 89., check_dtypes=False)
 
+  def testSparsesigmoidValue(self):
+    self.assertAllClose(nn.sparse_sigmoid(-2.), 0., check_dtypes=False)
+    self.assertAllClose(nn.sparse_sigmoid(2.), 1., check_dtypes=False)
+    self.assertAllClose(nn.sparse_sigmoid(0.), .5, check_dtypes=False)
+
   def testSquareplusValue(self):
     val = nn.squareplus(1e3)
     self.assertAllClose(val, 1e3, check_dtypes=False, atol=1e-3)