Added a lowering rule for lax.sign_p and improved test coverage for binary ops

superbobry · jax authors · commit ae1cfc21c35a · 2024-03-12T07:48:43.000-07:00
Closes #17317 PiperOrigin-RevId: 615038353
diff --git a/jax/_src/pallas/triton/lowering.py b/jax/_src/pallas/triton/lowering.py
@@ -837,7 +837,10 @@ def _mul(x: ir.Value, y: ir.Value) -> ir.Value:
 
 def _floordiv(x: ir.Value, y: ir.Value, *, signed: bool) -> ir.Value:
   assert x.type == y.type, (str(x.type), str(y.type))
-  if not isinstance(_element_type(x.type), ir.IntegerType):
+  x_element_type = _element_type(x.type)
+  if isinstance(x_element_type, ir.FloatType):
+    return arith_dialect.divf(x, y)
+  if not isinstance(x_element_type, ir.IntegerType):
     raise NotImplementedError(f"unsupported types: {x.type} and {y.type}")
   if signed:
     return arith_dialect.divsi(x, y)
@@ -859,7 +862,10 @@ def _truediv(x: ir.Value, y: ir.Value, *, signed: bool) -> ir.Value:
 
 def _mod(x: ir.Value, y: ir.Value, *, signed: bool) -> ir.Value:
   assert x.type == y.type, (str(x.type), str(y.type))
-  if not isinstance(_element_type(x.type), ir.IntegerType):
+  x_element_type = _element_type(x.type)
+  if isinstance(x_element_type, ir.FloatType):
+    return arith_dialect.remf(x, y)
+  if not isinstance(x_element_type, ir.IntegerType):
     raise NotImplementedError(f"unsupported types: {x.type} and {y.type}")
   if signed:
     return arith_dialect.remsi(x, y)
@@ -1108,6 +1114,19 @@ def _div_lowering_rule(ctx: LoweringRuleContext, x, y):
 triton_lowering_rules[lax.div_p] = _div_lowering_rule
 
 
+def _sign_lowering_rule(ctx: LoweringRuleContext, x):
+  [x_aval] = ctx.avals_in
+  signed = np.issubdtype(x_aval.dtype, jnp.signedinteger)
+  zero = _full(x.type, 0)
+  return _sub(
+      _cast(_greater_than(x, zero, signed=signed), x.type, signed=signed),
+      _cast(_less_than(x, zero, signed=signed), x.type, signed=signed),
+  )
+
+
+triton_lowering_rules[lax.sign_p] = _sign_lowering_rule
+
+
 def _iota_lowering_rule(ctx: LoweringRuleContext, *, dtype, shape, dimension):
   if dimension != 0:
     raise NotImplementedError
diff --git a/tests/pallas/pallas_test.py b/tests/pallas/pallas_test.py
@@ -1517,6 +1517,7 @@ def add_one(x_ref, o_ref):
 class PallasCallInterpreterVmapTest(PallasCallVmapTest):
   INTERPRET = True
 
+
 class PallasOpsTest(PallasTest):
 
   def test_pow_weak_dtype(self):
@@ -1528,17 +1529,31 @@ def square(x_ref, o_ref):
     x = jnp.array(42.0)
     np.testing.assert_allclose(square(x), x*x)
 
-  def test_ne(self):
+  COMPARISON_OPS = [
+      jnp.equal,
+      jnp.not_equal,
+      jnp.less,
+      jnp.less_equal,
+      jnp.greater,
+      jnp.greater_equal,
+  ]
+
+  @parameterized.named_parameters(
+      (f"{fn.__name__}_{dtype}", fn, dtype)
+      for fn, dtype in itertools.product(
+          COMPARISON_OPS, ["int32", "uint32", "float32"]
+      )
+  )
+  def test_comparison(self, fn, dtype):
     @functools.partial(
         self.pallas_call, out_shape=jax.ShapeDtypeStruct((8,), jnp.bool_),
         grid=1)
-    def ne(x_ref, y_ref, o_ref):
-      o_ref[:] = x_ref[...] != y_ref[...]
+    def kernel(x_ref, y_ref, o_ref):
+      o_ref[:] = fn(x_ref[...], y_ref[...])
 
-    x = jnp.ones(8, dtype=jnp.int32)
-    y = jnp.arange(8, dtype=jnp.int32)
-    not_equal = ne(x, y)
-    np.testing.assert_allclose(not_equal, x != y)
+    x = jnp.array([1, 3, -4, -6, 2, 5, 4, -7]).astype(dtype)
+    y = jnp.array([3, 1, -4, -5, 2, -2, 2, 4]).astype(dtype)
+    np.testing.assert_allclose(kernel(x, y), fn(x, y))
 
   def test_isnan(self):
     @functools.partial(
@@ -1551,33 +1566,52 @@ def isnan(x_ref, o_ref):
     x = x.at[3].set(jnp.nan)
     np.testing.assert_allclose(isnan(x), jnp.isnan(x))
 
-  @parameterized.named_parameters(*(
-      (fn.__name__, fn, out_dtype)
-      for fn, out_dtype in [
-          (jnp.add, jnp.int32),
-          (jnp.subtract, jnp.int32),
-          (jnp.multiply, jnp.int32),
-          (jnp.true_divide, jnp.float32),
-          (jnp.remainder, jnp.int32),
-          (jnp.less, jnp.bool_),
-          (jnp.less_equal, jnp.bool_),
-          (jnp.greater, jnp.bool_),
-          (jnp.greater_equal, jnp.bool_),
-          (jnp.equal, jnp.bool_),
-          (jnp.not_equal, jnp.bool_),
-      ]
-  ))
-  def test_signed_int_ops(self, f, out_dtype):
+  def test_true_divide(self):
     @functools.partial(
         self.pallas_call,
-        out_shape=jax.ShapeDtypeStruct((8,), out_dtype),
-        grid=1)
-    def f_i32(x_ref, y_ref, o_ref):
+        out_shape=jax.ShapeDtypeStruct((8,), jnp.float32),
+        grid=1,
+    )
+    def kernel(x_ref, y_ref, o_ref):
+      o_ref[...] = jnp.true_divide(x_ref[...], y_ref[...])
+
+    x = jnp.array([1, 3, -4, -6, 2, 5, 4, -7], dtype=jnp.int32)
+    y = jnp.array([3, 1, -4, -5, 2, -2, 2, 4], dtype=jnp.int32)
+    np.testing.assert_allclose(jnp.true_divide(x, y), kernel(x, y))
+
+  BINARY_OPS = [
+      ([jnp.floor_divide], ["int32", "uint32"]),
+      (
+          [jnp.add, jnp.subtract, jnp.multiply, jnp.remainder],
+          ["int32", "uint32", "float32"],
+      ),
+      (
+          [
+              jnp.bitwise_and,
+              jnp.bitwise_or,
+              jnp.bitwise_xor,
+              jnp.bitwise_left_shift,
+              jnp.bitwise_right_shift,
+          ],
+          ["int32", "uint32"],
+      ),
+  ]
+
+  @parameterized.named_parameters(
+      (f"{fn.__name__}_{dtype}", fn, dtype)
+      for args in BINARY_OPS
+      for fn, dtype in itertools.product(*args)
+  )
+  def test_binary(self, f, dtype):
+    @functools.partial(
+        self.pallas_call, out_shape=jax.ShapeDtypeStruct((8,), dtype), grid=1
+    )
+    def kernel(x_ref, y_ref, o_ref):
       o_ref[...] = f(x_ref[...], y_ref[...])
 
-    x = jnp.int32([1, 3, -4, -6, 2, 5, 4, -7])
-    y = jnp.int32([3, 1, -4, -5, 2, -2, 0, 4])
-    np.testing.assert_allclose(f(x, y), f_i32(x, y))
+    x = jnp.array([1, 3, -4, -6, 2, 5, 4, -7]).astype(dtype)
+    y = jnp.array([3, 1, -4, -5, 2, -2, 2, 4]).astype(dtype)
+    np.testing.assert_allclose(f(x, y), kernel(x, y))
 
 
 class PallasOpsInterpretTest(PallasOpsTest):