merge main into amd-staging (llvm#2975)

z1_cciauto · web-flow · commit f63674cbf6d2 · 2025-07-06T09:19:37.000-05:00
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
@@ -47,7 +47,7 @@ Implemented Papers
 - P1222R4: A Standard ``flat_set`` (`Github <https://github.com/llvm/llvm-project/issues/105193>`__)
 - P2897R7: ``aligned_accessor``: An mdspan accessor expressing pointer over-alignment (`Github <https://github.com/llvm/llvm-project/issues/118372>`__)
 - P3247R2: Deprecate the notion of trivial types (`Github <https://github.com/llvm/llvm-project/issues/118387>`__)
-- P3372R3: ``constexpr`` containers and adaptors (`Github <https://github.com/llvm/llvm-project/issues/128673>`__) (Only ``constexpr flat_map`` is implemented)
+- P3372R3: ``constexpr`` containers and adaptors (`Github <https://github.com/llvm/llvm-project/issues/127876>`__) (``forward_list``, ``list``, ``priority_queue``, ``flat_map``, and ``flat_set`` are implemented)
 - P2441R2: ``views::join_with`` (`Github <https://github.com/llvm/llvm-project/issues/105185>`__)
 - P2711R1: Making multi-param constructors of ``views`` ``explicit`` (`Github <https://github.com/llvm/llvm-project/issues/105252>`__)
 - P2770R0: Stashing stashing ``iterators`` for proper flattening (`Github <https://github.com/llvm/llvm-project/issues/105250>`__)
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
@@ -151,7 +151,10 @@ __libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__con
 static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
                                      __cxx_atomic_contention_t const volatile* __platform_state,
                                      __cxx_contention_t __old_value) {
-  __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_seq_cst);
+  __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_relaxed);
+  // https://github.com/llvm/llvm-project/issues/109290
+  // There are no platform guarantees of a memory barrier in the platform wait implementation
+  __cxx_atomic_thread_fence(memory_order_seq_cst);
   // We sleep as long as the monitored value hasn't changed.
   __libcpp_platform_wait_on_address(__platform_state, __old_value);
   __cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
@@ -163,7 +166,7 @@ static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __conte
 static void __libcpp_atomic_notify(void const volatile* __location) {
   auto const __entry = __libcpp_contention_state(__location);
   // The value sequence laundering happens on the next line below.
-  __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_release);
+  __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_seq_cst);
   __libcpp_contention_notify(
       &__entry->__contention_state,
       &__entry->__platform_state,
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11402,18 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
   SDValue AbsOp0 = N->getOperand(0);
   unsigned Opc0 = Op0.getOpcode();
 
-  // Check if the operands of the sub are (zero|sign)-extended.
-  // TODO: Should we use ValueTracking instead?
+  // Check if the operands of the sub are (zero|sign)-extended, otherwise
+  // fallback to ValueTracking.
   if (Opc0 != Op1.getOpcode() ||
       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
        Opc0 != ISD::SIGN_EXTEND_INREG)) {
     // fold (abs (sub nsw x, y)) -> abds(x, y)
     // Don't fold this for unsupported types as we lose the NSW handling.
-    if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
-        TLI.preferABDSToABSWithNSW(VT)) {
+    if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
+        (AbsOp0->getFlags().hasNoSignedWrap() ||
+         DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
       SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
       return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
     }
+    // fold (abs (sub x, y)) -> abdu(x, y)
+    if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
+        DAG.SignBitIsZero(Op1)) {
+      SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
+      return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+    }
     return SDValue();
   }
 
diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
 ; CHECK-LABEL: abdu_const:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
 ; CHECK-LABEL: abdu_const_lhs:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    usubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uabd v1.4s, v2.4s, v1.4s
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
 define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
 ; CHECK-LABEL: abdu_const_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
-; CHECK-NEXT:    usubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    neg v1.4s, v2.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
 ; CHECK-LABEL: abds_const:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
-; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v1.4s, v2.4s, v1.4s
-; CHECK-NEXT:    abs v1.4s, v1.4s
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sabd v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
   %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
 ; CHECK-LABEL: abds_const_lhs:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    ssubw v2.4s, v1.4s, v0.4h
-; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    abs v0.4s, v0.4s
-; CHECK-NEXT:    abs v1.4s, v2.4s
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sabd v1.4s, v2.4s, v1.4s
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
 define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
 ; CHECK-LABEL: abds_const_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
-; CHECK-NEXT:    ssubw2 v0.4s, v1.4s, v0.8h
-; CHECK-NEXT:    neg v1.4s, v2.4s
+; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
 ; CHECK-NEXT:    abs v0.4s, v0.4s
 ; CHECK-NEXT:    abs v1.4s, v1.4s
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    and z0.s, z0.s, #0xff
 ; CHECK-NEXT:    sxtb z1.s, p0/m, z1.s
-; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    sabd z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
   %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/PowerPC/fp-branch.ll b/llvm/test/CodeGen/PowerPC/fp-branch.ll
@@ -1,8 +1,21 @@
-; RUN: llc -verify-machineinstrs < %s -mattr=-vsx -mtriple=ppc32-- | grep fcmp | count 1
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s -mattr=-vsx -mtriple=ppc32-- | FileCheck %s
 
 declare i1 @llvm.isunordered.f64(double, double)
 
 define i1 @intcoord_cond_next55(double %tmp48.reload) {
+; CHECK-LABEL: intcoord_cond_next55:
+; CHECK:       # %bb.0: # %newFuncRoot
+; CHECK-NEXT:    lis 3, .LCPI0_0@ha
+; CHECK-NEXT:    lfs 0, .LCPI0_0@l(3)
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    blt 0, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %bb72.exitStub
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_2: # %cond_next62.exitStub
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    blr
 newFuncRoot:
         br label %cond_next55
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll
@@ -316,12 +316,10 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
 ; CHECK-NEXT:    vzext.vf4 v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT:    vsext.vf2 v8, v9
-; CHECK-NEXT:    vwsub.wv v10, v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT:    vrsub.vi v8, v10, 0
-; CHECK-NEXT:    vmax.vv v8, v10, v8
+; CHECK-NEXT:    vsext.vf4 v12, v9
+; CHECK-NEXT:    vmin.vv v8, v10, v12
+; CHECK-NEXT:    vmax.vv v10, v10, v12
+; CHECK-NEXT:    vsub.vv v8, v10, v8
 ; CHECK-NEXT:    ret
   %a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
   %b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
diff --git a/mlir/include/mlir/IR/ODSSupport.h b/mlir/include/mlir/IR/ODSSupport.h
@@ -118,6 +118,9 @@ convertFromAttribute(SmallVectorImpl<int32_t> &storage, Attribute attr,
 /// Convert the provided ArrayRef<int64_t> to a DenseI64ArrayAttr attribute.
 Attribute convertToAttribute(MLIRContext *ctx, ArrayRef<int64_t> storage);
 
+/// Convert the provided ArrayRef<int32_t> to a DenseI32ArrayAttr attribute.
+Attribute convertToAttribute(MLIRContext *ctx, ArrayRef<int32_t> storage);
+
 } // namespace mlir
 
 #endif // MLIR_IR_ODSSUPPORT_H
diff --git a/mlir/lib/IR/ODSSupport.cpp b/mlir/lib/IR/ODSSupport.cpp
@@ -173,3 +173,8 @@ Attribute mlir::convertToAttribute(MLIRContext *ctx,
                                    ArrayRef<int64_t> storage) {
   return DenseI64ArrayAttr::get(ctx, storage);
 }
+
+Attribute mlir::convertToAttribute(MLIRContext *ctx,
+                                   ArrayRef<int32_t> storage) {
+  return DenseI32ArrayAttr::get(ctx, storage);
+}
diff --git a/mlir/test/IR/properties.mlir b/mlir/test/IR/properties.mlir
@@ -2,10 +2,10 @@
 // # RUN: mlir-opt %s -mlir-print-op-generic -split-input-file  | mlir-opt -mlir-print-op-generic | FileCheck %s --check-prefix=GENERIC
 
 // CHECK:   test.with_properties
-// CHECK-SAME: a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4]{{$}}
+// CHECK-SAME: a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4], array32 = [5, 6]{{$}}
 // GENERIC:   "test.with_properties"()
-// GENERIC-SAME: <{a = 32 : i64, array = array<i64: 1, 2, 3, 4>, b = "foo", c = "bar", flag = true}> : () -> ()
-test.with_properties a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4]
+// GENERIC-SAME: <{a = 32 : i64, array = array<i64: 1, 2, 3, 4>, array32 = array<i32: 5, 6>, b = "foo", c = "bar", flag = true}> : () -> ()
+test.with_properties a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4], array32 = [5, 6]
 
 // CHECK:   test.with_nice_properties
 // CHECK-SAME:    "foo bar" is -3{{$}}
diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir
@@ -442,7 +442,7 @@ func.func @test_properties_rollback() {
   // CHECK: test.with_properties a = 32,
   // expected-remark @below{{op 'test.with_properties' is not legalizable}}
   test.with_properties
-      a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4]
+      a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4], array32 = [5, 6]
       {modify_inplace}
   "test.return"() : () -> ()
 }
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -3179,13 +3179,15 @@ def TestOpWithProperties : TEST_Op<"with_properties"> {
     `b` `=` $b `,`
     `c` `=` $c `,`
     `flag` `=` $flag `,`
-    `array` `=` $array attr-dict}];
+    `array` `=` $array `,`
+    `array32` `=` $array32 attr-dict}];
   let arguments = (ins
     I64Prop:$a,
     StrAttr:$b, // Attributes can directly be used here.
     StringProp:$c,
     BoolProp:$flag,
-    IntArrayProp<I64Prop>:$array // example of an array
+    IntArrayProp<I64Prop>:$array, // Example of an array.
+    IntArrayProp<I32Prop>:$array32 // Example of an array.
   );
 }
 

Original file line number	Diff line number	Diff line change
`@@ -173,3 +173,8 @@ Attribute mlir::convertToAttribute(MLIRContext *ctx,`
`173`	`173`	`ArrayRef<int64_t> storage) {`
`174`	`174`	`return DenseI64ArrayAttr::get(ctx, storage);`
`175`	`175`	`}`
	`176`	`+`
	`177`	`+Attribute mlir::convertToAttribute(MLIRContext *ctx,`
	`178`	`+ ArrayRef<int32_t> storage) {`
	`179`	`+ return DenseI32ArrayAttr::get(ctx, storage);`
	`180`	`+}`