Skip to content

Commit f63674c

Browse files
author
z1_cciauto
authored
merge main into amd-staging (llvm#2975)
2 parents 7ef0f9a + fc75dda commit f63674c

File tree

12 files changed

+72
-55
lines changed

12 files changed

+72
-55
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Implemented Papers
4747
- P1222R4: A Standard ``flat_set`` (`Github <https://github.com/llvm/llvm-project/issues/105193>`__)
4848
- P2897R7: ``aligned_accessor``: An mdspan accessor expressing pointer over-alignment (`Github <https://github.com/llvm/llvm-project/issues/118372>`__)
4949
- P3247R2: Deprecate the notion of trivial types (`Github <https://github.com/llvm/llvm-project/issues/118387>`__)
50-
- P3372R3: ``constexpr`` containers and adaptors (`Github <https://github.com/llvm/llvm-project/issues/128673>`__) (Only ``constexpr flat_map`` is implemented)
50+
- P3372R3: ``constexpr`` containers and adaptors (`Github <https://github.com/llvm/llvm-project/issues/127876>`__) (``forward_list``, ``list``, ``priority_queue``, ``flat_map``, and ``flat_set`` are implemented)
5151
- P2441R2: ``views::join_with`` (`Github <https://github.com/llvm/llvm-project/issues/105185>`__)
5252
- P2711R1: Making multi-param constructors of ``views`` ``explicit`` (`Github <https://github.com/llvm/llvm-project/issues/105252>`__)
5353
- P2770R0: Stashing stashing ``iterators`` for proper flattening (`Github <https://github.com/llvm/llvm-project/issues/105250>`__)

libcxx/src/atomic.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,10 @@ __libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__con
151151
static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
152152
__cxx_atomic_contention_t const volatile* __platform_state,
153153
__cxx_contention_t __old_value) {
154-
__cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_seq_cst);
154+
__cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_relaxed);
155+
// https://github.com/llvm/llvm-project/issues/109290
156+
// There are no platform guarantees of a memory barrier in the platform wait implementation
157+
__cxx_atomic_thread_fence(memory_order_seq_cst);
155158
// We sleep as long as the monitored value hasn't changed.
156159
__libcpp_platform_wait_on_address(__platform_state, __old_value);
157160
__cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
@@ -163,7 +166,7 @@ static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __conte
163166
static void __libcpp_atomic_notify(void const volatile* __location) {
164167
auto const __entry = __libcpp_contention_state(__location);
165168
// The value sequence laundering happens on the next line below.
166-
__cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_release);
169+
__cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_seq_cst);
167170
__libcpp_contention_notify(
168171
&__entry->__contention_state,
169172
&__entry->__platform_state,

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11402,18 +11402,25 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
1140211402
SDValue AbsOp0 = N->getOperand(0);
1140311403
unsigned Opc0 = Op0.getOpcode();
1140411404

11405-
// Check if the operands of the sub are (zero|sign)-extended.
11406-
// TODO: Should we use ValueTracking instead?
11405+
// Check if the operands of the sub are (zero|sign)-extended, otherwise
11406+
// fallback to ValueTracking.
1140711407
if (Opc0 != Op1.getOpcode() ||
1140811408
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
1140911409
Opc0 != ISD::SIGN_EXTEND_INREG)) {
1141011410
// fold (abs (sub nsw x, y)) -> abds(x, y)
1141111411
// Don't fold this for unsupported types as we lose the NSW handling.
11412-
if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
11413-
TLI.preferABDSToABSWithNSW(VT)) {
11412+
if (hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT) &&
11413+
(AbsOp0->getFlags().hasNoSignedWrap() ||
11414+
DAG.willNotOverflowSub(/*IsSigned=*/true, Op0, Op1))) {
1141411415
SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
1141511416
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
1141611417
}
11418+
// fold (abs (sub x, y)) -> abdu(x, y)
11419+
if (hasOperation(ISD::ABDU, VT) && DAG.SignBitIsZero(Op0) &&
11420+
DAG.SignBitIsZero(Op1)) {
11421+
SDValue ABD = DAG.getNode(ISD::ABDU, DL, VT, Op0, Op1);
11422+
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11423+
}
1141711424
return SDValue();
1141811425
}
1141911426

llvm/test/CodeGen/AArch64/abd-combine.ll

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@ define <8 x i16> @abdu_const(<8 x i16> %src1) {
1818
; CHECK-LABEL: abdu_const:
1919
; CHECK: // %bb.0:
2020
; CHECK-NEXT: movi v1.4s, #1
21-
; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
22-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
23-
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
24-
; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
25-
; CHECK-NEXT: abs v1.4s, v1.4s
26-
; CHECK-NEXT: abs v0.4s, v0.4s
27-
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
21+
; CHECK-NEXT: ushll v2.4s, v0.4h, #0
22+
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
23+
; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
24+
; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
25+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
2826
; CHECK-NEXT: ret
2927
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
3028
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -37,10 +35,10 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
3735
; CHECK-LABEL: abdu_const_lhs:
3836
; CHECK: // %bb.0:
3937
; CHECK-NEXT: movi v1.4s, #1
40-
; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h
41-
; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
42-
; CHECK-NEXT: abs v0.4s, v0.4s
43-
; CHECK-NEXT: abs v1.4s, v2.4s
38+
; CHECK-NEXT: ushll v2.4s, v0.4h, #0
39+
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
40+
; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
41+
; CHECK-NEXT: uabd v1.4s, v2.4s, v1.4s
4442
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
4543
; CHECK-NEXT: ret
4644
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
@@ -53,13 +51,6 @@ define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) {
5351
define <8 x i16> @abdu_const_zero(<8 x i16> %src1) {
5452
; CHECK-LABEL: abdu_const_zero:
5553
; CHECK: // %bb.0:
56-
; CHECK-NEXT: movi v1.2d, #0000000000000000
57-
; CHECK-NEXT: ushll v2.4s, v0.4h, #0
58-
; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h
59-
; CHECK-NEXT: neg v1.4s, v2.4s
60-
; CHECK-NEXT: abs v0.4s, v0.4s
61-
; CHECK-NEXT: abs v1.4s, v1.4s
62-
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
6354
; CHECK-NEXT: ret
6455
%zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
6556
%sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
@@ -328,13 +319,11 @@ define <8 x i16> @abds_const(<8 x i16> %src1) {
328319
; CHECK-LABEL: abds_const:
329320
; CHECK: // %bb.0:
330321
; CHECK-NEXT: movi v1.4s, #1
331-
; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
332-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
333-
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
334-
; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s
335-
; CHECK-NEXT: abs v1.4s, v1.4s
336-
; CHECK-NEXT: abs v0.4s, v0.4s
337-
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
322+
; CHECK-NEXT: sshll v2.4s, v0.4h, #0
323+
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
324+
; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
325+
; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
326+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
338327
; CHECK-NEXT: ret
339328
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
340329
%sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -347,10 +336,10 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
347336
; CHECK-LABEL: abds_const_lhs:
348337
; CHECK: // %bb.0:
349338
; CHECK-NEXT: movi v1.4s, #1
350-
; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h
351-
; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
352-
; CHECK-NEXT: abs v0.4s, v0.4s
353-
; CHECK-NEXT: abs v1.4s, v2.4s
339+
; CHECK-NEXT: sshll v2.4s, v0.4h, #0
340+
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
341+
; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
342+
; CHECK-NEXT: sabd v1.4s, v2.4s, v1.4s
354343
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
355344
; CHECK-NEXT: ret
356345
%zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
@@ -363,10 +352,8 @@ define <8 x i16> @abds_const_lhs(<8 x i16> %src1) {
363352
define <8 x i16> @abds_const_zero(<8 x i16> %src1) {
364353
; CHECK-LABEL: abds_const_zero:
365354
; CHECK: // %bb.0:
366-
; CHECK-NEXT: movi v1.2d, #0000000000000000
367-
; CHECK-NEXT: sshll v2.4s, v0.4h, #0
368-
; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h
369-
; CHECK-NEXT: neg v1.4s, v2.4s
355+
; CHECK-NEXT: sshll v1.4s, v0.4h, #0
356+
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
370357
; CHECK-NEXT: abs v0.4s, v0.4s
371358
; CHECK-NEXT: abs v1.4s, v1.4s
372359
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h

llvm/test/CodeGen/AArch64/sve-abd.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,7 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
283283
; CHECK-NEXT: ptrue p0.s
284284
; CHECK-NEXT: and z0.s, z0.s, #0xff
285285
; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
286-
; CHECK-NEXT: sub z0.s, z0.s, z1.s
287-
; CHECK-NEXT: abs z0.s, p0/m, z0.s
286+
; CHECK-NEXT: sabd z0.s, p0/m, z0.s, z1.s
288287
; CHECK-NEXT: ret
289288
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
290289
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>

llvm/test/CodeGen/PowerPC/fp-branch.ll

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
1-
; RUN: llc -verify-machineinstrs < %s -mattr=-vsx -mtriple=ppc32-- | grep fcmp | count 1
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -verify-machineinstrs < %s -mattr=-vsx -mtriple=ppc32-- | FileCheck %s
23

34
declare i1 @llvm.isunordered.f64(double, double)
45

56
define i1 @intcoord_cond_next55(double %tmp48.reload) {
7+
; CHECK-LABEL: intcoord_cond_next55:
8+
; CHECK: # %bb.0: # %newFuncRoot
9+
; CHECK-NEXT: lis 3, .LCPI0_0@ha
10+
; CHECK-NEXT: lfs 0, .LCPI0_0@l(3)
11+
; CHECK-NEXT: fcmpu 0, 1, 0
12+
; CHECK-NEXT: blt 0, .LBB0_2
13+
; CHECK-NEXT: # %bb.1: # %bb72.exitStub
14+
; CHECK-NEXT: li 3, 1
15+
; CHECK-NEXT: blr
16+
; CHECK-NEXT: .LBB0_2: # %cond_next62.exitStub
17+
; CHECK-NEXT: li 3, 0
18+
; CHECK-NEXT: blr
619
newFuncRoot:
720
br label %cond_next55
821

llvm/test/CodeGen/RISCV/rvv/abd.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,10 @@ define <vscale x 4 x i32> @uabd_non_matching_promotion(<vscale x 4 x i8> %a, <vs
316316
; CHECK: # %bb.0:
317317
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
318318
; CHECK-NEXT: vzext.vf4 v10, v8
319-
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
320-
; CHECK-NEXT: vsext.vf2 v8, v9
321-
; CHECK-NEXT: vwsub.wv v10, v10, v8
322-
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
323-
; CHECK-NEXT: vrsub.vi v8, v10, 0
324-
; CHECK-NEXT: vmax.vv v8, v10, v8
319+
; CHECK-NEXT: vsext.vf4 v12, v9
320+
; CHECK-NEXT: vmin.vv v8, v10, v12
321+
; CHECK-NEXT: vmax.vv v10, v10, v12
322+
; CHECK-NEXT: vsub.vv v8, v10, v8
325323
; CHECK-NEXT: ret
326324
%a.zext = zext <vscale x 4 x i8> %a to <vscale x 4 x i32>
327325
%b.zext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>

mlir/include/mlir/IR/ODSSupport.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ convertFromAttribute(SmallVectorImpl<int32_t> &storage, Attribute attr,
118118
/// Convert the provided ArrayRef<int64_t> to a DenseI64ArrayAttr attribute.
119119
Attribute convertToAttribute(MLIRContext *ctx, ArrayRef<int64_t> storage);
120120

121+
/// Convert the provided ArrayRef<int32_t> to a DenseI32ArrayAttr attribute.
122+
Attribute convertToAttribute(MLIRContext *ctx, ArrayRef<int32_t> storage);
123+
121124
} // namespace mlir
122125

123126
#endif // MLIR_IR_ODSSUPPORT_H

mlir/lib/IR/ODSSupport.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,8 @@ Attribute mlir::convertToAttribute(MLIRContext *ctx,
173173
ArrayRef<int64_t> storage) {
174174
return DenseI64ArrayAttr::get(ctx, storage);
175175
}
176+
177+
Attribute mlir::convertToAttribute(MLIRContext *ctx,
178+
ArrayRef<int32_t> storage) {
179+
return DenseI32ArrayAttr::get(ctx, storage);
180+
}

mlir/test/IR/properties.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
// # RUN: mlir-opt %s -mlir-print-op-generic -split-input-file | mlir-opt -mlir-print-op-generic | FileCheck %s --check-prefix=GENERIC
33

44
// CHECK: test.with_properties
5-
// CHECK-SAME: a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4]{{$}}
5+
// CHECK-SAME: a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4], array32 = [5, 6]{{$}}
66
// GENERIC: "test.with_properties"()
7-
// GENERIC-SAME: <{a = 32 : i64, array = array<i64: 1, 2, 3, 4>, b = "foo", c = "bar", flag = true}> : () -> ()
8-
test.with_properties a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4]
7+
// GENERIC-SAME: <{a = 32 : i64, array = array<i64: 1, 2, 3, 4>, array32 = array<i32: 5, 6>, b = "foo", c = "bar", flag = true}> : () -> ()
8+
test.with_properties a = 32, b = "foo", c = "bar", flag = true, array = [1, 2, 3, 4], array32 = [5, 6]
99

1010
// CHECK: test.with_nice_properties
1111
// CHECK-SAME: "foo bar" is -3{{$}}

0 commit comments

Comments
 (0)