llvm
diff --git a/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 7 additions & 3 deletions b/‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Lines changed: 7 additions & 3 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AMDGPU/maximum.ll
Lines changed: 12 additions & 12 deletions b/‎llvm/test/Analysis/CostModel/AMDGPU/maximum.ll
Lines changed: 12 additions & 12 deletions
diff --git a/‎llvm/test/Analysis/CostModel/AMDGPU/minimum.ll
Lines changed: 12 additions & 12 deletions b/‎llvm/test/Analysis/CostModel/AMDGPU/minimum.ll
Lines changed: 12 additions & 12 deletions
@@ -861,9 +861,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   if (Subtarget->hasIEEEMinMax()) {
     setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
                        {MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
-    setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
-                       {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
-                       Custom);
   } else {
     // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
     if (Subtarget->hasMinimum3Maximum3F32())
@@ -878,6 +875,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     }
   }
 
+  if (Subtarget->hasVOP3PInsts()) {
+    // We want to break these into v2f16 pieces, not scalarize.
+    setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
+                       {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
+                       Custom);
+  }
+
   setOperationAction(ISD::INTRINSIC_WO_CHAIN,
                      {MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
                       MVT::bf16, MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::i128,
 
@@ -11,19 +11,19 @@ define void @maximum_f16() {
 ; GFX950-FASTF64-LABEL: 'maximum_f16'
 ; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
 ; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
 ; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-LABEL: 'maximum_f16'
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 175 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'maximum_f16'
@@ -38,10 +38,10 @@ define void @maximum_f16() {
 ; GFX9-SIZE-LABEL: 'maximum_f16'
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'maximum_f16'
 
@@ -11,19 +11,19 @@ define void @minimum_f16() {
 ; GFX950-FASTF64-LABEL: 'minimum_f16'
 ; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
 ; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
 ; GFX950-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-LABEL: 'minimum_f16'
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 175 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'minimum_f16'
@@ -38,10 +38,10 @@ define void @minimum_f16() {
 ; GFX9-SIZE-LABEL: 'minimum_f16'
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'minimum_f16'