Skip to content

Commit d2c5fbe

Browse files
rampitecarsenm
andauthored
[AMDGPU] Legalize vector fminimum and fmaximum with VOP3P (#138971)
Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
1 parent 3b9b377 commit d2c5fbe

File tree

7 files changed

+921
-967
lines changed

7 files changed

+921
-967
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -861,9 +861,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
861861
if (Subtarget->hasIEEEMinMax()) {
862862
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM},
863863
{MVT::f16, MVT::f32, MVT::f64, MVT::v2f16}, Legal);
864-
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
865-
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
866-
Custom);
867864
} else {
868865
// FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum
869866
if (Subtarget->hasMinimum3Maximum3F32())
@@ -878,6 +875,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
878875
}
879876
}
880877

878+
if (Subtarget->hasVOP3PInsts()) {
879+
// We want to break these into v2f16 pieces, not scalarize.
880+
setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM},
881+
{MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16},
882+
Custom);
883+
}
884+
881885
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
882886
{MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
883887
MVT::bf16, MVT::v2i16, MVT::v2f16, MVT::v2bf16, MVT::i128,

llvm/test/Analysis/CostModel/AMDGPU/maximum.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ define void @maximum_f16() {
1111
; GFX950-FASTF64-LABEL: 'maximum_f16'
1212
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
1313
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
14-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
15-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
16-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
17-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
14+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
15+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
16+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
17+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
1818
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
1919
;
2020
; GFX9-LABEL: 'maximum_f16'
2121
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
2222
; GFX9-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
23-
; GFX9-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
24-
; GFX9-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
25-
; GFX9-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
26-
; GFX9-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
23+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
24+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
25+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
26+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
2727
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
2828
;
2929
; SLOWF64-LABEL: 'maximum_f16'
@@ -38,10 +38,10 @@ define void @maximum_f16() {
3838
; GFX9-SIZE-LABEL: 'maximum_f16'
3939
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.maximum.f16(half undef, half undef)
4040
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.maximum.v2f16(<2 x half> undef, <2 x half> undef)
41-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
42-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
43-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
44-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
41+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.maximum.v3f16(<3 x half> undef, <3 x half> undef)
42+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.maximum.v4f16(<4 x half> undef, <4 x half> undef)
43+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.maximum.v8f16(<8 x half> undef, <8 x half> undef)
44+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.maximum.v16f16(<16 x half> undef, <16 x half> undef)
4545
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
4646
;
4747
; SLOW-SIZE-LABEL: 'maximum_f16'

llvm/test/Analysis/CostModel/AMDGPU/minimum.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@ define void @minimum_f16() {
1111
; GFX950-FASTF64-LABEL: 'minimum_f16'
1212
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
1313
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
14-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
15-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
16-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
17-
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
14+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
15+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
16+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
17+
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
1818
; GFX950-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
1919
;
2020
; GFX9-LABEL: 'minimum_f16'
2121
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
2222
; GFX9-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
23-
; GFX9-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
24-
; GFX9-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
25-
; GFX9-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
26-
; GFX9-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
23+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
24+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
25+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
26+
; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
2727
; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
2828
;
2929
; SLOWF64-LABEL: 'minimum_f16'
@@ -38,10 +38,10 @@ define void @minimum_f16() {
3838
; GFX9-SIZE-LABEL: 'minimum_f16'
3939
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call half @llvm.minimum.f16(half undef, half undef)
4040
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16 = call <2 x half> @llvm.minimum.v2f16(<2 x half> undef, <2 x half> undef)
41-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
42-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
43-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
44-
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
41+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = call <3 x half> @llvm.minimum.v3f16(<3 x half> undef, <3 x half> undef)
42+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = call <4 x half> @llvm.minimum.v4f16(<4 x half> undef, <4 x half> undef)
43+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = call <8 x half> @llvm.minimum.v8f16(<8 x half> undef, <8 x half> undef)
44+
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16 = call <16 x half> @llvm.minimum.v16f16(<16 x half> undef, <16 x half> undef)
4545
; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
4646
;
4747
; SLOW-SIZE-LABEL: 'minimum_f16'

0 commit comments

Comments
 (0)