Skip to content

Commit bfe9fc9

Browse files
author
Simon Moll
committed
Merge commit 'b2cea573c9a175688a68eec84dbb37864933f60f' into merge/ve-fpbinops-vvp
2 parents a364251 + b2cea57 commit bfe9fc9

File tree

11 files changed

+394
-32
lines changed

11 files changed

+394
-32
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,8 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
708708
// is all undef or zero, we know what it loads.
709709
if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
710710
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
711-
if (GV->getInitializer()->isNullValue())
711+
if (GV->getInitializer()->isNullValue() && !Ty->isX86_MMXTy() &&
712+
!Ty->isX86_AMXTy())
712713
return Constant::getNullValue(Ty);
713714
if (isa<UndefValue>(GV->getInitializer()))
714715
return UndefValue::get(Ty);

llvm/lib/Target/VE/VVPInstrInfo.td

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,14 @@
2121
///// V(E) - VP internal nodes
2222
// fp node types
2323

24-
def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
25-
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>, SDTCisSameNumEltsAs<0, 3>, IsVLVT<4>
24+
// BinaryFPOp(x,y,mask,vl)
25+
def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc.
26+
SDTCisSameAs<0, 1>,
27+
SDTCisSameAs<0, 2>,
28+
SDTCisFP<0>,
29+
SDTCisInt<3>,
30+
SDTCisSameNumEltsAs<0, 3>,
31+
IsVLVT<4>
2632
]>;
2733

2834
def SDTFPTernaryOpVVP : SDTypeProfile<1, 5, [ // vvp_ffma
@@ -207,10 +213,12 @@ def vvp_reduce_umax : SDNode<"VEISD::VVP_REDUCE_UMAX", SDTReduceVVP>;
207213
// math funcs
208214
def vvp_fsqrt : SDNode<"VEISD::VVP_FSQRT", SDTFPUnaryOpVVP>;
209215

216+
// Binary operator commutative pattern.
210217
class vvp_commutative<SDNode RootOp> :
211-
PatFrags<(ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
212-
[(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
213-
(RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
218+
PatFrags<
219+
(ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
220+
[(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
221+
(RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
214222

215223
class vvp_fma_commutative<SDNode RootOp> :
216224
PatFrags<(ops node:$X, node:$Y, node:$Z, node:$mask, node:$vlen),

llvm/lib/Target/VE/VVPInstrPatternsVec.td

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -371,19 +371,36 @@ defm : Binary_vr_vv_ShortLong<vvp_srl,
371371
i32, v256i32, "PVSRLLO">;
372372

373373
// Floating-point arithmetic (256 elements)
374-
defm : Unary_ShortLong<vvp_frcp, f64, v256f64, "VRCPD", f32, v256f32, "VRCPS">;
375-
defm : Unary_ShortLong<vvp_fsqrt, f64, v256f64, "VFSQRTD", f32, v256f32, "VFSQRTS">;
376-
defm : Binary_rv_vv_ShortLong<c_vvp_fadd, f64, v256f64, "VFADDD", f32, v256f32, "PVFADDUP">;
377-
defm : Binary_rv_vv_ShortLong<vvp_fsub, f64, v256f64, "VFSUBD", f32, v256f32, "PVFSUBUP">;
378-
defm : Binary_rv_vv_ShortLong<c_vvp_fmul, f64, v256f64, "VFMULD", f32, v256f32, "PVFMULUP">;
379-
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv, f64, v256f64, "VFDIVD", f32, v256f32, "VFDIVS">;
380-
381-
defm : Binary_rv_vv_ShortLong<c_vvp_fminnum, f64, v256f64, "VFMIND", f32, v256f32, "VFMINS">;
382-
defm : Binary_rv_vv_ShortLong<c_vvp_fmaxnum, f64, v256f64, "VFMAXD", f32, v256f32, "VFMAXS">;
383-
384-
defm : Ternary_ShortLong<c_vvp_ffma, f64, v256f64, "VFMADD", f32, v256f32, "VFMADS">;
385-
defm : Ternary_ShortLong<c_vvp_ffms, f64, v256f64, "VFMSBD", f32, v256f32, "VFMSBS">;
386-
defm : Ternary_ShortLong<c_vvp_ffmsn, f64, v256f64, "VFNMSBD", f32, v256f32, "VFNMSBS">;
374+
defm : Unary_ShortLong<vvp_frcp,
375+
f64, v256f64, "VRCPD", f32, v256f32, "VRCPS">;
376+
defm : Unary_ShortLong<vvp_fsqrt,
377+
f64, v256f64, "VFSQRTD", f32, v256f32, "VFSQRTS">;
378+
defm : Binary_rv_vv_ShortLong<c_vvp_fadd,
379+
f64, v256f64, "VFADDD",
380+
f32, v256f32, "PVFADDUP">;
381+
defm : Binary_rv_vv_ShortLong<c_vvp_fmul,
382+
f64, v256f64, "VFMULD",
383+
f32, v256f32, "PVFMULUP">;
384+
defm : Binary_rv_vv_ShortLong<vvp_fsub,
385+
f64, v256f64, "VFSUBD",
386+
f32, v256f32, "PVFSUBUP">;
387+
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv,
388+
f64, v256f64, "VFDIVD",
389+
f32, v256f32, "VFDIVS">;
390+
391+
defm : Binary_rv_vv_ShortLong<c_vvp_fminnum,
392+
f64, v256f64, "VFMIND",
393+
f32, v256f32, "VFMINS">;
394+
defm : Binary_rv_vv_ShortLong<c_vvp_fmaxnum,
395+
f64, v256f64, "VFMAXD",
396+
f32, v256f32, "VFMAXS">;
397+
398+
defm : Ternary_ShortLong<c_vvp_ffma,
399+
f64, v256f64, "VFMADD", f32, v256f32, "VFMADS">;
400+
defm : Ternary_ShortLong<c_vvp_ffms,
401+
f64, v256f64, "VFMSBD", f32, v256f32, "VFMSBS">;
402+
defm : Ternary_ShortLong<c_vvp_ffmsn,
403+
f64, v256f64, "VFNMSBD", f32, v256f32, "VFNMSBS">;
387404
// TODO: vvp_ffman
388405

389406
///// Selection /////

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,9 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
305305
else if (auto *LI = dyn_cast<LoadInst>(U)) {
306306
// A load from zeroinitializer is always zeroinitializer, regardless of
307307
// any applied offset.
308-
if (Init->isNullValue()) {
309-
LI->replaceAllUsesWith(Constant::getNullValue(LI->getType()));
308+
Type *Ty = LI->getType();
309+
if (Init->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) {
310+
LI->replaceAllUsesWith(Constant::getNullValue(Ty));
310311
EraseFromParent(LI);
311312
continue;
312313
}
@@ -316,8 +317,7 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
316317
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
317318
DL, Offset, /* AllowNonInbounds */ true);
318319
if (PtrOp == GV) {
319-
if (auto *Value = ConstantFoldLoadFromConst(Init, LI->getType(),
320-
Offset, DL)) {
320+
if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
321321
LI->replaceAllUsesWith(Value);
322322
EraseFromParent(LI);
323323
}

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -664,10 +664,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
664664
return nullptr;
665665

666666
// When processing loads, we need to propagate two bits of information to the
667-
// sunk load: whether it is volatile, and what its alignment is. We currently
668-
// don't sink loads when some have their alignment specified and some don't.
669-
// visitLoadInst will propagate an alignment onto the load when TD is around,
670-
// and if TD isn't around, we can't handle the mixed case.
667+
// sunk load: whether it is volatile, and what its alignment is.
671668
bool isVolatile = FirstLI->isVolatile();
672669
Align LoadAlignment = FirstLI->getAlign();
673670
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
@@ -699,7 +696,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
699696
!isSafeAndProfitableToSinkLoad(LI))
700697
return nullptr;
701698

702-
LoadAlignment = std::min(LoadAlignment, Align(LI->getAlign()));
699+
LoadAlignment = std::min(LoadAlignment, LI->getAlign());
703700

704701
// If the PHI is of volatile loads and the load block has multiple
705702
// successors, sinking it would remove a load of the volatile value from
Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
33

4-
define fastcc <256 x float> @test_vp_fadd_256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
5-
; CHECK-LABEL: test_vp_fadd_256f32:
4+
declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fadd_v256f32_vv:
68
; CHECK: # %bb.0:
79
; CHECK-NEXT: and %s0, %s0, (32)0
810
; CHECK-NEXT: lvl %s0
@@ -12,5 +14,68 @@ define fastcc <256 x float> @test_vp_fadd_256f32(<256 x float> %i0, <256 x float
1214
ret <256 x float> %r0
1315
}
1416

15-
; integer arith
16-
declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
17+
define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fadd_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: pvfadd.up %v0, %s0, %v0
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fadd_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: pvfadd.up %v0, %s0, %v0
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x float> undef, float %s1, i32 0
38+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x float> %r0
41+
}
42+
43+
44+
declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
45+
46+
define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_fadd_v256f64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vfadd.d %v0, %v0, %v1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x double> %r0
55+
}
56+
57+
define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_fadd_v256f64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vfadd.d %v0, %s0, %v0
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x double> undef, double %s0, i32 0
65+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x double> %r0
68+
}
69+
70+
define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_fadd_v256f64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vfadd.d %v0, %s0, %v0
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x double> undef, double %s1, i32 0
78+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x double> %r0
81+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fdiv_v256f32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
13+
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
14+
ret <256 x float> %r0
15+
}
16+
17+
define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fdiv_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fdiv_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x float> undef, float %s1, i32 0
38+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x float> %r0
41+
}
42+
43+
44+
declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
45+
46+
define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_fdiv_v256f64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x double> %r0
55+
}
56+
57+
define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_fdiv_v256f64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x double> undef, double %s0, i32 0
65+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x double> %r0
68+
}
69+
70+
define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_fdiv_v256f64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x double> undef, double %s1, i32 0
78+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x double> %r0
81+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
3+
4+
declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32)
5+
6+
define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_fmul_v256f32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: pvfmul.up %v0, %v0, %v1
12+
; CHECK-NEXT: b.l.t (, %s10)
13+
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
14+
ret <256 x float> %r0
15+
}
16+
17+
define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_fmul_v256f32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: pvfmul.up %v0, %s0, %v0
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x float> undef, float %s0, i32 0
25+
%i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x float> %r0
28+
}
29+
30+
define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_fmul_v256f32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: pvfmul.up %v0, %s0, %v0
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x float> undef, float %s1, i32 0
38+
%i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x float> %r0
41+
}
42+
43+
44+
declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32)
45+
46+
define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_fmul_v256f64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vfmul.d %v0, %v0, %v1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x double> %r0
55+
}
56+
57+
define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_fmul_v256f64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vfmul.d %v0, %s0, %v0
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x double> undef, double %s0, i32 0
65+
%i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x double> %r0
68+
}
69+
70+
define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_fmul_v256f64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vfmul.d %v0, %s0, %v0
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x double> undef, double %s1, i32 0
78+
%i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x double> %r0
81+
}

0 commit comments

Comments
 (0)