-
Notifications
You must be signed in to change notification settings - Fork 14.4k
LoongArch: Add test for sincos intrinsic #147471
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-loongarch Author: Matt Arsenault (arsenm) ChangesPatch is 35.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147471.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/LoongArch/llvm.sincos.ll b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
new file mode 100644
index 0000000000000..ffedd7f9e9438
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/llvm.sincos.ll
@@ -0,0 +1,866 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64
+
+define { half, half } @test_sincos_f16(half %a) #0 {
+; LA32-LABEL: test_sincos_f16:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: movfr2gr.s $fp, $fs1
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: lu12i.w $a1, -16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: movgr2fr.w $fa0, $a0
+; LA32-NEXT: or $a0, $fp, $a1
+; LA32-NEXT: movgr2fr.w $fa1, $a0
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f16:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 0 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $fp, $fs1
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: lu12i.w $a1, -16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: movgr2fr.w $fa0, $a0
+; LA64-NEXT: or $a0, $fp, $a1
+; LA64-NEXT: movgr2fr.w $fa1, $a0
+; LA64-NEXT: fld.d $fs1, $sp, 0 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) #0 {
+; LA32-LABEL: test_sincos_f16_only_use_sin:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: bl sinf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: lu12i.w $a1, -16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: movgr2fr.w $fa0, $a0
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f16_only_use_sin:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: lu12i.w $a1, -16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: movgr2fr.w $fa0, $a0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) #0 {
+; LA32-LABEL: test_sincos_f16_only_use_cos:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: bl cosf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: lu12i.w $a1, -16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: movgr2fr.w $fa0, $a0
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f16_only_use_cos:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: lu12i.w $a1, -16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: movgr2fr.w $fa0, $a0
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 {
+; LA32-LABEL: test_sincos_v2f16:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -64
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 52 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 48 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 32 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 24 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs2, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs3, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: movgr2fr.w $fs0, $a2
+; LA32-NEXT: movgr2fr.w $fa0, $a1
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl __extendhfsf2
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs3, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl sinf
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: movfr2gr.s $s0, $fs1
+; LA32-NEXT: movfr2gr.s $s1, $fs3
+; LA32-NEXT: movfr2gr.s $s2, $fs2
+; LA32-NEXT: bl __truncsfhf2
+; LA32-NEXT: movfr2gr.s $a0, $fa0
+; LA32-NEXT: st.h $a0, $fp, 6
+; LA32-NEXT: st.h $s2, $fp, 4
+; LA32-NEXT: st.h $s1, $fp, 2
+; LA32-NEXT: st.h $s0, $fp, 0
+; LA32-NEXT: fld.d $fs3, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs2, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs1, $sp, 24 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 32 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $s2, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s1, $sp, 48 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $s0, $sp, 52 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 64
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_v2f16:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -80
+; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
+; LA64-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 32 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs2, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs3, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $s0, $a2
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: movgr2fr.w $fa0, $a1
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: movgr2fr.w $fa0, $s0
+; LA64-NEXT: pcaddu18i $ra, %call36(__extendhfsf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs2, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs3, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs2
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $s0, $fs0
+; LA64-NEXT: movfr2gr.s $s1, $fs3
+; LA64-NEXT: movfr2gr.s $s2, $fs1
+; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfhf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: movfr2gr.s $a0, $fa0
+; LA64-NEXT: st.h $a0, $fp, 6
+; LA64-NEXT: st.h $s2, $fp, 4
+; LA64-NEXT: st.h $s1, $fp, 2
+; LA64-NEXT: st.h $s0, $fp, 0
+; LA64-NEXT: fld.d $fs3, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs2, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs1, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 32 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 80
+; LA64-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincos_f32(float %a) #0 {
+; LA32-LABEL: test_sincos_f32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fa1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_f32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -32
+; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: fmov.s $fs0, $fa0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fs1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fmov.s $fa1, $fa0
+; LA64-NEXT: fmov.s $fa0, $fs1
+; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 32
+; LA64-NEXT: ret
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) #0 {
+; LA32-LABEL: test_sincos_v2f32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -48
+; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 32 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 24 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs2, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs3, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa1
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs3, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fst.s $fa0, $fp, 12
+; LA32-NEXT: fst.s $fs1, $fp, 8
+; LA32-NEXT: fst.s $fs3, $fp, 4
+; LA32-NEXT: fst.s $fs2, $fp, 0
+; LA32-NEXT: fld.d $fs3, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs2, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs1, $sp, 24 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 32 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 48
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_v2f32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -64
+; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vpackev.w $vr0, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vpackev.w $vr1, $vr0, $vr1
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 64
+; LA64-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) #0 {
+; LA32-LABEL: test_sincos_v3f32:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -64
+; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 48 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs1, $sp, 40 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs2, $sp, 32 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs3, $sp, 24 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs4, $sp, 16 # 8-byte Folded Spill
+; LA32-NEXT: fst.d $fs5, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: fmov.s $fs0, $fa2
+; LA32-NEXT: fmov.s $fs1, $fa1
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs3, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs4, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl sinf
+; LA32-NEXT: fmov.s $fs5, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs2
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fs2, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs1
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fmov.s $fs1, $fa0
+; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: bl cosf
+; LA32-NEXT: fst.s $fa0, $fp, 24
+; LA32-NEXT: fst.s $fs1, $fp, 20
+; LA32-NEXT: fst.s $fs2, $fp, 16
+; LA32-NEXT: fst.s $fs5, $fp, 8
+; LA32-NEXT: fst.s $fs4, $fp, 4
+; LA32-NEXT: fst.s $fs3, $fp, 0
+; LA32-NEXT: fld.d $fs5, $sp, 8 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs4, $sp, 16 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs3, $sp, 24 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs2, $sp, 32 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs1, $sp, 40 # 8-byte Folded Reload
+; LA32-NEXT: fld.d $fs0, $sp, 48 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 64
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_sincos_v3f32:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -96
+; LA64-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 2
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 72
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 1
+; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 68
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(sinf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 64
+; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 56
+; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 52
+; LA64-NEXT: vld $vr0, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0
+; LA64-NEXT: pcaddu18i $ra, %call36(cosf)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: fst.s $fa0, $sp, 48
+; LA64-NEXT: vld $vr0, $sp, 64
+; LA64-NEXT: vld $vr1, $sp, 48
+; LA64-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 96
+; LA64-NEXT: ret
+ %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)
+ ret { <...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
5f21740
to
8ee5b85
Compare
65011f9
to
248eb92
Compare
Merge activity
|
248eb92
to
aceabe9
Compare
3baa828
to
5e1d92b
Compare
5e1d92b
to
281f2ca
Compare
No description provided.