diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9bbb89e37865d..bb397c1c35bf8 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -8720,6 +8720,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) { if (isa(CI->getOperand(0))) return AnyChange; + // Remove noop bitcasts + if (isa(I) && I->getType() == I->getOperand(0)->getType()) { + replaceAllUsesWith(I, I->getOperand(0), FreshBBs, IsHugeFunc); + I->eraseFromParent(); + return true; + } + if (OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; diff --git a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll index 7dde168024278..f77d300adb2c7 100644 --- a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll @@ -9,8 +9,8 @@ ; MIR-DAG: ![[SET1:[0-9]+]] = !{![[SCOPE1]]} ; MIR-LABEL: name: test_memcpy -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) { ; CHECK-LABEL: test_memcpy: ; CHECK: // %bb.0: @@ -32,8 +32,8 @@ define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) { } ; MIR-LABEL: name: test_memcpy_inline -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) { ; CHECK-LABEL: test_memcpy_inline: ; CHECK: // %bb.0: @@ -55,8 +55,8 @@ define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) { } ; MIR-LABEL: name: test_memmove -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) { ; CHECK-LABEL: test_memmove: ; CHECK: // %bb.0: @@ -79,8 +79,8 @@ define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) { ; MIR-LABEL: name: test_memset ; MIR: %2:gpr64 = MOVi64imm -6148914691236517206 -; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p0, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRXui %2, %0, 1 :: (store (s64) into %ir.p + 8, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRXui %2, %0, 0 :: (store (s64) into %ir.p, align 4, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) { ; CHECK-LABEL: test_memset: ; CHECK: // %bb.0: @@ -100,8 +100,8 @@ define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) { } ; MIR-LABEL: name: test_mempcpy -; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.p1, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) -; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p0, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR: %2:fpr128 = LDRQui %0, 1 :: (load (s128) from %ir.add.ptr, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) +; MIR-NEXT: STRQui killed %2, %0, 0 :: (store (s128) into %ir.p, align 1, !alias.scope ![[SET0]], !noalias ![[SET1]]) define i32 @test_mempcpy(ptr nocapture %p, ptr nocapture readonly %q) { ; CHECK-LABEL: test_mempcpy: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll b/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll index 6b2a8c5aaf8b8..42fda4b4309af 100644 --- a/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-scoped-aa-store.ll @@ -21,7 +21,7 @@ define void @blam0(ptr %g0, ptr %g1) { ; MIR-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 ; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 ; MIR-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (s64) from %ir.g0, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.tmp41, align 4, !alias.scope !3, !noalias !0) + ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.g1, align 4, !alias.scope !3, !noalias !0) ; MIR-NEXT: RET_ReallyLR %tmp4 = getelementptr inbounds <3 x float>, ptr %g1, i64 0, i64 0 %tmp5 = load <3 x float>, ptr %g0, align 4, !alias.scope !0, !noalias !1 @@ -43,7 +43,7 @@ define void @blam1(ptr %g0, ptr %g1) { ; MIR-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 ; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 ; MIR-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY1]], 0 :: (load (s64) from %ir.g0, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.tmp41, align 4, !alias.scope !9, !noalias !10) + ; MIR-NEXT: STRDui killed [[LDRDui]], [[COPY]], 0 :: (store (s64) into %ir.g1, align 4, !alias.scope !9, !noalias !10) ; MIR-NEXT: RET_ReallyLR %tmp4 = getelementptr inbounds <3 x float>, ptr %g1, i64 0, i64 0 %tmp5 = load <3 x float>, ptr %g0, align 4, !alias.scope !0, !noalias !1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 96ee15f2eb78b..c7977c339880d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.test_call_external_i32_func_i32_imm.kernarg.segment, align 16, addrspace 4) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32 ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -2957,7 +2957,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.test_call_external_v33i32_func_v33i32_i32.kernarg.segment, align 16, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; GCN-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64) ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %18(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 92106d7e1d60d..862093ebf9416 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -1350,7 +1350,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.test_call_external_void_func_p0_imm.kernarg.segment, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] @@ -4654,7 +4654,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.stack_passed_arg_alignment_v32i32_f64.kernarg.segment, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: %18:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %18(p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index ac0d5ee78666e..bdf89f785ef99 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -17,7 +17,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.test_indirect_call_sgpr_ptr.kernarg.segment, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index ca580d8f29c84..a3f5d927d4b80 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -140,7 +140,7 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) - ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.a.kernarg.offset1, align 16, addrspace 4) + ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.kernel_call_i32_fastcc_i32_i32_unused_result.kernarg.segment, align 16, addrspace 4) ; GCN-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32) ; GCN-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index ae90cfb631e8d..412e3569c0d05 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -16,8 +16,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4) ; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4) ; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4) - ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4) + ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.f1.kernarg.segment, addrspace 4) + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.f1.kernarg.segment + 16, align 16, addrspace 4) ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1 diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index 663fd98b46bf7..03d3724c232fa 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -15,7 +15,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %7 ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %8 ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) - ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) + ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4) ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec @@ -40,7 +40,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec ; PEI-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) - ; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) + ; PEI-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4) ; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; PEI-GFX908-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec ; PEI-GFX908-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec @@ -60,7 +60,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %7 ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %8 ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) - ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) + ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4) ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec @@ -83,7 +83,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; PEI-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5) ; PEI-GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1 ; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1) - ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) + ; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.partial_copy.kernarg.segment, addrspace 4) ; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec ; PEI-GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec ; PEI-GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll index bd255e88b9512..5f1444be050b6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 { ; GCN: bb.0 (%ir-block.0): ; GCN-NEXT: liveins: $sgpr8_sgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) + ; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.test_spill_av_class.kernarg.segment, addrspace 4) ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec diff --git a/llvm/test/CodeGen/SystemZ/isel-debug.ll b/llvm/test/CodeGen/SystemZ/isel-debug.ll index 120a0e08ae9fa..753e29cbfaf54 100644 --- a/llvm/test/CodeGen/SystemZ/isel-debug.ll +++ b/llvm/test/CodeGen/SystemZ/isel-debug.ll @@ -5,7 +5,7 @@ ; ; Check that some debug output is printed without problems. ; CHECK: SystemZAddressingMode -; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.0)> +; CHECK: Base t5: i64,ch = load<(load (s64) from %ir.ptr)> ; CHECK: Index ; CHECK: Disp diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index bcd92f81911b2..61837714fc82c 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -299,15 +299,14 @@ define void @test_width2(ptr nocapture readnone %x, ptr nocapture %y, i8 zeroext ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: bfi r0, r12, #0, #1 -; CHECK-NEXT: sub.w r12, r1, #8 ; CHECK-NEXT: bfi r0, r3, #1, #1 ; CHECK-NEXT: lsls r3, r0, #31 ; CHECK-NEXT: itt ne -; CHECK-NEXT: ldrne.w r3, [r12] +; CHECK-NEXT: ldrne r3, [r1, #-8] ; CHECK-NEXT: vmovne.32 q0[0], r3 ; CHECK-NEXT: lsls r0, r0, #30 ; CHECK-NEXT: itt mi -; CHECK-NEXT: ldrmi.w r0, [r12, #4] +; CHECK-NEXT: ldrmi r0, [r1, #-4] ; CHECK-NEXT: vmovmi.32 q0[2], r0 ; CHECK-NEXT: vmrs r3, p0 ; CHECK-NEXT: and r0, r3, #1 diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll index 171ccb287f2b9..49977b488d84a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -18,9 +18,9 @@ define float @test_return_f1(float %f.coerce) { ; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f - ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) - ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13) + ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.f) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.f, align 4) + ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.retval) ; ALL-NEXT: $xmm0 = COPY [[LOAD]](s32) ; ALL-NEXT: RET 0, implicit $xmm0 entry: @@ -47,9 +47,9 @@ define double @test_return_d1(double %d.coerce) { ; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d - ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.coerce.dive2) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 8), (load (s8) from %ir.1, align 8) - ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.coerce.dive13) + ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.d) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 8), (load (s8) from %ir.d, align 8) + ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval) ; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64) ; ALL-NEXT: RET 0, implicit $xmm0 entry: @@ -75,14 +75,14 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1) ; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d - ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1) + ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.d) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8) - ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5) + ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.0) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 8), (load (s8) from %ir.d, align 8) + ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval) ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.retval + 8) ; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64) ; ALL-NEXT: $xmm1 = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $xmm0, implicit $xmm1 @@ -111,9 +111,9 @@ define i32 @test_return_i1(i32 %i.coerce) { ; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i - ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.coerce.dive2) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) - ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.coerce.dive13) + ; ALL-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.i) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4) + ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.retval) ; ALL-NEXT: $eax = COPY [[LOAD]](s32) ; ALL-NEXT: RET 0, implicit $eax entry: @@ -138,9 +138,9 @@ define i64 @test_return_i2(i64 %i.coerce) { ; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i - ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.0, align 4) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.1, align 4), (load (s8) from %ir.2, align 4) - ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.3, align 4) + ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.i, align 4) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4) + ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval, align 4) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: RET 0, implicit $rax entry: @@ -168,13 +168,13 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.2.coerce ; ALL-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp - ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4) + ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.coerce, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL-NEXT: %7:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.1) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4) + ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.0) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.i, align 4), (load (s8) from %ir.coerce, align 4) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.tmp, align 8), (load (s8) from %ir.retval, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp) ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) @@ -213,14 +213,14 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) { ; ALL-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; ALL-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i - ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4) + ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.i, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2, align 4) - ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4) - ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4) + ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.0, align 4) + ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.retval, align 4), (load (s8) from %ir.i, align 4) + ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.retval, align 4) ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.retval + 8, align 4) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $rdx = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $rax, implicit $rdx diff --git a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll index d3b86786a630c..2df4fedeb6770 100644 --- a/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/X86/memcpy-scoped-aa.ll @@ -18,10 +18,10 @@ define i32 @test_memcpy(ptr nocapture %p, ptr nocapture readonly %q) { ; MIR-NEXT: {{ $}} ; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3) ; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) ; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; MIR-NEXT: $eax = COPY [[ADD32rm]] @@ -44,10 +44,10 @@ define i32 @test_memcpy_inline(ptr nocapture %p, ptr nocapture readonly %q) { ; MIR-NEXT: {{ $}} ; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3) ; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) ; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; MIR-NEXT: $eax = COPY [[ADD32rm]] @@ -70,10 +70,10 @@ define i32 @test_memmove(ptr nocapture %p, ptr nocapture readonly %q) { ; MIR-NEXT: {{ $}} ; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3) ; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) ; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; MIR-NEXT: $eax = COPY [[ADD32rm]] @@ -97,8 +97,8 @@ define i32 @test_memset(ptr nocapture %p, ptr nocapture readonly %q) { ; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi ; MIR-NEXT: [[MOV64ri:%[0-9]+]]:gr64 = MOV64ri -6148914691236517206 - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p0 + 8, align 4, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p0, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p + 8, align 4, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV64ri]] :: (store (s64) into %ir.p, align 4, !alias.scope !0, !noalias !3) ; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) ; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; MIR-NEXT: $eax = COPY [[ADD32rm]] @@ -119,10 +119,10 @@ define i32 @test_mempcpy(ptr nocapture %p, ptr nocapture readonly %q) { ; MIR-NEXT: {{ $}} ; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; MIR-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi - ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.p1, align 1, !alias.scope !0, !noalias !3) - ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.p1 + 8, align 1, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p0 + 8, align 1, !alias.scope !0, !noalias !3) - ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p0, align 1, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load (s64) from %ir.add.ptr, align 1, !alias.scope !0, !noalias !3) + ; MIR-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load (s64) from %ir.add.ptr + 8, align 1, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store (s64) into %ir.p + 8, align 1, !alias.scope !0, !noalias !3) + ; MIR-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store (s64) into %ir.p, align 1, !alias.scope !0, !noalias !3) ; MIR-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.q, !alias.scope !3, !noalias !0) ; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.q1, !alias.scope !3, !noalias !0) ; MIR-NEXT: $eax = COPY [[ADD32rm]] diff --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll index c1fdd71c04948..fc2737b3d20b1 100644 --- a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll +++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll @@ -18,7 +18,7 @@ ; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]], ; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add {{(nuw )?}}[[BASEPTR]], Constant:i64<2> -; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<(load (s16) from %ir.tmp81, align 1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64 +; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<(load (s16) from %ir.tmp, align 1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64 ; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<(load (s8) from %ir.tmp12)> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64 ; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<(store (s8) into %ir.tmp14)> [[ENTRYTOKEN]], [[LD1]], t{{[0-9]+}}, undef:i64 diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll index 02525d73a786d..82dc0f7b853a4 100644 --- a/llvm/test/CodeGen/X86/pr44140.ll +++ b/llvm/test/CodeGen/X86/pr44140.ll @@ -22,22 +22,22 @@ define i32 @main() { ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3 ; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 +; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm4 +; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 2e2e78a6da51e..632205c8a3bc9 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -934,8 +934,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] -; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] ; X86-SSE-NEXT: # imm = 0x1F80 @@ -945,8 +944,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] -; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] ; X86-AVX-NEXT: # imm = 0x1F80 @@ -955,8 +953,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { ; ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] ; X64-SSE-NEXT: # imm = 0x1F80 @@ -964,8 +961,7 @@ define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { ; ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] ; X64-AVX-NEXT: # imm = 0x1F80 @@ -983,8 +979,7 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { ; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] -; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] @@ -993,8 +988,7 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { ; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] -; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] @@ -1002,16 +996,14 @@ define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { ; ; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] ; X64-AVX-NEXT: retq # encoding: [0xc3] @@ -1027,8 +1019,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { ; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] -; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] ; X86-SSE-NEXT: # imm = 0x8000 @@ -1038,8 +1029,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { ; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] -; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] ; X86-AVX-NEXT: # imm = 0x8000 @@ -1048,8 +1038,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { ; ; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] ; X64-SSE-NEXT: # imm = 0x8000 @@ -1057,8 +1046,7 @@ define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { ; ; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] ; X64-AVX-NEXT: # imm = 0x8000 @@ -1075,8 +1063,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind { ; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] -; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] ; X86-SSE-NEXT: # imm = 0x6000 @@ -1086,8 +1073,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind { ; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] -; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] ; X86-AVX-NEXT: # imm = 0x6000 @@ -1096,8 +1082,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind { ; ; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] ; X64-SSE-NEXT: # imm = 0x6000 @@ -1105,8 +1090,7 @@ define i32 @test_MM_GET_ROUNDING_MODE() nounwind { ; ; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] ; X64-AVX-NEXT: # imm = 0x6000 @@ -1123,8 +1107,7 @@ define i32 @test_mm_getcsr() nounwind { ; X86-SSE-LABEL: test_mm_getcsr: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] -; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] ; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-SSE-NEXT: popl %ecx # encoding: [0x59] ; X86-SSE-NEXT: retl # encoding: [0xc3] @@ -1132,23 +1115,20 @@ define i32 @test_mm_getcsr() nounwind { ; X86-AVX-LABEL: test_mm_getcsr: ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] -; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] -; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] ; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] ; X86-AVX-NEXT: popl %ecx # encoding: [0x59] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_getcsr: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_mm_getcsr: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] ; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] ; X64-AVX-NEXT: retq # encoding: [0xc3] %1 = alloca i32, align 4 @@ -1813,14 +1793,13 @@ define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] -; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] +; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] ; X86-SSE-NEXT: # imm = 0xE07F -; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] +; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24] ; X86-SSE-NEXT: popl %eax # encoding: [0x58] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; @@ -1828,39 +1807,36 @@ define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] -; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] +; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] ; X86-AVX-NEXT: # imm = 0xE07F -; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] +; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24] ; X86-AVX-NEXT: popl %eax # encoding: [0x58] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] +; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-SSE-NEXT: andl $-8065, %eax # encoding: [0x25,0x7f,0xe0,0xff,0xff] ; X64-SSE-NEXT: # imm = 0xE07F -; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] +; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] -; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] +; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-AVX-NEXT: andl $-8065, %eax # encoding: [0x25,0x7f,0xe0,0xff,0xff] ; X64-AVX-NEXT: # imm = 0xE07F -; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] +; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc] ; X64-AVX-NEXT: retq # encoding: [0xc3] %1 = alloca i32, align 4 %2 = bitcast ptr %1 to ptr @@ -1879,13 +1855,12 @@ define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] -; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] -; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] +; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] +; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24] ; X86-SSE-NEXT: popl %eax # encoding: [0x58] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; @@ -1893,36 +1868,33 @@ define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] -; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] -; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] +; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] +; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24] ; X86-AVX-NEXT: popl %eax # encoding: [0x58] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] -; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] +; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-SSE-NEXT: andl $-64, %eax # encoding: [0x83,0xe0,0xc0] +; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] -; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] -; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] +; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-AVX-NEXT: andl $-64, %eax # encoding: [0x83,0xe0,0xc0] +; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc] ; X64-AVX-NEXT: retq # encoding: [0xc3] %1 = alloca i32, align 4 %2 = bitcast ptr %1 to ptr @@ -1940,14 +1912,13 @@ define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] -; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] +; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] ; X86-SSE-NEXT: # imm = 0xFFFF7FFF -; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] +; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24] ; X86-SSE-NEXT: popl %eax # encoding: [0x58] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; @@ -1955,39 +1926,36 @@ define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] -; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] +; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] ; X86-AVX-NEXT: # imm = 0xFFFF7FFF -; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] +; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24] ; X86-AVX-NEXT: popl %eax # encoding: [0x58] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] +; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-SSE-NEXT: andl $-32769, %eax # encoding: [0x25,0xff,0x7f,0xff,0xff] ; X64-SSE-NEXT: # imm = 0xFFFF7FFF -; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] +; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] -; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] +; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-AVX-NEXT: andl $-32769, %eax # encoding: [0x25,0xff,0x7f,0xff,0xff] ; X64-AVX-NEXT: # imm = 0xFFFF7FFF -; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] +; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc] ; X64-AVX-NEXT: retq # encoding: [0xc3] %1 = alloca i32, align 4 %2 = bitcast ptr %1 to ptr @@ -2144,14 +2112,13 @@ define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: pushl %eax # encoding: [0x50] ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] -; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] +; X86-SSE-NEXT: stmxcsr (%esp) # encoding: [0x0f,0xae,0x1c,0x24] +; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] ; X86-SSE-NEXT: # imm = 0x9FFF -; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] +; X86-SSE-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-SSE-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-SSE-NEXT: ldmxcsr (%esp) # encoding: [0x0f,0xae,0x14,0x24] ; X86-SSE-NEXT: popl %eax # encoding: [0x58] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; @@ -2159,39 +2126,36 @@ define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { ; X86-AVX: # %bb.0: ; X86-AVX-NEXT: pushl %eax # encoding: [0x50] ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] -; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] -; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] -; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] +; X86-AVX-NEXT: vstmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x1c,0x24] +; X86-AVX-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] +; X86-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] ; X86-AVX-NEXT: # imm = 0x9FFF -; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] -; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] -; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] +; X86-AVX-NEXT: orl %eax, %ecx # encoding: [0x09,0xc1] +; X86-AVX-NEXT: movl %ecx, (%esp) # encoding: [0x89,0x0c,0x24] +; X86-AVX-NEXT: vldmxcsr (%esp) # encoding: [0xc5,0xf8,0xae,0x14,0x24] ; X86-AVX-NEXT: popl %eax # encoding: [0x58] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] -; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] +; X64-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x5c,0x24,0xfc] +; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-SSE-NEXT: andl $-24577, %eax # encoding: [0x25,0xff,0x9f,0xff,0xff] ; X64-SSE-NEXT: # imm = 0x9FFF -; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] +; X64-SSE-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] -; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] -; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] +; X64-AVX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc] +; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] +; X64-AVX-NEXT: andl $-24577, %eax # encoding: [0x25,0xff,0x9f,0xff,0xff] ; X64-AVX-NEXT: # imm = 0x9FFF -; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] -; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] -; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] +; X64-AVX-NEXT: orl %edi, %eax # encoding: [0x09,0xf8] +; X64-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] +; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc] ; X64-AVX-NEXT: retq # encoding: [0xc3] %1 = alloca i32, align 4 %2 = bitcast ptr %1 to ptr @@ -2303,28 +2267,24 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind { define void @test_mm_setcsr(i32 %a0) nounwind { ; X86-SSE-LABEL: test_mm_setcsr: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] -; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] +; X86-SSE-NEXT: ldmxcsr {{[0-9]+}}(%esp) # encoding: [0x0f,0xae,0x54,0x24,0x04] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX-LABEL: test_mm_setcsr: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] -; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] +; X86-AVX-NEXT: vldmxcsr {{[0-9]+}}(%esp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0x04] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_setcsr: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] -; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] +; X64-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0x0f,0xae,0x54,0x24,0xfc] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_mm_setcsr: ; X64-AVX: # %bb.0: ; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] -; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] -; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] +; X64-AVX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # encoding: [0xc5,0xf8,0xae,0x54,0x24,0xfc] ; X64-AVX-NEXT: retq # encoding: [0xc3] %st = alloca i32, align 4 store i32 %a0, ptr %st, align 4 diff --git a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll index 8d589c519eff2..0fcf433131745 100644 --- a/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll +++ b/llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll @@ -99,12 +99,12 @@ define void @test2(ptr addrspace(1) %this, i32 %0, ptr addrspace(1) %p0, ptr add ; CHECK-LV-NEXT: successors: %bb.3(0x7ffff800), %bb.7(0x00000800) ; CHECK-LV-NEXT: {{ $}} ; CHECK-LV-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-LV-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %11:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-LV-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %10:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax ; CHECK-LV-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-LV-NEXT: EH_LABEL ; CHECK-LV-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-LV-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY [[COPY4]] - ; CHECK-LV-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %13:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-LV-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %12:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax ; CHECK-LV-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-LV-NEXT: EH_LABEL ; CHECK-LV-NEXT: JMP_1 %bb.3 @@ -165,12 +165,12 @@ define void @test2(ptr addrspace(1) %this, i32 %0, ptr addrspace(1) %p0, ptr add ; CHECK-LIS-NEXT: successors: %bb.3(0x7ffff800), %bb.7(0x00000800) ; CHECK-LIS-NEXT: {{ $}} ; CHECK-LIS-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-LIS-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %11:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-LIS-NEXT: [[COPY4:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %10:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY4]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax ; CHECK-LIS-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-LIS-NEXT: EH_LABEL ; CHECK-LIS-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-LIS-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY [[COPY4]] - ; CHECK-LIS-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %13:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax + ; CHECK-LIS-NEXT: [[COPY6:%[0-9]+]]:gr64 = STATEPOINT 2882400000, 0, 0, undef %12:gr64, 2, 0, 2, 0, 2, 0, 2, 1, [[COPY6]](tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax ; CHECK-LIS-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-LIS-NEXT: EH_LABEL ; CHECK-LIS-NEXT: JMP_1 %bb.3 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll index 953548cc807a8..9d9b4688c2473 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll @@ -20,9 +20,8 @@ next: ; Address calcs should be duplicated into this block. One dbg.value should be ; updated, and the other should not. ; CHECK-LABEL: next: -; CHECK: %[[CASTVAR:[0-9a-zA-Z]+]] = bitcast ptr %p to ptr ; CHECK-NEXT: #dbg_assign(ptr %arith, ![[DIVAR:[0-9]+]], -; CHECK-NEXT: %[[GEPVAR:[0-9a-zA-Z]+]] = getelementptr i8, ptr %[[CASTVAR]], i64 3 +; CHECK-NEXT: %[[GEPVAR:[0-9a-zA-Z]+]] = getelementptr i8, ptr %p, i64 3 ; CHECK-NEXT: %loaded = load i8, ptr %[[GEPVAR]] ; CHECK-NEXT: #dbg_assign(ptr %[[GEPVAR]], ![[DIVAR]], call void @llvm.dbg.assign(metadata ptr %arith, metadata !12, metadata !DIExpression(), metadata !21, metadata ptr undef, metadata !DIExpression()), !dbg !14 diff --git a/llvm/test/DebugInfo/X86/bitcast-di.ll b/llvm/test/DebugInfo/X86/bitcast-di.ll index f599cc9ca7db3..a1b5ea431a759 100644 --- a/llvm/test/DebugInfo/X86/bitcast-di.ll +++ b/llvm/test/DebugInfo/X86/bitcast-di.ll @@ -5,44 +5,13 @@ target triple = "x86_64-unknown-linux-gnu" @x = external global [1 x [2 x <4 x float>]] -declare void @foo(i32) - -declare void @slowpath(i32, ptr) - -; Is DI maintained after sinking bitcast? -define void @test(i1 %cond, ptr %base) { -; CHECK-LABEL: @test -entry: - %addr = getelementptr inbounds i64, ptr %base, i64 5 - %casted = bitcast ptr %addr to ptr - br i1 %cond, label %if.then, label %fallthrough - -if.then: -; CHECK-LABEL: if.then: -; CHECK: bitcast ptr %addr to ptr, !dbg ![[castLoc:[0-9]+]] - %v1 = load i32, ptr %casted, align 4 - call void @foo(i32 %v1) - %cmp = icmp eq i32 %v1, 0 - br i1 %cmp, label %rare.1, label %fallthrough - -fallthrough: - ret void - -rare.1: -; CHECK-LABEL: rare.1: -; CHECK: bitcast ptr %addr to ptr, !dbg ![[castLoc]] - call void @slowpath(i32 %v1, ptr %casted) ;; NOT COLD - br label %fallthrough -} - ; Is DI maitained when a GEP with all zero indices gets converted to bitcast? define void @test2() { ; CHECK-LABEL: @test2 load.i145: -; CHECK: bitcast ptr @x to ptr, !dbg ![[castLoc2:[0-9]+]] +; CHECK: bitcast ptr @x to ptr, !dbg ![[castLoc:[0-9]+]] %x_offset = getelementptr [1 x [2 x <4 x float>]], ptr @x, i32 0, i64 0 ret void } -; CHECK: ![[castLoc]] = !DILocation(line: 2 -; CHECK: ![[castLoc2]] = !DILocation(line: 11 \ No newline at end of file +; CHECK: ![[castLoc]] = !DILocation(line: 1 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll index dbd5e87f2c28d..bdd298a9ba786 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll @@ -10,9 +10,8 @@ declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32 immarg, <4 x i define <4 x float> @foo(ptr %p) { ; CHECK-LABEL: define <4 x float> @foo ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP2]], i32 0, <4 x i1> zeroinitializer, <4 x float> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP1]], i32 0, <4 x i1> zeroinitializer, <4 x float> zeroinitializer) ; CHECK-NEXT: ret <4 x float> [[GATHER]] ; %base.splatinsert = insertelement <4 x ptr> poison, ptr %p, i32 0 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll index d0d87b38e0589..73f6aeeb9ad20 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-recreate.ll @@ -18,11 +18,9 @@ define void @addr_from_invoke() personality ptr null { ; CHECK-NEXT: [[PAD:%.*]] = cleanuppad within none [] ; CHECK-NEXT: cleanupret from [[PAD]] unwind to caller ; CHECK: [[BODY_1]]: -; CHECK-NEXT: [[GEP1:%.*]] = bitcast ptr [[PTR]] to ptr -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[PTR]] to ptr -; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP0]], align 4 +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4 +; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[PTR]], align 4 +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -51,8 +49,7 @@ define void @addr_from_arg(ptr %ptr, i1 %p) { ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; CHECK: [[BODY_1]]: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[PTR]] to ptr -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP0]], align 4 +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4 ; CHECK-NEXT: [[UNUSED:%.*]] = load <4 x i32>, ptr [[PTR]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[PTR]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll index 63fd1845594c6..20ce7e7704aed 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -898,30 +898,45 @@ fallthrough: ; Different types but null is the first? define i32 @test19(i1 %cond1, i1 %cond2, ptr %b2, ptr %b1) { -; CHECK-LABEL: define i32 @test19( -; CHECK-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[B2:%.*]], ptr [[B1:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5 -; CHECK-NEXT: br i1 [[COND1]], label %[[IF_THEN1:.*]], label %[[IF_THEN2:.*]] -; CHECK: [[IF_THEN1]]: -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i8, ptr [[B1]], i64 40 -; CHECK-NEXT: [[BC2:%.*]] = bitcast ptr [[G2]] to ptr -; CHECK-NEXT: br label %[[FALLTHROUGH:.*]] -; CHECK: [[IF_THEN2]]: -; CHECK-NEXT: [[BC1_1:%.*]] = bitcast ptr [[G1]] to ptr -; CHECK-NEXT: br i1 [[COND2]], label %[[FALLTHROUGH]], label %[[IF_THEN3:.*]] -; CHECK: [[IF_THEN3]]: -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i64, ptr null, i64 5 -; CHECK-NEXT: [[BC1_2:%.*]] = bitcast ptr [[G3]] to ptr -; CHECK-NEXT: br label %[[FALLTHROUGH]] -; CHECK: [[FALLTHROUGH]]: -; CHECK-NEXT: [[C:%.*]] = phi ptr [ [[BC2]], %[[IF_THEN1]] ], [ [[BC1_1]], %[[IF_THEN2]] ], [ [[BC1_2]], %[[IF_THEN3]] ] -; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[C]], align 4 -; CHECK-NEXT: [[G1_1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5 -; CHECK-NEXT: [[BC1_1_1:%.*]] = bitcast ptr [[G1_1]] to ptr -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[BC1_1_1]], align 4 -; CHECK-NEXT: [[V:%.*]] = add i32 [[V1]], [[V2]] -; CHECK-NEXT: ret i32 [[V]] +; CHECK-YES-LABEL: define i32 @test19( +; CHECK-YES-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[B2:%.*]], ptr [[B1:%.*]]) { +; CHECK-YES-NEXT: [[ENTRY:.*:]] +; CHECK-YES-NEXT: br i1 [[COND1]], label %[[IF_THEN1:.*]], label %[[IF_THEN2:.*]] +; CHECK-YES: [[IF_THEN1]]: +; CHECK-YES-NEXT: br label %[[FALLTHROUGH:.*]] +; CHECK-YES: [[IF_THEN2]]: +; CHECK-YES-NEXT: br i1 [[COND2]], label %[[FALLTHROUGH]], label %[[IF_THEN3:.*]] +; CHECK-YES: [[IF_THEN3]]: +; CHECK-YES-NEXT: br label %[[FALLTHROUGH]] +; CHECK-YES: [[FALLTHROUGH]]: +; CHECK-YES-NEXT: [[SUNK_PHI:%.*]] = phi ptr [ null, %[[IF_THEN3]] ], [ [[B2]], %[[IF_THEN2]] ], [ [[B1]], %[[IF_THEN1]] ] +; CHECK-YES-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[SUNK_PHI]], i64 40 +; CHECK-YES-NEXT: [[V1:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-YES-NEXT: [[G1_1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5 +; CHECK-YES-NEXT: [[V2:%.*]] = load i32, ptr [[G1_1]], align 4 +; CHECK-YES-NEXT: [[V:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-YES-NEXT: ret i32 [[V]] +; +; CHECK-NO-LABEL: define i32 @test19( +; CHECK-NO-SAME: i1 [[COND1:%.*]], i1 [[COND2:%.*]], ptr [[B2:%.*]], ptr [[B1:%.*]]) { +; CHECK-NO-NEXT: [[ENTRY:.*:]] +; CHECK-NO-NEXT: [[G1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5 +; CHECK-NO-NEXT: br i1 [[COND1]], label %[[IF_THEN1:.*]], label %[[IF_THEN2:.*]] +; CHECK-NO: [[IF_THEN1]]: +; CHECK-NO-NEXT: [[G2:%.*]] = getelementptr inbounds i8, ptr [[B1]], i64 40 +; CHECK-NO-NEXT: br label %[[FALLTHROUGH:.*]] +; CHECK-NO: [[IF_THEN2]]: +; CHECK-NO-NEXT: br i1 [[COND2]], label %[[FALLTHROUGH]], label %[[IF_THEN3:.*]] +; CHECK-NO: [[IF_THEN3]]: +; CHECK-NO-NEXT: [[G3:%.*]] = getelementptr inbounds i64, ptr null, i64 5 +; CHECK-NO-NEXT: br label %[[FALLTHROUGH]] +; CHECK-NO: [[FALLTHROUGH]]: +; CHECK-NO-NEXT: [[C:%.*]] = phi ptr [ [[G2]], %[[IF_THEN1]] ], [ [[G1]], %[[IF_THEN2]] ], [ [[G3]], %[[IF_THEN3]] ] +; CHECK-NO-NEXT: [[V1:%.*]] = load i32, ptr [[C]], align 4 +; CHECK-NO-NEXT: [[G1_1:%.*]] = getelementptr inbounds i64, ptr [[B2]], i64 5 +; CHECK-NO-NEXT: [[V2:%.*]] = load i32, ptr [[G1_1]], align 4 +; CHECK-NO-NEXT: [[V:%.*]] = add i32 [[V1]], [[V2]] +; CHECK-NO-NEXT: ret i32 [[V]] ; entry: %g1 = getelementptr inbounds i64, ptr %b2, i64 5 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll index 2945a007bf578..819b7c571efcb 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-two-phi.ll @@ -11,8 +11,7 @@ define void @test() { ; CHECK-NEXT: br label %[[START:.*]] ; CHECK: [[START]]: ; CHECK-NEXT: [[VAL1:%.*]] = phi ptr [ [[TMP0]], %[[ENTRY]] ], [ [[VAL1]], %[[START]] ] -; CHECK-NEXT: [[VAL2:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[VAL2]], %[[START]] ] -; CHECK-NEXT: [[SUNKADDR2:%.*]] = bitcast ptr [[VAL2]] to ptr +; CHECK-NEXT: [[SUNKADDR2:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[SUNKADDR2]], %[[START]] ] ; CHECK-NEXT: [[LOADX:%.*]] = load i64, ptr [[SUNKADDR2]], align 8 ; CHECK-NEXT: br label %[[START]] ;