diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index f1811c47e5ad4..e144111059cc2 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -553,24 +553,14 @@ void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs, continue; } - if (MO.isImplicit()) { - for (MCRegUnit Unit : TRI->regunits(Reg)) - RUClobbers.set(Unit); - if (!MO.isDead()) - // Non-dead implicit def? This cannot be hoisted. + // FIXME: For now, avoid instructions with multiple defs, unless it's dead. + if (!MO.isDead()) { + if (Def) RuledOut = true; - // No need to check if a dead implicit def is also defined by - // another instruction. - continue; + else + Def = Reg; } - // FIXME: For now, avoid instructions with multiple defs, unless - // it's a dead implicit def. - if (Def) - RuledOut = true; - else - Def = Reg; - // If we have already seen another instruction that defines the same // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. diff --git a/llvm/test/CodeGen/AArch64/mlicm-implicit-defs.mir b/llvm/test/CodeGen/AArch64/mlicm-implicit-defs.mir new file mode 100644 index 0000000000000..2c5a70288cfad --- /dev/null +++ b/llvm/test/CodeGen/AArch64/mlicm-implicit-defs.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -run-pass machinelicm -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64 -passes machinelicm -o - %s | FileCheck %s + +--- +name: unsafe_to_move +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: unsafe_to_move + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x16 = COPY killed $x0 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x1 = COPY killed $x16 + ; CHECK-NEXT: $x2 = MOVi64imm 1024, implicit-def dead $x16 + ; CHECK-NEXT: $x16 = LDRXroX killed $x1, killed $x2, 0, 0 + ; CHECK-NEXT: $xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x0 = COPY killed $x1 + ; CHECK-NEXT: RET_ReallyLR + bb.0: + liveins: $x0 + $x16 = COPY killed $x0 + B %bb.1 + + bb.1: + liveins: $x16 + $x1 = COPY killed $x16 + /* MOVi64imm below mimics a pseudo instruction that doesn't have any */ + /* unmodelled side effects, but uses x16 as a scratch register. */ + $x2 = MOVi64imm 1024, implicit-def dead $x16 + $x16 = LDRXroX killed $x1, killed $x2, 0, 0 + $xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + liveins: $x1 + $x0 = COPY killed $x1 + RET_ReallyLR +... + +--- +name: dead_implicit_def +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: dead_implicit_def + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12 = COPY killed $x0 + ; CHECK-NEXT: $x2 = MOVi64imm 1024, implicit-def dead $x16 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $x12, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x1 = COPY killed $x12 + ; CHECK-NEXT: $x16 = LDRXroX killed $x1, $x2, 0, 0 + ; CHECK-NEXT: $xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x0 = COPY killed $x1 + ; CHECK-NEXT: RET_ReallyLR + bb.0: + liveins: $x0 + $x12 = COPY killed $x0 + B %bb.1 + + bb.1: + liveins: $x12 + $x1 = COPY killed $x12 + /* MOVi64imm below mimics a pseudo instruction that doesn't have any */ + /* unmodelled side effects, but uses x16 as a scratch register. */ + $x2 = MOVi64imm 1024, implicit-def dead $x16 + $x16 = LDRXroX killed $x1, killed $x2, 0, 0 + $xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv + Bcc 1, %bb.1, implicit $nzcv + B %bb.2 + + bb.2: + liveins: $x1 + $x0 = COPY killed $x1 + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll index b5616501900dd..a13f3513c660e 100644 --- a/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll @@ -4,10 +4,10 @@ define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: copy_to_reg_frameindex: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_cmp_lt_u32 0, 16 ; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: .LBB0_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: s_cmp_lt_u32 0, 16 ; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST) ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_set_gpr_idx_off diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll index 4b8ef2c9613a5..4c2967a52fe93 100644 --- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll @@ -16,27 +16,28 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________ ; CHECK-NEXT: v_lshl_add_u32 v0, v0, 1, v0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc -; CHECK-NEXT: s_cbranch_execz .LBB0_12 +; CHECK-NEXT: s_cbranch_execz .LBB0_13 ; CHECK-NEXT: ; %bb.1: ; %if.end15 ; CHECK-NEXT: s_load_dword s4, s[8:9], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_bitcmp1_b32 s4, 0 ; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0 ; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5] -; CHECK-NEXT: s_cbranch_vccnz .LBB0_12 -; CHECK-NEXT: .LBB0_2: ; %while.cond.i -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_13 +; CHECK-NEXT: ; %bb.2: ; %lor.lhs.false17 ; CHECK-NEXT: s_cmp_eq_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 -; CHECK-NEXT: ; %bb.3: ; %if.end60 -; CHECK-NEXT: s_cbranch_execz .LBB0_11 -; CHECK-NEXT: ; %bb.4: ; %if.end5.i -; CHECK-NEXT: s_cbranch_scc0 .LBB0_11 -; CHECK-NEXT: ; %bb.5: ; %if.end5.i314 -; CHECK-NEXT: s_cbranch_scc0 .LBB0_11 -; CHECK-NEXT: ; %bb.6: ; %if.end5.i338 -; CHECK-NEXT: s_cbranch_scc0 .LBB0_11 -; CHECK-NEXT: ; %bb.7: ; %if.end5.i362 +; CHECK-NEXT: .LBB0_3: ; %while.cond.i +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_3 +; CHECK-NEXT: ; %bb.4: ; %if.end60 +; CHECK-NEXT: s_cbranch_execz .LBB0_12 +; CHECK-NEXT: ; %bb.5: ; %if.end5.i +; CHECK-NEXT: s_cbranch_scc0 .LBB0_12 +; CHECK-NEXT: ; %bb.6: ; %if.end5.i314 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_12 +; CHECK-NEXT: ; %bb.7: ; %if.end5.i338 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_12 +; CHECK-NEXT: ; %bb.8: ; %if.end5.i362 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _RSENC_gDcd_______________________________@rel32@lo+1157 @@ -46,23 +47,23 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________ ; CHECK-NEXT: buffer_store_byte v0, v0, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(1) ; CHECK-NEXT: buffer_store_byte v1, off, s[0:3], 0 offset:257 -; CHECK-NEXT: s_cbranch_scc0 .LBB0_11 -; CHECK-NEXT: ; %bb.8: ; %if.end5.i400 +; CHECK-NEXT: s_cbranch_scc0 .LBB0_12 +; CHECK-NEXT: ; %bb.9: ; %if.end5.i400 ; CHECK-NEXT: flat_load_ubyte v0, v[0:1] ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 ; CHECK-NEXT: s_and_b64 exec, exec, vcc -; CHECK-NEXT: s_cbranch_execz .LBB0_11 -; CHECK-NEXT: ; %bb.9: ; %if.then404 +; CHECK-NEXT: s_cbranch_execz .LBB0_12 +; CHECK-NEXT: ; %bb.10: ; %if.then404 ; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: .LBB0_10: ; %for.body564 +; CHECK-NEXT: .LBB0_11: ; %for.body564 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_sub_i32 s4, s4, 32 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB0_10 -; CHECK-NEXT: .LBB0_11: ; %UnifiedUnreachableBlock +; CHECK-NEXT: s_cbranch_scc1 .LBB0_11 +; CHECK-NEXT: .LBB0_12: ; %UnifiedUnreachableBlock ; CHECK-NEXT: ; divergent unreachable -; CHECK-NEXT: .LBB0_12: ; %UnifiedReturnBlock +; CHECK-NEXT: .LBB0_13: ; %UnifiedReturnBlock ; CHECK-NEXT: s_endpgm entry: %runtimeVersionCopy = alloca [128 x i8], align 16, addrspace(5) diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index 08cab7cd359b9..261a1f8fd2c6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -71,6 +71,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: or t1, t1, t3 ; RV32-NEXT: andi t1, t1, 1 ; RV32-NEXT: slli t2, t2, 1 +; RV32-NEXT: csrwi vxrm, 0 ; RV32-NEXT: j .LBB0_10 ; RV32-NEXT: .LBB0_9: # %for.cond1.for.cond.cleanup3_crit_edge.us ; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1 @@ -93,7 +94,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: li t3, 0 ; RV32-NEXT: neg t4, t2 ; RV32-NEXT: and t4, t4, a6 -; RV32-NEXT: csrwi vxrm, 0 ; RV32-NEXT: li t6, 0 ; RV32-NEXT: li t5, 0 ; RV32-NEXT: vsetvli s0, zero, e8, m2, ta, ma @@ -471,6 +471,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: or t4, t4, t5 ; RV64-NEXT: andi t4, t4, 1 ; RV64-NEXT: mv t5, a0 +; RV64-NEXT: csrwi vxrm, 0 ; RV64-NEXT: j .LBB0_6 ; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us ; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1 @@ -493,7 +494,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: slli t6, t0, 28 ; RV64-NEXT: sub t6, t6, t1 ; RV64-NEXT: and t6, t6, a6 -; RV64-NEXT: csrwi vxrm, 0 ; RV64-NEXT: mv s0, a2 ; RV64-NEXT: mv s1, a4 ; RV64-NEXT: mv s2, t5 diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll index 296694ca3c72d..3ac0fd7746a30 100644 --- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll +++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll @@ -22,7 +22,7 @@ define void @FontChange(i1 %foo) nounwind { ; CHECK-LABEL: FontChange: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je .LBB0_9 +; CHECK-NEXT: je .LBB0_10 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb366 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -30,32 +30,33 @@ define void @FontChange(i1 %foo) nounwind { ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %bb428 ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: je .LBB0_9 +; CHECK-NEXT: je .LBB0_10 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: cmpb $0, 0 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB0_3: # %bb650 +; CHECK-NEXT: .LBB0_4: # %bb650 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpb $0, 0 -; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.4: # %bb662 +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: # %bb.5: # %bb662 ; CHECK-NEXT: movl 0, %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: andl $57344, %ecx # imm = 0xE000 ; CHECK-NEXT: cmpl $8192, %ecx # imm = 0x2000 -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.5: # %bb4884 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.6: # %bb4884 ; CHECK-NEXT: andl $7168, %eax # imm = 0x1C00 ; CHECK-NEXT: cmpl $1024, %eax # imm = 0x400 -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.6: # %bb4932 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.7: # %bb4932 ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.7: # %bb4940 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.8: # %bb4940 ; CHECK-NEXT: movl 0, %eax ; CHECK-NEXT: cmpl $160, %eax -; CHECK-NEXT: je .LBB0_9 -; CHECK-NEXT: # %bb.8: # %bb4940 +; CHECK-NEXT: je .LBB0_10 +; CHECK-NEXT: # %bb.9: # %bb4940 ; CHECK-NEXT: cmpl $159, %eax -; CHECK-NEXT: .LBB0_9: # %bb4897 +; CHECK-NEXT: .LBB0_10: # %bb4897 ; CHECK-NEXT: retq entry: br i1 %foo, label %bb298, label %bb49