Skip to content

MachineLICM: Merge logic for implicit and explicit definitions. #147624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 5 additions & 15 deletions llvm/lib/CodeGen/MachineLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,24 +553,14 @@ void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs,
continue;
}

if (MO.isImplicit()) {
for (MCRegUnit Unit : TRI->regunits(Reg))
RUClobbers.set(Unit);
if (!MO.isDead())
// Non-dead implicit def? This cannot be hoisted.
// FIXME: For now, avoid instructions with multiple defs, unless it's dead.
if (!MO.isDead()) {
if (Def)
RuledOut = true;
// No need to check if a dead implicit def is also defined by
// another instruction.
continue;
else
Def = Reg;
}

// FIXME: For now, avoid instructions with multiple defs, unless
// it's a dead implicit def.
if (Def)
RuledOut = true;
else
Def = Reg;

// If we have already seen another instruction that defines the same
// register, then this is not safe. Two defs is indicated by setting a
// PhysRegClobbers bit.
Expand Down
103 changes: 103 additions & 0 deletions llvm/test/CodeGen/AArch64/mlicm-implicit-defs.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=aarch64 -run-pass machinelicm -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64 -passes machinelicm -o - %s | FileCheck %s

---
name: unsafe_to_move
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: unsafe_to_move
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x16 = COPY killed $x0
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x16
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x1 = COPY killed $x16
; CHECK-NEXT: $x2 = MOVi64imm 1024, implicit-def dead $x16
; CHECK-NEXT: $x16 = LDRXroX killed $x1, killed $x2, 0, 0
; CHECK-NEXT: $xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv
; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x0 = COPY killed $x1
; CHECK-NEXT: RET_ReallyLR
bb.0:
liveins: $x0
$x16 = COPY killed $x0
B %bb.1
bb.1:
liveins: $x16
$x1 = COPY killed $x16
/* MOVi64imm below mimics a pseudo instruction that doesn't have any */
/* unmodelled side effects, but uses x16 as a scratch register. */
$x2 = MOVi64imm 1024, implicit-def dead $x16
$x16 = LDRXroX killed $x1, killed $x2, 0, 0
$xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv
Bcc 1, %bb.1, implicit $nzcv
B %bb.2
bb.2:
liveins: $x1
$x0 = COPY killed $x1
RET_ReallyLR
...

---
name: dead_implicit_def
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: dead_implicit_def
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x12 = COPY killed $x0
; CHECK-NEXT: $x2 = MOVi64imm 1024, implicit-def dead $x16
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $x12, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x1 = COPY killed $x12
; CHECK-NEXT: $x16 = LDRXroX killed $x1, $x2, 0, 0
; CHECK-NEXT: $xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv
; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x0 = COPY killed $x1
; CHECK-NEXT: RET_ReallyLR
bb.0:
liveins: $x0
$x12 = COPY killed $x0
B %bb.1
bb.1:
liveins: $x12
$x1 = COPY killed $x12
/* MOVi64imm below mimics a pseudo instruction that doesn't have any */
/* unmodelled side effects, but uses x16 as a scratch register. */
$x2 = MOVi64imm 1024, implicit-def dead $x16
$x16 = LDRXroX killed $x1, killed $x2, 0, 0
$xzr = SUBSXri $x16, 0, 0, implicit-def $nzcv
Bcc 1, %bb.1, implicit $nzcv
B %bb.2
bb.2:
liveins: $x1
$x0 = COPY killed $x1
RET_ReallyLR
...
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/copy-to-reg-frameindex.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
define amdgpu_kernel void @copy_to_reg_frameindex(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: copy_to_reg_frameindex:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_cmp_lt_u32 0, 16
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cmp_lt_u32 0, 16
; CHECK-NEXT: s_set_gpr_idx_on 0, gpr_idx(DST)
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_set_gpr_idx_off
Expand Down
45 changes: 23 additions & 22 deletions llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,28 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: v_lshl_add_u32 v0, v0, 1, v0
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
; CHECK-NEXT: s_cbranch_execz .LBB0_12
; CHECK-NEXT: s_cbranch_execz .LBB0_13
; CHECK-NEXT: ; %bb.1: ; %if.end15
; CHECK-NEXT: s_load_dword s4, s[8:9], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_bitcmp1_b32 s4, 0
; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5]
; CHECK-NEXT: s_cbranch_vccnz .LBB0_12
; CHECK-NEXT: .LBB0_2: ; %while.cond.i
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cbranch_vccnz .LBB0_13
; CHECK-NEXT: ; %bb.2: ; %lor.lhs.false17
; CHECK-NEXT: s_cmp_eq_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.3: ; %if.end60
; CHECK-NEXT: s_cbranch_execz .LBB0_11
; CHECK-NEXT: ; %bb.4: ; %if.end5.i
; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.5: ; %if.end5.i314
; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.6: ; %if.end5.i338
; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.7: ; %if.end5.i362
; CHECK-NEXT: .LBB0_3: ; %while.cond.i
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
; CHECK-NEXT: ; %bb.4: ; %if.end60
; CHECK-NEXT: s_cbranch_execz .LBB0_12
; CHECK-NEXT: ; %bb.5: ; %if.end5.i
; CHECK-NEXT: s_cbranch_scc0 .LBB0_12
; CHECK-NEXT: ; %bb.6: ; %if.end5.i314
; CHECK-NEXT: s_cbranch_scc0 .LBB0_12
; CHECK-NEXT: ; %bb.7: ; %if.end5.i338
; CHECK-NEXT: s_cbranch_scc0 .LBB0_12
; CHECK-NEXT: ; %bb.8: ; %if.end5.i362
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, _RSENC_gDcd_______________________________@rel32@lo+1157
Expand All @@ -46,23 +47,23 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: buffer_store_byte v0, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v1, off, s[0:3], 0 offset:257
; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.8: ; %if.end5.i400
; CHECK-NEXT: s_cbranch_scc0 .LBB0_12
; CHECK-NEXT: ; %bb.9: ; %if.end5.i400
; CHECK-NEXT: flat_load_ubyte v0, v[0:1]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
; CHECK-NEXT: s_and_b64 exec, exec, vcc
; CHECK-NEXT: s_cbranch_execz .LBB0_11
; CHECK-NEXT: ; %bb.9: ; %if.then404
; CHECK-NEXT: s_cbranch_execz .LBB0_12
; CHECK-NEXT: ; %bb.10: ; %if.then404
; CHECK-NEXT: s_movk_i32 s4, 0x1000
; CHECK-NEXT: .LBB0_10: ; %for.body564
; CHECK-NEXT: .LBB0_11: ; %for.body564
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_sub_i32 s4, s4, 32
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_10
; CHECK-NEXT: .LBB0_11: ; %UnifiedUnreachableBlock
; CHECK-NEXT: s_cbranch_scc1 .LBB0_11
; CHECK-NEXT: .LBB0_12: ; %UnifiedUnreachableBlock
; CHECK-NEXT: ; divergent unreachable
; CHECK-NEXT: .LBB0_12: ; %UnifiedReturnBlock
; CHECK-NEXT: .LBB0_13: ; %UnifiedReturnBlock
; CHECK-NEXT: s_endpgm
entry:
%runtimeVersionCopy = alloca [128 x i8], align 16, addrspace(5)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: or t1, t1, t3
; RV32-NEXT: andi t1, t1, 1
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: csrwi vxrm, 0
; RV32-NEXT: j .LBB0_10
; RV32-NEXT: .LBB0_9: # %for.cond1.for.cond.cleanup3_crit_edge.us
; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1
Expand All @@ -93,7 +94,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV32-NEXT: li t3, 0
; RV32-NEXT: neg t4, t2
; RV32-NEXT: and t4, t4, a6
; RV32-NEXT: csrwi vxrm, 0
; RV32-NEXT: li t6, 0
; RV32-NEXT: li t5, 0
; RV32-NEXT: vsetvli s0, zero, e8, m2, ta, ma
Expand Down Expand Up @@ -471,6 +471,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64-NEXT: or t4, t4, t5
; RV64-NEXT: andi t4, t4, 1
; RV64-NEXT: mv t5, a0
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: j .LBB0_6
; RV64-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us
; RV64-NEXT: # in Loop: Header=BB0_6 Depth=1
Expand All @@ -493,7 +494,6 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64-NEXT: slli t6, t0, 28
; RV64-NEXT: sub t6, t6, t1
; RV64-NEXT: and t6, t6, a6
; RV64-NEXT: csrwi vxrm, 0
; RV64-NEXT: mv s0, a2
; RV64-NEXT: mv s1, a4
; RV64-NEXT: mv s2, t5
Expand Down
31 changes: 16 additions & 15 deletions llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,40 +22,41 @@ define void @FontChange(i1 %foo) nounwind {
; CHECK-LABEL: FontChange:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: je .LBB0_9
; CHECK-NEXT: je .LBB0_10
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb366
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %bb428
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: je .LBB0_9
; CHECK-NEXT: je .LBB0_10
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: cmpb $0, 0
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_3: # %bb650
; CHECK-NEXT: .LBB0_4: # %bb650
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpb $0, 0
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.4: # %bb662
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.5: # %bb662
; CHECK-NEXT: movl 0, %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl $57344, %ecx # imm = 0xE000
; CHECK-NEXT: cmpl $8192, %ecx # imm = 0x2000
; CHECK-NEXT: jne .LBB0_9
; CHECK-NEXT: # %bb.5: # %bb4884
; CHECK-NEXT: jne .LBB0_10
; CHECK-NEXT: # %bb.6: # %bb4884
; CHECK-NEXT: andl $7168, %eax # imm = 0x1C00
; CHECK-NEXT: cmpl $1024, %eax # imm = 0x400
; CHECK-NEXT: jne .LBB0_9
; CHECK-NEXT: # %bb.6: # %bb4932
; CHECK-NEXT: jne .LBB0_10
; CHECK-NEXT: # %bb.7: # %bb4932
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne .LBB0_9
; CHECK-NEXT: # %bb.7: # %bb4940
; CHECK-NEXT: jne .LBB0_10
; CHECK-NEXT: # %bb.8: # %bb4940
; CHECK-NEXT: movl 0, %eax
; CHECK-NEXT: cmpl $160, %eax
; CHECK-NEXT: je .LBB0_9
; CHECK-NEXT: # %bb.8: # %bb4940
; CHECK-NEXT: je .LBB0_10
; CHECK-NEXT: # %bb.9: # %bb4940
; CHECK-NEXT: cmpl $159, %eax
; CHECK-NEXT: .LBB0_9: # %bb4897
; CHECK-NEXT: .LBB0_10: # %bb4897
; CHECK-NEXT: retq
entry:
br i1 %foo, label %bb298, label %bb49
Expand Down