Skip to content

Commit c0e9084

Browse files
shiltianjrbyrnes
andauthored
[AMDGPU] Add a debug option -amdgpu-snop-padding for GCNHazardRecognizer (#146587)
This can help to identify if there is potential hazards. Co-authored-by: Byrnes, Jeffrey <Jeffrey.Byrnes@amd.com>
1 parent 9eac5f7 commit c0e9084

File tree

2 files changed

+138
-1
lines changed

2 files changed

+138
-1
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ static cl::opt<unsigned, false, MFMAPaddingRatioParser>
4444
cl::desc("Fill a percentage of the latency between "
4545
"neighboring MFMA with s_nops."));
4646

47+
// This is intended for debugging purposes only.
48+
static cl::opt<unsigned>
49+
NopPadding("amdgpu-snop-padding", cl::init(0), cl::Hidden,
50+
cl::desc("Insert a s_nop x before every instruction"));
51+
4752
//===----------------------------------------------------------------------===//
4853
// Hazard Recognizer Implementation
4954
//===----------------------------------------------------------------------===//
@@ -300,7 +305,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
300305
unsigned W = PreEmitNoopsCommon(MI);
301306
fixHazards(MI);
302307
CurrCycleInstr = nullptr;
303-
return W;
308+
return std::max(W, NopPadding.getValue());
304309
}
305310

306311
unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=8 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN8 %s
3+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=16 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN16 %s
4+
5+
---
6+
name: test_snop_padding
7+
tracksRegLiveness: true
8+
frameInfo:
9+
maxAlignment: 4
10+
stack:
11+
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
12+
machineFunctionInfo:
13+
isEntryFunction: false
14+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
15+
stackPtrOffsetReg: '$sgpr32'
16+
frameOffsetReg: '$sgpr33'
17+
hasSpilledSGPRs: true
18+
body: |
19+
; GCN8-LABEL: name: test_snop_padding
20+
; GCN8: bb.0:
21+
; GCN8-NEXT: successors: %bb.1(0x80000000)
22+
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
23+
; GCN8-NEXT: {{ $}}
24+
; GCN8-NEXT: S_NOP 7
25+
; GCN8-NEXT: S_BRANCH %bb.1
26+
; GCN8-NEXT: {{ $}}
27+
; GCN8-NEXT: bb.1:
28+
; GCN8-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
29+
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
30+
; GCN8-NEXT: {{ $}}
31+
; GCN8-NEXT: S_NOP 7
32+
; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
33+
; GCN8-NEXT: S_NOP 7
34+
; GCN8-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
35+
; GCN8-NEXT: {{ $}}
36+
; GCN8-NEXT: bb.2:
37+
; GCN8-NEXT: successors: %bb.3(0x80000000)
38+
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
39+
; GCN8-NEXT: {{ $}}
40+
; GCN8-NEXT: S_NOP 7
41+
; GCN8-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
42+
; GCN8-NEXT: S_NOP 7
43+
; GCN8-NEXT: S_NOP 0
44+
; GCN8-NEXT: S_NOP 7
45+
; GCN8-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
46+
; GCN8-NEXT: S_NOP 7
47+
; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
48+
; GCN8-NEXT: S_NOP 7
49+
; GCN8-NEXT: S_BRANCH %bb.3
50+
; GCN8-NEXT: {{ $}}
51+
; GCN8-NEXT: bb.3:
52+
; GCN8-NEXT: liveins: $sgpr10_sgpr11
53+
; GCN8-NEXT: {{ $}}
54+
; GCN8-NEXT: S_NOP 7
55+
; GCN8-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
56+
; GCN8-NEXT: S_NOP 7
57+
; GCN8-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
58+
; GCN8-NEXT: S_NOP 7
59+
; GCN8-NEXT: SI_RETURN
60+
;
61+
; GCN16-LABEL: name: test_snop_padding
62+
; GCN16: bb.0:
63+
; GCN16-NEXT: successors: %bb.1(0x80000000)
64+
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
65+
; GCN16-NEXT: {{ $}}
66+
; GCN16-NEXT: S_NOP 7
67+
; GCN16-NEXT: S_NOP 7
68+
; GCN16-NEXT: S_BRANCH %bb.1
69+
; GCN16-NEXT: {{ $}}
70+
; GCN16-NEXT: bb.1:
71+
; GCN16-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
72+
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
73+
; GCN16-NEXT: {{ $}}
74+
; GCN16-NEXT: S_NOP 7
75+
; GCN16-NEXT: S_NOP 7
76+
; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
77+
; GCN16-NEXT: S_NOP 7
78+
; GCN16-NEXT: S_NOP 7
79+
; GCN16-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
80+
; GCN16-NEXT: {{ $}}
81+
; GCN16-NEXT: bb.2:
82+
; GCN16-NEXT: successors: %bb.3(0x80000000)
83+
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
84+
; GCN16-NEXT: {{ $}}
85+
; GCN16-NEXT: S_NOP 7
86+
; GCN16-NEXT: S_NOP 7
87+
; GCN16-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
88+
; GCN16-NEXT: S_NOP 7
89+
; GCN16-NEXT: S_NOP 7
90+
; GCN16-NEXT: S_NOP 0
91+
; GCN16-NEXT: S_NOP 7
92+
; GCN16-NEXT: S_NOP 7
93+
; GCN16-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
94+
; GCN16-NEXT: S_NOP 7
95+
; GCN16-NEXT: S_NOP 7
96+
; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
97+
; GCN16-NEXT: S_NOP 7
98+
; GCN16-NEXT: S_NOP 7
99+
; GCN16-NEXT: S_BRANCH %bb.3
100+
; GCN16-NEXT: {{ $}}
101+
; GCN16-NEXT: bb.3:
102+
; GCN16-NEXT: liveins: $sgpr10_sgpr11
103+
; GCN16-NEXT: {{ $}}
104+
; GCN16-NEXT: S_NOP 7
105+
; GCN16-NEXT: S_NOP 7
106+
; GCN16-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
107+
; GCN16-NEXT: S_NOP 7
108+
; GCN16-NEXT: S_NOP 7
109+
; GCN16-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
110+
; GCN16-NEXT: S_NOP 7
111+
; GCN16-NEXT: S_NOP 7
112+
; GCN16-NEXT: SI_RETURN
113+
bb.0:
114+
liveins: $sgpr6, $sgpr10_sgpr11
115+
S_BRANCH %bb.1
116+
bb.1:
117+
liveins: $sgpr6, $sgpr10_sgpr11
118+
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
119+
S_CBRANCH_EXECZ %bb.3, implicit $exec
120+
bb.2:
121+
liveins: $sgpr6, $sgpr10_sgpr11
122+
SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
123+
S_NOP 0
124+
renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
125+
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
126+
S_BRANCH %bb.3
127+
bb.3:
128+
liveins: $sgpr10_sgpr11
129+
$sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
130+
S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
131+
SI_RETURN
132+
...

0 commit comments

Comments
 (0)