Skip to content

Commit 9927a43

Browse files
committed
comments
1 parent 1fadb4e commit 9927a43

File tree

2 files changed

+111
-3
lines changed

2 files changed

+111
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,10 @@ bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
416416
? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
417417
: (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
418418

419+
// Pack the offset and width of a BFE into
420+
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
421+
// source, bits [5:0] contain the offset and bits [22:16] the width.
422+
419423
// Ensure the high bits are clear to insert the offset.
420424
auto OffsetMask = B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
421425
auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask);
@@ -424,9 +428,6 @@ bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const {
424428
auto ShiftAmt = B.buildConstant(S32, 16);
425429
auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt);
426430

427-
// Transformation function, pack the offset and width of a BFE into
428-
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
429-
// source, bits [5:0] contain the offset and bits [22:16] the width.
430431
auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth);
431432

432433
MRI.setRegBank(OffsetMask.getReg(0), *RB);
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: test_s_bfe_i32__constants
6+
legalized: true
7+
regBankSelected: true
8+
tracksRegLiveness: true
9+
body: |
10+
bb.1:
11+
liveins: $sgpr0
12+
13+
; CHECK-LABEL: name: test_s_bfe_i32__constants
14+
; CHECK: liveins: $sgpr0
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: %reg:sreg_32(s32) = COPY $sgpr0
17+
; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
18+
; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
19+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
20+
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
21+
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
22+
; CHECK-NEXT: %bfx:sreg_32(s32) = S_BFE_I32 %reg(s32), [[OR]](s32), implicit-def $scc
23+
; CHECK-NEXT: $sgpr0 = COPY %bfx(s32)
24+
%reg:sgpr(s32) = COPY $sgpr0
25+
%width:sgpr(s32) = G_CONSTANT i32 5
26+
%offset:sgpr(s32) = G_CONSTANT i32 7
27+
%bfx:sgpr(s32) = G_SBFX %reg, %offset, %width
28+
$sgpr0 = COPY %bfx
29+
...
30+
---
31+
name: test_s_bfe_u32__constants
32+
legalized: true
33+
regBankSelected: true
34+
tracksRegLiveness: true
35+
body: |
36+
bb.1:
37+
liveins: $sgpr0
38+
39+
; CHECK-LABEL: name: test_s_bfe_u32__constants
40+
; CHECK: liveins: $sgpr0
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: %reg:sreg_32(s32) = COPY $sgpr0
43+
; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
44+
; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
45+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
46+
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
47+
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
48+
; CHECK-NEXT: %bfx:sreg_32(s32) = S_BFE_U32 %reg(s32), [[OR]](s32), implicit-def $scc
49+
; CHECK-NEXT: $sgpr0 = COPY %bfx(s32)
50+
%reg:sgpr(s32) = COPY $sgpr0
51+
%width:sgpr(s32) = G_CONSTANT i32 5
52+
%offset:sgpr(s32) = G_CONSTANT i32 7
53+
%bfx:sgpr(s32) = G_UBFX %reg, %offset, %width
54+
$sgpr0 = COPY %bfx
55+
...
56+
---
57+
name: test_s_bfe_i64__constants
58+
legalized: true
59+
regBankSelected: true
60+
tracksRegLiveness: true
61+
body: |
62+
bb.1:
63+
liveins: $sgpr0_sgpr1
64+
65+
; CHECK-LABEL: name: test_s_bfe_i64__constants
66+
; CHECK: liveins: $sgpr0_sgpr1
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: %reg:sreg_64(s64) = COPY $sgpr0_sgpr1
69+
; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
70+
; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
71+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
72+
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
73+
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
74+
; CHECK-NEXT: %bfx:sreg_64(s64) = S_BFE_I64 %reg(s64), [[OR]](s32), implicit-def $scc
75+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY %bfx(s64)
76+
%reg:sgpr(s64) = COPY $sgpr0_sgpr1
77+
%width:sgpr(s32) = G_CONSTANT i32 5
78+
%offset:sgpr(s32) = G_CONSTANT i32 7
79+
%bfx:sgpr(s64) = G_SBFX %reg, %offset, %width
80+
$sgpr0_sgpr1 = COPY %bfx
81+
...
82+
---
83+
name: test_s_bfe_u64__constants
84+
legalized: true
85+
regBankSelected: true
86+
tracksRegLiveness: true
87+
body: |
88+
bb.1:
89+
liveins: $sgpr0_sgpr1
90+
91+
; CHECK-LABEL: name: test_s_bfe_u64__constants
92+
; CHECK: liveins: $sgpr0_sgpr1
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: %reg:sreg_64(s64) = COPY $sgpr0_sgpr1
95+
; CHECK-NEXT: %width:sgpr(s32) = G_CONSTANT i32 5
96+
; CHECK-NEXT: %offset:sgpr(s32) = G_CONSTANT i32 7
97+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
98+
; CHECK-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL %width, [[C]](s32)
99+
; CHECK-NEXT: [[OR:%[0-9]+]]:sreg_32(s32) = G_OR %offset, [[SHL]]
100+
; CHECK-NEXT: %bfx:sreg_64(s64) = S_BFE_U64 %reg(s64), [[OR]](s32), implicit-def $scc
101+
; CHECK-NEXT: $sgpr0_sgpr1 = COPY %bfx(s64)
102+
%reg:sgpr(s64) = COPY $sgpr0_sgpr1
103+
%width:sgpr(s32) = G_CONSTANT i32 5
104+
%offset:sgpr(s32) = G_CONSTANT i32 7
105+
%bfx:sgpr(s64) = G_UBFX %reg, %offset, %width
106+
$sgpr0_sgpr1 = COPY %bfx
107+
...

0 commit comments

Comments
 (0)