Skip to content

Commit d34cad3

Browse files
committed
[AMDGPU] add tests for alternate form of usubsat; NFC
1 parent b33c211 commit d34cad3

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

llvm/test/CodeGen/AMDGPU/usubsat.ll

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,86 @@ define i16 @usubsat_as_bithack_i16(i16 %x) {
109109
ret i16 %result
110110
}
111111

112+
define i16 @usubsat_as_bithack2_i16(i16 %x) {
113+
; GFX6-LABEL: usubsat_as_bithack2_i16:
114+
; GFX6: ; %bb.0:
115+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116+
; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16
117+
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
118+
; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0xffff8000, v0
119+
; GFX6-NEXT: v_and_b32_e32 v0, v1, v0
120+
; GFX6-NEXT: s_setpc_b64 s[30:31]
121+
;
122+
; GFX8-LABEL: usubsat_as_bithack2_i16:
123+
; GFX8: ; %bb.0:
124+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125+
; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0
126+
; GFX8-NEXT: v_add_u16_e32 v0, 0x8000, v0
127+
; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
128+
; GFX8-NEXT: s_setpc_b64 s[30:31]
129+
;
130+
; GFX9-LABEL: usubsat_as_bithack2_i16:
131+
; GFX9: ; %bb.0:
132+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133+
; GFX9-NEXT: v_ashrrev_i16_e32 v1, 15, v0
134+
; GFX9-NEXT: v_add_u16_e32 v0, 0x8000, v0
135+
; GFX9-NEXT: v_and_b32_e32 v0, v1, v0
136+
; GFX9-NEXT: s_setpc_b64 s[30:31]
137+
;
138+
; GFX10-LABEL: usubsat_as_bithack2_i16:
139+
; GFX10: ; %bb.0:
140+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
142+
; GFX10-NEXT: v_ashrrev_i16 v1, 15, v0
143+
; GFX10-NEXT: v_add_nc_u16 v0, 0x8000, v0
144+
; GFX10-NEXT: v_and_b32_e32 v0, v1, v0
145+
; GFX10-NEXT: s_setpc_b64 s[30:31]
146+
%signsplat = ashr i16 %x, 15
147+
%flipsign = add i16 %x, 32768
148+
%result = and i16 %signsplat, %flipsign
149+
ret i16 %result
150+
}
151+
152+
define i16 @usubsat_as_bithack_commute_i16(i16 %x) {
153+
; GFX6-LABEL: usubsat_as_bithack_commute_i16:
154+
; GFX6: ; %bb.0:
155+
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156+
; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16
157+
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
158+
; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0xffff8000, v0
159+
; GFX6-NEXT: v_and_b32_e32 v0, v0, v1
160+
; GFX6-NEXT: s_setpc_b64 s[30:31]
161+
;
162+
; GFX8-LABEL: usubsat_as_bithack_commute_i16:
163+
; GFX8: ; %bb.0:
164+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165+
; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0
166+
; GFX8-NEXT: v_add_u16_e32 v0, 0x8000, v0
167+
; GFX8-NEXT: v_and_b32_e32 v0, v0, v1
168+
; GFX8-NEXT: s_setpc_b64 s[30:31]
169+
;
170+
; GFX9-LABEL: usubsat_as_bithack_commute_i16:
171+
; GFX9: ; %bb.0:
172+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173+
; GFX9-NEXT: v_ashrrev_i16_e32 v1, 15, v0
174+
; GFX9-NEXT: v_add_u16_e32 v0, 0x8000, v0
175+
; GFX9-NEXT: v_and_b32_e32 v0, v0, v1
176+
; GFX9-NEXT: s_setpc_b64 s[30:31]
177+
;
178+
; GFX10-LABEL: usubsat_as_bithack_commute_i16:
179+
; GFX10: ; %bb.0:
180+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
182+
; GFX10-NEXT: v_ashrrev_i16 v1, 15, v0
183+
; GFX10-NEXT: v_add_nc_u16 v0, 0x8000, v0
184+
; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
185+
; GFX10-NEXT: s_setpc_b64 s[30:31]
186+
%signsplat = ashr i16 %x, 15
187+
%flipsign = add i16 %x, 32768
188+
%result = and i16 %flipsign, %signsplat
189+
ret i16 %result
190+
}
191+
112192
define i32 @v_usubsat_i32(i32 %lhs, i32 %rhs) {
113193
; GFX6-LABEL: v_usubsat_i32:
114194
; GFX6: ; %bb.0:

0 commit comments

Comments
 (0)