@@ -109,6 +109,86 @@ define i16 @usubsat_as_bithack_i16(i16 %x) {
109
109
ret i16 %result
110
110
}
111
111
112
+ define i16 @usubsat_as_bithack2_i16 (i16 %x ) {
113
+ ; GFX6-LABEL: usubsat_as_bithack2_i16:
114
+ ; GFX6: ; %bb.0:
115
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116
+ ; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16
117
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
118
+ ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0xffff8000, v0
119
+ ; GFX6-NEXT: v_and_b32_e32 v0, v1, v0
120
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
121
+ ;
122
+ ; GFX8-LABEL: usubsat_as_bithack2_i16:
123
+ ; GFX8: ; %bb.0:
124
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125
+ ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0
126
+ ; GFX8-NEXT: v_add_u16_e32 v0, 0x8000, v0
127
+ ; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
128
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
129
+ ;
130
+ ; GFX9-LABEL: usubsat_as_bithack2_i16:
131
+ ; GFX9: ; %bb.0:
132
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v1, 15, v0
134
+ ; GFX9-NEXT: v_add_u16_e32 v0, 0x8000, v0
135
+ ; GFX9-NEXT: v_and_b32_e32 v0, v1, v0
136
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
137
+ ;
138
+ ; GFX10-LABEL: usubsat_as_bithack2_i16:
139
+ ; GFX10: ; %bb.0:
140
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141
+ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
142
+ ; GFX10-NEXT: v_ashrrev_i16 v1, 15, v0
143
+ ; GFX10-NEXT: v_add_nc_u16 v0, 0x8000, v0
144
+ ; GFX10-NEXT: v_and_b32_e32 v0, v1, v0
145
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
146
+ %signsplat = ashr i16 %x , 15
147
+ %flipsign = add i16 %x , 32768
148
+ %result = and i16 %signsplat , %flipsign
149
+ ret i16 %result
150
+ }
151
+
152
+ define i16 @usubsat_as_bithack_commute_i16 (i16 %x ) {
153
+ ; GFX6-LABEL: usubsat_as_bithack_commute_i16:
154
+ ; GFX6: ; %bb.0:
155
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156
+ ; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16
157
+ ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1
158
+ ; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0xffff8000, v0
159
+ ; GFX6-NEXT: v_and_b32_e32 v0, v0, v1
160
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
161
+ ;
162
+ ; GFX8-LABEL: usubsat_as_bithack_commute_i16:
163
+ ; GFX8: ; %bb.0:
164
+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165
+ ; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0
166
+ ; GFX8-NEXT: v_add_u16_e32 v0, 0x8000, v0
167
+ ; GFX8-NEXT: v_and_b32_e32 v0, v0, v1
168
+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
169
+ ;
170
+ ; GFX9-LABEL: usubsat_as_bithack_commute_i16:
171
+ ; GFX9: ; %bb.0:
172
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v1, 15, v0
174
+ ; GFX9-NEXT: v_add_u16_e32 v0, 0x8000, v0
175
+ ; GFX9-NEXT: v_and_b32_e32 v0, v0, v1
176
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
177
+ ;
178
+ ; GFX10-LABEL: usubsat_as_bithack_commute_i16:
179
+ ; GFX10: ; %bb.0:
180
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181
+ ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
182
+ ; GFX10-NEXT: v_ashrrev_i16 v1, 15, v0
183
+ ; GFX10-NEXT: v_add_nc_u16 v0, 0x8000, v0
184
+ ; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
185
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
186
+ %signsplat = ashr i16 %x , 15
187
+ %flipsign = add i16 %x , 32768
188
+ %result = and i16 %flipsign , %signsplat
189
+ ret i16 %result
190
+ }
191
+
112
192
define i32 @v_usubsat_i32 (i32 %lhs , i32 %rhs ) {
113
193
; GFX6-LABEL: v_usubsat_i32:
114
194
; GFX6: ; %bb.0:
0 commit comments