Skip to content

Commit db49437

Browse files
authored
Merge pull request rust-lang#382 from sadlerap/impl-generic-arithmetic-pass
simd: implement missing intrinsics from simd/generic-arithmetic-pass.rs
2 parents 0a67e9c + 17b2c46 commit db49437

File tree

5 files changed

+214
-5
lines changed

5 files changed

+214
-5
lines changed

failing-ui-tests.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ tests/ui/sepcomp/sepcomp-extern.rs
1313
tests/ui/sepcomp/sepcomp-fns-backwards.rs
1414
tests/ui/sepcomp/sepcomp-fns.rs
1515
tests/ui/sepcomp/sepcomp-statics.rs
16-
tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
1716
tests/ui/asm/x86_64/may_unwind.rs
1817
tests/ui/backtrace.rs
1918
tests/ui/catch-unwind-bang.rs
@@ -57,7 +56,6 @@ tests/ui/coroutine/panic-safe.rs
5756
tests/ui/issues/issue-14875.rs
5857
tests/ui/issues/issue-29948.rs
5958
tests/ui/panics/nested_panic_caught.rs
60-
tests/ui/simd/intrinsic/generic-bswap-byte.rs
6159
tests/ui/const_prop/ice-issue-111353.rs
6260
tests/ui/process/println-with-broken-pipe.rs
6361
tests/ui/panic-runtime/lto-abort.rs

failing-ui-tests12.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ tests/ui/packed/packed-struct-vec.rs
99
tests/ui/packed/packed-tuple-struct-layout.rs
1010
tests/ui/simd/array-type.rs
1111
tests/ui/simd/intrinsic/float-minmax-pass.rs
12+
tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
1213
tests/ui/simd/intrinsic/generic-arithmetic-saturating-pass.rs
1314
tests/ui/simd/intrinsic/generic-as.rs
1415
tests/ui/simd/intrinsic/generic-cast-pass.rs

src/builder.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -606,12 +606,29 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
606606
// ../../../gcc/gcc/cfgexpand.cc:6069
607607
// 0x7f0101bf9194 execute
608608
// ../../../gcc/gcc/cfgexpand.cc:6795
609-
if a.get_type().is_compatible_with(self.cx.float_type) {
609+
let a_type = a.get_type();
610+
let a_type_unqualified = a_type.unqualified();
611+
if a_type.is_compatible_with(self.cx.float_type) {
610612
let fmodf = self.context.get_builtin_function("fmodf");
611613
// FIXME(antoyo): this seems to produce the wrong result.
612614
return self.context.new_call(None, fmodf, &[a, b]);
613615
}
614-
assert_eq!(a.get_type().unqualified(), self.cx.double_type);
616+
else if let Some(vector_type) = a_type_unqualified.dyncast_vector() {
617+
assert_eq!(a_type_unqualified, b.get_type().unqualified());
618+
619+
let num_units = vector_type.get_num_units();
620+
let new_elements: Vec<_> = (0..num_units)
621+
.map(|i| {
622+
let index = self.context.new_rvalue_from_long(self.cx.type_u32(), i as _);
623+
let x = self.extract_element(a, index).to_rvalue();
624+
let y = self.extract_element(b, index).to_rvalue();
625+
self.frem(x, y)
626+
})
627+
.collect();
628+
629+
return self.context.new_rvalue_from_vector(None, a_type, &new_elements)
630+
}
631+
assert_eq!(a_type_unqualified, self.cx.double_type);
615632

616633
let fmod = self.context.get_builtin_function("fmod");
617634
return self.context.new_call(None, fmod, &[a, b]);

src/int.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
4848

4949
pub fn gcc_neg(&self, a: RValue<'gcc>) -> RValue<'gcc> {
5050
let a_type = a.get_type();
51-
if self.is_native_int_type(a_type) {
51+
if self.is_native_int_type(a_type) || a_type.is_vector() {
5252
self.cx.context.new_unary_op(None, UnaryOp::Minus, a.get_type(), a)
5353
}
5454
else {

src/intrinsic/simd.rs

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::iter::FromIterator;
2+
13
use gccjit::ToRValue;
24
use gccjit::{BinaryOp, RValue, Type};
35
#[cfg(feature = "master")]
@@ -21,6 +23,8 @@ use rustc_target::abi::Align;
2123
use crate::builder::Builder;
2224
#[cfg(feature = "master")]
2325
use crate::context::CodegenCx;
26+
#[cfg(not(feature = "master"))]
27+
use crate::common::SignType;
2428

2529
pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
2630
bx: &mut Builder<'a, 'gcc, 'tcx>,
@@ -156,6 +160,195 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
156160
return Ok(compare_simd_types(bx, arg1, arg2, in_elem, llret_ty, cmp_op));
157161
}
158162

163+
let simd_bswap = |bx: &mut Builder<'a, 'gcc, 'tcx>, vector: RValue<'gcc>| -> RValue<'gcc> {
164+
let v_type = vector.get_type();
165+
let vector_type = v_type.unqualified().dyncast_vector().expect("vector type");
166+
let elem_type = vector_type.get_element_type();
167+
let elem_size_bytes = elem_type.get_size();
168+
if elem_size_bytes == 1 {
169+
return vector;
170+
}
171+
172+
let type_size_bytes = elem_size_bytes as u64 * in_len;
173+
let shuffle_indices = Vec::from_iter(0..type_size_bytes);
174+
let byte_vector_type = bx.context.new_vector_type(bx.type_u8(), type_size_bytes);
175+
let byte_vector = bx.context.new_bitcast(None, args[0].immediate(), byte_vector_type);
176+
177+
#[cfg(not(feature = "master"))]
178+
let shuffled = {
179+
let new_elements: Vec<_> = shuffle_indices.chunks_exact(elem_size_bytes as _)
180+
.flat_map(|x| x.iter().rev())
181+
.map(|&i| {
182+
let index = bx.context.new_rvalue_from_long(bx.u64_type, i as _);
183+
bx.extract_element(byte_vector, index)
184+
})
185+
.collect();
186+
187+
bx.context.new_rvalue_from_vector(None, byte_vector_type, &new_elements)
188+
};
189+
#[cfg(feature = "master")]
190+
let shuffled = {
191+
let indices: Vec<_> = shuffle_indices.chunks_exact(elem_size_bytes as _)
192+
.flat_map(|x| x.iter().rev())
193+
.map(|&i| bx.context.new_rvalue_from_int(bx.u8_type, i as _))
194+
.collect();
195+
196+
let mask = bx.context.new_rvalue_from_vector(None, byte_vector_type, &indices);
197+
bx.context.new_rvalue_vector_perm(None, byte_vector, byte_vector, mask)
198+
};
199+
bx.context.new_bitcast(None, shuffled, v_type)
200+
};
201+
202+
if name == sym::simd_bswap || name == sym::simd_bitreverse {
203+
require!(
204+
bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
205+
InvalidMonomorphization::UnsupportedOperation {
206+
span,
207+
name,
208+
in_ty,
209+
in_elem,
210+
}
211+
);
212+
}
213+
214+
if name == sym::simd_bswap {
215+
return Ok(simd_bswap(bx, args[0].immediate()));
216+
}
217+
218+
// We use a different algorithm from non-vector bitreverse to take advantage of most
219+
// processors' vector shuffle units. It works like this:
220+
// 1. Generate pre-reversed low and high nibbles as a vector.
221+
// 2. Byte-swap the input.
222+
// 3. Mask off the low and high nibbles of each byte in the byte-swapped input.
223+
// 4. Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask.
224+
// 5. Combine the results of the shuffle back together and cast back to the original type.
225+
#[cfg(feature = "master")]
226+
if name == sym::simd_bitreverse {
227+
let vector = args[0].immediate();
228+
let v_type = vector.get_type();
229+
let vector_type = v_type.unqualified().dyncast_vector().expect("vector type");
230+
let elem_type = vector_type.get_element_type();
231+
let elem_size_bytes = elem_type.get_size();
232+
233+
let type_size_bytes = elem_size_bytes as u64 * in_len;
234+
// We need to ensure at least 16 entries in our vector type, since the pre-reversed vectors
235+
// we generate below have 16 entries in them. `new_rvalue_vector_perm` requires the mask
236+
// vector to be of the same length as the source vectors.
237+
let byte_vector_type_size = type_size_bytes.max(16);
238+
239+
let byte_vector_type = bx.context.new_vector_type(bx.u8_type, type_size_bytes);
240+
let long_byte_vector_type = bx.context.new_vector_type(bx.u8_type, byte_vector_type_size);
241+
242+
// Step 1: Generate pre-reversed low and high nibbles as a vector.
243+
let zero_byte = bx.context.new_rvalue_zero(bx.u8_type);
244+
let hi_nibble_elements: Vec<_> = (0u8..16)
245+
.map(|x| bx.context.new_rvalue_from_int(bx.u8_type, x.reverse_bits() as _))
246+
.chain((16..byte_vector_type_size).map(|_| zero_byte))
247+
.collect();
248+
let hi_nibble = bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &hi_nibble_elements);
249+
250+
let lo_nibble_elements: Vec<_> = (0u8..16)
251+
.map(|x| bx.context.new_rvalue_from_int(bx.u8_type, (x.reverse_bits() >> 4) as _))
252+
.chain((16..byte_vector_type_size).map(|_| zero_byte))
253+
.collect();
254+
let lo_nibble = bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &lo_nibble_elements);
255+
256+
let mask = bx.context.new_rvalue_from_vector(
257+
None,
258+
long_byte_vector_type,
259+
&vec![bx.context.new_rvalue_from_int(bx.u8_type, 0x0f); byte_vector_type_size as _]);
260+
261+
let four_vec = bx.context.new_rvalue_from_vector(
262+
None,
263+
long_byte_vector_type,
264+
&vec![bx.context.new_rvalue_from_int(bx.u8_type, 4); byte_vector_type_size as _]);
265+
266+
// Step 2: Byte-swap the input.
267+
let swapped = simd_bswap(bx, args[0].immediate());
268+
let byte_vector = bx.context.new_bitcast(None, swapped, byte_vector_type);
269+
270+
// We're going to need to extend the vector with zeros to make sure that the types are the
271+
// same, since that's what new_rvalue_vector_perm expects.
272+
let byte_vector = if byte_vector_type_size > type_size_bytes {
273+
let mut byte_vector_elements = Vec::with_capacity(byte_vector_type_size as _);
274+
for i in 0..type_size_bytes {
275+
let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _);
276+
let val = bx.extract_element(byte_vector, idx);
277+
byte_vector_elements.push(val);
278+
}
279+
for _ in type_size_bytes..byte_vector_type_size {
280+
byte_vector_elements.push(zero_byte);
281+
}
282+
bx.context.new_rvalue_from_vector(None, long_byte_vector_type, &byte_vector_elements)
283+
} else {
284+
bx.context.new_bitcast(None, byte_vector, long_byte_vector_type)
285+
};
286+
287+
// Step 3: Mask off the low and high nibbles of each byte in the byte-swapped input.
288+
let masked_hi = (byte_vector >> four_vec) & mask;
289+
let masked_lo = byte_vector & mask;
290+
291+
// Step 4: Shuffle the pre-reversed low and high-nibbles using the masked nibbles as a shuffle mask.
292+
let hi = bx.context.new_rvalue_vector_perm(None, hi_nibble, hi_nibble, masked_lo);
293+
let lo = bx.context.new_rvalue_vector_perm(None, lo_nibble, lo_nibble, masked_hi);
294+
295+
// Step 5: Combine the results of the shuffle back together and cast back to the original type.
296+
let result = hi | lo;
297+
let cast_ty = bx.context.new_vector_type(elem_type, byte_vector_type_size / (elem_size_bytes as u64));
298+
299+
// we might need to truncate if sizeof(v_type) < sizeof(cast_type)
300+
if type_size_bytes < byte_vector_type_size {
301+
let cast_result = bx.context.new_bitcast(None, result, cast_ty);
302+
let elems: Vec<_> = (0..in_len)
303+
.map(|i| {
304+
let idx = bx.context.new_rvalue_from_int(bx.u32_type, i as _);
305+
bx.extract_element(cast_result, idx)
306+
})
307+
.collect();
308+
return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems))
309+
} else {
310+
// avoid the unnecessary truncation as an optimization.
311+
return Ok(bx.context.new_bitcast(None, result, v_type));
312+
}
313+
}
314+
// since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to
315+
// component-wise bitreverses if they're not available.
316+
#[cfg(not(feature = "master"))]
317+
if name == sym::simd_bitreverse {
318+
let vector = args[0].immediate();
319+
let vector_ty = vector.get_type();
320+
let vector_type = vector_ty.unqualified().dyncast_vector().expect("vector type");
321+
let num_elements = vector_type.get_num_units();
322+
323+
let elem_type = vector_type.get_element_type();
324+
let elem_size_bytes = elem_type.get_size();
325+
let num_type = elem_type.to_unsigned(bx.cx);
326+
let new_elements: Vec<_> = (0..num_elements)
327+
.map(|idx| {
328+
let index = bx.context.new_rvalue_from_long(num_type, idx as _);
329+
let extracted_value = bx.extract_element(vector, index).to_rvalue();
330+
bx.bit_reverse(elem_size_bytes as u64 * 8, extracted_value)
331+
})
332+
.collect();
333+
return Ok(bx.context.new_rvalue_from_vector(None, vector_ty, &new_elements));
334+
}
335+
336+
if name == sym::simd_ctlz || name == sym::simd_cttz {
337+
let vector = args[0].immediate();
338+
let elements: Vec<_> = (0..in_len)
339+
.map(|i| {
340+
let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
341+
let value = bx.extract_element(vector, index).to_rvalue();
342+
if name == sym::simd_ctlz {
343+
bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value)
344+
} else {
345+
bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value)
346+
}
347+
})
348+
.collect();
349+
return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements));
350+
}
351+
159352
if name == sym::simd_shuffle {
160353
// Make sure this is actually an array, since typeck only checks the length-suffixed
161354
// version of this intrinsic.

0 commit comments

Comments
 (0)