Skip to content

Commit 4636c59

Browse files
committed
Add more SIMD
1 parent 5088fb3 commit 4636c59

File tree

5 files changed

+102
-30
lines changed

5 files changed

+102
-30
lines changed

src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
13431343
}
13441344
// TODO(antoyo): switch to using new_vector_access.
13451345
let array = self.context.new_bitcast(None, v2, array_type);
1346-
for i in 0..vec_num_units {
1346+
for i in 0..(mask_num_units - vec_num_units) {
13471347
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
13481348
}
13491349
let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements);

src/consts.rs

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
2727
}
2828
// NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
2929
// SIMD builtins require a constant value.
30-
if value.get_type() != typ {
31-
self.context.new_bitcast(None, value, typ)
32-
}
33-
else {
34-
value
35-
}
30+
self.bitcast_if_needed(value, typ)
3631
}
3732
}
3833

@@ -86,13 +81,7 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
8681

8782
// TODO(antoyo): set alignment.
8883

89-
let value =
90-
if value.get_type() != gcc_type {
91-
self.context.new_bitcast(None, value, gcc_type)
92-
}
93-
else {
94-
value
95-
};
84+
let value = self.bitcast_if_needed(value, gcc_type);
9685
global.global_set_initializer_rvalue(value);
9786

9887
// As an optimization, all shared statics which do not have interior

src/context.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
279279
pub fn sess(&self) -> &Session {
280280
&self.tcx.sess
281281
}
282+
283+
pub fn bitcast_if_needed(&self, value: RValue<'gcc>, expected_type: Type<'gcc>) -> RValue<'gcc> {
284+
if value.get_type() != expected_type {
285+
self.context.new_bitcast(None, value, expected_type)
286+
}
287+
else {
288+
value
289+
}
290+
}
282291
}
283292

284293
impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {

src/intrinsic/llvm.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,25 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
2121
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
2222
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
2323
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
24+
25+
// The above doc points to unknown builtins for the following, so override them:
26+
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
27+
"llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si",
28+
"llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf",
29+
"llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf",
30+
"llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di",
31+
"llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di",
32+
"llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df",
33+
"llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df",
34+
"llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si",
35+
"llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256",
36+
"llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf",
37+
"llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256",
38+
"llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di",
39+
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
40+
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
41+
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
42+
"" => "",
2443
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
2544
_ => include!("archs.rs"),
2645
};

src/intrinsic/simd.rs

Lines changed: 71 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
202202
};
203203
let builtin = bx.context.get_target_builtin_function(func_name);
204204
let param1_type = builtin.get_param(0).to_rvalue().get_type();
205-
let vector =
206-
if vector.get_type() != param1_type {
207-
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
208-
bx.context.new_bitcast(None, vector, param1_type)
209-
}
210-
else {
211-
vector
212-
};
205+
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
206+
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
213207
let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]);
214208
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
215209
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
@@ -539,18 +533,79 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
539533
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
540534

541535
let func = bx.context.get_target_builtin_function(builtin_name);
542-
let param1_type = func.get_parameter(0).get_type();
543-
let lhs =
544-
if lhs.get_type() != param1_type {
545-
bx.context.new_bitcast(None, lhs, param1_type)
546-
}
547-
else {
548-
lhs
549-
};
536+
let param1_type = func.get_param(0).to_rvalue().get_type();
537+
let param2_type = func.get_param(1).to_rvalue().get_type();
538+
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
539+
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
550540
let result = bx.context.new_call(None, func, &[lhs, rhs]);
551541
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
552542
return Ok(bx.context.new_bitcast(None, result, vec_ty));
553543
}
554544

545+
macro_rules! arith_red {
546+
($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident,
547+
$identity:expr) => {
548+
if name == sym::$name {
549+
require!(
550+
ret_ty == in_elem,
551+
"expected return type `{}` (element of input `{}`), found `{}`",
552+
in_elem,
553+
in_ty,
554+
ret_ty
555+
);
556+
return match in_elem.kind() {
557+
ty::Int(_) | ty::Uint(_) => {
558+
let r = bx.$integer_reduce(args[0].immediate());
559+
if $ordered {
560+
// if overflow occurs, the result is the
561+
// mathematical result modulo 2^n:
562+
Ok(bx.$op(args[1].immediate(), r))
563+
} else {
564+
Ok(bx.$integer_reduce(args[0].immediate()))
565+
}
566+
}
567+
ty::Float(f) => {
568+
let acc = if $ordered {
569+
// ordered arithmetic reductions take an accumulator
570+
args[1].immediate()
571+
} else {
572+
// unordered arithmetic reductions use the identity accumulator
573+
match f.bit_width() {
574+
32 => bx.const_real(bx.type_f32(), $identity),
575+
64 => bx.const_real(bx.type_f64(), $identity),
576+
v => return_error!(
577+
r#"
578+
unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
579+
sym::$name,
580+
in_ty,
581+
in_elem,
582+
v,
583+
ret_ty
584+
),
585+
}
586+
};
587+
Ok(bx.$float_reduce(acc, args[0].immediate()))
588+
}
589+
_ => return_error!(
590+
"unsupported {} from `{}` with element `{}` to `{}`",
591+
sym::$name,
592+
in_ty,
593+
in_elem,
594+
ret_ty
595+
),
596+
};
597+
}
598+
};
599+
}
600+
601+
// TODO: use a recursive algorithm a-la Hacker's Delight.
602+
arith_red!(
603+
simd_reduce_add_unordered: vector_reduce_add,
604+
vector_reduce_fadd_fast,
605+
false,
606+
add,
607+
0.0
608+
);
609+
555610
unimplemented!("simd {}", name);
556611
}

0 commit comments

Comments
 (0)