Skip to content

Commit d3c9cc5

Browse files
committed
Add support for missing SIMD intrinsics
1 parent 4dd288c commit d3c9cc5

File tree

2 files changed

+61
-2
lines changed

2 files changed

+61
-2
lines changed

src/intrinsic/llvm.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,10 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
182182
| "__builtin_ia32_vplzcntd_128_mask"
183183
| "__builtin_ia32_vplzcntq_512_mask"
184184
| "__builtin_ia32_vplzcntq_256_mask"
185-
| "__builtin_ia32_vplzcntq_128_mask" => {
185+
| "__builtin_ia32_vplzcntq_128_mask"
186+
| "__builtin_ia32_cvtqq2pd128_mask"
187+
| "__builtin_ia32_cvtqq2pd256_mask"
188+
| "__builtin_ia32_cvtqq2ps256_mask" => {
186189
let mut new_args = args.to_vec();
187190
// Remove last arg as it doesn't seem to be used in GCC and is always false.
188191
new_args.pop();
@@ -378,6 +381,23 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
378381
);
379382
args = vec![arg.get_address(None)].into();
380383
}
384+
"__builtin_ia32_cvtqq2pd512_mask" | "__builtin_ia32_cvtqq2ps512_mask" => {
385+
let mut old_args = args.to_vec();
386+
let mut new_args = vec![];
387+
new_args.push(old_args.swap_remove(0));
388+
let arg2_type = gcc_func.get_param_type(1);
389+
let vector_type = arg2_type.dyncast_vector().expect("vector type");
390+
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
391+
let num_units = vector_type.get_num_units();
392+
let first_arg =
393+
builder.context.new_rvalue_from_vector(None, arg2_type, &vec![zero; num_units]);
394+
new_args.push(first_arg);
395+
let arg3_type = gcc_func.get_param_type(2);
396+
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
397+
new_args.push(minus_one);
398+
new_args.push(old_args.swap_remove(0));
399+
args = new_args.into();
400+
}
381401
_ => (),
382402
}
383403
} else {
@@ -987,6 +1007,29 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
9871007
"llvm.x86.avx512.vpdpbusds.128" => "__builtin_ia32_vpdpbusds_v4si",
9881008
"llvm.x86.xsave" => "__builtin_ia32_xsave",
9891009
"llvm.x86.xsaveopt" => "__builtin_ia32_xsaveopt",
1010+
"llvm.x86.avx512.mask.loadu.w.512" => "__builtin_ia32_loaddquhi512_mask",
1011+
"llvm.x86.avx512.mask.loadu.b.512" => "__builtin_ia32_loaddquqi512_mask",
1012+
"llvm.x86.avx512.mask.loadu.w.256" => "__builtin_ia32_loaddquhi256_mask",
1013+
"llvm.x86.avx512.mask.loadu.b.256" => "__builtin_ia32_loaddquqi256_mask",
1014+
"llvm.x86.avx512.mask.loadu.w.128" => "__builtin_ia32_loaddquhi128_mask",
1015+
"llvm.x86.avx512.mask.loadu.b.128" => "__builtin_ia32_loaddquqi128_mask",
1016+
"llvm.x86.avx512.mask.storeu.w.512" => "__builtin_ia32_storedquhi512_mask",
1017+
"llvm.x86.avx512.mask.storeu.b.512" => "__builtin_ia32_storedquqi512_mask",
1018+
"llvm.x86.avx512.mask.storeu.w.256" => "__builtin_ia32_storedquhi256_mask",
1019+
"llvm.x86.avx512.mask.storeu.b.256" => "__builtin_ia32_storedquqi256_mask",
1020+
"llvm.x86.avx512.mask.storeu.w.128" => "__builtin_ia32_storedquhi128_mask",
1021+
"llvm.x86.avx512.mask.storeu.b.128" => "__builtin_ia32_storedquqi128_mask",
1022+
"llvm.x86.avx512.mask.expand.load.w.512" => "__builtin_ia32_expandloadhi512_mask",
1023+
"llvm.x86.avx512.mask.expand.load.w.256" => "__builtin_ia32_expandloadhi256_mask",
1024+
"llvm.x86.avx512.mask.expand.load.w.128" => "__builtin_ia32_expandloadhi128_mask",
1025+
"llvm.x86.avx512.mask.expand.load.b.512" => "__builtin_ia32_expandloadqi512_mask",
1026+
"llvm.x86.avx512.mask.expand.load.b.256" => "__builtin_ia32_expandloadqi256_mask",
1027+
"llvm.x86.avx512.mask.expand.load.b.128" => "__builtin_ia32_expandloadqi128_mask",
1028+
"llvm.x86.avx512.sitofp.round.v8f64.v8i64" => "__builtin_ia32_cvtqq2pd512_mask",
1029+
"llvm.x86.avx512.sitofp.round.v2f64.v2i64" => "__builtin_ia32_cvtqq2pd128_mask",
1030+
"llvm.x86.avx512.sitofp.round.v4f64.v4i64" => "__builtin_ia32_cvtqq2pd256_mask",
1031+
"llvm.x86.avx512.sitofp.round.v8f32.v8i64" => "__builtin_ia32_cvtqq2ps512_mask",
1032+
"llvm.x86.avx512.sitofp.round.v4f32.v4i64" => "__builtin_ia32_cvtqq2ps256_mask",
9901033

9911034
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
9921035
_ => include!("archs.rs"),

src/intrinsic/simd.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
201201
bx.context.new_bitcast(None, shuffled, v_type)
202202
};
203203

204-
if name == sym::simd_bswap || name == sym::simd_bitreverse {
204+
if matches!(name, sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctpop) {
205205
require!(
206206
bx.type_kind(bx.element_type(llret_ty)) == TypeKind::Integer,
207207
InvalidMonomorphization::UnsupportedOperation { span, name, in_ty, in_elem }
@@ -212,6 +212,22 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
212212
return Ok(simd_bswap(bx, args[0].immediate()));
213213
}
214214

215+
let simd_ctpop = |bx: &mut Builder<'a, 'gcc, 'tcx>, vector: RValue<'gcc>| -> RValue<'gcc> {
216+
let mut vector_elements = vec![];
217+
let elem_ty = bx.element_type(llret_ty);
218+
for i in 0..in_len {
219+
let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
220+
let element = bx.extract_element(vector, index).to_rvalue();
221+
let result = bx.context.new_cast(None, bx.pop_count(element), elem_ty);
222+
vector_elements.push(result);
223+
}
224+
bx.context.new_rvalue_from_vector(None, llret_ty, &vector_elements)
225+
};
226+
227+
if name == sym::simd_ctpop {
228+
return Ok(simd_ctpop(bx, args[0].immediate()));
229+
}
230+
215231
// We use a different algorithm from non-vector bitreverse to take advantage of most
216232
// processors' vector shuffle units. It works like this:
217233
// 1. Generate pre-reversed low and high nibbles as a vector.

0 commit comments

Comments
 (0)