Skip to content

Commit 1367983

Browse files
committed
Begin implementing llvm simd intrinsics for aarch64
1 parent ef6400d commit 1367983

File tree

3 files changed

+137
-1
lines changed

3 files changed

+137
-1
lines changed

src/intrinsics/llvm.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,32 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
1313
ret: CPlace<'tcx>,
1414
target: Option<BasicBlock>,
1515
) {
16+
if intrinsic.starts_with("llvm.aarch64") {
17+
return llvm_aarch64::codegen_aarch64_llvm_intrinsic_call(
18+
fx, intrinsic, substs, args, ret, target,
19+
);
20+
}
1621
if intrinsic.starts_with("llvm.x86") {
1722
return llvm_x86::codegen_x86_llvm_intrinsic_call(fx, intrinsic, substs, args, ret, target);
1823
}
1924

2025
match intrinsic {
26+
_ if intrinsic.starts_with("llvm.ctlz.v") => {
27+
intrinsic_args!(fx, args => (a); intrinsic);
28+
29+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
30+
fx.bcx.ins().clz(lane)
31+
});
32+
}
33+
34+
_ if intrinsic.starts_with("llvm.ctpop.v") => {
35+
intrinsic_args!(fx, args => (a); intrinsic);
36+
37+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
38+
fx.bcx.ins().popcnt(lane)
39+
});
40+
}
41+
2142
_ => {
2243
fx.tcx
2344
.sess
@@ -31,4 +52,3 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>(
3152
let ret_block = fx.get_block(dest);
3253
fx.bcx.ins().jump(ret_block, &[]);
3354
}
34-

src/intrinsics/llvm_aarch64.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
//! Emulate AArch64 LLVM intrinsics
2+
3+
use crate::intrinsics::*;
4+
use crate::prelude::*;
5+
6+
use rustc_middle::ty::subst::SubstsRef;
7+
8+
pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
9+
fx: &mut FunctionCx<'_, '_, 'tcx>,
10+
intrinsic: &str,
11+
_substs: SubstsRef<'tcx>,
12+
args: &[mir::Operand<'tcx>],
13+
ret: CPlace<'tcx>,
14+
target: Option<BasicBlock>,
15+
) {
16+
// llvm.aarch64.neon.sqshl.v*i*
17+
18+
match intrinsic {
19+
_ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => {
20+
intrinsic_args!(fx, args => (a); intrinsic);
21+
22+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
23+
fx.bcx.ins().iabs(lane)
24+
});
25+
}
26+
27+
_ if intrinsic.starts_with("llvm.aarch64.neon.cls.v") => {
28+
intrinsic_args!(fx, args => (a); intrinsic);
29+
30+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
31+
fx.bcx.ins().cls(lane)
32+
});
33+
}
34+
35+
_ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") => {
36+
intrinsic_args!(fx, args => (x, y); intrinsic);
37+
38+
simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
39+
crate::num::codegen_saturating_int_binop(fx, BinOp::Add, x_lane, y_lane)
40+
});
41+
}
42+
43+
_ if intrinsic.starts_with("llvm.aarch64.neon.sqsub.v") => {
44+
intrinsic_args!(fx, args => (x, y); intrinsic);
45+
46+
simd_pair_for_each_lane_typed(fx, x, y, ret, &|fx, x_lane, y_lane| {
47+
crate::num::codegen_saturating_int_binop(fx, BinOp::Sub, x_lane, y_lane)
48+
});
49+
}
50+
51+
/*
52+
_ if intrinsic.starts_with("llvm.aarch64.neon.sshl.v")
53+
|| intrinsic.starts_with("llvm.aarch64.neon.sqshl.v")
54+
// FIXME split this one out once saturating is implemented
55+
|| intrinsic.starts_with("llvm.aarch64.neon.sqshlu.v") =>
56+
{
57+
intrinsic_args!(fx, args => (a, b); intrinsic);
58+
59+
simd_pair_for_each_lane(fx, a, b, ret, &|fx, _lane_ty, _res_lane_ty, a, b| {
60+
// FIXME saturate?
61+
fx.bcx.ins().ishl(a, b)
62+
});
63+
}
64+
65+
_ if intrinsic.starts_with("llvm.aarch64.neon.sqshrn.v") => {
66+
let (a, imm32) = match args {
67+
[a, imm32] => (a, imm32),
68+
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
69+
};
70+
let a = codegen_operand(fx, a);
71+
let imm32 = crate::constant::mir_operand_get_const_val(fx, imm32)
72+
.expect("llvm.aarch64.neon.sqshrn.v* imm32 not const");
73+
74+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm32
75+
.try_to_bits(Size::from_bytes(4))
76+
.unwrap_or_else(|| panic!("imm32 not scalar: {:?}", imm32))
77+
{
78+
imm32 if imm32 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm32 as u8)),
79+
_ => fx.bcx.ins().iconst(types::I32, 0),
80+
});
81+
}
82+
83+
_ if intrinsic.starts_with("llvm.aarch64.neon.sqshrun.v") => {
84+
let (a, imm32) = match args {
85+
[a, imm32] => (a, imm32),
86+
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
87+
};
88+
let a = codegen_operand(fx, a);
89+
let imm32 = crate::constant::mir_operand_get_const_val(fx, imm32)
90+
.expect("llvm.aarch64.neon.sqshrn.v* imm32 not const");
91+
92+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm32
93+
.try_to_bits(Size::from_bytes(4))
94+
.unwrap_or_else(|| panic!("imm32 not scalar: {:?}", imm32))
95+
{
96+
imm32 if imm32 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm32 as u8)),
97+
_ => fx.bcx.ins().iconst(types::I32, 0),
98+
});
99+
}
100+
*/
101+
102+
_ => {
103+
fx.tcx.sess.warn(&format!(
104+
"unsupported AArch64 llvm intrinsic {}; replacing with trap",
105+
intrinsic
106+
));
107+
crate::trap::trap_unimplemented(fx, intrinsic);
108+
return;
109+
}
110+
}
111+
112+
let dest = target.expect("all llvm intrinsics used by stdlib should return");
113+
let ret_block = fx.get_block(dest);
114+
fx.bcx.ins().jump(ret_block, &[]);
115+
}

src/intrinsics/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ macro_rules! intrinsic_args {
1414

1515
mod cpuid;
1616
mod llvm;
17+
mod llvm_aarch64;
1718
mod llvm_x86;
1819
mod simd;
1920

0 commit comments

Comments
 (0)