Skip to content

Commit 0a24f2b

Browse files
surechenAmanieu
authored andcommitted
add neon instruction abs for floating-point
1 parent b7503bf commit 0a24f2b

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ extern "C" {
7272
fn vabs_s64_(a: int64x1_t) -> int64x1_t;
7373
#[link_name = "llvm.aarch64.neon.abs.v2i64"]
7474
fn vabsq_s64_(a: int64x2_t) -> int64x2_t;
75+
#[link_name = "llvm.fabs.v1f64"]
76+
fn vabs_f64_(a: float64x1_t) -> float64x1_t;
77+
#[link_name = "llvm.fabs.v2f64"]
78+
fn vabsq_f64_(a: float64x2_t) -> float64x2_t;
7579

7680
#[link_name = "llvm.aarch64.neon.suqadd.v8i8"]
7781
fn vuqadd_s8_(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
@@ -688,6 +692,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
688692
pub unsafe fn vabsd_s64(a: i64) -> i64 {
689693
vabsd_s64_(a)
690694
}
695+
691696
/// Absolute Value (wrapping).
692697
#[inline]
693698
#[target_feature(enable = "neon")]
@@ -703,6 +708,21 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
703708
vabsq_s64_(a)
704709
}
705710

711+
/// Floating-point absolute value.
712+
#[inline]
713+
#[target_feature(enable = "neon")]
714+
#[cfg_attr(test, assert_instr(fabs))]
715+
pub unsafe fn vabs_f64(a: float64x1_t) -> float64x1_t {
716+
vabs_f64_(a)
717+
}
718+
/// Floating-point absolute value.
719+
#[inline]
720+
#[target_feature(enable = "neon")]
721+
#[cfg_attr(test, assert_instr(fabs))]
722+
pub unsafe fn vabsq_f64(a: float64x2_t) -> float64x2_t {
723+
vabsq_f64_(a)
724+
}
725+
706726
/// Signed saturating Accumulate of Unsigned value.
707727
#[inline]
708728
#[target_feature(enable = "neon")]
@@ -3912,6 +3932,20 @@ mod tests {
39123932
let e = i64x2::new(i64::MIN, i64::MAX);
39133933
assert_eq!(r, e);
39143934
}
3935+
#[simd_test(enable = "neon")]
3936+
unsafe fn test_vabs_f64() {
3937+
let a = f64x1::new(f64::MIN);
3938+
let r: f64x1 = transmute(vabs_f64(transmute(a)));
3939+
let e = f64x1::new(f64::MAX);
3940+
assert_eq!(r, e);
3941+
}
3942+
#[simd_test(enable = "neon")]
3943+
unsafe fn test_vabsq_f64() {
3944+
let a = f64x2::new(f64::MIN, -4.2);
3945+
let r: f64x2 = transmute(vabsq_f64(transmute(a)));
3946+
let e = f64x2::new(f64::MAX, 4.2);
3947+
assert_eq!(r, e);
3948+
}
39153949

39163950
#[simd_test(enable = "neon")]
39173951
unsafe fn test_vaddv_s16() {

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,12 @@ extern "C" {
135135
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
136136
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i32")]
137137
fn vabsq_s32_(a: int32x4_t) -> int32x4_t;
138+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fabs.v2f32")]
139+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fabs.v2f32")]
140+
fn vabs_f32_(a: float32x2_t) -> float32x2_t;
141+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fabs.v4f32")]
142+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fabs.v4f32")]
143+
fn vabsq_f32_(a: float32x4_t) -> float32x4_t;
138144

139145
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
140146
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
@@ -1147,6 +1153,24 @@ pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t {
11471153
pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t {
11481154
vabsq_s32_(a)
11491155
}
1156+
/// Floating-point absolute value.
1157+
#[inline]
1158+
#[target_feature(enable = "neon")]
1159+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1160+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(fabs))]
1161+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))]
1162+
pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
1163+
vabs_f32_(a)
1164+
}
1165+
/// Floating-point absolute value.
1166+
#[inline]
1167+
#[target_feature(enable = "neon")]
1168+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1169+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(fabs))]
1170+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))]
1171+
pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
1172+
vabsq_f32_(a)
1173+
}
11501174

11511175
/// Add pairwise.
11521176
#[inline]
@@ -8565,6 +8589,20 @@ mod tests {
85658589
assert_eq!(r, e);
85668590
}
85678591
#[simd_test(enable = "neon")]
8592+
unsafe fn test_vabs_f32() {
8593+
let a = f32x2::new(f32::MIN, -1.0);
8594+
let r: f32x2 = transmute(vabs_f32(transmute(a)));
8595+
let e = f32x2::new(f32::MAX, 1.0);
8596+
assert_eq!(r, e);
8597+
}
8598+
#[simd_test(enable = "neon")]
8599+
unsafe fn test_vabsq_f32() {
8600+
let a = f32x4::new(f32::MIN, -1.32, -4.3, -6.8);
8601+
let r: f32x4 = transmute(vabsq_f32(transmute(a)));
8602+
let e = f32x4::new(f32::MAX, 1.32, 4.3, 6.8);
8603+
assert_eq!(r, e);
8604+
}
8605+
#[simd_test(enable = "neon")]
85688606
unsafe fn test_vpadd_s16() {
85698607
let a = i16x4::new(1, 2, 3, 4);
85708608
let b = i16x4::new(0, -1, -2, -3);

0 commit comments

Comments
 (0)