Skip to content

Commit b876ff7

Browse files
committed
feat: get code closer to simdjson (still broken)
1 parent 1e3a741 commit b876ff7

File tree

5 files changed

+239
-168
lines changed

5 files changed

+239
-168
lines changed

src/neon/intrinsics.rs

Lines changed: 56 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,20 @@ pub type poly64_t = i64;
2828

2929
#[allow(improper_ctypes)]
3030
extern "C" {
31+
#[link_name = "llvm.aarch64.neon.addp.v16u8"]
32+
fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
3133
#[link_name = "llvm.aarch64.neon.pmull64"]
3234
fn vmull_p64_(a: i64, b: i64) -> int8x16_t;
3335
#[link_name = "llvm.ctpop.i64"]
3436
fn ctpop_s64_(a: i64) -> i64;
3537
#[link_name = "llvm.cttz.i64"]
3638
fn cttz_u64_(a: i64) -> i64;
39+
#[link_name = "llvm.aarch64.neon.uqxtn.v2u32"]
40+
fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t;
41+
#[link_name = "llvm.aarch64.neon.uqsub.v16u8"]
42+
fn vqsubq_u8_(a: uint8x16_t, a: uint8x16_t) -> uint8x16_t;
3743
}
3844

39-
//unsafe fn vpaddq_u8_(_a: poly128_t, _b: poly128_t) -> poly128_t { mem::transmute(vdupq_n_u8(0)) }
40-
4145
unsafe fn vaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) }
4246
unsafe fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) }
4347
unsafe fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) }
@@ -153,6 +157,10 @@ pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t {
153157
mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b)))
154158
}
155159

160+
#[inline]
161+
pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
162+
vpaddq_u8_(a, b)
163+
}
156164

157165
#[inline]
158166
pub unsafe fn vshrq_n_u8(a: uint8x16_t, n: u8) -> uint8x16_t {
@@ -294,6 +302,7 @@ macro_rules! aarch64_simd_ceq {
294302
}
295303

296304
aarch64_simd_ceq!(vceqq_u8, uint8x16_t);
305+
297306
aarch64_simd_ceq!(vceq_s64, int64x1_t);
298307
aarch64_simd_ceq!(vceqq_s64, int64x2_t);
299308
aarch64_simd_ceq!(vceq_u64, uint64x1_t);
@@ -405,11 +414,6 @@ pub fn zerou8x16() -> uint8x16_t {
405414
uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)
406415
}
407416

408-
#[inline]
409-
pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
410-
mem::transmute(vaddq_u8_(mem::transmute(a), mem::transmute(b)))
411-
}
412-
413417
#[inline]
414418
pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
415419
mem::transmute(vaddq_u8_(mem::transmute(a), mem::transmute(b)))
@@ -441,8 +445,6 @@ macro_rules! arm_reinterpret {
441445
($name:ident, $from:ty, $to:ty) => {
442446
// Vector reinterpret cast operation
443447
#[inline]
444-
#[target_feature(enable = "neon")]
445-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
446448
pub unsafe fn $name(a: $from) -> $to {
447449
mem::transmute(a)
448450
}
@@ -462,7 +464,6 @@ macro_rules! arm_vget_lane {
462464
($name:ident, $to:ty, $from:ty, $lanes:literal) => {
463465
#[inline]
464466
pub unsafe fn $name(v: $from, lane: u32) -> $to {
465-
if lane > $lanes { unreachable_unchecked() }
466467
simd_llvm::simd_extract(v, lane)
467468
}
468469
};
@@ -487,8 +488,8 @@ pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t {
487488
}
488489

489490
#[inline]
490-
pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
491-
uint32x2_t(a.0 as u32, a.1 as u32)
491+
pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
492+
vqmovn_u64_(a)
492493
}
493494

494495
#[inline]
@@ -503,8 +504,7 @@ pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t {
503504

504505
#[inline]
505506
pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
506-
// FIXME?
507-
simd_llvm::simd_sub(mem::transmute(a), mem::transmute(b))
507+
vqsubq_u8_(a, b)
508508
}
509509

510510
#[inline]
@@ -583,3 +583,45 @@ pub fn trailingzeroes(a: u64) -> u32 {
583583
pub unsafe fn vst1q_u32(addr: *mut u8, val: uint32x4_t) {
584584
std::ptr::write(addr as *mut uint32x4_t, val)
585585
}
586+
587+
588+
#[allow(unused)]
589+
macro_rules! constify_imm5 {
590+
($imm8:expr, $expand:ident) => {
591+
#[allow(overflowing_literals)]
592+
match ($imm8) & 0b1_1111 {
593+
0 => $expand!(0),
594+
1 => $expand!(1),
595+
2 => $expand!(2),
596+
3 => $expand!(3),
597+
4 => $expand!(4),
598+
5 => $expand!(5),
599+
6 => $expand!(6),
600+
7 => $expand!(7),
601+
8 => $expand!(8),
602+
9 => $expand!(9),
603+
10 => $expand!(10),
604+
11 => $expand!(11),
605+
12 => $expand!(12),
606+
13 => $expand!(13),
607+
14 => $expand!(14),
608+
15 => $expand!(15),
609+
16 => $expand!(16),
610+
17 => $expand!(17),
611+
18 => $expand!(18),
612+
19 => $expand!(19),
613+
20 => $expand!(20),
614+
21 => $expand!(21),
615+
22 => $expand!(22),
616+
23 => $expand!(23),
617+
24 => $expand!(24),
618+
25 => $expand!(25),
619+
26 => $expand!(26),
620+
27 => $expand!(27),
621+
28 => $expand!(28),
622+
29 => $expand!(29),
623+
30 => $expand!(30),
624+
_ => $expand!(31),
625+
}
626+
};
627+
}

src/neon/simd.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,4 +467,4 @@ macro_rules! constify_imm8 {
467467
_ => $expand!(255),
468468
}
469469
};
470-
}
470+
}

src/neon/simd_llvm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ extern "platform-intrinsic" {
2020
// pub fn simd_cast<T, U>(x: T) -> U;
2121
//
2222
pub fn simd_add<T>(x: T, y: T) -> T;
23-
pub fn simd_sub<T>(x: T, y: T) -> T;
23+
// pub fn simd_sub<T>(x: T, y: T) -> T;
2424
// pub fn simd_mul<T>(x: T, y: T) -> T;
2525
// pub fn simd_div<T>(x: T, y: T) -> T;
2626
// pub fn simd_shl<T>(x: T, y: T) -> T;

0 commit comments

Comments
 (0)