@@ -28,16 +28,20 @@ pub type poly64_t = i64;
28
28
29
29
#[ allow( improper_ctypes) ]
30
30
extern "C" {
31
+ #[ link_name = "llvm.aarch64.neon.addp.v16u8" ]
32
+ fn vpaddq_u8_ ( a : uint8x16_t , b : uint8x16_t ) -> uint8x16_t ;
31
33
#[ link_name = "llvm.aarch64.neon.pmull64" ]
32
34
fn vmull_p64_ ( a : i64 , b : i64 ) -> int8x16_t ;
33
35
#[ link_name = "llvm.ctpop.i64" ]
34
36
fn ctpop_s64_ ( a : i64 ) -> i64 ;
35
37
#[ link_name = "llvm.cttz.i64" ]
36
38
fn cttz_u64_ ( a : i64 ) -> i64 ;
39
+ #[ link_name = "llvm.aarch64.neon.uqxtn.v2u32" ]
40
+ fn vqmovn_u64_ ( a : uint64x2_t ) -> uint32x2_t ;
41
+ #[ link_name = "llvm.aarch64.neon.uqsub.v16u8" ]
42
+ fn vqsubq_u8_ ( a : uint8x16_t , a : uint8x16_t ) -> uint8x16_t ;
37
43
}
38
44
39
- //unsafe fn vpaddq_u8_(_a: poly128_t, _b: poly128_t) -> poly128_t { mem::transmute(vdupq_n_u8(0)) }
40
-
41
45
unsafe fn vaddq_u8_ ( a : uint8x16_t , b : uint8x16_t ) -> uint8x16_t { simd_llvm:: simd_add ( mem:: transmute ( a) , mem:: transmute ( b) ) }
42
46
unsafe fn vaddq_s8_ ( a : int8x16_t , b : int8x16_t ) -> int8x16_t { simd_llvm:: simd_add ( mem:: transmute ( a) , mem:: transmute ( b) ) }
43
47
unsafe fn vaddq_s32_ ( a : int32x4_t , b : int32x4_t ) -> int32x4_t { simd_llvm:: simd_add ( mem:: transmute ( a) , mem:: transmute ( b) ) }
@@ -153,6 +157,10 @@ pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t {
153
157
mem:: transmute ( vmull_p64_ ( mem:: transmute ( a) , mem:: transmute ( b) ) )
154
158
}
155
159
160
+ #[ inline]
161
+ pub unsafe fn vpaddq_u8 ( a : uint8x16_t , b : uint8x16_t ) -> uint8x16_t {
162
+ vpaddq_u8_ ( a, b)
163
+ }
156
164
157
165
#[ inline]
158
166
pub unsafe fn vshrq_n_u8 ( a : uint8x16_t , n : u8 ) -> uint8x16_t {
@@ -294,6 +302,7 @@ macro_rules! aarch64_simd_ceq {
294
302
}
295
303
296
304
aarch64_simd_ceq ! ( vceqq_u8, uint8x16_t) ;
305
+
297
306
aarch64_simd_ceq ! ( vceq_s64, int64x1_t) ;
298
307
aarch64_simd_ceq ! ( vceqq_s64, int64x2_t) ;
299
308
aarch64_simd_ceq ! ( vceq_u64, uint64x1_t) ;
@@ -405,11 +414,6 @@ pub fn zerou8x16() -> uint8x16_t {
405
414
uint8x16_t ( 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 )
406
415
}
407
416
408
- #[ inline]
409
- pub unsafe fn vpaddq_u8 ( a : uint8x16_t , b : uint8x16_t ) -> uint8x16_t {
410
- mem:: transmute ( vaddq_u8_ ( mem:: transmute ( a) , mem:: transmute ( b) ) )
411
- }
412
-
413
417
#[ inline]
414
418
pub unsafe fn vaddq_u8 ( a : uint8x16_t , b : uint8x16_t ) -> uint8x16_t {
415
419
mem:: transmute ( vaddq_u8_ ( mem:: transmute ( a) , mem:: transmute ( b) ) )
@@ -441,8 +445,6 @@ macro_rules! arm_reinterpret {
441
445
( $name: ident, $from: ty, $to: ty) => {
442
446
// Vector reinterpret cast operation
443
447
#[ inline]
444
- #[ target_feature( enable = "neon" ) ]
445
- #[ cfg_attr( target_arch = "arm" , target_feature( enable = "v7" ) ) ]
446
448
pub unsafe fn $name( a: $from) -> $to {
447
449
mem:: transmute( a)
448
450
}
@@ -462,7 +464,6 @@ macro_rules! arm_vget_lane {
462
464
( $name: ident, $to: ty, $from: ty, $lanes: literal) => {
463
465
#[ inline]
464
466
pub unsafe fn $name( v: $from, lane: u32 ) -> $to {
465
- if lane > $lanes { unreachable_unchecked( ) }
466
467
simd_llvm:: simd_extract( v, lane)
467
468
}
468
469
} ;
@@ -487,8 +488,8 @@ pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t {
487
488
}
488
489
489
490
#[ inline]
490
- pub fn vqmovn_u64 ( a : uint64x2_t ) -> uint32x2_t {
491
- uint32x2_t ( a . 0 as u32 , a . 1 as u32 )
491
+ pub unsafe fn vqmovn_u64 ( a : uint64x2_t ) -> uint32x2_t {
492
+ vqmovn_u64_ ( a )
492
493
}
493
494
494
495
#[ inline]
@@ -503,8 +504,7 @@ pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t {
503
504
504
505
#[ inline]
505
506
pub unsafe fn vqsubq_u8 ( a : uint8x16_t , b : uint8x16_t ) -> uint8x16_t {
506
- // FIXME?
507
- simd_llvm:: simd_sub ( mem:: transmute ( a) , mem:: transmute ( b) )
507
+ vqsubq_u8_ ( a, b)
508
508
}
509
509
510
510
#[ inline]
@@ -583,3 +583,45 @@ pub fn trailingzeroes(a: u64) -> u32 {
583
583
pub unsafe fn vst1q_u32 ( addr : * mut u8 , val : uint32x4_t ) {
584
584
std:: ptr:: write ( addr as * mut uint32x4_t , val)
585
585
}
586
+
587
+
588
+ #[ allow( unused) ]
589
+ macro_rules! constify_imm5 {
590
+ ( $imm8: expr, $expand: ident) => {
591
+ #[ allow( overflowing_literals) ]
592
+ match ( $imm8) & 0b1_1111 {
593
+ 0 => $expand!( 0 ) ,
594
+ 1 => $expand!( 1 ) ,
595
+ 2 => $expand!( 2 ) ,
596
+ 3 => $expand!( 3 ) ,
597
+ 4 => $expand!( 4 ) ,
598
+ 5 => $expand!( 5 ) ,
599
+ 6 => $expand!( 6 ) ,
600
+ 7 => $expand!( 7 ) ,
601
+ 8 => $expand!( 8 ) ,
602
+ 9 => $expand!( 9 ) ,
603
+ 10 => $expand!( 10 ) ,
604
+ 11 => $expand!( 11 ) ,
605
+ 12 => $expand!( 12 ) ,
606
+ 13 => $expand!( 13 ) ,
607
+ 14 => $expand!( 14 ) ,
608
+ 15 => $expand!( 15 ) ,
609
+ 16 => $expand!( 16 ) ,
610
+ 17 => $expand!( 17 ) ,
611
+ 18 => $expand!( 18 ) ,
612
+ 19 => $expand!( 19 ) ,
613
+ 20 => $expand!( 20 ) ,
614
+ 21 => $expand!( 21 ) ,
615
+ 22 => $expand!( 22 ) ,
616
+ 23 => $expand!( 23 ) ,
617
+ 24 => $expand!( 24 ) ,
618
+ 25 => $expand!( 25 ) ,
619
+ 26 => $expand!( 26 ) ,
620
+ 27 => $expand!( 27 ) ,
621
+ 28 => $expand!( 28 ) ,
622
+ 29 => $expand!( 29 ) ,
623
+ 30 => $expand!( 30 ) ,
624
+ _ => $expand!( 31 ) ,
625
+ }
626
+ } ;
627
+ }
0 commit comments