From fa26882132e42154357b5d1450e3a3074b820788 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Wed, 7 Aug 2019 22:47:11 -0400 Subject: [PATCH 01/34] feat: neon support (mostly broken) --- Cargo.toml | 1 + src/lib.rs | 2187 ++++++++++++++++++++-------------------- src/neon/deser.rs | 204 ++++ src/neon/intrinsics.rs | 287 ++++++ src/neon/mod.rs | 6 + src/neon/simd.rs | 469 +++++++++ src/neon/simd_llvm.rs | 54 + src/neon/stage1.rs | 560 ++++++++++ src/neon/utf8check.rs | 247 +++++ src/numberparse.rs | 30 +- src/portability.rs | 30 + src/stage2.rs | 6 +- src/value/generator.rs | 156 +-- 13 files changed, 3049 insertions(+), 1188 deletions(-) create mode 100644 src/neon/deser.rs create mode 100644 src/neon/intrinsics.rs create mode 100644 src/neon/mod.rs create mode 100644 src/neon/simd.rs create mode 100644 src/neon/simd_llvm.rs create mode 100644 src/neon/stage1.rs create mode 100644 src/neon/utf8check.rs create mode 100644 src/portability.rs diff --git a/Cargo.toml b/Cargo.toml index 399f7c9d..cae31f52 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ halfbrown = "0.1" page_size = "0.4" itoa = "0.4" ryu = "1" +core_arch = "0.1.5" # serde compatibilty serde = { version = "1", features = ["derive"], optional = true} diff --git a/src/lib.rs b/src/lib.rs index 02038553..3973b7a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,14 @@ #![deny(warnings)] +#![feature(asm)] +#![feature(stdsimd)] +#![feature(repr_simd)] +#![feature(custom_inner_attributes)] +#![feature(aarch64_target_feature)] +#![feature(platform_intrinsics)] +#![feature(stmt_expr_attributes)] +#![feature(simd_ffi)] +#![feature(link_llvm_intrinsics)] + #![cfg_attr(feature = "hints", feature(core_intrinsics))] //! simdjson-rs is a rust port of the simejson c++ library. It follows //! most of the design closely with a few exceptions to make it better @@ -69,16 +79,16 @@ //! let v: Value = simd_json::serde::from_slice(&mut d).unwrap(); //! ``` -#[cfg(feature = "serde_impl")] -extern crate serde as serde_ext; -#[cfg(feature = "serde_impl")] -pub mod serde; +//#[cfg(feature = "serde_impl")] +//extern crate serde as serde_ext; +//#[cfg(feature = "serde_impl")] +//pub mod serde; mod charutils; #[macro_use] mod macros; mod error; -mod numberparse; +//mod numberparse; mod parsedjson; mod stringparse; @@ -89,22 +99,29 @@ pub use crate::avx2::deser::*; #[cfg(target_feature = "avx2")] use crate::avx2::stage1::SIMDJSON_PADDING; -#[cfg(not(target_feature = "avx2"))] +#[cfg(target_feature = "sse42")] mod sse42; -#[cfg(not(target_feature = "avx2"))] +#[cfg(target_feature = "sse42")] pub use crate::sse42::deser::*; -#[cfg(not(target_feature = "avx2"))] +#[cfg(target_feature = "sse42")] use crate::sse42::stage1::SIMDJSON_PADDING; -mod stage2; -pub mod value; +//#[cfg(target_feature = "neon")] +mod neon; +//#[cfg(target_feature = "neon")] +//pub use crate::neon::deser::*; +//#[cfg(target_feature = "neon")] +//use crate::neon::stage1::SIMDJSON_PADDING; + +//mod stage2; +//pub mod value; -use crate::numberparse::Number; -use std::mem; -use std::str; +//use crate::numberparse::Number; +//use std::mem; +//use std::str; pub use crate::error::{Error, ErrorType}; -pub use crate::value::*; +//pub use crate::value::*; pub type Result = std::result::Result; @@ -120,1074 +137,1074 @@ pub struct Deserializer<'de> { str_offset: usize, iidx: usize, } - -impl<'de> Deserializer<'de> { - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn error(&self, error: ErrorType) -> Error { - Error::new(self.idx, self.iidx, self.c() as char, error) - } - // By convention, `Deserializer` constructors are named like `from_xyz`. - // That way basic use cases are satisfied by something like - // `serde_json::from_str(...)` while advanced use cases that require a - // deserializer can make one with `serde_json::Deserializer::from_str(...)`. - pub fn from_slice(input: &'de mut [u8]) -> Result { - // We have to pick an initial size of the structural indexes. - // 6 is a heuristic that seems to work well for the benchmark - // data and limit re-allocation frequency. - - let len = input.len(); - - let buf_start: usize = input.as_ptr() as *const () as usize; - let needs_relocation = (buf_start + input.len()) % page_size::get() < SIMDJSON_PADDING; - - let s1_result: std::result::Result, ErrorType> = if needs_relocation { - let mut data: Vec = Vec::with_capacity(len + SIMDJSON_PADDING); - unsafe { - data.set_len(len + 1); - data.as_mut_slice() - .get_unchecked_mut(0..len) - .clone_from_slice(input); - *(data.get_unchecked_mut(len)) = 0; - data.set_len(len); - Deserializer::find_structural_bits(&data) - } - } else { - unsafe { Deserializer::find_structural_bits(input) } - }; - let structural_indexes = match s1_result { - Ok(i) => i, - Err(t) => { - return Err(Error::generic(t)); - } - }; - - let counts = Deserializer::validate(input, &structural_indexes)?; - - let strings = Vec::with_capacity(len + SIMDJSON_PADDING); - - Ok(Deserializer { - counts, - structural_indexes, - input, - idx: 0, - strings, - str_offset: 0, - iidx: 0, - }) - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn skip(&mut self) { - self.idx += 1; - self.iidx = unsafe { *self.structural_indexes.get_unchecked(self.idx) as usize }; - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn c(&self) -> u8 { - unsafe { - *self - .input - .get_unchecked(*self.structural_indexes.get_unchecked(self.idx) as usize) - } - } - - // pull out the check so we don't need to - // stry every time - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn next_(&mut self) -> u8 { - unsafe { - self.idx += 1; - self.iidx = *self.structural_indexes.get_unchecked(self.idx) as usize; - *self.input.get_unchecked(self.iidx) - } - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn count_elements(&self) -> usize { - unsafe { *self.counts.get_unchecked(self.idx) } - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn parse_number_root(&mut self, minus: bool) -> Result { - let input = unsafe { &self.input.get_unchecked(self.iidx..) }; - let len = input.len(); - let mut copy = vec![0u8; len + SIMDJSON_PADDING]; - copy[len] = 0; - unsafe { - copy.as_mut_ptr().copy_from(input.as_ptr(), len); - }; - self.parse_number_int(©, minus) - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn parse_number(&mut self, minus: bool) -> Result { - let input = unsafe { &self.input.get_unchecked(self.iidx..) }; - let len = input.len(); - if len < SIMDJSON_PADDING { - let mut copy = vec![0u8; len + SIMDJSON_PADDING]; - unsafe { - copy.as_mut_ptr().copy_from(input.as_ptr(), len); - }; - self.parse_number_int(©, minus) - } else { - self.parse_number_int(input, minus) - } - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - fn parse_number_(&mut self, minus: bool) -> Result { - let input = unsafe { &self.input.get_unchecked(self.iidx..) }; - self.parse_number_int(input, minus) - } -} - -#[cfg(test)] -mod tests { - use super::serde::from_slice; - use super::{ - owned::to_value, owned::Map, owned::Value, to_borrowed_value, to_owned_value, Deserializer, - }; - use halfbrown::HashMap; - use proptest::prelude::*; - use serde::Deserialize; - use serde_json; - - #[test] - fn count1() { - let mut d = String::from("[]"); - let mut d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(&mut d).expect(""); - assert_eq!(simd.counts[1], 0); - } - - #[test] - fn count2() { - let mut d = String::from("[1]"); - let mut d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(&mut d).expect(""); - assert_eq!(simd.counts[1], 1); - } - - #[test] - fn count3() { - let mut d = String::from("[1,2]"); - let mut d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(&mut d).expect(""); - assert_eq!(simd.counts[1], 2); - } - - #[test] - fn count4() { - let mut d = String::from(" [ 1 , [ 3 ] , 2 ]"); - let mut d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(&mut d).expect(""); - assert_eq!(simd.counts[1], 3); - assert_eq!(simd.counts[4], 1); - } - - #[test] - fn count5() { - let mut d = String::from("[[],null,null]"); - let mut d = unsafe { d.as_bytes_mut() }; - let simd = Deserializer::from_slice(&mut d).expect(""); - assert_eq!(simd.counts[1], 3); - assert_eq!(simd.counts[2], 0); - } - - #[test] - fn empty() { - let mut d = String::from(""); - let mut d = unsafe { d.as_bytes_mut() }; - let v_simd = from_slice::(&mut d); - let v_serde = serde_json::from_slice::(d); - assert!(v_simd.is_err()); - assert!(v_serde.is_err()); - } - - #[test] - fn bool_true() { - let mut d = String::from("true"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(true))); - } - - #[test] - fn bool_false() { - let mut d = String::from("false"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(false))); - //assert!(false) - } - - #[test] - fn union() { - let mut d = String::from("null"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::Null)); - } - - #[test] - fn int() { - let mut d = String::from("42"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(42))); - } - - #[test] - fn zero() { - let mut d = String::from("0"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(0))); - } - - #[test] - fn one() { - let mut d = String::from("1"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(1))); - } - - #[test] - fn minus_one() { - let mut d = String::from("-1"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(-1))); - } - - #[test] - fn float() { - let mut d = String::from("23.0"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!(to_value(&mut d1), Ok(Value::from(23.0))); - } - - #[test] - fn string() { - let mut d = String::from(r#""snot""#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(to_value(&mut d1), Ok(Value::from("snot"))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn lonely_quote() { - let mut d = String::from(r#"""#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(&mut d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - - #[test] - fn lonely_quote1() { - let mut d = String::from(r#"["]"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(&mut d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - #[test] - fn lonely_quote2() { - let mut d = String::from(r#"[1, "]"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(&mut d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - - #[test] - fn lonely_quote3() { - let mut d = String::from(r#"{": 1}"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde = serde_json::from_slice::(d).is_err(); - let v_simd = from_slice::(&mut d).is_err(); - assert!(v_simd); - assert!(v_serde); - } - - #[test] - fn empty_string() { - let mut d = String::from(r#""""#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(to_value(&mut d1), Ok(Value::from(""))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn empty_array() { - let mut d = String::from(r#"[]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); - let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); - assert_eq!(to_value(&mut d1), Ok(Value::Array(vec![]))); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn malformed_array() { - let mut d = String::from(r#"[["#); - let mut d1 = d.clone(); - let mut d2 = d.clone(); - let mut d = unsafe { d.as_bytes_mut() }; - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d2 = unsafe { d2.as_bytes_mut() }; - let v_serde: Result = serde_json::from_slice(d); - let v_simd_ov = to_owned_value(&mut d); - let v_simd_bv = to_borrowed_value(&mut d1); - let v_simd: Result = from_slice(&mut d2); - assert!(v_simd_ov.is_err()); - assert!(v_simd_bv.is_err()); - assert!(v_simd.is_err()); - assert!(v_serde.is_err()); - } - - #[test] - fn double_array() { - let mut d = String::from(r#"[[]]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); - let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![Value::Array(vec![])])) - ); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn null_null_array() { - let mut d = String::from(r#"[[],null,null]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); - let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![ - Value::Array(vec![]), - Value::Null, - Value::Null, - ])) - ); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn one_element_array() { - let mut d = String::from(r#"["snot"]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![Value::from("snot")])) - ); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn two_element_array() { - let mut d = String::from(r#"["snot", "badger"]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![ - Value::from("snot"), - Value::from("badger") - ])) - ); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn list() { - let mut d = String::from(r#"[42, 23.0, "snot badger"]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![ - Value::from(42), - Value::from(23.0), - Value::from("snot badger") - ])) - ); - } - - #[test] - fn nested_list1() { - let mut d = String::from(r#"[42, [23.0, "snot"], "bad", "ger"]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![ - Value::from(42), - Value::Array(vec![Value::from(23.0), Value::from("snot")]), - Value::from("bad"), - Value::from("ger") - ])) - ); - - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn nested_list2() { - let mut d = String::from(r#"[42, [23.0, "snot"], {"bad": "ger"}]"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn utf8() { - let mut d = String::from(r#""\u000e""#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, "\u{e}"); - // NOTE: serde is broken for this - //assert_eq!(v_serde, "\u{e}"); - //assert_eq!(v_simd, v_serde) - } - - #[test] - fn unicode() { - let mut d = String::from(r#""¡\"""#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn odd_array() { - let mut d = String::from("[{},null]"); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![Value::Object(Map::new()), Value::Null])) - ); - } - - #[test] - fn map2() { - let mut d = String::from(r#"[{"\u0000":null}]"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn null() { - let mut d = String::from(r#"null"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - assert_eq!(to_value(&mut d1), Ok(Value::Null)); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - #[test] - fn null_null() { - let mut d = String::from(r#"[null, null]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![Value::Null, Value::Null,])) - ); - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn nested_null() { - let mut d = String::from(r#"[[null, null]]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![Value::Array(vec![ - Value::Null, - Value::Null, - ])])) - ); - - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - } - - #[test] - fn nestednested_null() { - let mut d = String::from(r#"[[[null, null]]]"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - assert_eq!( - to_value(&mut d1), - Ok(Value::Array(vec![Value::Array(vec![Value::Array(vec![ - Value::Null, - Value::Null, - ])])])) - ); - } - - #[test] - fn odd_array2() { - let mut d = String::from("[[\"\\u0000\\\"\"]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn odd_array3() { - let mut d = String::from("[{\"\\u0000\\u0000\":null}]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn odd_array4() { - let mut d = String::from("[{\"\\u0000𐀀a\":null}]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn float1() { - let mut d = String::from("2.3250706903316115e307"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - // We ignore this since serde is less percise on this test - #[ignore] - #[test] - fn float2() { - let mut d = String::from("-4.5512678569607477e306"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn map0() { - let mut d = String::from(r#"{"snot": "badger"}"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - let mut h = Map::new(); - h.insert("snot".into(), Value::from("badger")); - assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); - } - - #[test] - fn map1() { - let mut d = String::from(r#"{"snot": "badger", "badger": "snot"}"#); - let mut d1 = d.clone(); - let mut d1 = unsafe { d1.as_bytes_mut() }; - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); - let v_simd: serde_json::Value = from_slice(&mut d).expect(""); - assert_eq!(v_simd, v_serde); - let mut h = Map::new(); - h.insert("snot".into(), Value::from("badger")); - h.insert("badger".into(), Value::from("snot")); - assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); - } - - #[test] - fn tpl1() { - let mut d = String::from("[-65.613616999999977, 43.420273000000009]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: (f32, f32) = serde_json::from_slice(d).expect("serde_json"); - let v_simd: (f32, f32) = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn tpl2() { - let mut d = String::from("[[-65.613616999999977, 43.420273000000009]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn tpl3() { - let mut d = String::from( - "[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]", - ); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - #[test] - fn tpl4() { - let mut d = String::from("[[[-65.613616999999977,43.420273000000009]]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - #[test] - fn tpl5() { - let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn tpl6() { - let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn tpl7() { - let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[derive(Deserialize, PartialEq, Debug)] - struct Obj { - a: u64, - b: u64, - } - - #[derive(Deserialize, PartialEq, Debug)] - struct Obj1 { - a: Obj, - } - - #[test] - fn obj() { - let mut d = String::from(r#"{"a": 1, "b":1}"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Obj = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Obj = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn obj2() { - let mut d = - String::from(r#"{"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); - let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn obj3() { - let mut d = String::from( - r#"{"c": {"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}}"#, - ); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: HashMap> = - serde_json::from_slice(d).expect("serde_json"); - let v_simd: HashMap> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn obj4() { - let mut d = String::from(r#"{"c": {"a": {"a": 1, "b":1}}}"#); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); - let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn vecvec() { - let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]], [[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); - let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn crazy_string() { - // there is unicode in here! - let d = "\"𐀀𐀀 𐀀𐀀0 𐀀A\\u00000A0 A \\u000b\""; - let mut d = String::from(d); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); - let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - #[test] - fn event() { - #[derive(Deserialize, Debug, PartialEq)] - #[serde(deny_unknown_fields, rename_all = "camelCase")] - pub struct CitmCatalog { - pub area_names: HashMap, - pub audience_sub_category_names: HashMap, - pub block_names: HashMap, - pub events: HashMap, - } - pub type Id = u32; - #[derive(Deserialize, Debug, PartialEq)] - #[serde(deny_unknown_fields, rename_all = "camelCase")] - pub struct Event { - pub description: (), - pub id: Id, - pub logo: Option, - pub name: String, - pub sub_topic_ids: Vec, - pub subject_code: (), - pub subtitle: (), - pub topic_ids: Vec, - } - - let mut d = String::from( - r#" -{ - "areaNames": { - "205705993": "Arrière-scène central", - "205705994": "1er balcon central", - "205705995": "2ème balcon bergerie cour", - "205705996": "2ème balcon bergerie jardin", - "205705998": "1er balcon bergerie jardin", - "205705999": "1er balcon bergerie cour", - "205706000": "Arrière-scène jardin", - "205706001": "Arrière-scène cour", - "205706002": "2ème balcon jardin", - "205706003": "2ème balcon cour", - "205706004": "2ème Balcon central", - "205706005": "1er balcon jardin", - "205706006": "1er balcon cour", - "205706007": "Orchestre central", - "205706008": "Orchestre jardin", - "205706009": "Orchestre cour", - "342752287": "Zone physique secrète" - }, - "audienceSubCategoryNames": { - "337100890": "Abonné" - }, - "blockNames": {}, - "events": { - "138586341": { - "description": null, - "id": 138586341, - "logo": null, - "name": "30th Anniversary Tour", - "subTopicIds": [ - 337184269, - 337184283 - ], - "subjectCode": null, - "subtitle": null, - "topicIds": [ - 324846099, - 107888604 - ] - }, - "138586345": { - "description": null, - "id": 138586345, - "logo": "/images/UE0AAAAACEKo6QAAAAZDSVRN", - "name": "Berliner Philharmoniker", - "subTopicIds": [ - 337184268, - 337184283, - 337184275 - ], - "subjectCode": null, - "subtitle": null, - "topicIds": [ - 324846099, - 107888604, - 324846100 - ] - } - } -} -"#, - ); - let mut d = unsafe { d.as_bytes_mut() }; - let v_serde: CitmCatalog = serde_json::from_slice(d).expect("serde_json"); - let v_simd: CitmCatalog = from_slice(&mut d).expect("simd_json"); - assert_eq!(v_simd, v_serde) - } - - // How much do we care about this, it's within the same range and - // based on floating point math inprecisions during parsing. - // Is this a real issue worth improving? - #[test] - fn silly_float1() { - let v = Value::from(3.0901448042322017e305); - let s = v.to_string(); - dbg!(&s); - let mut bytes = s.as_bytes().to_vec(); - let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); - assert_eq!(v, parsed); - } - - #[test] - #[ignore] - fn silly_float2() { - let v = Value::from(-6.990585694841803e305); - let s = v.to_string(); - dbg!(&s); - let mut bytes = s.as_bytes().to_vec(); - let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); - assert_eq!(v, parsed); - } - - //6.576692109929364e305 - fn arb_json() -> BoxedStrategy { - let leaf = prop_oneof![ - Just(Value::Null), - any::().prop_map(Value::Bool), - // (-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // The float parsing of simd and serde are too different - any::().prop_map(|i| json!(i)), - ".*".prop_map(Value::from), - ]; - leaf.prop_recursive( - 8, // 8 levels deep - 256, // Shoot for maximum size of 256 nodes - 10, // We put up to 10 items per collection - |inner| { - prop_oneof![ - // Take the inner strategy and make the two recursive cases. - prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), - prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), - ] - }, - ) - .prop_map(|v| serde_json::to_string(&v).expect("").to_string()) - .boxed() - } - - fn arb_json_value() -> BoxedStrategy { - let leaf = prop_oneof![ - Just(Value::Null), - any::().prop_map(Value::Bool), - //(-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // damn you float! - any::().prop_map(|i| json!(i)), - ".*".prop_map(Value::from), - ]; - leaf.prop_recursive( - 8, // 8 levels deep - 256, // Shoot for maximum size of 256 nodes - 10, // We put up to 10 items per collection - |inner| { - prop_oneof![ - // Take the inner strategy and make the two recursive cases. - prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), - prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), - ] - }, - ) - .boxed() - } - - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - fork: true, - .. ProptestConfig::default() - })] - - #[test] - fn prop_json_encode_decode(val in arb_json_value()) { - let mut encoded: Vec = Vec::new(); - let _ = val.write(&mut encoded); - println!("{}", String::from_utf8(encoded.clone()).unwrap()); - let res = to_owned_value(&mut encoded).unwrap(); - assert_eq!(val, res); - } - - } - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - fork: true, - .. ProptestConfig::default() - })] - - #[test] - fn prop_json(d in arb_json()) { - if let Ok(v_serde) = serde_json::from_slice::(&d.as_bytes()) { - let mut d1 = d.clone(); - let d1 = unsafe{ d1.as_bytes_mut()}; - let v_simd_serde: serde_json::Value = from_slice(d1).expect(""); - // We add our own encoder in here. - let mut d2 = v_simd_serde.to_string(); - let d2 = unsafe{ d2.as_bytes_mut()}; - let mut d3 = d.clone(); - let d3 = unsafe{ d3.as_bytes_mut()}; - assert_eq!(v_simd_serde, v_serde); - let v_simd_owned = to_owned_value(d2); - assert!(v_simd_owned.is_ok()); - let v_simd_borrowed = to_borrowed_value(d3); - dbg!(&v_simd_borrowed); - assert!(v_simd_borrowed.is_ok()); - assert_eq!(v_simd_owned.unwrap(), super::OwnedValue::from(v_simd_borrowed.unwrap())); - } - - } - - } - - fn arb_junk() -> BoxedStrategy> { - prop::collection::vec(any::(), 0..(1024 * 8)).boxed() - } - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - fork: true, - .. ProptestConfig::default() - })] - #[test] - fn prop_junk(d in arb_junk()) { - let mut d1 = d.clone(); - let mut d2 = d.clone(); - let mut d3 = d.clone(); - - let _ = from_slice::(&mut d1); - let _ = to_borrowed_value(&mut d2); - let _ = to_owned_value(&mut d3); - - } - } - - proptest! { - #![proptest_config(ProptestConfig { - // Setting both fork and timeout is redundant since timeout implies - // fork, but both are shown for clarity. - fork: true, - .. ProptestConfig::default() - })] - - #[test] - fn prop_string(d in "\\PC*") { - let mut d1 = d.clone(); - let mut d1 = unsafe{ d1.as_bytes_mut()}; - let mut d2 = d.clone(); - let mut d2 = unsafe{ d2.as_bytes_mut()}; - let mut d3 = d.clone(); - let mut d3 = unsafe{ d3.as_bytes_mut()}; - let _ = from_slice::(&mut d1); - let _ = to_borrowed_value(&mut d2); - let _ = to_owned_value(&mut d3); - - } - } -} +// +//impl<'de> Deserializer<'de> { +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn error(&self, error: ErrorType) -> Error { +// Error::new(self.idx, self.iidx, self.c() as char, error) +// } +// // By convention, `Deserializer` constructors are named like `from_xyz`. +// // That way basic use cases are satisfied by something like +// // `serde_json::from_str(...)` while advanced use cases that require a +// // deserializer can make one with `serde_json::Deserializer::from_str(...)`. +// pub fn from_slice(input: &'de mut [u8]) -> Result { +// // We have to pick an initial size of the structural indexes. +// // 6 is a heuristic that seems to work well for the benchmark +// // data and limit re-allocation frequency. +// +// let len = input.len(); +// +// let buf_start: usize = input.as_ptr() as *const () as usize; +// let needs_relocation = (buf_start + input.len()) % page_size::get() < SIMDJSON_PADDING; +// +// let s1_result: std::result::Result, ErrorType> = if needs_relocation { +// let mut data: Vec = Vec::with_capacity(len + SIMDJSON_PADDING); +// unsafe { +// data.set_len(len + 1); +// data.as_mut_slice() +// .get_unchecked_mut(0..len) +// .clone_from_slice(input); +// *(data.get_unchecked_mut(len)) = 0; +// data.set_len(len); +// Deserializer::find_structural_bits(&data) +// } +// } else { +// unsafe { Deserializer::find_structural_bits(input) } +// }; +// let structural_indexes = match s1_result { +// Ok(i) => i, +// Err(t) => { +// return Err(Error::generic(t)); +// } +// }; +// +// let counts = Deserializer::validate(input, &structural_indexes)?; +// +// let strings = Vec::with_capacity(len + SIMDJSON_PADDING); +// +// Ok(Deserializer { +// counts, +// structural_indexes, +// input, +// idx: 0, +// strings, +// str_offset: 0, +// iidx: 0, +// }) +// } +// +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn skip(&mut self) { +// self.idx += 1; +// self.iidx = unsafe { *self.structural_indexes.get_unchecked(self.idx) as usize }; +// } +// +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn c(&self) -> u8 { +// unsafe { +// *self +// .input +// .get_unchecked(*self.structural_indexes.get_unchecked(self.idx) as usize) +// } +// } +// +// // pull out the check so we don't need to +// // stry every time +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn next_(&mut self) -> u8 { +// unsafe { +// self.idx += 1; +// self.iidx = *self.structural_indexes.get_unchecked(self.idx) as usize; +// *self.input.get_unchecked(self.iidx) +// } +// } +// +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn count_elements(&self) -> usize { +// unsafe { *self.counts.get_unchecked(self.idx) } +// } +// +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn parse_number_root(&mut self, minus: bool) -> Result { +// let input = unsafe { &self.input.get_unchecked(self.iidx..) }; +// let len = input.len(); +// let mut copy = vec![0u8; len + SIMDJSON_PADDING]; +// copy[len] = 0; +// unsafe { +// copy.as_mut_ptr().copy_from(input.as_ptr(), len); +// }; +// self.parse_number_int(©, minus) +// } +// +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn parse_number(&mut self, minus: bool) -> Result { +// let input = unsafe { &self.input.get_unchecked(self.iidx..) }; +// let len = input.len(); +// if len < SIMDJSON_PADDING { +// let mut copy = vec![0u8; len + SIMDJSON_PADDING]; +// unsafe { +// copy.as_mut_ptr().copy_from(input.as_ptr(), len); +// }; +// self.parse_number_int(©, minus) +// } else { +// self.parse_number_int(input, minus) +// } +// } +// +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// fn parse_number_(&mut self, minus: bool) -> Result { +// let input = unsafe { &self.input.get_unchecked(self.iidx..) }; +// self.parse_number_int(input, minus) +// } +//} +// +//#[cfg(test)] +//mod tests { +// use super::serde::from_slice; +// use super::{ +// owned::to_value, owned::Map, owned::Value, to_borrowed_value, to_owned_value, Deserializer, +// }; +// use halfbrown::HashMap; +// use proptest::prelude::*; +// use serde::Deserialize; +// use serde_json; +// +// #[test] +// fn count1() { +// let mut d = String::from("[]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let simd = Deserializer::from_slice(&mut d).expect(""); +// assert_eq!(simd.counts[1], 0); +// } +// +// #[test] +// fn count2() { +// let mut d = String::from("[1]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let simd = Deserializer::from_slice(&mut d).expect(""); +// assert_eq!(simd.counts[1], 1); +// } +// +// #[test] +// fn count3() { +// let mut d = String::from("[1,2]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let simd = Deserializer::from_slice(&mut d).expect(""); +// assert_eq!(simd.counts[1], 2); +// } +// +// #[test] +// fn count4() { +// let mut d = String::from(" [ 1 , [ 3 ] , 2 ]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let simd = Deserializer::from_slice(&mut d).expect(""); +// assert_eq!(simd.counts[1], 3); +// assert_eq!(simd.counts[4], 1); +// } +// +// #[test] +// fn count5() { +// let mut d = String::from("[[],null,null]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let simd = Deserializer::from_slice(&mut d).expect(""); +// assert_eq!(simd.counts[1], 3); +// assert_eq!(simd.counts[2], 0); +// } +// +// #[test] +// fn empty() { +// let mut d = String::from(""); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_simd = from_slice::(&mut d); +// let v_serde = serde_json::from_slice::(d); +// assert!(v_simd.is_err()); +// assert!(v_serde.is_err()); +// } +// +// #[test] +// fn bool_true() { +// let mut d = String::from("true"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(true))); +// } +// +// #[test] +// fn bool_false() { +// let mut d = String::from("false"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(false))); +// //assert!(false) +// } +// +// #[test] +// fn union() { +// let mut d = String::from("null"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::Null)); +// } +// +// #[test] +// fn int() { +// let mut d = String::from("42"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(42))); +// } +// +// #[test] +// fn zero() { +// let mut d = String::from("0"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(0))); +// } +// +// #[test] +// fn one() { +// let mut d = String::from("1"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(1))); +// } +// +// #[test] +// fn minus_one() { +// let mut d = String::from("-1"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(-1))); +// } +// +// #[test] +// fn float() { +// let mut d = String::from("23.0"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!(to_value(&mut d1), Ok(Value::from(23.0))); +// } +// +// #[test] +// fn string() { +// let mut d = String::from(r#""snot""#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(to_value(&mut d1), Ok(Value::from("snot"))); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn lonely_quote() { +// let mut d = String::from(r#"""#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde = serde_json::from_slice::(d).is_err(); +// let v_simd = from_slice::(&mut d).is_err(); +// assert!(v_simd); +// assert!(v_serde); +// } +// +// #[test] +// fn lonely_quote1() { +// let mut d = String::from(r#"["]"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde = serde_json::from_slice::(d).is_err(); +// let v_simd = from_slice::(&mut d).is_err(); +// assert!(v_simd); +// assert!(v_serde); +// } +// #[test] +// fn lonely_quote2() { +// let mut d = String::from(r#"[1, "]"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde = serde_json::from_slice::(d).is_err(); +// let v_simd = from_slice::(&mut d).is_err(); +// assert!(v_simd); +// assert!(v_serde); +// } +// +// #[test] +// fn lonely_quote3() { +// let mut d = String::from(r#"{": 1}"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde = serde_json::from_slice::(d).is_err(); +// let v_simd = from_slice::(&mut d).is_err(); +// assert!(v_simd); +// assert!(v_serde); +// } +// +// #[test] +// fn empty_string() { +// let mut d = String::from(r#""""#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(to_value(&mut d1), Ok(Value::from(""))); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn empty_array() { +// let mut d = String::from(r#"[]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); +// let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); +// assert_eq!(to_value(&mut d1), Ok(Value::Array(vec![]))); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn malformed_array() { +// let mut d = String::from(r#"[["#); +// let mut d1 = d.clone(); +// let mut d2 = d.clone(); +// let mut d = unsafe { d.as_bytes_mut() }; +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d2 = unsafe { d2.as_bytes_mut() }; +// let v_serde: Result = serde_json::from_slice(d); +// let v_simd_ov = to_owned_value(&mut d); +// let v_simd_bv = to_borrowed_value(&mut d1); +// let v_simd: Result = from_slice(&mut d2); +// assert!(v_simd_ov.is_err()); +// assert!(v_simd_bv.is_err()); +// assert!(v_simd.is_err()); +// assert!(v_serde.is_err()); +// } +// +// #[test] +// fn double_array() { +// let mut d = String::from(r#"[[]]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); +// let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![Value::Array(vec![])])) +// ); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn null_null_array() { +// let mut d = String::from(r#"[[],null,null]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); +// let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![ +// Value::Array(vec![]), +// Value::Null, +// Value::Null, +// ])) +// ); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn one_element_array() { +// let mut d = String::from(r#"["snot"]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![Value::from("snot")])) +// ); +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn two_element_array() { +// let mut d = String::from(r#"["snot", "badger"]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![ +// Value::from("snot"), +// Value::from("badger") +// ])) +// ); +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn list() { +// let mut d = String::from(r#"[42, 23.0, "snot badger"]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![ +// Value::from(42), +// Value::from(23.0), +// Value::from("snot badger") +// ])) +// ); +// } +// +// #[test] +// fn nested_list1() { +// let mut d = String::from(r#"[42, [23.0, "snot"], "bad", "ger"]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![ +// Value::from(42), +// Value::Array(vec![Value::from(23.0), Value::from("snot")]), +// Value::from("bad"), +// Value::from("ger") +// ])) +// ); +// +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn nested_list2() { +// let mut d = String::from(r#"[42, [23.0, "snot"], {"bad": "ger"}]"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn utf8() { +// let mut d = String::from(r#""\u000e""#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, "\u{e}"); +// // NOTE: serde is broken for this +// //assert_eq!(v_serde, "\u{e}"); +// //assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn unicode() { +// let mut d = String::from(r#""¡\"""#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn odd_array() { +// let mut d = String::from("[{},null]"); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![Value::Object(Map::new()), Value::Null])) +// ); +// } +// +// #[test] +// fn map2() { +// let mut d = String::from(r#"[{"\u0000":null}]"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn null() { +// let mut d = String::from(r#"null"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// assert_eq!(to_value(&mut d1), Ok(Value::Null)); +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// #[test] +// fn null_null() { +// let mut d = String::from(r#"[null, null]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![Value::Null, Value::Null,])) +// ); +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn nested_null() { +// let mut d = String::from(r#"[[null, null]]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![Value::Array(vec![ +// Value::Null, +// Value::Null, +// ])])) +// ); +// +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// } +// +// #[test] +// fn nestednested_null() { +// let mut d = String::from(r#"[[[null, null]]]"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// assert_eq!( +// to_value(&mut d1), +// Ok(Value::Array(vec![Value::Array(vec![Value::Array(vec![ +// Value::Null, +// Value::Null, +// ])])])) +// ); +// } +// +// #[test] +// fn odd_array2() { +// let mut d = String::from("[[\"\\u0000\\\"\"]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn odd_array3() { +// let mut d = String::from("[{\"\\u0000\\u0000\":null}]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn odd_array4() { +// let mut d = String::from("[{\"\\u0000𐀀a\":null}]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn float1() { +// let mut d = String::from("2.3250706903316115e307"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// // We ignore this since serde is less percise on this test +// #[ignore] +// #[test] +// fn float2() { +// let mut d = String::from("-4.5512678569607477e306"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn map0() { +// let mut d = String::from(r#"{"snot": "badger"}"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// let mut h = Map::new(); +// h.insert("snot".into(), Value::from("badger")); +// assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); +// } +// +// #[test] +// fn map1() { +// let mut d = String::from(r#"{"snot": "badger", "badger": "snot"}"#); +// let mut d1 = d.clone(); +// let mut d1 = unsafe { d1.as_bytes_mut() }; +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); +// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); +// assert_eq!(v_simd, v_serde); +// let mut h = Map::new(); +// h.insert("snot".into(), Value::from("badger")); +// h.insert("badger".into(), Value::from("snot")); +// assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); +// } +// +// #[test] +// fn tpl1() { +// let mut d = String::from("[-65.613616999999977, 43.420273000000009]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: (f32, f32) = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: (f32, f32) = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn tpl2() { +// let mut d = String::from("[[-65.613616999999977, 43.420273000000009]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn tpl3() { +// let mut d = String::from( +// "[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]", +// ); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// #[test] +// fn tpl4() { +// let mut d = String::from("[[[-65.613616999999977,43.420273000000009]]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// #[test] +// fn tpl5() { +// let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn tpl6() { +// let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn tpl7() { +// let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[derive(Deserialize, PartialEq, Debug)] +// struct Obj { +// a: u64, +// b: u64, +// } +// +// #[derive(Deserialize, PartialEq, Debug)] +// struct Obj1 { +// a: Obj, +// } +// +// #[test] +// fn obj() { +// let mut d = String::from(r#"{"a": 1, "b":1}"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Obj = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Obj = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn obj2() { +// let mut d = +// String::from(r#"{"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn obj3() { +// let mut d = String::from( +// r#"{"c": {"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}}"#, +// ); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: HashMap> = +// serde_json::from_slice(d).expect("serde_json"); +// let v_simd: HashMap> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn obj4() { +// let mut d = String::from(r#"{"c": {"a": {"a": 1, "b":1}}}"#); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn vecvec() { +// let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]], [[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn crazy_string() { +// // there is unicode in here! +// let d = "\"𐀀𐀀 𐀀𐀀0 𐀀A\\u00000A0 A \\u000b\""; +// let mut d = String::from(d); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// #[test] +// fn event() { +// #[derive(Deserialize, Debug, PartialEq)] +// #[serde(deny_unknown_fields, rename_all = "camelCase")] +// pub struct CitmCatalog { +// pub area_names: HashMap, +// pub audience_sub_category_names: HashMap, +// pub block_names: HashMap, +// pub events: HashMap, +// } +// pub type Id = u32; +// #[derive(Deserialize, Debug, PartialEq)] +// #[serde(deny_unknown_fields, rename_all = "camelCase")] +// pub struct Event { +// pub description: (), +// pub id: Id, +// pub logo: Option, +// pub name: String, +// pub sub_topic_ids: Vec, +// pub subject_code: (), +// pub subtitle: (), +// pub topic_ids: Vec, +// } +// +// let mut d = String::from( +// r#" +//{ +// "areaNames": { +// "205705993": "Arrière-scène central", +// "205705994": "1er balcon central", +// "205705995": "2ème balcon bergerie cour", +// "205705996": "2ème balcon bergerie jardin", +// "205705998": "1er balcon bergerie jardin", +// "205705999": "1er balcon bergerie cour", +// "205706000": "Arrière-scène jardin", +// "205706001": "Arrière-scène cour", +// "205706002": "2ème balcon jardin", +// "205706003": "2ème balcon cour", +// "205706004": "2ème Balcon central", +// "205706005": "1er balcon jardin", +// "205706006": "1er balcon cour", +// "205706007": "Orchestre central", +// "205706008": "Orchestre jardin", +// "205706009": "Orchestre cour", +// "342752287": "Zone physique secrète" +// }, +// "audienceSubCategoryNames": { +// "337100890": "Abonné" +// }, +// "blockNames": {}, +// "events": { +// "138586341": { +// "description": null, +// "id": 138586341, +// "logo": null, +// "name": "30th Anniversary Tour", +// "subTopicIds": [ +// 337184269, +// 337184283 +// ], +// "subjectCode": null, +// "subtitle": null, +// "topicIds": [ +// 324846099, +// 107888604 +// ] +// }, +// "138586345": { +// "description": null, +// "id": 138586345, +// "logo": "/images/UE0AAAAACEKo6QAAAAZDSVRN", +// "name": "Berliner Philharmoniker", +// "subTopicIds": [ +// 337184268, +// 337184283, +// 337184275 +// ], +// "subjectCode": null, +// "subtitle": null, +// "topicIds": [ +// 324846099, +// 107888604, +// 324846100 +// ] +// } +// } +//} +//"#, +// ); +// let mut d = unsafe { d.as_bytes_mut() }; +// let v_serde: CitmCatalog = serde_json::from_slice(d).expect("serde_json"); +// let v_simd: CitmCatalog = from_slice(&mut d).expect("simd_json"); +// assert_eq!(v_simd, v_serde) +// } +// +// // How much do we care about this, it's within the same range and +// // based on floating point math inprecisions during parsing. +// // Is this a real issue worth improving? +// #[test] +// fn silly_float1() { +// let v = Value::from(3.0901448042322017e305); +// let s = v.to_string(); +// dbg!(&s); +// let mut bytes = s.as_bytes().to_vec(); +// let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); +// assert_eq!(v, parsed); +// } +// +// #[test] +// #[ignore] +// fn silly_float2() { +// let v = Value::from(-6.990585694841803e305); +// let s = v.to_string(); +// dbg!(&s); +// let mut bytes = s.as_bytes().to_vec(); +// let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); +// assert_eq!(v, parsed); +// } +// +// //6.576692109929364e305 +// fn arb_json() -> BoxedStrategy { +// let leaf = prop_oneof![ +// Just(Value::Null), +// any::().prop_map(Value::Bool), +// // (-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // The float parsing of simd and serde are too different +// any::().prop_map(|i| json!(i)), +// ".*".prop_map(Value::from), +// ]; +// leaf.prop_recursive( +// 8, // 8 levels deep +// 256, // Shoot for maximum size of 256 nodes +// 10, // We put up to 10 items per collection +// |inner| { +// prop_oneof![ +// // Take the inner strategy and make the two recursive cases. +// prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), +// prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), +// ] +// }, +// ) +// .prop_map(|v| serde_json::to_string(&v).expect("").to_string()) +// .boxed() +// } +// +// fn arb_json_value() -> BoxedStrategy { +// let leaf = prop_oneof![ +// Just(Value::Null), +// any::().prop_map(Value::Bool), +// //(-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // damn you float! +// any::().prop_map(|i| json!(i)), +// ".*".prop_map(Value::from), +// ]; +// leaf.prop_recursive( +// 8, // 8 levels deep +// 256, // Shoot for maximum size of 256 nodes +// 10, // We put up to 10 items per collection +// |inner| { +// prop_oneof![ +// // Take the inner strategy and make the two recursive cases. +// prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), +// prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), +// ] +// }, +// ) +// .boxed() +// } +// +// proptest! { +// #![proptest_config(ProptestConfig { +// // Setting both fork and timeout is redundant since timeout implies +// // fork, but both are shown for clarity. +// fork: true, +// .. ProptestConfig::default() +// })] +// +// #[test] +// fn prop_json_encode_decode(val in arb_json_value()) { +// let mut encoded: Vec = Vec::new(); +// let _ = val.write(&mut encoded); +// println!("{}", String::from_utf8(encoded.clone()).unwrap()); +// let res = to_owned_value(&mut encoded).unwrap(); +// assert_eq!(val, res); +// } +// +// } +// proptest! { +// #![proptest_config(ProptestConfig { +// // Setting both fork and timeout is redundant since timeout implies +// // fork, but both are shown for clarity. +// fork: true, +// .. ProptestConfig::default() +// })] +// +// #[test] +// fn prop_json(d in arb_json()) { +// if let Ok(v_serde) = serde_json::from_slice::(&d.as_bytes()) { +// let mut d1 = d.clone(); +// let d1 = unsafe{ d1.as_bytes_mut()}; +// let v_simd_serde: serde_json::Value = from_slice(d1).expect(""); +// // We add our own encoder in here. +// let mut d2 = v_simd_serde.to_string(); +// let d2 = unsafe{ d2.as_bytes_mut()}; +// let mut d3 = d.clone(); +// let d3 = unsafe{ d3.as_bytes_mut()}; +// assert_eq!(v_simd_serde, v_serde); +// let v_simd_owned = to_owned_value(d2); +// assert!(v_simd_owned.is_ok()); +// let v_simd_borrowed = to_borrowed_value(d3); +// dbg!(&v_simd_borrowed); +// assert!(v_simd_borrowed.is_ok()); +// assert_eq!(v_simd_owned.unwrap(), super::OwnedValue::from(v_simd_borrowed.unwrap())); +// } +// +// } +// +// } +// +// fn arb_junk() -> BoxedStrategy> { +// prop::collection::vec(any::(), 0..(1024 * 8)).boxed() +// } +// proptest! { +// #![proptest_config(ProptestConfig { +// // Setting both fork and timeout is redundant since timeout implies +// // fork, but both are shown for clarity. +// fork: true, +// .. ProptestConfig::default() +// })] +// #[test] +// fn prop_junk(d in arb_junk()) { +// let mut d1 = d.clone(); +// let mut d2 = d.clone(); +// let mut d3 = d.clone(); +// +// let _ = from_slice::(&mut d1); +// let _ = to_borrowed_value(&mut d2); +// let _ = to_owned_value(&mut d3); +// +// } +// } +// +// proptest! { +// #![proptest_config(ProptestConfig { +// // Setting both fork and timeout is redundant since timeout implies +// // fork, but both are shown for clarity. +// fork: true, +// .. ProptestConfig::default() +// })] +// +// #[test] +// fn prop_string(d in "\\PC*") { +// let mut d1 = d.clone(); +// let mut d1 = unsafe{ d1.as_bytes_mut()}; +// let mut d2 = d.clone(); +// let mut d2 = unsafe{ d2.as_bytes_mut()}; +// let mut d3 = d.clone(); +// let mut d3 = unsafe{ d3.as_bytes_mut()}; +// let _ = from_slice::(&mut d1); +// let _ = to_borrowed_value(&mut d2); +// let _ = to_owned_value(&mut d3); +// +// } +// } +//} diff --git a/src/neon/deser.rs b/src/neon/deser.rs new file mode 100644 index 00000000..790eb1b6 --- /dev/null +++ b/src/neon/deser.rs @@ -0,0 +1,204 @@ +// +//use std::mem; +// +//pub use crate::error::{Error, ErrorType}; +//pub use crate::Deserializer; +//pub use crate::Result; +//pub use crate::neon::intrinsics::*; +////pub use crate::neon::utf8check::*; +//pub use crate::stringparse::*; +// +////use crate::portability::trailingzeroes; +// +// +//impl<'de> Deserializer<'de> { +// #[cfg_attr(not(feature = "no-inline"), inline(always))] +// pub fn parse_str_(&mut self) -> Result<&'de str> { +// // Add 1 to skip the initial " +// let idx = self.iidx + 1; +// let mut padding = [0u8; 32]; +// //let mut read: usize = 0; +// +// // we include the terminal '"' so we know where to end +// // This is safe since we check sub's lenght in the range access above and only +// // create sub sliced form sub to `sub.len()`. +// +// let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; +// let mut src_i: usize = 0; +// let mut len = src_i; +// loop { +// let v: __m128i = if src.len() >= src_i + 16 { +// // This is safe since we ensure src is at least 16 wide +// #[allow(clippy::cast_ptr_alignment)] +// unsafe { +// _mm_loadu_si128(src.as_ptr().add(src_i) as *const __m128i) +// } +// } else { +// unsafe { +// padding +// .get_unchecked_mut(..src.len() - src_i) +// .clone_from_slice(src.get_unchecked(src_i..)); +// // This is safe since we ensure src is at least 32 wide +// #[allow(clippy::cast_ptr_alignment)] +// _mm_loadu_si128(padding.as_ptr() as *const __m128i) +// } +// }; +// +// // store to dest unconditionally - we can overwrite the bits we don't like +// // later +// let bs_bits: u32 = unsafe { +// static_cast_u32!(_mm_movemask_epi8(_mm_cmpeq_epi8( +// v, +// _mm_set1_epi8(b'\\' as i8) +// ))) +// }; +// let quote_mask = unsafe { _mm_cmpeq_epi8(v, _mm_set1_epi8(b'"' as i8)) }; +// let quote_bits = unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; +// if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { +// // we encountered quotes first. Move dst to point to quotes and exit +// // find out where the quote is... +// let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; +// +// /////////////////////// +// // Above, check for overflow in case someone has a crazy string (>=4GB?) +// // But only add the overflow check when the document itself exceeds 4GB +// // Currently unneeded because we refuse to parse docs larger or equal to 4GB. +// //////////////////////// +// +// // we advance the point, accounting for the fact that we have a NULl termination +// +// len += quote_dist as usize; +// unsafe { +// let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; +// return Ok(&*v); +// } +// +// // we compare the pointers since we care if they are 'at the same spot' +// // not if they are the same value +// } +// if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { +// // Move to the 'bad' character +// let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); +// len += bs_dist as usize; +// src_i += bs_dist as usize; +// break; +// } else { +// // they are the same. Since they can't co-occur, it means we encountered +// // neither. +// src_i += 16; +// len += 16; +// } +// } +// +// let mut dst_i: usize = 0; +// let dst: &mut [u8] = &mut self.strings; +// +// loop { +// let v: __m128i = if src.len() >= src_i + 16 { +// // This is safe since we ensure src is at least 16 wide +// #[allow(clippy::cast_ptr_alignment)] +// unsafe { +// _mm_loadu_si128(src.as_ptr().add(src_i) as *const __m128i) +// } +// } else { +// unsafe { +// padding +// .get_unchecked_mut(..src.len() - src_i) +// .clone_from_slice(src.get_unchecked(src_i..)); +// // This is safe since we ensure src is at least 16 wide +// #[allow(clippy::cast_ptr_alignment)] +// _mm_loadu_si128(padding.as_ptr() as *const __m128i) +// } +// }; +// +// #[allow(clippy::cast_ptr_alignment)] +// unsafe { +// _mm_storeu_si128(dst.as_mut_ptr().add(dst_i) as *mut __m128i, v) +// }; +// +// // store to dest unconditionally - we can overwrite the bits we don't like +// // later +// let bs_bits: u32 = unsafe { +// static_cast_u32!(_mm_movemask_epi8(_mm_cmpeq_epi8( +// v, +// _mm_set1_epi8(b'\\' as i8) +// ))) +// }; +// let quote_mask = unsafe { _mm_cmpeq_epi8(v, _mm_set1_epi8(b'"' as i8)) }; +// let quote_bits = unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; +// if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { +// // we encountered quotes first. Move dst to point to quotes and exit +// // find out where the quote is... +// let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; +// +// /////////////////////// +// // Above, check for overflow in case someone has a crazy string (>=4GB?) +// // But only add the overflow check when the document itself exceeds 4GB +// // Currently unneeded because we refuse to parse docs larger or equal to 4GB. +// //////////////////////// +// +// // we advance the point, accounting for the fact that we have a NULl termination +// +// dst_i += quote_dist as usize; +// unsafe { +// self.input +// .get_unchecked_mut(idx + len..idx + len + dst_i) +// .clone_from_slice(&self.strings.get_unchecked(..dst_i)); +// let v = self.input.get_unchecked(idx..idx + len + dst_i) as *const [u8] +// as *const str; +// self.str_offset += dst_i as usize; +// return Ok(&*v); +// } +// +// // we compare the pointers since we care if they are 'at the same spot' +// // not if they are the same value +// } +// if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { +// // find out where the backspace is +// let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); +// let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; +// // we encountered backslash first. Handle backslash +// if escape_char == b'u' { +// // move src/dst up to the start; they will be further adjusted +// // within the unicode codepoint handling code. +// src_i += bs_dist as usize; +// dst_i += bs_dist as usize; +// let (o, s) = if let Ok(r) = +// handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { +// dst.get_unchecked_mut(dst_i..) +// }) { +// r +// } else { +// return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); +// }; +// if o == 0 { +// return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); +// }; +// // We moved o steps forword at the destiation and 6 on the source +// src_i += s; +// dst_i += o; +// } else { +// // simple 1:1 conversion. Will eat bs_dist+2 characters in input and +// // write bs_dist+1 characters to output +// // note this may reach beyond the part of the buffer we've actually +// // seen. I think this is ok +// let escape_result: u8 = +// unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; +// if escape_result == 0 { +// return Err(self.error(ErrorType::InvalidEscape)); +// } +// unsafe { +// *dst.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; +// } +// src_i += bs_dist as usize + 2; +// dst_i += bs_dist as usize + 1; +// } +// } else { +// // they are the same. Since they can't co-occur, it means we encountered +// // neither. +// src_i += 16; +// dst_i += 16; +// } +// } +// } +//} diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs new file mode 100644 index 00000000..a2bca414 --- /dev/null +++ b/src/neon/intrinsics.rs @@ -0,0 +1,287 @@ +//use std::arch:: + +use crate::neon::simd_llvm; +use crate::neon::simd; + +use std::mem; +use std::hint::unreachable_unchecked; + +#[allow(unused)] +macro_rules! types { + ($( + $(#[$doc:meta])* + pub struct $name:ident($($fields:tt)*); + )*) => ($( + $(#[$doc])* + #[derive(Copy, Clone, Debug)] + #[allow(non_camel_case_types)] + #[repr(simd)] + #[allow(clippy::missing_inline_in_public_items)] + pub struct $name($($fields)*); + )*) +} + +pub type poly64_t = i64; + +extern "C" { + #[link_name = "llvm.aarch64.neon.pmull64"] + fn vmull_p64_(a: i64, b: i64) -> int8x16_t; +} + +//poly128_t vmull_p64 (poly64_t a, poly64_t b) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(pmull))] +pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { + mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b))) +} + +types! { + /// ARM-specific 64-bit wide vector of eight packed `i8`. + pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); + /// ARM-specific 64-bit wide vector of eight packed `u8`. + pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); + /// ARM-specific 64-bit wide polynomial vector of eight packed `u8`. + pub struct poly8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); + /// ARM-specific 64-bit wide vector of four packed `i16`. + pub struct int16x4_t(i16, i16, i16, i16); + /// ARM-specific 64-bit wide vector of four packed `u16`. + pub struct uint16x4_t(u16, u16, u16, u16); + // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. + // pub struct float16x4_t(f16, f16, f16, f16); + /// ARM-specific 64-bit wide vector of four packed `u16`. + pub struct poly16x4_t(u16, u16, u16, u16); + /// ARM-specific 64-bit wide vector of two packed `i32`. + pub struct int32x2_t(i32, i32); + /// ARM-specific 64-bit wide vector of two packed `u32`. + pub struct uint32x2_t(u32, u32); + /// ARM-specific 64-bit wide vector of two packed `f32`. + pub struct float32x2_t(f32, f32); + /// ARM-specific 64-bit wide vector of one packed `i64`. + pub struct int64x1_t(i64); + /// ARM-specific 64-bit wide vector of one packed `u64`. + pub struct uint64x1_t(u64); + + /// ARM-specific 128-bit wide vector of sixteen packed `i8`. + pub struct int8x16_t( + i8, i8 ,i8, i8, i8, i8 ,i8, i8, + i8, i8 ,i8, i8, i8, i8 ,i8, i8, + ); + /// ARM-specific 128-bit wide vector of sixteen packed `u8`. + pub struct uint8x16_t( + u8, u8 ,u8, u8, u8, u8 ,u8, u8, + u8, u8 ,u8, u8, u8, u8 ,u8, u8, + ); + /// ARM-specific 128-bit wide vector of sixteen packed `u8`. + pub struct poly8x16_t( + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8 + ); + /// ARM-specific 128-bit wide vector of eight packed `i16`. + pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); + /// ARM-specific 128-bit wide vector of eight packed `u16`. + pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); + // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. + // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); + /// ARM-specific 128-bit wide vector of eight packed `u16`. + pub struct poly16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); + /// ARM-specific 128-bit wide vector of four packed `i32`. + pub struct int32x4_t(i32, i32, i32, i32); + /// ARM-specific 128-bit wide vector of four packed `u32`. + pub struct uint32x4_t(u32, u32, u32, u32); + /// ARM-specific 128-bit wide vector of four packed `f32`. + pub struct float32x4_t(f32, f32, f32, f32); + /// ARM-specific 128-bit wide vector of two packed `i64`. + pub struct int64x2_t(i64, i64); + /// ARM-specific 128-bit wide vector of two packed `u64`. + pub struct uint64x2_t(u64, u64); + /// ARM-specific 128-bit wide vector of one packed `i128` + pub struct poly128_t(i128); +} + +impl uint8x16_t { + pub fn new(a:u8,b:u8,c:u8,d:u8,e:u8,f:u8,g:u8,h:u8,i:u8,j:u8,k:u8,l:u8,m:u8,n:u8,o:u8,p:u8) -> uint8x16_t { + uint8x16_t(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) + } +} + +impl int8x16_t { + pub fn new(a:i8,b:i8,c:i8,d:i8,e:i8,f:i8,g:i8,h:i8,i:i8,j:i8,k:i8,l:i8,m:i8,n:i8,o:i8,p:i8) -> int8x16_t { + int8x16_t(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) + } +} + +macro_rules! aarch64_simd_2 { + ($name:ident, $type:ty, $simd_fn:ident, $intr:ident) => { + #[inline] + #[target_feature(enable = "neon")] + #[cfg_attr(test, assert_instr($intr))] + pub unsafe fn $name(a: $type, b: $type) -> $type { + simd_llvm::$simd_fn(a, b) + } + }; +} +macro_rules! aarch64_simd_ceq { + ($name:ident, $type:ty) => { + /// Compare bitwise Equal (vector) + aarch64_simd_2!($name, $type, simd_eq, cmeq); + }; +} + +aarch64_simd_ceq!(vceqq_u8, uint8x16_t); +aarch64_simd_ceq!(vceq_s64, int64x1_t); +aarch64_simd_ceq!(vceqq_s64, int64x2_t); +aarch64_simd_ceq!(vceq_u64, uint64x1_t); +aarch64_simd_ceq!(vceqq_u64, uint64x2_t); +aarch64_simd_ceq!(vceq_p64, uint64x1_t); +aarch64_simd_ceq!(vceqq_p64, uint64x2_t); + +macro_rules! aarch64_simd_cgt { + ($name:ident, $type:ty) => { + /// Compare signed Greater than (vector) + aarch64_simd_2!($name, $type, simd_gt, cmgt); + }; +} +macro_rules! aarch64_simd_cgtu { + ($name:ident, $type:ty) => { + /// Compare Greater than (vector) + aarch64_simd_2!($name, $type, simd_gt, cmhi); + }; +} + +aarch64_simd_cgt!(vcgt_s64, int64x1_t); +aarch64_simd_cgt!(vcgtq_s64, int64x2_t); +aarch64_simd_cgtu!(vcgt_u64, uint64x1_t); +aarch64_simd_cgtu!(vcgtq_u64, uint64x2_t); + +macro_rules! aarch64_simd_clt { + ($name:ident, $type:ty) => { + /// Compare signed Lesser than (vector) + aarch64_simd_2!($name, $type, simd_lt, cmgt); + }; +} +macro_rules! aarch64_simd_cltu { + ($name:ident, $type:ty) => { + /// Compare Lesser than (vector) + aarch64_simd_2!($name, $type, simd_lt, cmhi); + }; +} + +aarch64_simd_clt!(vclt_s64, int64x1_t); +aarch64_simd_clt!(vcltq_s64, int64x2_t); +aarch64_simd_cltu!(vclt_u64, uint64x1_t); +aarch64_simd_cltu!(vcltq_u64, uint64x2_t); + +macro_rules! aarch64_simd_cge { + ($name:ident, $type:ty) => { + /// Compare signed Greater than (vector) + aarch64_simd_2!($name, $type, simd_ge, cmge); + }; +} +macro_rules! aarch64_simd_cgeu { + ($name:ident, $type:ty) => { + /// Compare Greater than (vector) + aarch64_simd_2!($name, $type, simd_ge, cmhs); + }; +} + +aarch64_simd_cge!(vcge_s64, int64x1_t); +aarch64_simd_cge!(vcgeq_s64, int64x2_t); +aarch64_simd_cgeu!(vcge_u64, uint64x1_t); +aarch64_simd_cgeu!(vcgeq_u64, uint64x2_t); + +macro_rules! aarch64_simd_cle { + ($name:ident, $type:ty) => { + /// Compare signed Lesser than (vector) + aarch64_simd_2!($name, $type, simd_le, cmge); + }; +} +macro_rules! aarch64_simd_cleu { + ($name:ident, $type:ty) => { + /// Compare Lesser than (vector) + aarch64_simd_2!($name, $type, simd_le, cmhs); + }; +} + +aarch64_simd_cleu!(vcleq_u8, uint8x16_t); +aarch64_simd_cle!(vcle_s64, int64x1_t); +aarch64_simd_cle!(vcleq_s64, int64x2_t); +aarch64_simd_cleu!(vcle_u64, uint64x1_t); +aarch64_simd_cleu!(vcleq_u64, uint64x2_t); + + +pub fn vdupq_n_u8(a:u8) -> uint8x16_t { + uint8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +} + +pub fn zero8x16() -> uint8x16_t { + uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) +} + +pub unsafe fn vpaddq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_add(a, b) } + +pub unsafe fn vandq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_and(a, b) } +pub unsafe fn vorrq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_or(a, b) } +pub unsafe fn vandq_s8(a:int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_and(a, b) } +pub unsafe fn vorrq_s8(a:int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_or(a, b) } + +macro_rules! arm_reinterpret { + ($name:ident, $from:ty, $to:ty) => { + // Vector reinterpret cast operation + #[inline] + #[target_feature(enable = "neon")] + #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] + pub unsafe fn $name(a: $from) -> $to { + mem::transmute(a) + } + }; +} + +arm_reinterpret!(vreinterpret_u64_u32, uint32x2_t, uint64x1_t); +arm_reinterpret!(vreinterpretq_s8_u8, uint8x16_t, int8x16_t); +arm_reinterpret!(vreinterpretq_u16_u8, uint8x16_t, uint16x8_t); +arm_reinterpret!(vreinterpretq_u32_u8, uint8x16_t, uint32x4_t); +arm_reinterpret!(vreinterpretq_u64_u8, uint8x16_t, uint64x2_t); +arm_reinterpret!(vreinterpretq_u8_s8, int8x16_t, uint8x16_t); + +macro_rules! arm_vget_lane { + ($name:ident, $to:ty, $from:ty, $lanes:literal) => { + #[inline] + #[target_feature(enable = "neon")] + #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] + #[cfg_attr(test, assert_instr(umov))] + pub unsafe fn $name(v: $from, lane: u32) -> $to { + if lane > $lanes { unreachable_unchecked() } + simd_llvm::simd_extract(v, lane) + } + }; +} + +arm_vget_lane!(vgetq_lane_u16, u16, uint8x16_t, 7); +arm_vget_lane!(vgetq_lane_u64, u64, uint64x2_t, 1); +arm_vget_lane!(vget_lane_u64, u64, uint64x1_t, 0); + +pub fn vqmovn_u64(a:uint64x2_t) -> uint32x2_t { + uint32x2_t(a.0 as u32, a.1 as u32) +} + +//fn vaddq_s8() -> u16 { 0 } +//fn vceqq_s8() -> u16 { 0 } +//fn vceqq_u8() -> u16 { 0 } +//fn vcgtq_s8() -> u16 { 0 } +//fn vcleq_u8() -> u16 { 0 } +//fn vdupq_n_s8() -> u16 { 0 } +//fn vextq_s8() -> u16 { 0 } +//fn vget_lane_u64() -> u16 { 0 } +//fn vgetq_lane_u32() -> u16 { 0 } +//fn vgetq_lane_u64() -> u16 { 0 } +//fn vld1q_s8() -> u16 { 0 } +//fn vld1q_u8() -> u16 { 0 } +//fn vmovq_n_u8() -> u16 { 0 } +//fn vmull_p64() -> u16 { 0 } +//fn vorrq_s8() -> u16 { 0 } +//fn vqsubq_u8() -> u16 { 0 } +//fn vqtbl1q_s8() -> u16 { 0 } +//fn vqtbl1q_u8() -> u16 { 0 } +//fn vshrq_n_u8() -> u16 { 0 } +//fn vst1q_u8() -> u16 { 0 } \ No newline at end of file diff --git a/src/neon/mod.rs b/src/neon/mod.rs new file mode 100644 index 00000000..6d23b075 --- /dev/null +++ b/src/neon/mod.rs @@ -0,0 +1,6 @@ +//pub mod deser; +pub mod stage1; +//pub mod utf8check; +mod simd; +mod simd_llvm; +mod intrinsics; \ No newline at end of file diff --git a/src/neon/simd.rs b/src/neon/simd.rs new file mode 100644 index 00000000..ecf5a4cb --- /dev/null +++ b/src/neon/simd.rs @@ -0,0 +1,469 @@ +#![allow(non_camel_case_types)] + +use crate::neon::simd_llvm; + +macro_rules! simd_ty { + ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => { + #[repr(simd)] + #[derive(Copy, Clone, Debug, PartialEq)] + pub(crate) struct $id($(pub $elem_ty),*); + + #[allow(clippy::use_self)] + impl $id { + #[inline] + pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self { + $id($($elem_name),*) + } + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) const fn splat(value: $ety) -> Self { + $id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + value + }),*) + } + + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) fn extract(self, index: usize) -> $ety { + unsafe { + simd_llvm::simd_extract(self, index as u32) + } + } + } + } +} + +macro_rules! simd_m_ty { + ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => { + #[repr(simd)] + #[derive(Copy, Clone, Debug, PartialEq)] + pub(crate) struct $id($(pub $elem_ty),*); + + #[allow(clippy::use_self)] + impl $id { + #[inline] + const fn bool_to_internal(x: bool) -> $ety { + [0 as $ety, !(0 as $ety)][x as usize] + } + + #[inline] + pub(crate) const fn new($($elem_name: bool),*) -> Self { + $id($(Self::bool_to_internal($elem_name)),*) + } + + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) const fn splat(value: bool) -> Self { + $id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + Self::bool_to_internal(value) + }),*) + } + + // FIXME: Workaround rust@60637 + #[inline(always)] + pub(crate) fn extract(self, index: usize) -> bool { + let r: $ety = unsafe { + simd_llvm::simd_extract(self, index as u32) + }; + r != 0 + } + } + } +} + +// 16-bit wide types: + +simd_ty!(u8x2[u8]: u8, u8 | x0, x1); +simd_ty!(i8x2[i8]: i8, i8 | x0, x1); + +// 32-bit wide types: + +simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3); +simd_ty!(u16x2[u16]: u16, u16 | x0, x1); + +simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3); +simd_ty!(i16x2[i16]: i16, i16 | x0, x1); + +// 64-bit wide types: + +simd_ty!(u8x8[u8]: + u8, u8, u8, u8, u8, u8, u8, u8 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3); +simd_ty!(u32x2[u32]: u32, u32 | x0, x1); +simd_ty!(u64x1[u64]: u64 | x1); + +simd_ty!(i8x8[i8]: + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3); +simd_ty!(i32x2[i32]: i32, i32 | x0, x1); +simd_ty!(i64x1[i64]: i64 | x1); + +simd_ty!(f32x2[f32]: f32, f32 | x0, x1); + +// 128-bit wide types: + +simd_ty!(u8x16[u8]: + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(u16x8[u16]: + u16, u16, u16, u16, u16, u16, u16, u16 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3); +simd_ty!(u64x2[u64]: u64, u64 | x0, x1); + +simd_ty!(i8x16[i8]: + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(i16x8[i16]: + i16, i16, i16, i16, i16, i16, i16, i16 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); +simd_ty!(i64x2[i64]: i64, i64 | x0, x1); + +simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3); +simd_ty!(f64x2[f64]: f64, f64 | x0, x1); + +simd_m_ty!(m8x16[i8]: + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_m_ty!(m16x8[i16]: + i16, i16, i16, i16, i16, i16, i16, i16 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); +simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1); + +// 256-bit wide types: + +simd_ty!(u8x32[u8]: + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8 + | x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, + x24, x25, x26, x27, x28, x29, x30, x31 +); +simd_ty!(u16x16[u16]: + u16, u16, u16, u16, u16, u16, u16, u16, + u16, u16, u16, u16, u16, u16, u16, u16 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(u32x8[u32]: + u32, u32, u32, u32, u32, u32, u32, u32 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3); + +simd_ty!(i8x32[i8]: + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8, + i8, i8, i8, i8, i8, i8, i8, i8 + | x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, + x24, x25, x26, x27, x28, x29, x30, x31 +); +simd_ty!(i16x16[i16]: + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16 + | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +); +simd_ty!(i32x8[i32]: + i32, i32, i32, i32, i32, i32, i32, i32 + | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3); + +simd_ty!(f32x8[f32]: + f32, f32, f32, f32, f32, f32, f32, f32 | + x0, x1, x2, x3, x4, x5, x6, x7); + +// 512-bit wide types: + +simd_ty!(i32x16[i32]: + i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32 + | x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15); + +simd_ty!(i64x8[i64]: + i64, i64, i64, i64, i64, i64, i64, i64 + | x0, x1, x2, x3, x4, x5, x6, x7); + +#[allow(unused)] +#[macro_export] +macro_rules! constify_imm8 { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match ($imm8) & 0b1111_1111 { + 0 => $expand!(0), + 1 => $expand!(1), + 2 => $expand!(2), + 3 => $expand!(3), + 4 => $expand!(4), + 5 => $expand!(5), + 6 => $expand!(6), + 7 => $expand!(7), + 8 => $expand!(8), + 9 => $expand!(9), + 10 => $expand!(10), + 11 => $expand!(11), + 12 => $expand!(12), + 13 => $expand!(13), + 14 => $expand!(14), + 15 => $expand!(15), + 16 => $expand!(16), + 17 => $expand!(17), + 18 => $expand!(18), + 19 => $expand!(19), + 20 => $expand!(20), + 21 => $expand!(21), + 22 => $expand!(22), + 23 => $expand!(23), + 24 => $expand!(24), + 25 => $expand!(25), + 26 => $expand!(26), + 27 => $expand!(27), + 28 => $expand!(28), + 29 => $expand!(29), + 30 => $expand!(30), + 31 => $expand!(31), + 32 => $expand!(32), + 33 => $expand!(33), + 34 => $expand!(34), + 35 => $expand!(35), + 36 => $expand!(36), + 37 => $expand!(37), + 38 => $expand!(38), + 39 => $expand!(39), + 40 => $expand!(40), + 41 => $expand!(41), + 42 => $expand!(42), + 43 => $expand!(43), + 44 => $expand!(44), + 45 => $expand!(45), + 46 => $expand!(46), + 47 => $expand!(47), + 48 => $expand!(48), + 49 => $expand!(49), + 50 => $expand!(50), + 51 => $expand!(51), + 52 => $expand!(52), + 53 => $expand!(53), + 54 => $expand!(54), + 55 => $expand!(55), + 56 => $expand!(56), + 57 => $expand!(57), + 58 => $expand!(58), + 59 => $expand!(59), + 60 => $expand!(60), + 61 => $expand!(61), + 62 => $expand!(62), + 63 => $expand!(63), + 64 => $expand!(64), + 65 => $expand!(65), + 66 => $expand!(66), + 67 => $expand!(67), + 68 => $expand!(68), + 69 => $expand!(69), + 70 => $expand!(70), + 71 => $expand!(71), + 72 => $expand!(72), + 73 => $expand!(73), + 74 => $expand!(74), + 75 => $expand!(75), + 76 => $expand!(76), + 77 => $expand!(77), + 78 => $expand!(78), + 79 => $expand!(79), + 80 => $expand!(80), + 81 => $expand!(81), + 82 => $expand!(82), + 83 => $expand!(83), + 84 => $expand!(84), + 85 => $expand!(85), + 86 => $expand!(86), + 87 => $expand!(87), + 88 => $expand!(88), + 89 => $expand!(89), + 90 => $expand!(90), + 91 => $expand!(91), + 92 => $expand!(92), + 93 => $expand!(93), + 94 => $expand!(94), + 95 => $expand!(95), + 96 => $expand!(96), + 97 => $expand!(97), + 98 => $expand!(98), + 99 => $expand!(99), + 100 => $expand!(100), + 101 => $expand!(101), + 102 => $expand!(102), + 103 => $expand!(103), + 104 => $expand!(104), + 105 => $expand!(105), + 106 => $expand!(106), + 107 => $expand!(107), + 108 => $expand!(108), + 109 => $expand!(109), + 110 => $expand!(110), + 111 => $expand!(111), + 112 => $expand!(112), + 113 => $expand!(113), + 114 => $expand!(114), + 115 => $expand!(115), + 116 => $expand!(116), + 117 => $expand!(117), + 118 => $expand!(118), + 119 => $expand!(119), + 120 => $expand!(120), + 121 => $expand!(121), + 122 => $expand!(122), + 123 => $expand!(123), + 124 => $expand!(124), + 125 => $expand!(125), + 126 => $expand!(126), + 127 => $expand!(127), + 128 => $expand!(128), + 129 => $expand!(129), + 130 => $expand!(130), + 131 => $expand!(131), + 132 => $expand!(132), + 133 => $expand!(133), + 134 => $expand!(134), + 135 => $expand!(135), + 136 => $expand!(136), + 137 => $expand!(137), + 138 => $expand!(138), + 139 => $expand!(139), + 140 => $expand!(140), + 141 => $expand!(141), + 142 => $expand!(142), + 143 => $expand!(143), + 144 => $expand!(144), + 145 => $expand!(145), + 146 => $expand!(146), + 147 => $expand!(147), + 148 => $expand!(148), + 149 => $expand!(149), + 150 => $expand!(150), + 151 => $expand!(151), + 152 => $expand!(152), + 153 => $expand!(153), + 154 => $expand!(154), + 155 => $expand!(155), + 156 => $expand!(156), + 157 => $expand!(157), + 158 => $expand!(158), + 159 => $expand!(159), + 160 => $expand!(160), + 161 => $expand!(161), + 162 => $expand!(162), + 163 => $expand!(163), + 164 => $expand!(164), + 165 => $expand!(165), + 166 => $expand!(166), + 167 => $expand!(167), + 168 => $expand!(168), + 169 => $expand!(169), + 170 => $expand!(170), + 171 => $expand!(171), + 172 => $expand!(172), + 173 => $expand!(173), + 174 => $expand!(174), + 175 => $expand!(175), + 176 => $expand!(176), + 177 => $expand!(177), + 178 => $expand!(178), + 179 => $expand!(179), + 180 => $expand!(180), + 181 => $expand!(181), + 182 => $expand!(182), + 183 => $expand!(183), + 184 => $expand!(184), + 185 => $expand!(185), + 186 => $expand!(186), + 187 => $expand!(187), + 188 => $expand!(188), + 189 => $expand!(189), + 190 => $expand!(190), + 191 => $expand!(191), + 192 => $expand!(192), + 193 => $expand!(193), + 194 => $expand!(194), + 195 => $expand!(195), + 196 => $expand!(196), + 197 => $expand!(197), + 198 => $expand!(198), + 199 => $expand!(199), + 200 => $expand!(200), + 201 => $expand!(201), + 202 => $expand!(202), + 203 => $expand!(203), + 204 => $expand!(204), + 205 => $expand!(205), + 206 => $expand!(206), + 207 => $expand!(207), + 208 => $expand!(208), + 209 => $expand!(209), + 210 => $expand!(210), + 211 => $expand!(211), + 212 => $expand!(212), + 213 => $expand!(213), + 214 => $expand!(214), + 215 => $expand!(215), + 216 => $expand!(216), + 217 => $expand!(217), + 218 => $expand!(218), + 219 => $expand!(219), + 220 => $expand!(220), + 221 => $expand!(221), + 222 => $expand!(222), + 223 => $expand!(223), + 224 => $expand!(224), + 225 => $expand!(225), + 226 => $expand!(226), + 227 => $expand!(227), + 228 => $expand!(228), + 229 => $expand!(229), + 230 => $expand!(230), + 231 => $expand!(231), + 232 => $expand!(232), + 233 => $expand!(233), + 234 => $expand!(234), + 235 => $expand!(235), + 236 => $expand!(236), + 237 => $expand!(237), + 238 => $expand!(238), + 239 => $expand!(239), + 240 => $expand!(240), + 241 => $expand!(241), + 242 => $expand!(242), + 243 => $expand!(243), + 244 => $expand!(244), + 245 => $expand!(245), + 246 => $expand!(246), + 247 => $expand!(247), + 248 => $expand!(248), + 249 => $expand!(249), + 250 => $expand!(250), + 251 => $expand!(251), + 252 => $expand!(252), + 253 => $expand!(253), + 254 => $expand!(254), + _ => $expand!(255), + } + }; +} \ No newline at end of file diff --git a/src/neon/simd_llvm.rs b/src/neon/simd_llvm.rs new file mode 100644 index 00000000..a6c1eb52 --- /dev/null +++ b/src/neon/simd_llvm.rs @@ -0,0 +1,54 @@ +extern "platform-intrinsic" { + pub fn simd_eq(x: T, y: T) -> U; + pub fn simd_ne(x: T, y: T) -> U; + pub fn simd_lt(x: T, y: T) -> U; + pub fn simd_le(x: T, y: T) -> U; + pub fn simd_gt(x: T, y: T) -> U; + pub fn simd_ge(x: T, y: T) -> U; + + pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U; + pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U; + pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U; + pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; + pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; + pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U; + pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U; + + pub fn simd_insert(x: T, idx: u32, val: U) -> T; + pub fn simd_extract(x: T, idx: u32) -> U; + + pub fn simd_cast(x: T) -> U; + + pub fn simd_add(x: T, y: T) -> T; + pub fn simd_sub(x: T, y: T) -> T; + pub fn simd_mul(x: T, y: T) -> T; + pub fn simd_div(x: T, y: T) -> T; + pub fn simd_shl(x: T, y: T) -> T; + pub fn simd_shr(x: T, y: T) -> T; + pub fn simd_and(x: T, y: T) -> T; + pub fn simd_or(x: T, y: T) -> T; + pub fn simd_xor(x: T, y: T) -> T; + + pub fn simd_reduce_add_unordered(x: T) -> U; + pub fn simd_reduce_mul_unordered(x: T) -> U; + pub fn simd_reduce_add_ordered(x: T, acc: U) -> U; + pub fn simd_reduce_mul_ordered(x: T, acc: U) -> U; + pub fn simd_reduce_min(x: T) -> U; + pub fn simd_reduce_max(x: T) -> U; + pub fn simd_reduce_min_nanless(x: T) -> U; + pub fn simd_reduce_max_nanless(x: T) -> U; + pub fn simd_reduce_and(x: T) -> U; + pub fn simd_reduce_or(x: T) -> U; + pub fn simd_reduce_xor(x: T) -> U; + pub fn simd_reduce_all(x: T) -> bool; + pub fn simd_reduce_any(x: T) -> bool; + + pub fn simd_select(m: M, a: T, b: T) -> T; + pub fn simd_select_bitmask(m: M, a: T, b: T) -> T; + + pub fn simd_fmin(a: T, b: T) -> T; + pub fn simd_fmax(a: T, b: T) -> T; + + pub fn simd_fsqrt(a: T) -> T; + pub fn simd_fma(a: T, b: T, c: T) -> T; +} \ No newline at end of file diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs new file mode 100644 index 00000000..6e5f6b58 --- /dev/null +++ b/src/neon/stage1.rs @@ -0,0 +1,560 @@ +#![allow(dead_code)] + +//use crate::portability::*; +use crate::neon::intrinsics::*; +//use core::arch::aarch64::*; +//use crate::neon::utf8check::*; +use crate::*; +use std::mem; + +#[macro_use] +use crate::neon::simd::*; + +pub const SIMDJSON_PADDING: usize = mem::size_of::() * 4; + +#[derive(Debug)] +struct SimdInput { + v0: uint8x16_t, + v1: uint8x16_t, + v2: uint8x16_t, + v3: uint8x16_t, +} + +fn fill_input(ptr: &[u8]) -> SimdInput { + unsafe { + #[allow(clippy::cast_ptr_alignment)] + SimdInput { + v0: mem::transmute(ptr.as_ptr() as *const uint8x16_t), + v1: mem::transmute(ptr.as_ptr().add(16) as *const uint8x16_t), + v2: mem::transmute(ptr.as_ptr().add(32) as *const uint8x16_t), + v3: mem::transmute(ptr.as_ptr().add(48) as *const uint8x16_t), + } + } +} + +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn neon_movemask(input: uint8x16_t) -> u16 { + const bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + + let minput: uint8x16_t = vandq_u8(input, bit_mask); + let tmp: uint8x16_t = vpaddq_u8(minput, minput); + let tmp = vpaddq_u8(tmp, tmp); + let tmp = vpaddq_u8(tmp, tmp); + + unsafe { + vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) + } +} + + +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: uint8x16_t) -> u64 { + const bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + let t0 = vandq_u8(p0, bit_mask); + let t1 = vandq_u8(p1, bit_mask); + let t2 = vandq_u8(p2, bit_mask); + let t3 = vandq_u8(p3, bit_mask); + let sum0 = vpaddq_u8(t0, t1); + let sum1 = vpaddq_u8(t2, t3); + let sum0 = vpaddq_u8(sum0, sum1); + let sum0 = vpaddq_u8(sum0, sum0); + unsafe { + vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0) + } +} + +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn check_ascii_neon(si: &SimdInput) -> bool { + let high_bit: uint8x16_t = vdupq_n_u8(0x80); + let t0: uint8x16_t = vorrq_u8(si.v0, si.v1); + let t1: uint8x16_t = vorrq_u8(si.v2, si.v3); + let t3: uint8x16_t = vorrq_u8(t0, t1); + let t4: uint8x16_t = vandq_u8(t3, high_bit); + + unsafe { + let v64: uint64x2_t = vreinterpretq_u64_u8(t4); + let v32: uint32x2_t = vqmovn_u64(v64); + let result: uint64x1_t = vreinterpret_u64_u32(v32); + + vget_lane_u64(result, 0) == 0 + } +} + +struct utf8_checking_state { + has_error: int8x16_t, + previous: processed_utf_bytes, +} +// +//#[cfg_attr(not(feature = "no-inline"), inline(always))] +//unsafe fn check_utf8( +// input: &SimdInput, +// has_error: &mut uint8x16_t, +// previous: &mut utf8_checking_state, +//) { +// if check_ascii_neon(input) { +// // All bytes are ascii. Therefore the byte that was just before must be +// // ascii too. We only check the byte that was just before simd_input. Nines +// // are arbitrary values. +// const verror: int8x16_t = +// int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); +// state.has_error = +// vorrq_s8(vreinterpretq_s8_u8( +// vcgtq_s8(state.previous.carried_continuations, verror)), +// state.has_error); +// } else { +// // it is not ascii so we have to do heavy work +// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), +// &(state.previous), &(state.has_error)); +// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), +// &(state.previous), &(state.has_error)); +// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), +// &(state.previous), &(state.has_error)); +// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), +// &(state.previous), &(state.has_error)); +// } +//} + +// a straightforward comparison of a mask against input +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { + const MASK: uint8x16_t = vdupq_n_u8(m); + + let cmp_res_0: uint8x16_t = vceqq_u8(input.v0, MASK); + let cmp_res_1: uint8x16_t = vceqq_u8(input.v1, MASK); + let cmp_res_2: uint8x16_t = vceqq_u8(input.v2, MASK); + let cmp_res_3: uint8x16_t = vceqq_u8(input.v3, MASK); + + neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) +} + +// find all values less than or equal than the content of maxval (using unsigned arithmetic) +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn unsigned_lteq_against_input(input: &SimdInput, maxval: u8) -> u64 { + const MASK: uint8x16_t = vdupq_n_u8(maxval); + + let cmp_res_0: uint8x16_t = vcleq_u8(input.v0, MASK); + let cmp_res_1: uint8x16_t = vcleq_u8(input.v1, MASK); + let cmp_res_2: uint8x16_t = vcleq_u8(input.v2, MASK); + let cmp_res_3: uint8x16_t = vcleq_u8(input.v3, MASK); + + neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) +} + +unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_backslash: &mut u64) -> u64 { + const EVEN_BITS: u64 = 0x5555_5555_5555_5555; + const ODD_BITS: u64 = !EVEN_BITS; + + let bs_bits: u64 = cmp_mask_against_input(input, b'\\'); + let start_edges: u64 = bs_bits & !(bs_bits << 1); + // flip lowest if we have an odd-length run at the end of the prior + // iteration + let even_start_mask: u64 = EVEN_BITS ^ *prev_iter_ends_odd_backslash; + let even_starts: u64 = start_edges & even_start_mask; + let odd_starts: u64 = start_edges & !even_start_mask; + let even_carries: u64 = bs_bits.wrapping_add(even_starts); + + let mut odd_carries: u64 = 0; + // must record the carry-out of our odd-carries out of bit 63; this + // indicates whether the sense of any edge going to the next iteration + // should be flipped + let iter_ends_odd_backslash: bool = add_overflow(bs_bits, odd_starts, &mut odd_carries); + + odd_carries |= *prev_iter_ends_odd_backslash; // push in bit zero as a potential end + // if we had an odd-numbered run at the + // end of the previous iteration + *prev_iter_ends_odd_backslash = if iter_ends_odd_backslash { 0x1 } else { 0x0 }; + let even_carry_ends: u64 = even_carries & !bs_bits; + let odd_carry_ends: u64 = odd_carries & !bs_bits; + let even_start_odd_end: u64 = even_carry_ends & ODD_BITS; + let odd_start_even_end: u64 = odd_carry_ends & EVEN_BITS; + let odd_ends: u64 = even_start_odd_end | odd_start_even_end; + odd_ends +} + +unsafe fn find_quote_mask_and_bits(input: &SimdInput, odd_ends: u64, + prev_iter_inside_quote: &u64, quote_bits: &mut u64, error_mask: &u64) -> u64 { + quote_bits = cmp_mask_against_input(input, b'"'); + quote_bits = &(quote_bits & !odd_ends); + let quote_mask: u64 = vmull_p64(-1, quote_bits); + + quote_mask ^= prev_iter_inside_quote; + + // All Unicode characters may be placed within the + // quotation marks, except for the characters that MUST be escaped: + // quotation mark, reverse solidus, and the control characters (U+0000 + //through U+001F). + // https://tools.ietf.org/html/rfc8259 + let unescaped: u64 = unsigned_lteq_against_input(input, 0x1F); + error_mask |= quote_mask & unescaped; + + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, + // John Regher from Utah U. says this is fine code + prev_iter_inside_quote = quote_mask >> 63; + + quote_mask +} + +// +//#[cfg_attr(not(feature = "no-inline"), inline(always))] +//unsafe fn find_whitespace_and_structurals( +// input: &SimdInput, +// whitespace: &mut u64, +// structurals: &mut u64, +//) { +// // do a 'shufti' to detect structural JSON characters +// // they are +// // * `{` 0x7b +// // * `}` 0x7d +// // * `:` 0x3a +// // * `[` 0x5b +// // * `]` 0x5d +// // * `,` 0x2c +// // these go into the first 3 buckets of the comparison (1/2/4) +// +// // we are also interested in the four whitespace characters: +// // * space 0x20 +// // * linefeed 0x0a +// // * horizontal tab 0x09 +// // * carriage return 0x0d +// // these go into the next 2 buckets of the comparison (8/16) +// +// // TODO: const? +// let low_nibble_mask: __m128i = _mm_setr_epi8( +// 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, +// ); +// // TODO: const? +// let high_nibble_mask: __m128i = _mm_setr_epi8( +// 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, +// ); +// +// let structural_shufti_mask: __m128i = _mm_set1_epi8(0x7); +// let whitespace_shufti_mask: __m128i = _mm_set1_epi8(0x18); +// +// let v_v0: __m128i = _mm_and_si128( +// _mm_shuffle_epi8(low_nibble_mask, input.v0), +// _mm_shuffle_epi8( +// high_nibble_mask, +// _mm_and_si128(_mm_srli_epi32(input.v0, 4), _mm_set1_epi8(0x7f)), +// ), +// ); +// let v_v1: __m128i = _mm_and_si128( +// _mm_shuffle_epi8(low_nibble_mask, input.v1), +// _mm_shuffle_epi8( +// high_nibble_mask, +// _mm_and_si128(_mm_srli_epi32(input.v1, 4), _mm_set1_epi8(0x7f)), +// ), +// ); +// let v_v2: __m128i = _mm_and_si128( +// _mm_shuffle_epi8(low_nibble_mask, input.v2), +// _mm_shuffle_epi8( +// high_nibble_mask, +// _mm_and_si128(_mm_srli_epi32(input.v2, 4), _mm_set1_epi8(0x7f)), +// ), +// ); +// let v_v3: __m128i = _mm_and_si128( +// _mm_shuffle_epi8(low_nibble_mask, input.v3), +// _mm_shuffle_epi8( +// high_nibble_mask, +// _mm_and_si128(_mm_srli_epi32(input.v3, 4), _mm_set1_epi8(0x7f)), +// ), +// ); +// +// let tmp_v0: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v0, structural_shufti_mask), +// _mm_set1_epi8(0), +// ); +// let tmp_v1: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v1, structural_shufti_mask), +// _mm_set1_epi8(0), +// ); +// let tmp_v2: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v2, structural_shufti_mask), +// _mm_set1_epi8(0), +// ); +// let tmp_v3: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v3, structural_shufti_mask), +// _mm_set1_epi8(0), +// ); +// +// let structural_res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(tmp_v0))); +// let structural_res_1: u64 = _mm_movemask_epi8(tmp_v1) as u64; +// let structural_res_2: u64 = _mm_movemask_epi8(tmp_v2) as u64; +// let structural_res_3: u64 = _mm_movemask_epi8(tmp_v3) as u64; +// +// *structurals = !(structural_res_0 | (structural_res_1 << 16) | (structural_res_2 << 32) | (structural_res_3 << 48)); +// +// let tmp_ws_v0: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v0, whitespace_shufti_mask), +// _mm_set1_epi8(0), +// ); +// let tmp_ws_v1: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v1, whitespace_shufti_mask), +// _mm_set1_epi8(0), +// ); +// let tmp_ws_v2: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v2, whitespace_shufti_mask), +// _mm_set1_epi8(0), +// ); +// let tmp_ws_v3: __m128i = _mm_cmpeq_epi8( +// _mm_and_si128(v_v3, whitespace_shufti_mask), +// _mm_set1_epi8(0), +// ); +// +// let ws_res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(tmp_ws_v0))); +// let ws_res_1: u64 = _mm_movemask_epi8(tmp_ws_v1) as u64; +// let ws_res_2: u64 = _mm_movemask_epi8(tmp_ws_v2) as u64; +// let ws_res_3: u64 = _mm_movemask_epi8(tmp_ws_v3) as u64; +// +// *whitespace = !(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48)); +//} +// + +// flatten out values in 'bits' assuming that they are are to have values of idx +// plus their position in the bitvector, and store these indexes at +// base_ptr[base] incrementing base as we go +// will potentially store extra values beyond end of valid bits, so base_ptr +// needs to be large enough to handle this +//TODO: usize was u32 here does this matter? +//#[cfg_attr(not(feature = "no-inline"), inline(always))] +//fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { +// let cnt: usize = hamming(bits) as usize; +// let mut l = base.len(); +// let idx_minus_64 = idx.wrapping_sub(64); +// let idx_64_v = unsafe { +// _mm_set_epi32( +// static_cast_i32!(idx_minus_64), +// static_cast_i32!(idx_minus_64), +// static_cast_i32!(idx_minus_64), +// static_cast_i32!(idx_minus_64), +// ) +// }; +// +// // We're doing some trickery here. +// // We reserve 64 extra entries, because we've at most 64 bit to set +// // then we trunctate the base to the next base (that we calcuate above) +// // We later indiscriminatory writre over the len we set but that's OK +// // since we ensure we reserve the needed space +// base.reserve(64); +// unsafe { +// base.set_len(l + cnt); +// } +// +// while bits != 0 { +// unsafe { +// let v0 = static_cast_i32!(trailingzeroes(bits)); +// bits &= bits.wrapping_sub(1); +// let v1 = static_cast_i32!(trailingzeroes(bits)); +// bits &= bits.wrapping_sub(1); +// let v2 = static_cast_i32!(trailingzeroes(bits)); +// bits &= bits.wrapping_sub(1); +// let v3 = static_cast_i32!(trailingzeroes(bits)); +// bits &= bits.wrapping_sub(1); +// +// let v: __m128i = _mm_set_epi32(v3, v2, v1, v0); +// let v: __m128i = _mm_add_epi32(idx_64_v, v); +// #[allow(clippy::cast_ptr_alignment)] +// _mm_storeu_si128(base.as_mut_ptr().add(l) as *mut __m128i, v); +// } +// l += 4; +// } +//} + +// +//// return a updated structural bit vector with quoted contents cleared out and +//// pseudo-structural characters added to the mask +//// updates prev_iter_ends_pseudo_pred which tells us whether the previous +//// iteration ended on a whitespace or a structural character (which means that +//// the next iteration +//// will have a pseudo-structural character at its start) +//#[cfg_attr(not(feature = "no-inline"), inline(always))] +//fn finalize_structurals( +// mut structurals: u64, +// whitespace: u64, +// quote_mask: u64, +// quote_bits: u64, +// prev_iter_ends_pseudo_pred: &mut u64, +//) -> u64 { +// // mask off anything inside quotes +// structurals &= !quote_mask; +// // add the real quote bits back into our bitmask as well, so we can +// // quickly traverse the strings we've spent all this trouble gathering +// structurals |= quote_bits; +// // Now, establish "pseudo-structural characters". These are non-whitespace +// // characters that are (a) outside quotes and (b) have a predecessor that's +// // either whitespace or a structural character. This means that subsequent +// // passes will get a chance to encounter the first character of every string +// // of non-whitespace and, if we're parsing an atom like true/false/null or a +// // number we can stop at the first whitespace or structural character +// // following it. +// +// // a qualified predecessor is something that can happen 1 position before an +// // psuedo-structural character +// let pseudo_pred: u64 = structurals | whitespace; +// +// let shifted_pseudo_pred: u64 = (pseudo_pred << 1) | *prev_iter_ends_pseudo_pred; +// *prev_iter_ends_pseudo_pred = pseudo_pred >> 63; +// let pseudo_structurals: u64 = shifted_pseudo_pred & (!whitespace) & (!quote_mask); +// structurals |= pseudo_structurals; +// +// // now, we've used our close quotes all we need to. So let's switch them off +// // they will be off in the quote mask and on in quote bits. +// structurals &= !(quote_bits & !quote_mask); +// structurals +//} +// + +//impl<'de> Deserializer<'de> { +// #[inline(never)] +// pub unsafe fn find_structural_bits(input: &[u8]) -> std::result::Result, ErrorType> { +// let len = input.len(); +// // 6 is a heuristic number to estimate it turns out a rate of 1/6 structural caracters lears +// // almost never to relocations. +// let mut structural_indexes = Vec::with_capacity(len / 6); +// structural_indexes.push(0); // push extra root element +// +// let mut has_error: __m128i = _mm_setzero_si128(); +// let mut previous = AvxProcessedUtfBytes::default(); +// // we have padded the input out to 64 byte multiple with the remainder being +// // zeros +// +// // persistent state across loop +// // does the last iteration end with an odd-length sequence of backslashes? +// // either 0 or 1, but a 64-bit value +// let mut prev_iter_ends_odd_backslash: u64 = 0; +// // does the previous iteration end inside a double-quote pair? +// let mut prev_iter_inside_quote: u64 = 0; +// // either all zeros or all ones +// // does the previous iteration end on something that is a predecessor of a +// // pseudo-structural character - i.e. whitespace or a structural character +// // effectively the very first char is considered to follow "whitespace" for +// // the +// // purposes of pseudo-structural character detection so we initialize to 1 +// let mut prev_iter_ends_pseudo_pred: u64 = 1; +// +// // structurals are persistent state across loop as we flatten them on the +// // subsequent iteration into our array pointed to be base_ptr. +// // This is harmless on the first iteration as structurals==0 +// // and is done for performance reasons; we can hide some of the latency of the +// // expensive carryless multiply in the previous step with this work +// let mut structurals: u64 = 0; +// +// let lenminus64: usize = if len < 64 { 0 } else { len as usize - 64 }; +// let mut idx: usize = 0; +// let mut error_mask: u64 = 0; // for unescaped characters within strings (ASCII code points < 0x20) +// +// while idx < lenminus64 { +// /* +// #ifndef _MSC_VER +// __builtin_prefetch(buf + idx + 128); +// #endif +// */ +// let input: SimdInput = fill_input(input.get_unchecked(idx as usize..)); +// check_utf8(&input, &mut has_error, &mut previous); +// // detect odd sequences of backslashes +// let odd_ends: u64 = +// find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); +// +// // detect insides of quote pairs ("quote_mask") and also our quote_bits +// // themselves +// let mut quote_bits: u64 = 0; +// let quote_mask: u64 = find_quote_mask_and_bits( +// &input, +// odd_ends, +// &mut prev_iter_inside_quote, +// &mut quote_bits, +// &mut error_mask, +// ); +// +// // take the previous iterations structural bits, not our current iteration, +// // and flatten +// flatten_bits(&mut structural_indexes, idx as u32, structurals); +// +// let mut whitespace: u64 = 0; +// find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals); +// +// // fixup structurals to reflect quotes and add pseudo-structural characters +// structurals = finalize_structurals( +// structurals, +// whitespace, +// quote_mask, +// quote_bits, +// &mut prev_iter_ends_pseudo_pred, +// ); +// idx += 64; +// } +// +// // we use a giant copy-paste which is ugly. +// // but otherwise the string needs to be properly padded or else we +// // risk invalidating the UTF-8 checks. +// if idx < len { +// let mut tmpbuf: [u8; 64] = [0x20; 64]; +// tmpbuf +// .as_mut_ptr() +// .copy_from(input.as_ptr().add(idx), len as usize - idx); +// let input: SimdInput = fill_input(&tmpbuf); +// +// check_utf8(&input, &mut has_error, &mut previous); +// +// // detect odd sequences of backslashes +// let odd_ends: u64 = +// find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); +// +// // detect insides of quote pairs ("quote_mask") and also our quote_bits +// // themselves +// let mut quote_bits: u64 = 0; +// let quote_mask: u64 = find_quote_mask_and_bits( +// &input, +// odd_ends, +// &mut prev_iter_inside_quote, +// &mut quote_bits, +// &mut error_mask, +// ); +// +// // take the previous iterations structural bits, not our current iteration, +// // and flatten +// flatten_bits(&mut structural_indexes, idx as u32, structurals); +// +// let mut whitespace: u64 = 0; +// find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals); +// +// // fixup structurals to reflect quotes and add pseudo-structural characters +// structurals = finalize_structurals( +// structurals, +// whitespace, +// quote_mask, +// quote_bits, +// &mut prev_iter_ends_pseudo_pred, +// ); +// idx += 64; +// } +// // This test isn't in upstream, for some reason the error mask is et for then. +// if prev_iter_inside_quote != 0 { +// return Err(ErrorType::Syntax); +// } +// // finally, flatten out the remaining structurals from the last iteration +// flatten_bits(&mut structural_indexes, idx as u32, structurals); +// +// // a valid JSON file cannot have zero structural indexes - we should have +// // found something (note that we compare to 1 as we always add the root!) +// if structural_indexes.len() == 1 { +// return Err(ErrorType::EOF); +// } +// +// if structural_indexes.last() > Some(&(len as u32)) { +// return Err(ErrorType::InternalError); +// } +// +// if error_mask != 0 { +// return Err(ErrorType::Syntax); +// } +// +// if _mm_testz_si128(has_error, has_error) != 0 { +// Ok(structural_indexes) +// } else { +// Err(ErrorType::InvalidUTF8) +// } +// } +//} diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs new file mode 100644 index 00000000..04bf0640 --- /dev/null +++ b/src/neon/utf8check.rs @@ -0,0 +1,247 @@ +use crate::*; + +/* + * legal utf-8 byte sequence + * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94 + * + * Code Points 1st 2s 3s 4s + * U+0000..U+007F 00..7F + * U+0080..U+07FF C2..DF 80..BF + * U+0800..U+0FFF E0 A0..BF 80..BF + * U+1000..U+CFFF E1..EC 80..BF 80..BF + * U+D000..U+D7FF ED 80..9F 80..BF + * U+E000..U+FFFF EE..EF 80..BF 80..BF + * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + * + */ + +// all byte values must be no larger than 0xF4 + +/*****************************/ +#[cfg_attr(not(feature = "no-inline"), inline)] +fn push_last_byte_of_a_to_b(a: __m128i, b: __m128i) -> __m128i { + unsafe { _mm_alignr_epi8(b, a, 15) } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +fn push_last_2bytes_of_a_to_b(a: __m128i, b: __m128i) -> __m128i { + unsafe { _mm_alignr_epi8(b, a, 14) } +} + +// all byte values must be no larger than 0xF4 +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avxcheck_smaller_than_0xf4(current_bytes: __m128i, has_error: &mut __m128i) { + // unsigned, saturates to 0 below max + *has_error = unsafe { + _mm_or_si128( + *has_error, + _mm_subs_epu8(current_bytes, _mm_set1_epi8(static_cast_i8!(0xF4u8))), + ) + }; +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avxcontinuation_lengths(high_nibbles: __m128i) -> __m128i { + unsafe { + _mm_shuffle_epi8( + _mm_setr_epi8( + 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) + 0, 0, 0, 0, // 10xx (continuation) + 2, 2, // 110x + 3, // 1110 + 4, // 1111, next should be 0 (not checked here) + ), + high_nibbles, + ) + } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avxcarry_continuations(initial_lengths: __m128i, previous_carries: __m128i) -> __m128i { + unsafe { + let right1: __m128i = _mm_subs_epu8( + push_last_byte_of_a_to_b(previous_carries, initial_lengths), + _mm_set1_epi8(1), + ); + let sum: __m128i = _mm_add_epi8(initial_lengths, right1); + let right2: __m128i = _mm_subs_epu8( + push_last_2bytes_of_a_to_b(previous_carries, sum), + _mm_set1_epi8(2), + ); + _mm_add_epi8(sum, right2) + } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avxcheck_continuations(initial_lengths: __m128i, carries: __m128i, has_error: &mut __m128i) { + // overlap || underlap + // carry > length && length > 0 || !(carry > length) && !(length > 0) + // (carries > length) == (lengths > 0) + unsafe { + let overunder: __m128i = _mm_cmpeq_epi8( + _mm_cmpgt_epi8(carries, initial_lengths), + _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128()), + ); + + *has_error = _mm_or_si128(*has_error, overunder); + } +} + +// when 0xED is found, next byte must be no larger than 0x9F +// when 0xF4 is found, next byte must be no larger than 0x8F +// next byte must be continuation, ie sign bit is set, so signed < is ok +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avxcheck_first_continuation_max( + current_bytes: __m128i, + off1_current_bytes: __m128i, + has_error: &mut __m128i, +) { + unsafe { + let mask_ed: __m128i = _mm_cmpeq_epi8( + off1_current_bytes, + _mm_set1_epi8(static_cast_i8!(0xEDu8)), + ); + let mask_f4: __m128i = _mm_cmpeq_epi8( + off1_current_bytes, + _mm_set1_epi8(static_cast_i8!(0xF4u8)), + ); + + let badfollow_ed: __m128i = _mm_and_si128( + _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(static_cast_i8!(0x9Fu8))), + mask_ed, + ); + let badfollow_f4: __m128i = _mm_and_si128( + _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(static_cast_i8!(0x8Fu8))), + mask_f4, + ); + + *has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollow_ed, badfollow_f4)); + } +} + +// map off1_hibits => error condition +// hibits off1 cur +// C => < C2 && true +// E => < E1 && < A0 +// F => < F1 && < 90 +// else false && false +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avxcheck_overlong( + current_bytes: __m128i, + off1_current_bytes: __m128i, + hibits: __m128i, + previous_hibits: __m128i, + has_error: &mut __m128i, +) { + unsafe { + let off1_hibits: __m128i = push_last_byte_of_a_to_b(previous_hibits, hibits); + let initial_mins: __m128i = _mm_shuffle_epi8( + _mm_setr_epi8( + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, // 10xx => false + static_cast_i8!(0xC2u8), + -128, // 110x + static_cast_i8!(0xE1u8), // 1110 + static_cast_i8!(0xF1u8), // 1111 + ), + off1_hibits, + ); + + let initial_under: __m128i = _mm_cmpgt_epi8(initial_mins, off1_current_bytes); + + let second_mins: __m128i = _mm_shuffle_epi8( + _mm_setr_epi8( + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, + -128, // 10xx => false + 127, + 127, // 110x => true + static_cast_i8!(0xA0u8), // 1110 + static_cast_i8!(0x90u8), // 1111 + ), + off1_hibits, + ); + let second_under: __m128i = _mm_cmpgt_epi8(second_mins, current_bytes); + *has_error = _mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under)); + } +} + +pub struct AvxProcessedUtfBytes { + rawbytes: __m128i, + high_nibbles: __m128i, + pub carried_continuations: __m128i, +} + +impl Default for AvxProcessedUtfBytes { + #[cfg_attr(not(feature = "no-inline"), inline)] + fn default() -> Self { + unsafe { + AvxProcessedUtfBytes { + rawbytes: _mm_setzero_si128(), + high_nibbles: _mm_setzero_si128(), + carried_continuations: _mm_setzero_si128(), + } + } + } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +fn avx_count_nibbles(bytes: __m128i, answer: &mut AvxProcessedUtfBytes) { + answer.rawbytes = bytes; + answer.high_nibbles = + unsafe { _mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8(0x0F)) }; +} + +// check whether the current bytes are valid UTF-8 +// at the end of the function, previous gets updated +#[cfg_attr(not(feature = "no-inline"), inline)] +pub fn avxcheck_utf8_bytes( + current_bytes: __m128i, + previous: &AvxProcessedUtfBytes, + has_error: &mut __m128i, +) -> AvxProcessedUtfBytes { + let mut pb = AvxProcessedUtfBytes::default(); + avx_count_nibbles(current_bytes, &mut pb); + + avxcheck_smaller_than_0xf4(current_bytes, has_error); + + let initial_lengths: __m128i = avxcontinuation_lengths(pb.high_nibbles); + + pb.carried_continuations = + avxcarry_continuations(initial_lengths, previous.carried_continuations); + + avxcheck_continuations(initial_lengths, pb.carried_continuations, has_error); + + let off1_current_bytes: __m128i = push_last_byte_of_a_to_b(previous.rawbytes, pb.rawbytes); + avxcheck_first_continuation_max(current_bytes, off1_current_bytes, has_error); + + avxcheck_overlong( + current_bytes, + off1_current_bytes, + pb.high_nibbles, + previous.high_nibbles, + has_error, + ); + pb +} diff --git a/src/numberparse.rs b/src/numberparse.rs index 03880d56..425d16ab 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -1,10 +1,6 @@ use crate::charutils::*; use crate::unlikely; use crate::*; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; const POWER_OF_TEN: [f64; 617] = [ 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, 1e-297, @@ -133,26 +129,14 @@ pub enum Number { } #[cfg_attr(not(feature = "no-inline"), inline)] +#[cfg(target_arch = "aarch64")] fn parse_eight_digits_unrolled(chars: &[u8]) -> u32 { - unsafe { - // this actually computes *16* values so we are being wasteful. - let ascii0: __m128i = _mm_set1_epi8(b'0' as i8); - let mul_1_10: __m128i = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - let mul_1_100: __m128i = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - let mul_1_10000: __m128i = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - // We know what we're doing right? :P - #[allow(clippy::cast_ptr_alignment)] - let input: __m128i = _mm_sub_epi8( - _mm_loadu_si128(chars.get_unchecked(0..16).as_ptr() as *const __m128i), - ascii0, - ); - let t1: __m128i = _mm_maddubs_epi16(input, mul_1_10); - let t2: __m128i = _mm_madd_epi16(t1, mul_1_100); - let t3: __m128i = _mm_packus_epi32(t2, t2); - let t4: __m128i = _mm_madd_epi16(t3, mul_1_10000); - _mm_cvtsi128_si32(t4) as u32 // only captures the sum of the first 8 digits, drop the rest - } + let val: u64 = unsafe { *(chars.as_ptr() as *const u64) }; + // memcpy(&val, chars, sizeof(u64)); + let val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + let val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + + return ((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32) as u32; } impl<'de> Deserializer<'de> { diff --git a/src/portability.rs b/src/portability.rs new file mode 100644 index 00000000..69481a85 --- /dev/null +++ b/src/portability.rs @@ -0,0 +1,30 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +#[cfg_attr(not(feature = "no-inline"), inline)] +#[cfg(target_arch = "x86_64")] +pub fn add_overflow(value1: u64, value2: u64, result: &mut u64) -> bool { + unsafe { _addcarry_u64(0, value1, value2, result) != 0 } +} + +//TODO: static? + +#[cfg_attr(not(feature = "no-inline"), inline)] +#[cfg(target_arch = "x86_64")] +pub fn hamming(input_num: u64) -> u32 { + unsafe { _popcnt64(input_num as i64) as u32 } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +#[cfg(target_arch = "x86_64")] +pub fn hamming(input_num: u64) -> u32 { + unsafe { __popcnt(input_num as u32) + __popcnt((input_num >> 32) as u32) as u32 } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +#[cfg(target_arch = "x86_64")] +pub fn trailingzeroes(input_num: u64) -> u32 { + unsafe { _tzcnt_u64(input_num) as u32 } +} diff --git a/src/stage2.rs b/src/stage2.rs index aaa51ad8..f33a3877 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -1,9 +1,11 @@ #![allow(dead_code)] +use crate::charutils::*; #[cfg(target_feature = "avx2")] use crate::avx2::stage1::SIMDJSON_PADDING; -use crate::charutils::*; -#[cfg(not(target_feature = "avx2"))] +#[cfg(target_feature = "sse4.2")] use crate::sse42::stage1::SIMDJSON_PADDING; +#[cfg(target_feature = "neon")] +use crate::neon::stage1::SIMDJSON_PADDING; use crate::{Deserializer, Error, ErrorType, Result}; #[cfg_attr(not(feature = "no-inline"), inline(always))] diff --git a/src/value/generator.rs b/src/value/generator.rs index 55824f57..1d21a833 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -101,91 +101,91 @@ pub trait BaseGenerator { let mut len = string.len(); let mut idx = 0; - unsafe { +// unsafe { // Looking at the table above the lower 5 bits are entirely // quote characters that gives us a bitmask of 0x1f for that // region, only quote (`"`) and backslash (`\`) are not in // this range. - if AVX2_PRESENT { - let zero = _mm256_set1_epi8(0); - let lower_quote_range = _mm256_set1_epi8(0x1F as i8); - let quote = _mm256_set1_epi8(b'"' as i8); - let backslash = _mm256_set1_epi8(b'\\' as i8); - while len - idx >= 32 { - // Load 32 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(idx) as *const __m256i); - // Test the data against being backslash and quote. - let bs_or_quote = _mm256_or_si256( - _mm256_cmpeq_epi8(data, backslash), - _mm256_cmpeq_epi8(data, quote), - ); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm256_and_si256(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm256_xor_si256(data, in_quote_range); - let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..idx + quote_dist])); - let ch = string[idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), - - escape => stry!(self.write(&[b'\\', escape])), - }; - string = &string[idx + quote_dist + 1..]; - idx = 0; - len = string.len(); - } else { - idx += 32; - } - } - } +// if AVX2_PRESENT { +// let zero = _mm256_set1_epi8(0); +// let lower_quote_range = _mm256_set1_epi8(0x1F as i8); +// let quote = _mm256_set1_epi8(b'"' as i8); +// let backslash = _mm256_set1_epi8(b'\\' as i8); +// while len - idx >= 32 { +// // Load 32 bytes of data; +// #[allow(clippy::cast_ptr_alignment)] +// let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(idx) as *const __m256i); +// // Test the data against being backslash and quote. +// let bs_or_quote = _mm256_or_si256( +// _mm256_cmpeq_epi8(data, backslash), +// _mm256_cmpeq_epi8(data, quote), +// ); +// // Now mask the data with the quote range (0x1F). +// let in_quote_range = _mm256_and_si256(data, lower_quote_range); +// // then test of the data is unchanged. aka: xor it with the +// // Any field that was inside the quote range it will be zero +// // now. +// let is_unchanged = _mm256_xor_si256(data, in_quote_range); +// let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); +// let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); +// if quote_bits != 0 { +// let quote_dist = trailingzeroes(quote_bits as u64) as usize; +// stry!(self.get_writer().write_all(&string[0..idx + quote_dist])); +// let ch = string[idx + quote_dist]; +// match ESCAPED[ch as usize] { +// b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), +// +// escape => stry!(self.write(&[b'\\', escape])), +// }; +// string = &string[idx + quote_dist + 1..]; +// idx = 0; +// len = string.len(); +// } else { +// idx += 32; +// } +// } +// } // The case where we have a 16+ byte block // we repeate the same logic as above but with // only 16 bytes - let zero = _mm_set1_epi8(0); - let lower_quote_range = _mm_set1_epi8(0x1F as i8); - let quote = _mm_set1_epi8(b'"' as i8); - let backslash = _mm_set1_epi8(b'\\' as i8); - while len - idx > 16 { - // Load 16 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m128i = _mm_loadu_si128(string.as_ptr().add(idx) as *const __m128i); - // Test the data against being backslash and quote. - let bs_or_quote = - _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm_and_si128(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm_xor_si128(data, in_quote_range); - let in_range = _mm_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..idx + quote_dist])); - let ch = string[idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), - - escape => stry!(self.write(&[b'\\', escape])), - }; - string = &string[idx + quote_dist + 1..]; - idx = 0; - len = string.len(); - } else { - idx += 16; - } - } - stry!(self.get_writer().write_all(&string[0..idx])); - string = &string[idx..]; - } +// let zero = _mm_set1_epi8(0); +// let lower_quote_range = _mm_set1_epi8(0x1F as i8); +// let quote = _mm_set1_epi8(b'"' as i8); +// let backslash = _mm_set1_epi8(b'\\' as i8); +// while len - idx > 16 { +// // Load 16 bytes of data; +// #[allow(clippy::cast_ptr_alignment)] +// let data: __m128i = _mm_loadu_si128(string.as_ptr().add(idx) as *const __m128i); +// // Test the data against being backslash and quote. +// let bs_or_quote = +// _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); +// // Now mask the data with the quote range (0x1F). +// let in_quote_range = _mm_and_si128(data, lower_quote_range); +// // then test of the data is unchanged. aka: xor it with the +// // Any field that was inside the quote range it will be zero +// // now. +// let is_unchanged = _mm_xor_si128(data, in_quote_range); +// let in_range = _mm_cmpeq_epi8(is_unchanged, zero); +// let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); +// if quote_bits != 0 { +// let quote_dist = trailingzeroes(quote_bits as u64) as usize; +// stry!(self.get_writer().write_all(&string[0..idx + quote_dist])); +// let ch = string[idx + quote_dist]; +// match ESCAPED[ch as usize] { +// b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), +// +// escape => stry!(self.write(&[b'\\', escape])), +// }; +// string = &string[idx + quote_dist + 1..]; +// idx = 0; +// len = string.len(); +// } else { +// idx += 16; +// } +// } +// stry!(self.get_writer().write_all(&string[0..idx])); +// string = &string[idx..]; +// } // Legacy code to handle the remainder of the code for (index, ch) in string.iter().enumerate() { if ESCAPED[*ch as usize] > 0 { From 9e96fabe62757039e05faf7aabffca9734e33acb Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 9 Aug 2019 18:21:50 -0400 Subject: [PATCH 02/34] feat: progress on neon compilation --- src/neon/intrinsics.rs | 180 +++++++-- src/neon/mod.rs | 2 +- src/neon/simd.rs | 1 + src/neon/simd_llvm.rs | 84 ++--- src/neon/stage1.rs | 814 +++++++++++++++++++---------------------- src/neon/utf8check.rs | 287 ++++++--------- 6 files changed, 686 insertions(+), 682 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index a2bca414..065047b6 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -1,10 +1,10 @@ //use std::arch:: use crate::neon::simd_llvm; -use crate::neon::simd; use std::mem; use std::hint::unreachable_unchecked; +use core; #[allow(unused)] macro_rules! types { @@ -21,21 +21,44 @@ macro_rules! types { )*) } -pub type poly64_t = i64; - extern "C" { #[link_name = "llvm.aarch64.neon.pmull64"] fn vmull_p64_(a: i64, b: i64) -> int8x16_t; + #[link_name = "llvm.aarch64.neon.vshrq"] + fn vshrq_n_u8_(a: uint8x16_t, b: u8) -> uint8x16_t; + #[link_name = "llvm.aarch64.neon.addp.v16i8"] + fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.neon.addp.v16i8"] + fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[link_name = "llvm.aarch64.neon.addp.v16i32"] + fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[link_name = "llvm.aarch64.neon.vextq.v16s8"] + fn vextq_s8_(a: int8x16_t, b: int8x16_t, n:u8) -> int8x16_t; + #[link_name = "llvm.aarch64.neon.vtstq.v16u8"] + fn vtstq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.neon.vtstq.v16s8"] + fn vtstq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[link_name = "llvm.ctpop.u64"] + fn ctpop_u64_(a: u64) -> u32; + #[link_name = "llvm.ctlz.u64"] + fn ctlz_u64_(a: u64) -> u32; + #[link_name = "llvm.cttz.u64"] + fn cttz_u64_(a: u64) -> u32; } -//poly128_t vmull_p64 (poly64_t a, poly64_t b) #[inline] -#[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(pmull))] -pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { +pub unsafe fn vmull_p64(a: i64, b: i64) -> uint8x16_t { mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b))) } + +#[inline] +pub unsafe fn vshrq_n_u8(a: uint8x16_t, b: u8) -> uint8x16_t { + // FIXME? + vshrq_n_u8_(a, b) +} + types! { /// ARM-specific 64-bit wide vector of eight packed `i8`. pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); @@ -47,8 +70,6 @@ types! { pub struct int16x4_t(i16, i16, i16, i16); /// ARM-specific 64-bit wide vector of four packed `u16`. pub struct uint16x4_t(u16, u16, u16, u16); - // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. - // pub struct float16x4_t(f16, f16, f16, f16); /// ARM-specific 64-bit wide vector of four packed `u16`. pub struct poly16x4_t(u16, u16, u16, u16); /// ARM-specific 64-bit wide vector of two packed `i32`. @@ -61,7 +82,6 @@ types! { pub struct int64x1_t(i64); /// ARM-specific 64-bit wide vector of one packed `u64`. pub struct uint64x1_t(u64); - /// ARM-specific 128-bit wide vector of sixteen packed `i8`. pub struct int8x16_t( i8, i8 ,i8, i8, i8, i8 ,i8, i8, @@ -81,8 +101,6 @@ types! { pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); /// ARM-specific 128-bit wide vector of eight packed `u16`. pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); - // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. - // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); /// ARM-specific 128-bit wide vector of eight packed `u16`. pub struct poly16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); /// ARM-specific 128-bit wide vector of four packed `i32`. @@ -95,8 +113,6 @@ types! { pub struct int64x2_t(i64, i64); /// ARM-specific 128-bit wide vector of two packed `u64`. pub struct uint64x2_t(u64, u64); - /// ARM-specific 128-bit wide vector of one packed `i128` - pub struct poly128_t(i128); } impl uint8x16_t { @@ -111,6 +127,33 @@ impl int8x16_t { } } +impl int32x4_t { + pub fn new(a:i32,b:i32,c:i32,d:i32) -> int32x4_t { + int32x4_t(a, b, c, d) + } +} + +#[inline] +pub fn add_overflow(a: u64, b: u64, out: &mut u64) -> bool { + let (carry, did_carry) = a.overflowing_add(b); + + if did_carry { + *out = carry; + }; + + did_carry +} + +#[inline] +pub unsafe fn vld1q_s8(addr: *const i8) -> int8x16_t { + *(addr as *const int8x16_t) +} + +#[inline] +pub unsafe fn vld1q_u8(addr: *const u8) -> uint8x16_t { + *(addr as *const uint8x16_t) +} + macro_rules! aarch64_simd_2 { ($name:ident, $type:ty, $simd_fn:ident, $intr:ident) => { #[inline] @@ -136,6 +179,8 @@ aarch64_simd_ceq!(vceqq_u64, uint64x2_t); aarch64_simd_ceq!(vceq_p64, uint64x1_t); aarch64_simd_ceq!(vceqq_p64, uint64x2_t); +aarch64_simd_ceq!(vceqq_s8, int8x16_t); + macro_rules! aarch64_simd_cgt { ($name:ident, $type:ty) => { /// Compare signed Greater than (vector) @@ -209,20 +254,58 @@ aarch64_simd_cle!(vcleq_s64, int64x2_t); aarch64_simd_cleu!(vcle_u64, uint64x1_t); aarch64_simd_cleu!(vcleq_u64, uint64x2_t); +aarch64_simd_cleu!(vcgtq_s8, int8x16_t); + +#[inline] +pub fn vdupq_n_s8(a:i8) -> int8x16_t { + int8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +} + +#[inline] +pub fn zeroi8x16() -> int8x16_t { + int8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) +} +#[inline] pub fn vdupq_n_u8(a:u8) -> uint8x16_t { uint8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } -pub fn zero8x16() -> uint8x16_t { +#[inline] +pub fn vmovq_n_u8(a:u8) -> uint8x16_t { + uint8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +} + +#[inline] +pub fn zerou8x16() -> uint8x16_t { uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) } -pub unsafe fn vpaddq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_add(a, b) } +#[inline] +pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + vpaddq_u8_(a, b) +} +#[inline] +pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + vaddq_s8_(a, b) +} + +#[inline] +pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + vaddq_s32_(a, b) +} + +#[inline] pub unsafe fn vandq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_and(a, b) } + +#[inline] pub unsafe fn vorrq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_or(a, b) } + +#[inline] pub unsafe fn vandq_s8(a:int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_and(a, b) } + +#[inline] pub unsafe fn vorrq_s8(a:int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_or(a, b) } macro_rules! arm_reinterpret { @@ -257,31 +340,56 @@ macro_rules! arm_vget_lane { }; } -arm_vget_lane!(vgetq_lane_u16, u16, uint8x16_t, 7); +arm_vget_lane!(vgetq_lane_u16, u16, uint16x8_t, 7); arm_vget_lane!(vgetq_lane_u64, u64, uint64x2_t, 1); arm_vget_lane!(vget_lane_u64, u64, uint64x1_t, 0); +pub unsafe fn vextq_s8(a:int8x16_t, b:int8x16_t, n:u8) -> int8x16_t { + vextq_s8_(a, b, n) +} + +#[inline] pub fn vqmovn_u64(a:uint64x2_t) -> uint32x2_t { uint32x2_t(a.0 as u32, a.1 as u32) } -//fn vaddq_s8() -> u16 { 0 } -//fn vceqq_s8() -> u16 { 0 } -//fn vceqq_u8() -> u16 { 0 } -//fn vcgtq_s8() -> u16 { 0 } -//fn vcleq_u8() -> u16 { 0 } -//fn vdupq_n_s8() -> u16 { 0 } -//fn vextq_s8() -> u16 { 0 } -//fn vget_lane_u64() -> u16 { 0 } -//fn vgetq_lane_u32() -> u16 { 0 } -//fn vgetq_lane_u64() -> u16 { 0 } -//fn vld1q_s8() -> u16 { 0 } -//fn vld1q_u8() -> u16 { 0 } -//fn vmovq_n_u8() -> u16 { 0 } -//fn vmull_p64() -> u16 { 0 } -//fn vorrq_s8() -> u16 { 0 } -//fn vqsubq_u8() -> u16 { 0 } -//fn vqtbl1q_s8() -> u16 { 0 } -//fn vqtbl1q_u8() -> u16 { 0 } -//fn vshrq_n_u8() -> u16 { 0 } -//fn vst1q_u8() -> u16 { 0 } \ No newline at end of file +#[inline] +pub unsafe fn vqtbl1q_s8(t: int8x16_t, idx: uint8x16_t) -> int8x16_t { + mem::transmute(core::arch::aarch64::vqtbl1q_s8(mem::transmute(t), mem::transmute(idx))) +} + +#[inline] +pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t { + mem::transmute(core::arch::aarch64::vqtbl1q_s8(mem::transmute(t), mem::transmute(idx))) +} + +#[inline] +pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + // FIXME? + simd_llvm::simd_sub(mem::transmute(a), mem::transmute(b)) +} + +#[inline] +pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + vtstq_u8_(a, b) +} + +#[inline] +pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + vtstq_s8_(a, b) +} + +#[inline] +pub unsafe fn hamming(a:u64) -> u32 { + ctpop_u64_(a) +} + +#[inline] +pub unsafe fn trailingzeroes(a:u64) -> u32 { + cttz_u64_(a) +} + +#[inline] +pub unsafe fn vst1q_u32(addr: *mut u8, val : uint32x4_t) { + std::ptr::write(addr as *mut uint32x4_t, val) +} diff --git a/src/neon/mod.rs b/src/neon/mod.rs index 6d23b075..c1a87d98 100644 --- a/src/neon/mod.rs +++ b/src/neon/mod.rs @@ -1,6 +1,6 @@ //pub mod deser; pub mod stage1; -//pub mod utf8check; +pub mod utf8check; mod simd; mod simd_llvm; mod intrinsics; \ No newline at end of file diff --git a/src/neon/simd.rs b/src/neon/simd.rs index ecf5a4cb..544bc0d8 100644 --- a/src/neon/simd.rs +++ b/src/neon/simd.rs @@ -1,4 +1,5 @@ #![allow(non_camel_case_types)] +#![allow(unused)] use crate::neon::simd_llvm; diff --git a/src/neon/simd_llvm.rs b/src/neon/simd_llvm.rs index a6c1eb52..892ae59c 100644 --- a/src/neon/simd_llvm.rs +++ b/src/neon/simd_llvm.rs @@ -1,54 +1,54 @@ extern "platform-intrinsic" { pub fn simd_eq(x: T, y: T) -> U; - pub fn simd_ne(x: T, y: T) -> U; +// pub fn simd_ne(x: T, y: T) -> U; pub fn simd_lt(x: T, y: T) -> U; pub fn simd_le(x: T, y: T) -> U; pub fn simd_gt(x: T, y: T) -> U; pub fn simd_ge(x: T, y: T) -> U; - - pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U; - pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U; - pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U; - pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; - pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; - pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U; - pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U; - - pub fn simd_insert(x: T, idx: u32, val: U) -> T; +// +// pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U; +// pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U; +// pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U; +// pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; +// pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; +// pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U; +// pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U; +// +// pub fn simd_insert(x: T, idx: u32, val: U) -> T; pub fn simd_extract(x: T, idx: u32) -> U; - - pub fn simd_cast(x: T) -> U; - +// +// pub fn simd_cast(x: T) -> U; +// pub fn simd_add(x: T, y: T) -> T; pub fn simd_sub(x: T, y: T) -> T; - pub fn simd_mul(x: T, y: T) -> T; - pub fn simd_div(x: T, y: T) -> T; - pub fn simd_shl(x: T, y: T) -> T; - pub fn simd_shr(x: T, y: T) -> T; +// pub fn simd_mul(x: T, y: T) -> T; +// pub fn simd_div(x: T, y: T) -> T; +// pub fn simd_shl(x: T, y: T) -> T; +// pub fn simd_shr(x: T, y: T) -> T; pub fn simd_and(x: T, y: T) -> T; pub fn simd_or(x: T, y: T) -> T; - pub fn simd_xor(x: T, y: T) -> T; - - pub fn simd_reduce_add_unordered(x: T) -> U; - pub fn simd_reduce_mul_unordered(x: T) -> U; - pub fn simd_reduce_add_ordered(x: T, acc: U) -> U; - pub fn simd_reduce_mul_ordered(x: T, acc: U) -> U; - pub fn simd_reduce_min(x: T) -> U; - pub fn simd_reduce_max(x: T) -> U; - pub fn simd_reduce_min_nanless(x: T) -> U; - pub fn simd_reduce_max_nanless(x: T) -> U; - pub fn simd_reduce_and(x: T) -> U; - pub fn simd_reduce_or(x: T) -> U; - pub fn simd_reduce_xor(x: T) -> U; - pub fn simd_reduce_all(x: T) -> bool; - pub fn simd_reduce_any(x: T) -> bool; - - pub fn simd_select(m: M, a: T, b: T) -> T; - pub fn simd_select_bitmask(m: M, a: T, b: T) -> T; - - pub fn simd_fmin(a: T, b: T) -> T; - pub fn simd_fmax(a: T, b: T) -> T; - - pub fn simd_fsqrt(a: T) -> T; - pub fn simd_fma(a: T, b: T, c: T) -> T; +// pub fn simd_xor(x: T, y: T) -> T; +// +// pub fn simd_reduce_add_unordered(x: T) -> U; +// pub fn simd_reduce_mul_unordered(x: T) -> U; +// pub fn simd_reduce_add_ordered(x: T, acc: U) -> U; +// pub fn simd_reduce_mul_ordered(x: T, acc: U) -> U; +// pub fn simd_reduce_min(x: T) -> U; +// pub fn simd_reduce_max(x: T) -> U; +// pub fn simd_reduce_min_nanless(x: T) -> U; +// pub fn simd_reduce_max_nanless(x: T) -> U; +// pub fn simd_reduce_and(x: T) -> U; +// pub fn simd_reduce_or(x: T) -> U; +// pub fn simd_reduce_xor(x: T) -> U; +// pub fn simd_reduce_all(x: T) -> bool; +// pub fn simd_reduce_any(x: T) -> bool; +// +// pub fn simd_select(m: M, a: T, b: T) -> T; +// pub fn simd_select_bitmask(m: M, a: T, b: T) -> T; +// +// pub fn simd_fmin(a: T, b: T) -> T; +// pub fn simd_fmax(a: T, b: T) -> T; +// +// pub fn simd_fsqrt(a: T) -> T; +// pub fn simd_fma(a: T, b: T, c: T) -> T; } \ No newline at end of file diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 6e5f6b58..fc4daabc 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -1,15 +1,10 @@ #![allow(dead_code)] -//use crate::portability::*; use crate::neon::intrinsics::*; -//use core::arch::aarch64::*; -//use crate::neon::utf8check::*; +use crate::neon::utf8check::*; use crate::*; use std::mem; -#[macro_use] -use crate::neon::simd::*; - pub const SIMDJSON_PADDING: usize = mem::size_of::() * 4; #[derive(Debug)] @@ -24,34 +19,31 @@ fn fill_input(ptr: &[u8]) -> SimdInput { unsafe { #[allow(clippy::cast_ptr_alignment)] SimdInput { - v0: mem::transmute(ptr.as_ptr() as *const uint8x16_t), - v1: mem::transmute(ptr.as_ptr().add(16) as *const uint8x16_t), - v2: mem::transmute(ptr.as_ptr().add(32) as *const uint8x16_t), - v3: mem::transmute(ptr.as_ptr().add(48) as *const uint8x16_t), + v0: vld1q_u8(ptr.as_ptr() as *const u8), + v1: vld1q_u8(ptr.as_ptr().add(16) as *const u8), + v2: vld1q_u8(ptr.as_ptr().add(32) as *const u8), + v3: vld1q_u8(ptr.as_ptr().add(48) as *const u8), } } } #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn neon_movemask(input: uint8x16_t) -> u16 { - const bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + let bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); let minput: uint8x16_t = vandq_u8(input, bit_mask); let tmp: uint8x16_t = vpaddq_u8(minput, minput); let tmp = vpaddq_u8(tmp, tmp); let tmp = vpaddq_u8(tmp, tmp); - unsafe { - vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) - } + vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) } - #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: uint8x16_t) -> u64 { - const bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + let bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); let t0 = vandq_u8(p0, bit_mask); let t1 = vandq_u8(p1, bit_mask); let t2 = vandq_u8(p2, bit_mask); @@ -60,8 +52,26 @@ unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: let sum1 = vpaddq_u8(t2, t3); let sum0 = vpaddq_u8(sum0, sum1); let sum0 = vpaddq_u8(sum0, sum0); - unsafe { - vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0) + + vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0) +} + +unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { + vgetq_lane_u64(vreinterpretq_u64_u8(vmull_p64(-1, quote_bits as i64)), 0) +} + +struct Utf8CheckingState { + has_error: int8x16_t, + previous: ProcessedUtfBytes, +} + +impl Default for Utf8CheckingState { + #[cfg_attr(not(feature = "no-inline"), inline)] + fn default() -> Self { + Utf8CheckingState { + has_error: vdupq_n_s8(0), + previous:ProcessedUtfBytes::default() + } } } @@ -73,58 +83,54 @@ unsafe fn check_ascii_neon(si: &SimdInput) -> bool { let t3: uint8x16_t = vorrq_u8(t0, t1); let t4: uint8x16_t = vandq_u8(t3, high_bit); - unsafe { - let v64: uint64x2_t = vreinterpretq_u64_u8(t4); - let v32: uint32x2_t = vqmovn_u64(v64); - let result: uint64x1_t = vreinterpret_u64_u32(v32); + let v64: uint64x2_t = vreinterpretq_u64_u8(t4); + let v32: uint32x2_t = vqmovn_u64(v64); + let result: uint64x1_t = vreinterpret_u64_u32(v32); - vget_lane_u64(result, 0) == 0 - } + vget_lane_u64(result, 0) == 0 } -struct utf8_checking_state { - has_error: int8x16_t, - previous: processed_utf_bytes, +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn check_utf8( + input: &SimdInput, + state: &mut Utf8CheckingState, +) { + if check_ascii_neon(input) { + // All bytes are ascii. Therefore the byte that was just before must be + // ascii too. We only check the byte that was just before simd_input. Nines + // are arbitrary values. + let VERROR: int8x16_t = + int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); + state.has_error = + vorrq_s8(vcgtq_s8(state.previous.carried_continuations, VERROR), + state.has_error); + } else { + // it is not ascii so we have to do heavy work + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), + &(state.previous), &mut (state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), + &(state.previous), &mut (state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), + &(state.previous), &mut (state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), + &(state.previous), &mut (state.has_error)); + } } -// -//#[cfg_attr(not(feature = "no-inline"), inline(always))] -//unsafe fn check_utf8( -// input: &SimdInput, -// has_error: &mut uint8x16_t, -// previous: &mut utf8_checking_state, -//) { -// if check_ascii_neon(input) { -// // All bytes are ascii. Therefore the byte that was just before must be -// // ascii too. We only check the byte that was just before simd_input. Nines -// // are arbitrary values. -// const verror: int8x16_t = -// int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); -// state.has_error = -// vorrq_s8(vreinterpretq_s8_u8( -// vcgtq_s8(state.previous.carried_continuations, verror)), -// state.has_error); -// } else { -// // it is not ascii so we have to do heavy work -// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), -// &(state.previous), &(state.has_error)); -// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), -// &(state.previous), &(state.has_error)); -// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), -// &(state.previous), &(state.has_error)); -// state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), -// &(state.previous), &(state.has_error)); -// } -//} // a straightforward comparison of a mask against input #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { - const MASK: uint8x16_t = vdupq_n_u8(m); - - let cmp_res_0: uint8x16_t = vceqq_u8(input.v0, MASK); - let cmp_res_1: uint8x16_t = vceqq_u8(input.v1, MASK); - let cmp_res_2: uint8x16_t = vceqq_u8(input.v2, MASK); - let cmp_res_3: uint8x16_t = vceqq_u8(input.v3, MASK); + macro_rules! call { + ($imm8:expr) => { + vdupq_n_u8($imm8) + }; + }; + let mask: uint8x16_t = constify_imm8!(m, call); + + let cmp_res_0: uint8x16_t = vceqq_u8(input.v0, mask); + let cmp_res_1: uint8x16_t = vceqq_u8(input.v1, mask); + let cmp_res_2: uint8x16_t = vceqq_u8(input.v2, mask); + let cmp_res_3: uint8x16_t = vceqq_u8(input.v3, mask); neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) } @@ -132,16 +138,66 @@ unsafe fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { // find all values less than or equal than the content of maxval (using unsigned arithmetic) #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn unsigned_lteq_against_input(input: &SimdInput, maxval: u8) -> u64 { - const MASK: uint8x16_t = vdupq_n_u8(maxval); - - let cmp_res_0: uint8x16_t = vcleq_u8(input.v0, MASK); - let cmp_res_1: uint8x16_t = vcleq_u8(input.v1, MASK); - let cmp_res_2: uint8x16_t = vcleq_u8(input.v2, MASK); - let cmp_res_3: uint8x16_t = vcleq_u8(input.v3, MASK); + macro_rules! call { + ($imm8:expr) => { + vdupq_n_u8($imm8) + }; + }; + let mask: uint8x16_t = constify_imm8!(maxval, call); + + let cmp_res_0: uint8x16_t = vcleq_u8(input.v0, mask); + let cmp_res_1: uint8x16_t = vcleq_u8(input.v1, mask); + let cmp_res_2: uint8x16_t = vcleq_u8(input.v2, mask); + let cmp_res_3: uint8x16_t = vcleq_u8(input.v3, mask); neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) } +unsafe fn find_whitespace_and_structurals(input: &SimdInput, whitespace: &mut u64, structurals: &mut u64) { + let low_nibble_mask: uint8x16_t = uint8x16_t::new(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + let high_nibble_mask: uint8x16_t = uint8x16_t::new(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + let structural_shufti_mask: uint8x16_t = vmovq_n_u8(0x7); + let whitespace_shufti_mask: uint8x16_t = vmovq_n_u8(0x18); + let low_nib_and_mask: uint8x16_t = vmovq_n_u8(0xf); + + let nib_0_lo: uint8x16_t = vandq_u8(input.v0, low_nib_and_mask); + let nib_0_hi: uint8x16_t = vshrq_n_u8(input.v0, 4); + let shuf_0_lo: uint8x16_t = vqtbl1q_u8(low_nibble_mask, nib_0_lo); + let shuf_0_hi: uint8x16_t = vqtbl1q_u8(high_nibble_mask, nib_0_hi); + let v_0: uint8x16_t = vandq_u8(shuf_0_lo, shuf_0_hi); + + let nib_1_lo: uint8x16_t = vandq_u8(input.v1, low_nib_and_mask); + let nib_1_hi: uint8x16_t = vshrq_n_u8(input.v1, 4); + let shuf_1_lo: uint8x16_t = vqtbl1q_u8(low_nibble_mask, nib_1_lo); + let shuf_1_hi: uint8x16_t = vqtbl1q_u8(high_nibble_mask, nib_1_hi); + let v_1: uint8x16_t = vandq_u8(shuf_1_lo, shuf_1_hi); + + let nib_2_lo: uint8x16_t = vandq_u8(input.v2, low_nib_and_mask); + let nib_2_hi: uint8x16_t = vshrq_n_u8(input.v2, 4); + let shuf_2_lo: uint8x16_t = vqtbl1q_u8(low_nibble_mask, nib_2_lo); + let shuf_2_hi: uint8x16_t = vqtbl1q_u8(high_nibble_mask, nib_2_hi); + let v_2: uint8x16_t = vandq_u8(shuf_2_lo, shuf_2_hi); + + let nib_3_lo: uint8x16_t = vandq_u8(input.v3, low_nib_and_mask); + let nib_3_hi: uint8x16_t = vshrq_n_u8(input.v3, 4); + let shuf_3_lo: uint8x16_t = vqtbl1q_u8(low_nibble_mask, nib_3_lo); + let shuf_3_hi: uint8x16_t = vqtbl1q_u8(high_nibble_mask, nib_3_hi); + let v_3: uint8x16_t = vandq_u8(shuf_3_lo, shuf_3_hi); + + let tmp_0: uint8x16_t = vtstq_u8(v_0, structural_shufti_mask); + let tmp_1: uint8x16_t = vtstq_u8(v_1, structural_shufti_mask); + let tmp_2: uint8x16_t = vtstq_u8(v_2, structural_shufti_mask); + let tmp_3: uint8x16_t = vtstq_u8(v_3, structural_shufti_mask); + *structurals = neon_movemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3); + + let tmp_ws_0: uint8x16_t = vtstq_u8(v_0, whitespace_shufti_mask); + let tmp_ws_1: uint8x16_t = vtstq_u8(v_1, whitespace_shufti_mask); + let tmp_ws_2: uint8x16_t = vtstq_u8(v_2, whitespace_shufti_mask); + let tmp_ws_3: uint8x16_t = vtstq_u8(v_3, whitespace_shufti_mask); + *whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); +} + + unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_backslash: &mut u64) -> u64 { const EVEN_BITS: u64 = 0x5555_5555_5555_5555; const ODD_BITS: u64 = !EVEN_BITS; @@ -162,8 +218,8 @@ unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_bac let iter_ends_odd_backslash: bool = add_overflow(bs_bits, odd_starts, &mut odd_carries); odd_carries |= *prev_iter_ends_odd_backslash; // push in bit zero as a potential end - // if we had an odd-numbered run at the - // end of the previous iteration + // if we had an odd-numbered run at the + // end of the previous iteration *prev_iter_ends_odd_backslash = if iter_ends_odd_backslash { 0x1 } else { 0x0 }; let even_carry_ends: u64 = even_carries & !bs_bits; let odd_carry_ends: u64 = odd_carries & !bs_bits; @@ -173,13 +229,24 @@ unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_bac odd_ends } +//template <> +//really_inline ErrorValues check_utf8_errors( +// utf8_checking_state &state) { +// uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); +// uint32x2_t v32 = vqmovn_u64(v64); +// uint64x1_t result = vreinterpret_u64_u32(v32); +// return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR +// : simdjson::SUCCESS; +//} + unsafe fn find_quote_mask_and_bits(input: &SimdInput, odd_ends: u64, - prev_iter_inside_quote: &u64, quote_bits: &mut u64, error_mask: &u64) -> u64 { - quote_bits = cmp_mask_against_input(input, b'"'); - quote_bits = &(quote_bits & !odd_ends); - let quote_mask: u64 = vmull_p64(-1, quote_bits); + prev_iter_inside_quote: &mut u64, quote_bits: &mut u64, error_mask: &mut u64) -> u64 { + *quote_bits = cmp_mask_against_input(input, b'"'); + *quote_bits &= !odd_ends; + + let mut quote_mask: u64 = compute_quote_mask(*quote_bits); - quote_mask ^= prev_iter_inside_quote; + quote_mask ^= *prev_iter_inside_quote; // All Unicode characters may be placed within the // quotation marks, except for the characters that MUST be escaped: @@ -187,374 +254,255 @@ unsafe fn find_quote_mask_and_bits(input: &SimdInput, odd_ends: u64, //through U+001F). // https://tools.ietf.org/html/rfc8259 let unescaped: u64 = unsigned_lteq_against_input(input, 0x1F); - error_mask |= quote_mask & unescaped; + *error_mask |= quote_mask & unescaped; // right shift of a signed value expected to be well-defined and standard // compliant as of C++20, // John Regher from Utah U. says this is fine code - prev_iter_inside_quote = quote_mask >> 63; + *prev_iter_inside_quote = quote_mask >> 63; quote_mask } -// -//#[cfg_attr(not(feature = "no-inline"), inline(always))] -//unsafe fn find_whitespace_and_structurals( -// input: &SimdInput, -// whitespace: &mut u64, -// structurals: &mut u64, -//) { -// // do a 'shufti' to detect structural JSON characters -// // they are -// // * `{` 0x7b -// // * `}` 0x7d -// // * `:` 0x3a -// // * `[` 0x5b -// // * `]` 0x5d -// // * `,` 0x2c -// // these go into the first 3 buckets of the comparison (1/2/4) -// -// // we are also interested in the four whitespace characters: -// // * space 0x20 -// // * linefeed 0x0a -// // * horizontal tab 0x09 -// // * carriage return 0x0d -// // these go into the next 2 buckets of the comparison (8/16) -// -// // TODO: const? -// let low_nibble_mask: __m128i = _mm_setr_epi8( -// 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, -// ); -// // TODO: const? -// let high_nibble_mask: __m128i = _mm_setr_epi8( -// 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, -// ); -// -// let structural_shufti_mask: __m128i = _mm_set1_epi8(0x7); -// let whitespace_shufti_mask: __m128i = _mm_set1_epi8(0x18); -// -// let v_v0: __m128i = _mm_and_si128( -// _mm_shuffle_epi8(low_nibble_mask, input.v0), -// _mm_shuffle_epi8( -// high_nibble_mask, -// _mm_and_si128(_mm_srli_epi32(input.v0, 4), _mm_set1_epi8(0x7f)), -// ), -// ); -// let v_v1: __m128i = _mm_and_si128( -// _mm_shuffle_epi8(low_nibble_mask, input.v1), -// _mm_shuffle_epi8( -// high_nibble_mask, -// _mm_and_si128(_mm_srli_epi32(input.v1, 4), _mm_set1_epi8(0x7f)), -// ), -// ); -// let v_v2: __m128i = _mm_and_si128( -// _mm_shuffle_epi8(low_nibble_mask, input.v2), -// _mm_shuffle_epi8( -// high_nibble_mask, -// _mm_and_si128(_mm_srli_epi32(input.v2, 4), _mm_set1_epi8(0x7f)), -// ), -// ); -// let v_v3: __m128i = _mm_and_si128( -// _mm_shuffle_epi8(low_nibble_mask, input.v3), -// _mm_shuffle_epi8( -// high_nibble_mask, -// _mm_and_si128(_mm_srli_epi32(input.v3, 4), _mm_set1_epi8(0x7f)), -// ), -// ); -// -// let tmp_v0: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v0, structural_shufti_mask), -// _mm_set1_epi8(0), -// ); -// let tmp_v1: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v1, structural_shufti_mask), -// _mm_set1_epi8(0), -// ); -// let tmp_v2: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v2, structural_shufti_mask), -// _mm_set1_epi8(0), -// ); -// let tmp_v3: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v3, structural_shufti_mask), -// _mm_set1_epi8(0), -// ); -// -// let structural_res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(tmp_v0))); -// let structural_res_1: u64 = _mm_movemask_epi8(tmp_v1) as u64; -// let structural_res_2: u64 = _mm_movemask_epi8(tmp_v2) as u64; -// let structural_res_3: u64 = _mm_movemask_epi8(tmp_v3) as u64; -// -// *structurals = !(structural_res_0 | (structural_res_1 << 16) | (structural_res_2 << 32) | (structural_res_3 << 48)); -// -// let tmp_ws_v0: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v0, whitespace_shufti_mask), -// _mm_set1_epi8(0), -// ); -// let tmp_ws_v1: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v1, whitespace_shufti_mask), -// _mm_set1_epi8(0), -// ); -// let tmp_ws_v2: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v2, whitespace_shufti_mask), -// _mm_set1_epi8(0), -// ); -// let tmp_ws_v3: __m128i = _mm_cmpeq_epi8( -// _mm_and_si128(v_v3, whitespace_shufti_mask), -// _mm_set1_epi8(0), -// ); -// -// let ws_res_0: u64 = u64::from(static_cast_u32!(_mm_movemask_epi8(tmp_ws_v0))); -// let ws_res_1: u64 = _mm_movemask_epi8(tmp_ws_v1) as u64; -// let ws_res_2: u64 = _mm_movemask_epi8(tmp_ws_v2) as u64; -// let ws_res_3: u64 = _mm_movemask_epi8(tmp_ws_v3) as u64; -// -// *whitespace = !(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48)); -//} -// - // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at // base_ptr[base] incrementing base as we go // will potentially store extra values beyond end of valid bits, so base_ptr // needs to be large enough to handle this //TODO: usize was u32 here does this matter? -//#[cfg_attr(not(feature = "no-inline"), inline(always))] -//fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { -// let cnt: usize = hamming(bits) as usize; -// let mut l = base.len(); -// let idx_minus_64 = idx.wrapping_sub(64); -// let idx_64_v = unsafe { -// _mm_set_epi32( -// static_cast_i32!(idx_minus_64), -// static_cast_i32!(idx_minus_64), -// static_cast_i32!(idx_minus_64), -// static_cast_i32!(idx_minus_64), -// ) -// }; -// -// // We're doing some trickery here. -// // We reserve 64 extra entries, because we've at most 64 bit to set -// // then we trunctate the base to the next base (that we calcuate above) -// // We later indiscriminatory writre over the len we set but that's OK -// // since we ensure we reserve the needed space -// base.reserve(64); -// unsafe { -// base.set_len(l + cnt); -// } -// -// while bits != 0 { -// unsafe { -// let v0 = static_cast_i32!(trailingzeroes(bits)); -// bits &= bits.wrapping_sub(1); -// let v1 = static_cast_i32!(trailingzeroes(bits)); -// bits &= bits.wrapping_sub(1); -// let v2 = static_cast_i32!(trailingzeroes(bits)); -// bits &= bits.wrapping_sub(1); -// let v3 = static_cast_i32!(trailingzeroes(bits)); -// bits &= bits.wrapping_sub(1); -// -// let v: __m128i = _mm_set_epi32(v3, v2, v1, v0); -// let v: __m128i = _mm_add_epi32(idx_64_v, v); -// #[allow(clippy::cast_ptr_alignment)] -// _mm_storeu_si128(base.as_mut_ptr().add(l) as *mut __m128i, v); -// } -// l += 4; -// } -//} +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { + let cnt: usize = hamming(bits) as usize; + let mut l = base.len(); + let idx_minus_64 = idx.wrapping_sub(64); + let idx_64_v = int32x4_t::new( + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + ); + + // We're doing some trickery here. + // We reserve 64 extra entries, because we've at most 64 bit to set + // then we trunctate the base to the next base (that we calcuate above) + // We later indiscriminatory writre over the len we set but that's OK + // since we ensure we reserve the needed space + base.reserve(64); + base.set_len(l + cnt); + + while bits != 0 { + let v0 : i32 = static_cast_i32!(trailingzeroes(bits)); + bits &= bits.wrapping_sub(1); + let v1 : i32 = static_cast_i32!(trailingzeroes(bits)); + bits &= bits.wrapping_sub(1); + let v2 : i32 = static_cast_i32!(trailingzeroes(bits)); + bits &= bits.wrapping_sub(1); + let v3 : i32 = static_cast_i32!(trailingzeroes(bits)); + bits &= bits.wrapping_sub(1); + + let v: int32x4_t = int32x4_t::new(v3, v2, v1, v0); + let v: int32x4_t = vaddq_s32(idx_64_v, v); + + std::ptr::write(base.as_mut_ptr().add(l) as *mut int32x4_t, v); + l += 4; + } +} -// -//// return a updated structural bit vector with quoted contents cleared out and -//// pseudo-structural characters added to the mask -//// updates prev_iter_ends_pseudo_pred which tells us whether the previous -//// iteration ended on a whitespace or a structural character (which means that -//// the next iteration -//// will have a pseudo-structural character at its start) -//#[cfg_attr(not(feature = "no-inline"), inline(always))] -//fn finalize_structurals( -// mut structurals: u64, -// whitespace: u64, -// quote_mask: u64, -// quote_bits: u64, -// prev_iter_ends_pseudo_pred: &mut u64, -//) -> u64 { -// // mask off anything inside quotes -// structurals &= !quote_mask; -// // add the real quote bits back into our bitmask as well, so we can -// // quickly traverse the strings we've spent all this trouble gathering -// structurals |= quote_bits; -// // Now, establish "pseudo-structural characters". These are non-whitespace -// // characters that are (a) outside quotes and (b) have a predecessor that's -// // either whitespace or a structural character. This means that subsequent -// // passes will get a chance to encounter the first character of every string -// // of non-whitespace and, if we're parsing an atom like true/false/null or a -// // number we can stop at the first whitespace or structural character -// // following it. -// -// // a qualified predecessor is something that can happen 1 position before an -// // psuedo-structural character -// let pseudo_pred: u64 = structurals | whitespace; -// -// let shifted_pseudo_pred: u64 = (pseudo_pred << 1) | *prev_iter_ends_pseudo_pred; -// *prev_iter_ends_pseudo_pred = pseudo_pred >> 63; -// let pseudo_structurals: u64 = shifted_pseudo_pred & (!whitespace) & (!quote_mask); -// structurals |= pseudo_structurals; -// -// // now, we've used our close quotes all we need to. So let's switch them off -// // they will be off in the quote mask and on in quote bits. -// structurals &= !(quote_bits & !quote_mask); -// structurals -//} -// - -//impl<'de> Deserializer<'de> { -// #[inline(never)] -// pub unsafe fn find_structural_bits(input: &[u8]) -> std::result::Result, ErrorType> { -// let len = input.len(); -// // 6 is a heuristic number to estimate it turns out a rate of 1/6 structural caracters lears -// // almost never to relocations. -// let mut structural_indexes = Vec::with_capacity(len / 6); -// structural_indexes.push(0); // push extra root element -// -// let mut has_error: __m128i = _mm_setzero_si128(); -// let mut previous = AvxProcessedUtfBytes::default(); -// // we have padded the input out to 64 byte multiple with the remainder being -// // zeros -// -// // persistent state across loop -// // does the last iteration end with an odd-length sequence of backslashes? -// // either 0 or 1, but a 64-bit value -// let mut prev_iter_ends_odd_backslash: u64 = 0; -// // does the previous iteration end inside a double-quote pair? -// let mut prev_iter_inside_quote: u64 = 0; -// // either all zeros or all ones -// // does the previous iteration end on something that is a predecessor of a -// // pseudo-structural character - i.e. whitespace or a structural character -// // effectively the very first char is considered to follow "whitespace" for -// // the -// // purposes of pseudo-structural character detection so we initialize to 1 -// let mut prev_iter_ends_pseudo_pred: u64 = 1; -// -// // structurals are persistent state across loop as we flatten them on the -// // subsequent iteration into our array pointed to be base_ptr. -// // This is harmless on the first iteration as structurals==0 -// // and is done for performance reasons; we can hide some of the latency of the -// // expensive carryless multiply in the previous step with this work -// let mut structurals: u64 = 0; -// -// let lenminus64: usize = if len < 64 { 0 } else { len as usize - 64 }; -// let mut idx: usize = 0; -// let mut error_mask: u64 = 0; // for unescaped characters within strings (ASCII code points < 0x20) -// -// while idx < lenminus64 { -// /* -// #ifndef _MSC_VER -// __builtin_prefetch(buf + idx + 128); -// #endif -// */ -// let input: SimdInput = fill_input(input.get_unchecked(idx as usize..)); -// check_utf8(&input, &mut has_error, &mut previous); -// // detect odd sequences of backslashes -// let odd_ends: u64 = -// find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); -// -// // detect insides of quote pairs ("quote_mask") and also our quote_bits -// // themselves -// let mut quote_bits: u64 = 0; -// let quote_mask: u64 = find_quote_mask_and_bits( -// &input, -// odd_ends, -// &mut prev_iter_inside_quote, -// &mut quote_bits, -// &mut error_mask, -// ); -// -// // take the previous iterations structural bits, not our current iteration, -// // and flatten -// flatten_bits(&mut structural_indexes, idx as u32, structurals); -// -// let mut whitespace: u64 = 0; -// find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals); -// -// // fixup structurals to reflect quotes and add pseudo-structural characters -// structurals = finalize_structurals( -// structurals, -// whitespace, -// quote_mask, -// quote_bits, -// &mut prev_iter_ends_pseudo_pred, -// ); -// idx += 64; -// } -// -// // we use a giant copy-paste which is ugly. -// // but otherwise the string needs to be properly padded or else we -// // risk invalidating the UTF-8 checks. -// if idx < len { -// let mut tmpbuf: [u8; 64] = [0x20; 64]; -// tmpbuf -// .as_mut_ptr() -// .copy_from(input.as_ptr().add(idx), len as usize - idx); -// let input: SimdInput = fill_input(&tmpbuf); -// -// check_utf8(&input, &mut has_error, &mut previous); -// -// // detect odd sequences of backslashes -// let odd_ends: u64 = -// find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); -// -// // detect insides of quote pairs ("quote_mask") and also our quote_bits -// // themselves -// let mut quote_bits: u64 = 0; -// let quote_mask: u64 = find_quote_mask_and_bits( -// &input, -// odd_ends, -// &mut prev_iter_inside_quote, -// &mut quote_bits, -// &mut error_mask, -// ); -// -// // take the previous iterations structural bits, not our current iteration, -// // and flatten -// flatten_bits(&mut structural_indexes, idx as u32, structurals); -// -// let mut whitespace: u64 = 0; -// find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals); -// -// // fixup structurals to reflect quotes and add pseudo-structural characters -// structurals = finalize_structurals( -// structurals, -// whitespace, -// quote_mask, -// quote_bits, -// &mut prev_iter_ends_pseudo_pred, -// ); -// idx += 64; -// } -// // This test isn't in upstream, for some reason the error mask is et for then. -// if prev_iter_inside_quote != 0 { -// return Err(ErrorType::Syntax); -// } -// // finally, flatten out the remaining structurals from the last iteration -// flatten_bits(&mut structural_indexes, idx as u32, structurals); -// -// // a valid JSON file cannot have zero structural indexes - we should have -// // found something (note that we compare to 1 as we always add the root!) -// if structural_indexes.len() == 1 { -// return Err(ErrorType::EOF); -// } -// -// if structural_indexes.last() > Some(&(len as u32)) { -// return Err(ErrorType::InternalError); -// } -// -// if error_mask != 0 { -// return Err(ErrorType::Syntax); -// } -// -// if _mm_testz_si128(has_error, has_error) != 0 { -// Ok(structural_indexes) -// } else { -// Err(ErrorType::InvalidUTF8) -// } -// } -//} + +// return a updated structural bit vector with quoted contents cleared out and +// pseudo-structural characters added to the mask +// updates prev_iter_ends_pseudo_pred which tells us whether the previous +// iteration ended on a whitespace or a structural character (which means that +// the next iteration +// will have a pseudo-structural character at its start) +#[cfg_attr(not(feature = "no-inline"), inline(always))] +fn finalize_structurals( + mut structurals: u64, + whitespace: u64, + quote_mask: u64, + quote_bits: u64, + prev_iter_ends_pseudo_pred: &mut u64, +) -> u64 { + // mask off anything inside quotes + structurals &= !quote_mask; + // add the real quote bits back into our bitmask as well, so we can + // quickly traverse the strings we've spent all this trouble gathering + structurals |= quote_bits; + // Now, establish "pseudo-structural characters". These are non-whitespace + // characters that are (a) outside quotes and (b) have a predecessor that's + // either whitespace or a structural character. This means that subsequent + // passes will get a chance to encounter the first character of every string + // of non-whitespace and, if we're parsing an atom like true/false/null or a + // number we can stop at the first whitespace or structural character + // following it. + + // a qualified predecessor is something that can happen 1 position before an + // psuedo-structural character + let pseudo_pred: u64 = structurals | whitespace; + + let shifted_pseudo_pred: u64 = (pseudo_pred << 1) | *prev_iter_ends_pseudo_pred; + *prev_iter_ends_pseudo_pred = pseudo_pred >> 63; + let pseudo_structurals: u64 = shifted_pseudo_pred & (!whitespace) & (!quote_mask); + structurals |= pseudo_structurals; + + // now, we've used our close quotes all we need to. So let's switch them off + // they will be off in the quote mask and on in quote bits. + structurals &= !(quote_bits & !quote_mask); + structurals +} + +impl<'de> Deserializer<'de> { + #[inline(never)] + pub unsafe fn find_structural_bits(input: &[u8]) -> std::result::Result, ErrorType> { + let len = input.len(); + // 6 is a heuristic number to estimate it turns out a rate of 1/6 structural caracters lears + // almost never to relocations. + let mut structural_indexes = Vec::with_capacity(len / 6); + structural_indexes.push(0); // push extra root element + + // let mut has_error: uint8x16_t = vdupq_n_u8(0); + let mut previous = Utf8CheckingState::default(); + // we have padded the input out to 64 byte multiple with the remainder being + // zeros + + // persistent state across loop + // does the last iteration end with an odd-length sequence of backslashes? + // either 0 or 1, but a 64-bit value + let mut prev_iter_ends_odd_backslash: u64 = 0; + // does the previous iteration end inside a double-quote pair? + let mut prev_iter_inside_quote: u64 = 0; + // either all zeros or all ones + // does the previous iteration end on something that is a predecessor of a + // pseudo-structural character - i.e. whitespace or a structural character + // effectively the very first char is considered to follow "whitespace" for + // the + // purposes of pseudo-structural character detection so we initialize to 1 + let mut prev_iter_ends_pseudo_pred: u64 = 1; + + // structurals are persistent state across loop as we flatten them on the + // subsequent iteration into our array pointed to be base_ptr. + // This is harmless on the first iteration as structurals==0 + // and is done for performance reasons; we can hide some of the latency of the + // expensive carryless multiply in the previous step with this work + let mut structurals: u64 = 0; + + let lenminus64: usize = if len < 64 { 0 } else { len as usize - 64 }; + let mut idx: usize = 0; + let mut error_mask: u64 = 0; // for unescaped characters within strings (ASCII code points < 0x20) + + while idx < lenminus64 { + /* + #ifndef _MSC_VER + __builtin_prefetch(buf + idx + 128); + #endif + */ + let input: SimdInput = fill_input(input.get_unchecked(idx as usize..)); + check_utf8(&input, &mut previous); + // detect odd sequences of backslashes + let odd_ends: u64 = + find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); + + // detect insides of quote pairs ("quote_mask") and also our quote_bits + // themselves + let mut quote_bits: u64 = 0; + let quote_mask: u64 = find_quote_mask_and_bits( + &input, + odd_ends, + &mut prev_iter_inside_quote, + &mut quote_bits, + &mut error_mask, + ); + + // take the previous iterations structural bits, not our current iteration, + // and flatten + flatten_bits(&mut structural_indexes, idx as u32, structurals); + + let mut whitespace: u64 = 0; + find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals); + + // fixup structurals to reflect quotes and add pseudo-structural characters + structurals = finalize_structurals( + structurals, + whitespace, + quote_mask, + quote_bits, + &mut prev_iter_ends_pseudo_pred, + ); + idx += 64; + } + + // we use a giant copy-paste which is ugly. + // but otherwise the string needs to be properly padded or else we + // risk invalidating the UTF-8 checks. + if idx < len { + let mut tmpbuf: [u8; 64] = [0x20; 64]; + tmpbuf + .as_mut_ptr() + .copy_from(input.as_ptr().add(idx), len as usize - idx); + let input: SimdInput = fill_input(&tmpbuf); + + check_utf8(&input, &mut previous); + + // detect odd sequences of backslashes + let odd_ends: u64 = + find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); + + // detect insides of quote pairs ("quote_mask") and also our quote_bits + // themselves + let mut quote_bits: u64 = 0; + let quote_mask: u64 = find_quote_mask_and_bits( + &input, + odd_ends, + &mut prev_iter_inside_quote, + &mut quote_bits, + &mut error_mask, + ); + + // take the previous iterations structural bits, not our current iteration, + // and flatten + flatten_bits(&mut structural_indexes, idx as u32, structurals); + + let mut whitespace: u64 = 0; + find_whitespace_and_structurals(&input, &mut whitespace, &mut structurals); + + // fixup structurals to reflect quotes and add pseudo-structural characters + structurals = finalize_structurals( + structurals, + whitespace, + quote_mask, + quote_bits, + &mut prev_iter_ends_pseudo_pred, + ); + idx += 64; + } + + // This test isn't in upstream, for some reason the error mask is et for then. + if prev_iter_inside_quote != 0 { + return Err(ErrorType::Syntax); + } + // finally, flatten out the remaining structurals from the last iteration + flatten_bits(&mut structural_indexes, idx as u32, structurals); + + // a valid JSON file cannot have zero structural indexes - we should have + // found something (note that we compare to 1 as we always add the root!) + if structural_indexes.len() == 1 { + return Err(ErrorType::EOF); + } + + if structural_indexes.last() > Some(&(len as u32)) { + return Err(ErrorType::InternalError); + } + + if error_mask != 0 { + return Err(ErrorType::Syntax); + } + + let err: i128 = mem::transmute(vtstq_s8(previous.has_error, previous.has_error)); + + if err != 0 { + Ok(structural_indexes) + } else { + Err(ErrorType::InvalidUTF8) + } + } +} diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index 04bf0640..de190f46 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -1,4 +1,6 @@ -use crate::*; + +use crate::neon::intrinsics::*; +use std::mem; /* * legal utf-8 byte sequence @@ -17,108 +19,78 @@ use crate::*; * */ -// all byte values must be no larger than 0xF4 - -/*****************************/ -#[cfg_attr(not(feature = "no-inline"), inline)] -fn push_last_byte_of_a_to_b(a: __m128i, b: __m128i) -> __m128i { - unsafe { _mm_alignr_epi8(b, a, 15) } -} - -#[cfg_attr(not(feature = "no-inline"), inline)] -fn push_last_2bytes_of_a_to_b(a: __m128i, b: __m128i) -> __m128i { - unsafe { _mm_alignr_epi8(b, a, 14) } -} - // all byte values must be no larger than 0xF4 #[cfg_attr(not(feature = "no-inline"), inline)] -fn avxcheck_smaller_than_0xf4(current_bytes: __m128i, has_error: &mut __m128i) { +unsafe fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8x16_t) { // unsigned, saturates to 0 below max - *has_error = unsafe { - _mm_or_si128( + *has_error = + vorrq_s8( *has_error, - _mm_subs_epu8(current_bytes, _mm_set1_epi8(static_cast_i8!(0xF4u8))), - ) - }; + vreinterpretq_s8_u8(vqsubq_u8( + vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4)))); } #[cfg_attr(not(feature = "no-inline"), inline)] -fn avxcontinuation_lengths(high_nibbles: __m128i) -> __m128i { - unsafe { - _mm_shuffle_epi8( - _mm_setr_epi8( - 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) - 0, 0, 0, 0, // 10xx (continuation) - 2, 2, // 110x - 3, // 1110 - 4, // 1111, next should be 0 (not checked here) - ), - high_nibbles, - ) - } +unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { + let NIBBLES : int8x16_t = int8x16_t::new( + 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) + 0, 0, 0, 0, // 10xx (continuation) + 2, 2, // 110x + 3, // 1110 + 4, // 1111, next should be 0 (not checked here) + ); + + vqtbl1q_s8(NIBBLES, vreinterpretq_u8_s8(high_nibbles)) } #[cfg_attr(not(feature = "no-inline"), inline)] -fn avxcarry_continuations(initial_lengths: __m128i, previous_carries: __m128i) -> __m128i { - unsafe { - let right1: __m128i = _mm_subs_epu8( - push_last_byte_of_a_to_b(previous_carries, initial_lengths), - _mm_set1_epi8(1), - ); - let sum: __m128i = _mm_add_epi8(initial_lengths, right1); - let right2: __m128i = _mm_subs_epu8( - push_last_2bytes_of_a_to_b(previous_carries, sum), - _mm_set1_epi8(2), - ); - _mm_add_epi8(sum, right2) - } +unsafe fn carry_continuations(initial_lengths: int8x16_t, previous_carries: int8x16_t) -> int8x16_t { + let right1 : int8x16_t = vreinterpretq_s8_u8(vqsubq_u8( + vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)), + vdupq_n_u8(1))); + let sum : int8x16_t = vaddq_s8(initial_lengths, right1); + + let right2 : int8x16_t = vreinterpretq_s8_u8( + vqsubq_u8(vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)), + vdupq_n_u8(2))); + + vaddq_s8(sum, right2) } #[cfg_attr(not(feature = "no-inline"), inline)] -fn avxcheck_continuations(initial_lengths: __m128i, carries: __m128i, has_error: &mut __m128i) { +unsafe fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, has_error: &mut int8x16_t) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) - unsafe { - let overunder: __m128i = _mm_cmpeq_epi8( - _mm_cmpgt_epi8(carries, initial_lengths), - _mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128()), - ); - *has_error = _mm_or_si128(*has_error, overunder); - } + // FIXME: was vceqq_u8 ? + let overunder : int8x16_t = vceqq_s8( + vcgtq_s8(carries, initial_lengths), + vcgtq_s8(initial_lengths, vdupq_n_s8(0))); + + *has_error = vorrq_s8(*has_error, overunder); } // when 0xED is found, next byte must be no larger than 0x9F // when 0xF4 is found, next byte must be no larger than 0x8F // next byte must be continuation, ie sign bit is set, so signed < is ok #[cfg_attr(not(feature = "no-inline"), inline)] -fn avxcheck_first_continuation_max( - current_bytes: __m128i, - off1_current_bytes: __m128i, - has_error: &mut __m128i, +unsafe fn check_first_continuation_max( + current_bytes: int8x16_t, + off1_current_bytes: int8x16_t, + has_error: &mut int8x16_t, ) { - unsafe { - let mask_ed: __m128i = _mm_cmpeq_epi8( - off1_current_bytes, - _mm_set1_epi8(static_cast_i8!(0xEDu8)), - ); - let mask_f4: __m128i = _mm_cmpeq_epi8( - off1_current_bytes, - _mm_set1_epi8(static_cast_i8!(0xF4u8)), - ); - - let badfollow_ed: __m128i = _mm_and_si128( - _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(static_cast_i8!(0x9Fu8))), - mask_ed, - ); - let badfollow_f4: __m128i = _mm_and_si128( - _mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(static_cast_i8!(0x8Fu8))), - mask_f4, - ); - - *has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollow_ed, badfollow_f4)); - } + let maskED : int8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xED)); + let maskF4 : int8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xF4)); + + // FIXME: was vandq_u8? + let badfollowED : int8x16_t = + vandq_s8(vcgtq_s8(current_bytes, vdupq_n_s8(0x9F)), maskED); + let badfollowF4 : int8x16_t = + vandq_s8(vcgtq_s8(current_bytes, vdupq_n_s8(0x8F)), maskF4); + + *has_error = vorrq_s8( + *has_error, vorrq_s8(badfollowED, badfollowF4)); } // map off1_hibits => error condition @@ -128,120 +100,95 @@ fn avxcheck_first_continuation_max( // F => < F1 && < 90 // else false && false #[cfg_attr(not(feature = "no-inline"), inline)] -fn avxcheck_overlong( - current_bytes: __m128i, - off1_current_bytes: __m128i, - hibits: __m128i, - previous_hibits: __m128i, - has_error: &mut __m128i, +unsafe fn check_overlong( + current_bytes: int8x16_t, + off1_current_bytes: int8x16_t, + hibits: int8x16_t, + previous_hibits: int8x16_t, + has_error: &mut int8x16_t, ) { - unsafe { - let off1_hibits: __m128i = push_last_byte_of_a_to_b(previous_hibits, hibits); - let initial_mins: __m128i = _mm_shuffle_epi8( - _mm_setr_epi8( - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, // 10xx => false - static_cast_i8!(0xC2u8), - -128, // 110x - static_cast_i8!(0xE1u8), // 1110 - static_cast_i8!(0xF1u8), // 1111 - ), - off1_hibits, - ); - - let initial_under: __m128i = _mm_cmpgt_epi8(initial_mins, off1_current_bytes); - - let second_mins: __m128i = _mm_shuffle_epi8( - _mm_setr_epi8( - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, - -128, // 10xx => false - 127, - 127, // 110x => true - static_cast_i8!(0xA0u8), // 1110 - static_cast_i8!(0x90u8), // 1111 - ), - off1_hibits, - ); - let second_under: __m128i = _mm_cmpgt_epi8(second_mins, current_bytes); - *has_error = _mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under)); - } + let _initial_mins : int8x16_t = int8x16_t::new( + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + 0xC2, -128, // 110x + 0xE1, // 1110 + 0xF1, + ); + + let _second_mins : int8x16_t = int8x16_t::new( + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + 127, 127, // 110x => true + 0xA0, // 1110 + 0x90, + ); + + let off1_hibits : int8x16_t = vextq_s8(previous_hibits, hibits, 16 - 1); + let initial_mins : int8x16_t = + vqtbl1q_s8(_initial_mins, vreinterpretq_u8_s8(off1_hibits)); + + let initial_under : int8x16_t = vcgtq_s8(initial_mins, off1_current_bytes); + + let second_mins : int8x16_t = + vqtbl1q_s8(_second_mins, vreinterpretq_u8_s8(off1_hibits)); + + let second_under : int8x16_t = vcgtq_s8(second_mins, current_bytes); + + *has_error = vorrq_s8( + *has_error, vandq_s8(initial_under, second_under)); } -pub struct AvxProcessedUtfBytes { - rawbytes: __m128i, - high_nibbles: __m128i, - pub carried_continuations: __m128i, +pub struct ProcessedUtfBytes { + rawbytes: int8x16_t, + high_nibbles: int8x16_t, + pub carried_continuations: int8x16_t, } -impl Default for AvxProcessedUtfBytes { +impl Default for ProcessedUtfBytes { #[cfg_attr(not(feature = "no-inline"), inline)] fn default() -> Self { - unsafe { - AvxProcessedUtfBytes { - rawbytes: _mm_setzero_si128(), - high_nibbles: _mm_setzero_si128(), - carried_continuations: _mm_setzero_si128(), - } + ProcessedUtfBytes { + rawbytes: vdupq_n_s8(0x00), + high_nibbles: vdupq_n_s8(0x00), + carried_continuations: vdupq_n_s8(0x00), } } } #[cfg_attr(not(feature = "no-inline"), inline)] -fn avx_count_nibbles(bytes: __m128i, answer: &mut AvxProcessedUtfBytes) { +unsafe fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) { answer.rawbytes = bytes; - answer.high_nibbles = - unsafe { _mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8(0x0F)) }; + answer.high_nibbles = vreinterpretq_s8_u8(mem::transmute(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4))); } // check whether the current bytes are valid UTF-8 // at the end of the function, previous gets updated #[cfg_attr(not(feature = "no-inline"), inline)] -pub fn avxcheck_utf8_bytes( - current_bytes: __m128i, - previous: &AvxProcessedUtfBytes, - has_error: &mut __m128i, -) -> AvxProcessedUtfBytes { - let mut pb = AvxProcessedUtfBytes::default(); - avx_count_nibbles(current_bytes, &mut pb); +pub fn check_utf8_bytes( + current_bytes: int8x16_t, + previous: &ProcessedUtfBytes, + has_error: &mut int8x16_t, +) -> ProcessedUtfBytes { + let mut pb = ProcessedUtfBytes::default(); + unsafe { + count_nibbles(current_bytes, &mut pb); - avxcheck_smaller_than_0xf4(current_bytes, has_error); + check_smaller_than_0xf4(current_bytes, has_error); - let initial_lengths: __m128i = avxcontinuation_lengths(pb.high_nibbles); + let initial_lengths: int8x16_t = continuation_lengths(pb.high_nibbles); - pb.carried_continuations = - avxcarry_continuations(initial_lengths, previous.carried_continuations); + pb.carried_continuations = + carry_continuations(initial_lengths, previous.carried_continuations); - avxcheck_continuations(initial_lengths, pb.carried_continuations, has_error); + check_continuations(initial_lengths, pb.carried_continuations, has_error); - let off1_current_bytes: __m128i = push_last_byte_of_a_to_b(previous.rawbytes, pb.rawbytes); - avxcheck_first_continuation_max(current_bytes, off1_current_bytes, has_error); + let off1_current_bytes : int8x16_t = + vextq_s8(previous.rawbytes, pb.rawbytes, 16 - 1); - avxcheck_overlong( - current_bytes, - off1_current_bytes, - pb.high_nibbles, - previous.high_nibbles, - has_error, - ); + check_first_continuation_max(current_bytes, off1_current_bytes, has_error); + + check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles, + previous.high_nibbles, has_error); + } pb } From 05fd4f3c65bff1e401fd30e39eb1edc9d77b6d02 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 9 Aug 2019 18:56:26 -0400 Subject: [PATCH 03/34] feat: partial implementation of deserializer (broken, needs movemask) --- src/lib.rs | 262 +++++++++++++++--------------- src/neon/deser.rs | 400 +++++++++++++++++++++++----------------------- src/neon/mod.rs | 2 +- 3 files changed, 332 insertions(+), 332 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3973b7a5..d2d9f10e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -79,16 +79,16 @@ //! let v: Value = simd_json::serde::from_slice(&mut d).unwrap(); //! ``` -//#[cfg(feature = "serde_impl")] -//extern crate serde as serde_ext; -//#[cfg(feature = "serde_impl")] -//pub mod serde; +#[cfg(feature = "serde_impl")] +extern crate serde as serde_ext; +#[cfg(feature = "serde_impl")] +pub mod serde; mod charutils; #[macro_use] mod macros; mod error; -//mod numberparse; +mod numberparse; mod parsedjson; mod stringparse; @@ -109,19 +109,19 @@ use crate::sse42::stage1::SIMDJSON_PADDING; //#[cfg(target_feature = "neon")] mod neon; //#[cfg(target_feature = "neon")] -//pub use crate::neon::deser::*; +pub use crate::neon::deser::*; //#[cfg(target_feature = "neon")] -//use crate::neon::stage1::SIMDJSON_PADDING; +use crate::neon::stage1::SIMDJSON_PADDING; -//mod stage2; -//pub mod value; +mod stage2; +pub mod value; -//use crate::numberparse::Number; +use crate::numberparse::Number; //use std::mem; //use std::str; pub use crate::error::{Error, ErrorType}; -//pub use crate::value::*; +pub use crate::value::*; pub type Result = std::result::Result; @@ -137,126 +137,126 @@ pub struct Deserializer<'de> { str_offset: usize, iidx: usize, } -// -//impl<'de> Deserializer<'de> { -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn error(&self, error: ErrorType) -> Error { -// Error::new(self.idx, self.iidx, self.c() as char, error) -// } -// // By convention, `Deserializer` constructors are named like `from_xyz`. -// // That way basic use cases are satisfied by something like -// // `serde_json::from_str(...)` while advanced use cases that require a -// // deserializer can make one with `serde_json::Deserializer::from_str(...)`. -// pub fn from_slice(input: &'de mut [u8]) -> Result { -// // We have to pick an initial size of the structural indexes. -// // 6 is a heuristic that seems to work well for the benchmark -// // data and limit re-allocation frequency. -// -// let len = input.len(); -// -// let buf_start: usize = input.as_ptr() as *const () as usize; -// let needs_relocation = (buf_start + input.len()) % page_size::get() < SIMDJSON_PADDING; -// -// let s1_result: std::result::Result, ErrorType> = if needs_relocation { -// let mut data: Vec = Vec::with_capacity(len + SIMDJSON_PADDING); -// unsafe { -// data.set_len(len + 1); -// data.as_mut_slice() -// .get_unchecked_mut(0..len) -// .clone_from_slice(input); -// *(data.get_unchecked_mut(len)) = 0; -// data.set_len(len); -// Deserializer::find_structural_bits(&data) -// } -// } else { -// unsafe { Deserializer::find_structural_bits(input) } -// }; -// let structural_indexes = match s1_result { -// Ok(i) => i, -// Err(t) => { -// return Err(Error::generic(t)); -// } -// }; -// -// let counts = Deserializer::validate(input, &structural_indexes)?; -// -// let strings = Vec::with_capacity(len + SIMDJSON_PADDING); -// -// Ok(Deserializer { -// counts, -// structural_indexes, -// input, -// idx: 0, -// strings, -// str_offset: 0, -// iidx: 0, -// }) -// } -// -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn skip(&mut self) { -// self.idx += 1; -// self.iidx = unsafe { *self.structural_indexes.get_unchecked(self.idx) as usize }; -// } -// -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn c(&self) -> u8 { -// unsafe { -// *self -// .input -// .get_unchecked(*self.structural_indexes.get_unchecked(self.idx) as usize) -// } -// } -// -// // pull out the check so we don't need to -// // stry every time -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn next_(&mut self) -> u8 { -// unsafe { -// self.idx += 1; -// self.iidx = *self.structural_indexes.get_unchecked(self.idx) as usize; -// *self.input.get_unchecked(self.iidx) -// } -// } -// -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn count_elements(&self) -> usize { -// unsafe { *self.counts.get_unchecked(self.idx) } -// } -// -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn parse_number_root(&mut self, minus: bool) -> Result { -// let input = unsafe { &self.input.get_unchecked(self.iidx..) }; -// let len = input.len(); -// let mut copy = vec![0u8; len + SIMDJSON_PADDING]; -// copy[len] = 0; -// unsafe { -// copy.as_mut_ptr().copy_from(input.as_ptr(), len); -// }; -// self.parse_number_int(©, minus) -// } -// -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn parse_number(&mut self, minus: bool) -> Result { -// let input = unsafe { &self.input.get_unchecked(self.iidx..) }; -// let len = input.len(); -// if len < SIMDJSON_PADDING { -// let mut copy = vec![0u8; len + SIMDJSON_PADDING]; -// unsafe { -// copy.as_mut_ptr().copy_from(input.as_ptr(), len); -// }; -// self.parse_number_int(©, minus) -// } else { -// self.parse_number_int(input, minus) -// } -// } -// -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// fn parse_number_(&mut self, minus: bool) -> Result { -// let input = unsafe { &self.input.get_unchecked(self.iidx..) }; -// self.parse_number_int(input, minus) -// } -//} + +impl<'de> Deserializer<'de> { + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn error(&self, error: ErrorType) -> Error { + Error::new(self.idx, self.iidx, self.c() as char, error) + } + // By convention, `Deserializer` constructors are named like `from_xyz`. + // That way basic use cases are satisfied by something like + // `serde_json::from_str(...)` while advanced use cases that require a + // deserializer can make one with `serde_json::Deserializer::from_str(...)`. + pub fn from_slice(input: &'de mut [u8]) -> Result { + // We have to pick an initial size of the structural indexes. + // 6 is a heuristic that seems to work well for the benchmark + // data and limit re-allocation frequency. + + let len = input.len(); + + let buf_start: usize = input.as_ptr() as *const () as usize; + let needs_relocation = (buf_start + input.len()) % page_size::get() < SIMDJSON_PADDING; + + let s1_result: std::result::Result, ErrorType> = if needs_relocation { + let mut data: Vec = Vec::with_capacity(len + SIMDJSON_PADDING); + unsafe { + data.set_len(len + 1); + data.as_mut_slice() + .get_unchecked_mut(0..len) + .clone_from_slice(input); + *(data.get_unchecked_mut(len)) = 0; + data.set_len(len); + Deserializer::find_structural_bits(&data) + } + } else { + unsafe { Deserializer::find_structural_bits(input) } + }; + let structural_indexes = match s1_result { + Ok(i) => i, + Err(t) => { + return Err(Error::generic(t)); + } + }; + + let counts = Deserializer::validate(input, &structural_indexes)?; + + let strings = Vec::with_capacity(len + SIMDJSON_PADDING); + + Ok(Deserializer { + counts, + structural_indexes, + input, + idx: 0, + strings, + str_offset: 0, + iidx: 0, + }) + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn skip(&mut self) { + self.idx += 1; + self.iidx = unsafe { *self.structural_indexes.get_unchecked(self.idx) as usize }; + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn c(&self) -> u8 { + unsafe { + *self + .input + .get_unchecked(*self.structural_indexes.get_unchecked(self.idx) as usize) + } + } + + // pull out the check so we don't need to + // stry every time + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn next_(&mut self) -> u8 { + unsafe { + self.idx += 1; + self.iidx = *self.structural_indexes.get_unchecked(self.idx) as usize; + *self.input.get_unchecked(self.iidx) + } + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn count_elements(&self) -> usize { + unsafe { *self.counts.get_unchecked(self.idx) } + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn parse_number_root(&mut self, minus: bool) -> Result { + let input = unsafe { &self.input.get_unchecked(self.iidx..) }; + let len = input.len(); + let mut copy = vec![0u8; len + SIMDJSON_PADDING]; + copy[len] = 0; + unsafe { + copy.as_mut_ptr().copy_from(input.as_ptr(), len); + }; + self.parse_number_int(©, minus) + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn parse_number(&mut self, minus: bool) -> Result { + let input = unsafe { &self.input.get_unchecked(self.iidx..) }; + let len = input.len(); + if len < SIMDJSON_PADDING { + let mut copy = vec![0u8; len + SIMDJSON_PADDING]; + unsafe { + copy.as_mut_ptr().copy_from(input.as_ptr(), len); + }; + self.parse_number_int(©, minus) + } else { + self.parse_number_int(input, minus) + } + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + fn parse_number_(&mut self, minus: bool) -> Result { + let input = unsafe { &self.input.get_unchecked(self.iidx..) }; + self.parse_number_int(input, minus) + } +} // //#[cfg(test)] //mod tests { diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 790eb1b6..9cc5ca13 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -1,204 +1,204 @@ -// -//use std::mem; -// -//pub use crate::error::{Error, ErrorType}; -//pub use crate::Deserializer; -//pub use crate::Result; -//pub use crate::neon::intrinsics::*; -////pub use crate::neon::utf8check::*; -//pub use crate::stringparse::*; -// -////use crate::portability::trailingzeroes; -// -// -//impl<'de> Deserializer<'de> { -// #[cfg_attr(not(feature = "no-inline"), inline(always))] -// pub fn parse_str_(&mut self) -> Result<&'de str> { -// // Add 1 to skip the initial " -// let idx = self.iidx + 1; -// let mut padding = [0u8; 32]; -// //let mut read: usize = 0; -// -// // we include the terminal '"' so we know where to end -// // This is safe since we check sub's lenght in the range access above and only -// // create sub sliced form sub to `sub.len()`. -// -// let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; -// let mut src_i: usize = 0; -// let mut len = src_i; -// loop { -// let v: __m128i = if src.len() >= src_i + 16 { -// // This is safe since we ensure src is at least 16 wide -// #[allow(clippy::cast_ptr_alignment)] +use std::mem; + +pub use crate::error::{Error, ErrorType}; +pub use crate::Deserializer; +pub use crate::Result; +pub use crate::neon::intrinsics::*; +pub use crate::neon::utf8check::*; +pub use crate::stringparse::*; + +use crate::neon::intrinsics::*; + + +impl<'de> Deserializer<'de> { + #[cfg_attr(not(feature = "no-inline"), inline(always))] + pub fn parse_str_(&mut self) -> Result<&'de str> { + // Add 1 to skip the initial " + let idx = self.iidx + 1; + let mut padding = [0u8; 32]; + //let mut read: usize = 0; + + // we include the terminal '"' so we know where to end + // This is safe since we check sub's lenght in the range access above and only + // create sub sliced form sub to `sub.len()`. + + + // + // FIXME - this section is somewhat questionable -- needs a complete proofread!!! + // + + let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; + let mut src_i: usize = 0; + let mut len = src_i; + loop { + let v: int8x16_t = if src.len() >= src_i + 16 { + // This is safe since we ensure src is at least 16 wide + #[allow(clippy::cast_ptr_alignment)] + unsafe { + *(src.as_ptr().add(src_i) as *const int8x16_t) + } + } else { + unsafe { + padding + .get_unchecked_mut(..src.len() - src_i) + .clone_from_slice(src.get_unchecked(src_i..)); + // This is safe since we ensure src is at least 16 wide + #[allow(clippy::cast_ptr_alignment)] + *(padding.as_ptr() as *const int8x16_t) + } + }; + + // store to dest unconditionally - we can overwrite the bits we don't like + // later + let bs_bits: u32 = 0; // FIXME: vceqq_s8(v, vdupq_n_s8(b'\\' as i8)); + let quote_mask = 0; // FIXME: unsafe { vceqq_s8(v, vdupq_n_s8(b'"' as i8)) }; + let quote_bits = 0; // FIXME: unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { + // we encountered quotes first. Move dst to point to quotes and exit + // find out where the quote is... + let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; + + /////////////////////// + // Above, check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + //////////////////////// + + // we advance the point, accounting for the fact that we have a NULl termination + + len += quote_dist as usize; + unsafe { + let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; + return Ok(&*v); + } + + // we compare the pointers since we care if they are 'at the same spot' + // not if they are the same value + } + if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { + // Move to the 'bad' character + let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); + len += bs_dist as usize; + src_i += bs_dist as usize; + break; + } else { + // they are the same. Since they can't co-occur, it means we encountered + // neither. + src_i += 16; + len += 16; + } + } + + let mut dst_i: usize = 0; + let dst: &mut [u8] = &mut self.strings; + + loop { + let v: int8x16_t = if src.len() >= src_i + 16 { + // This is safe since we ensure src is at least 16 wide + #[allow(clippy::cast_ptr_alignment)] + unsafe { + *(src.as_ptr().add(src_i) as *const int8x16_t) + } + } else { + unsafe { + padding + .get_unchecked_mut(..src.len() - src_i) + .clone_from_slice(src.get_unchecked(src_i..)); + // This is safe since we ensure src is at least 16 wide + #[allow(clippy::cast_ptr_alignment)] + *(padding.as_ptr() as *const int8x16_t) + } + }; + + #[allow(clippy::cast_ptr_alignment)] + unsafe { + *(dst.as_mut_ptr().add(dst_i) as *mut int8x16_t) = v; + }; + + // store to dest unconditionally - we can overwrite the bits we don't like + // later + let bs_bits: u32 = 0; // FIXME // unsafe { -// _mm_loadu_si128(src.as_ptr().add(src_i) as *const __m128i) -// } -// } else { -// unsafe { -// padding -// .get_unchecked_mut(..src.len() - src_i) -// .clone_from_slice(src.get_unchecked(src_i..)); -// // This is safe since we ensure src is at least 32 wide -// #[allow(clippy::cast_ptr_alignment)] -// _mm_loadu_si128(padding.as_ptr() as *const __m128i) -// } -// }; -// -// // store to dest unconditionally - we can overwrite the bits we don't like -// // later -// let bs_bits: u32 = unsafe { -// static_cast_u32!(_mm_movemask_epi8(_mm_cmpeq_epi8( +// static_cast_u32!(_mm_movemask_epi8(vceqq_s8( // v, -// _mm_set1_epi8(b'\\' as i8) +// vdupq_n_s8(b'\\' as i8) // ))) // }; -// let quote_mask = unsafe { _mm_cmpeq_epi8(v, _mm_set1_epi8(b'"' as i8)) }; -// let quote_bits = unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; -// if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { -// // we encountered quotes first. Move dst to point to quotes and exit -// // find out where the quote is... -// let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; -// -// /////////////////////// -// // Above, check for overflow in case someone has a crazy string (>=4GB?) -// // But only add the overflow check when the document itself exceeds 4GB -// // Currently unneeded because we refuse to parse docs larger or equal to 4GB. -// //////////////////////// -// -// // we advance the point, accounting for the fact that we have a NULl termination -// -// len += quote_dist as usize; -// unsafe { -// let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; -// return Ok(&*v); -// } -// -// // we compare the pointers since we care if they are 'at the same spot' -// // not if they are the same value -// } -// if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { -// // Move to the 'bad' character -// let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); -// len += bs_dist as usize; -// src_i += bs_dist as usize; -// break; -// } else { -// // they are the same. Since they can't co-occur, it means we encountered -// // neither. -// src_i += 16; -// len += 16; -// } -// } -// -// let mut dst_i: usize = 0; -// let dst: &mut [u8] = &mut self.strings; -// -// loop { -// let v: __m128i = if src.len() >= src_i + 16 { -// // This is safe since we ensure src is at least 16 wide -// #[allow(clippy::cast_ptr_alignment)] -// unsafe { -// _mm_loadu_si128(src.as_ptr().add(src_i) as *const __m128i) -// } -// } else { -// unsafe { -// padding -// .get_unchecked_mut(..src.len() - src_i) -// .clone_from_slice(src.get_unchecked(src_i..)); -// // This is safe since we ensure src is at least 16 wide -// #[allow(clippy::cast_ptr_alignment)] -// _mm_loadu_si128(padding.as_ptr() as *const __m128i) -// } -// }; -// -// #[allow(clippy::cast_ptr_alignment)] -// unsafe { -// _mm_storeu_si128(dst.as_mut_ptr().add(dst_i) as *mut __m128i, v) -// }; -// -// // store to dest unconditionally - we can overwrite the bits we don't like -// // later -// let bs_bits: u32 = unsafe { -// static_cast_u32!(_mm_movemask_epi8(_mm_cmpeq_epi8( -// v, -// _mm_set1_epi8(b'\\' as i8) -// ))) -// }; -// let quote_mask = unsafe { _mm_cmpeq_epi8(v, _mm_set1_epi8(b'"' as i8)) }; -// let quote_bits = unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; -// if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { -// // we encountered quotes first. Move dst to point to quotes and exit -// // find out where the quote is... -// let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; -// -// /////////////////////// -// // Above, check for overflow in case someone has a crazy string (>=4GB?) -// // But only add the overflow check when the document itself exceeds 4GB -// // Currently unneeded because we refuse to parse docs larger or equal to 4GB. -// //////////////////////// -// -// // we advance the point, accounting for the fact that we have a NULl termination -// -// dst_i += quote_dist as usize; -// unsafe { -// self.input -// .get_unchecked_mut(idx + len..idx + len + dst_i) -// .clone_from_slice(&self.strings.get_unchecked(..dst_i)); -// let v = self.input.get_unchecked(idx..idx + len + dst_i) as *const [u8] -// as *const str; -// self.str_offset += dst_i as usize; -// return Ok(&*v); -// } -// -// // we compare the pointers since we care if they are 'at the same spot' -// // not if they are the same value -// } -// if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { -// // find out where the backspace is -// let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); -// let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; -// // we encountered backslash first. Handle backslash -// if escape_char == b'u' { -// // move src/dst up to the start; they will be further adjusted -// // within the unicode codepoint handling code. -// src_i += bs_dist as usize; -// dst_i += bs_dist as usize; -// let (o, s) = if let Ok(r) = -// handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { -// dst.get_unchecked_mut(dst_i..) -// }) { -// r -// } else { -// return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); -// }; -// if o == 0 { -// return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); -// }; -// // We moved o steps forword at the destiation and 6 on the source -// src_i += s; -// dst_i += o; -// } else { -// // simple 1:1 conversion. Will eat bs_dist+2 characters in input and -// // write bs_dist+1 characters to output -// // note this may reach beyond the part of the buffer we've actually -// // seen. I think this is ok -// let escape_result: u8 = -// unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; -// if escape_result == 0 { -// return Err(self.error(ErrorType::InvalidEscape)); -// } -// unsafe { -// *dst.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; -// } -// src_i += bs_dist as usize + 2; -// dst_i += bs_dist as usize + 1; -// } -// } else { -// // they are the same. Since they can't co-occur, it means we encountered -// // neither. -// src_i += 16; -// dst_i += 16; -// } -// } -// } -//} + let quote_mask = 0; // FIXME: unsafe { vceqq_s8(v, vdupq_n_s8(b'"' as i8)) }; + let quote_bits = 0; // FIXME: unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { + // we encountered quotes first. Move dst to point to quotes and exit + // find out where the quote is... + let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; + + /////////////////////// + // Above, check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + //////////////////////// + + // we advance the point, accounting for the fact that we have a NULl termination + + dst_i += quote_dist as usize; + unsafe { + self.input + .get_unchecked_mut(idx + len..idx + len + dst_i) + .clone_from_slice(&self.strings.get_unchecked(..dst_i)); + let v = self.input.get_unchecked(idx..idx + len + dst_i) as *const [u8] + as *const str; + self.str_offset += dst_i as usize; + return Ok(&*v); + } + + // we compare the pointers since we care if they are 'at the same spot' + // not if they are the same value + } + if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { + // find out where the backspace is + let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); + let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; + // we encountered backslash first. Handle backslash + if escape_char == b'u' { + // move src/dst up to the start; they will be further adjusted + // within the unicode codepoint handling code. + src_i += bs_dist as usize; + dst_i += bs_dist as usize; + let (o, s) = if let Ok(r) = + handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { + dst.get_unchecked_mut(dst_i..) + }) { + r + } else { + return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); + }; + if o == 0 { + return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); + }; + // We moved o steps forword at the destiation and 6 on the source + src_i += s; + dst_i += o; + } else { + // simple 1:1 conversion. Will eat bs_dist+2 characters in input and + // write bs_dist+1 characters to output + // note this may reach beyond the part of the buffer we've actually + // seen. I think this is ok + let escape_result: u8 = + unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; + if escape_result == 0 { + return Err(self.error(ErrorType::InvalidEscape)); + } + unsafe { + *dst.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; + } + src_i += bs_dist as usize + 2; + dst_i += bs_dist as usize + 1; + } + } else { + // they are the same. Since they can't co-occur, it means we encountered + // neither. + src_i += 16; + dst_i += 16; + } + } + } +} diff --git a/src/neon/mod.rs b/src/neon/mod.rs index c1a87d98..1b979e5a 100644 --- a/src/neon/mod.rs +++ b/src/neon/mod.rs @@ -1,4 +1,4 @@ -//pub mod deser; +pub mod deser; pub mod stage1; pub mod utf8check; mod simd; From 086e74593dc91d501986c30166cb9cecc793b94a Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 9 Aug 2019 18:58:02 -0400 Subject: [PATCH 04/34] feat: re-enable tests (still broken) --- src/lib.rs | 1902 ++++++++++++++++++++++++++-------------------------- 1 file changed, 951 insertions(+), 951 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d2d9f10e..c032bd0d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -257,954 +257,954 @@ impl<'de> Deserializer<'de> { self.parse_number_int(input, minus) } } -// -//#[cfg(test)] -//mod tests { -// use super::serde::from_slice; -// use super::{ -// owned::to_value, owned::Map, owned::Value, to_borrowed_value, to_owned_value, Deserializer, -// }; -// use halfbrown::HashMap; -// use proptest::prelude::*; -// use serde::Deserialize; -// use serde_json; -// -// #[test] -// fn count1() { -// let mut d = String::from("[]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let simd = Deserializer::from_slice(&mut d).expect(""); -// assert_eq!(simd.counts[1], 0); -// } -// -// #[test] -// fn count2() { -// let mut d = String::from("[1]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let simd = Deserializer::from_slice(&mut d).expect(""); -// assert_eq!(simd.counts[1], 1); -// } -// -// #[test] -// fn count3() { -// let mut d = String::from("[1,2]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let simd = Deserializer::from_slice(&mut d).expect(""); -// assert_eq!(simd.counts[1], 2); -// } -// -// #[test] -// fn count4() { -// let mut d = String::from(" [ 1 , [ 3 ] , 2 ]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let simd = Deserializer::from_slice(&mut d).expect(""); -// assert_eq!(simd.counts[1], 3); -// assert_eq!(simd.counts[4], 1); -// } -// -// #[test] -// fn count5() { -// let mut d = String::from("[[],null,null]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let simd = Deserializer::from_slice(&mut d).expect(""); -// assert_eq!(simd.counts[1], 3); -// assert_eq!(simd.counts[2], 0); -// } -// -// #[test] -// fn empty() { -// let mut d = String::from(""); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_simd = from_slice::(&mut d); -// let v_serde = serde_json::from_slice::(d); -// assert!(v_simd.is_err()); -// assert!(v_serde.is_err()); -// } -// -// #[test] -// fn bool_true() { -// let mut d = String::from("true"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(true))); -// } -// -// #[test] -// fn bool_false() { -// let mut d = String::from("false"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(false))); -// //assert!(false) -// } -// -// #[test] -// fn union() { -// let mut d = String::from("null"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::Null)); -// } -// -// #[test] -// fn int() { -// let mut d = String::from("42"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(42))); -// } -// -// #[test] -// fn zero() { -// let mut d = String::from("0"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(0))); -// } -// -// #[test] -// fn one() { -// let mut d = String::from("1"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(1))); -// } -// -// #[test] -// fn minus_one() { -// let mut d = String::from("-1"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(-1))); -// } -// -// #[test] -// fn float() { -// let mut d = String::from("23.0"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!(to_value(&mut d1), Ok(Value::from(23.0))); -// } -// -// #[test] -// fn string() { -// let mut d = String::from(r#""snot""#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(to_value(&mut d1), Ok(Value::from("snot"))); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn lonely_quote() { -// let mut d = String::from(r#"""#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde = serde_json::from_slice::(d).is_err(); -// let v_simd = from_slice::(&mut d).is_err(); -// assert!(v_simd); -// assert!(v_serde); -// } -// -// #[test] -// fn lonely_quote1() { -// let mut d = String::from(r#"["]"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde = serde_json::from_slice::(d).is_err(); -// let v_simd = from_slice::(&mut d).is_err(); -// assert!(v_simd); -// assert!(v_serde); -// } -// #[test] -// fn lonely_quote2() { -// let mut d = String::from(r#"[1, "]"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde = serde_json::from_slice::(d).is_err(); -// let v_simd = from_slice::(&mut d).is_err(); -// assert!(v_simd); -// assert!(v_serde); -// } -// -// #[test] -// fn lonely_quote3() { -// let mut d = String::from(r#"{": 1}"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde = serde_json::from_slice::(d).is_err(); -// let v_simd = from_slice::(&mut d).is_err(); -// assert!(v_simd); -// assert!(v_serde); -// } -// -// #[test] -// fn empty_string() { -// let mut d = String::from(r#""""#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(to_value(&mut d1), Ok(Value::from(""))); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn empty_array() { -// let mut d = String::from(r#"[]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); -// let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); -// assert_eq!(to_value(&mut d1), Ok(Value::Array(vec![]))); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn malformed_array() { -// let mut d = String::from(r#"[["#); -// let mut d1 = d.clone(); -// let mut d2 = d.clone(); -// let mut d = unsafe { d.as_bytes_mut() }; -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d2 = unsafe { d2.as_bytes_mut() }; -// let v_serde: Result = serde_json::from_slice(d); -// let v_simd_ov = to_owned_value(&mut d); -// let v_simd_bv = to_borrowed_value(&mut d1); -// let v_simd: Result = from_slice(&mut d2); -// assert!(v_simd_ov.is_err()); -// assert!(v_simd_bv.is_err()); -// assert!(v_simd.is_err()); -// assert!(v_serde.is_err()); -// } -// -// #[test] -// fn double_array() { -// let mut d = String::from(r#"[[]]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); -// let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![Value::Array(vec![])])) -// ); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn null_null_array() { -// let mut d = String::from(r#"[[],null,null]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); -// let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![ -// Value::Array(vec![]), -// Value::Null, -// Value::Null, -// ])) -// ); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn one_element_array() { -// let mut d = String::from(r#"["snot"]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![Value::from("snot")])) -// ); -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn two_element_array() { -// let mut d = String::from(r#"["snot", "badger"]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![ -// Value::from("snot"), -// Value::from("badger") -// ])) -// ); -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn list() { -// let mut d = String::from(r#"[42, 23.0, "snot badger"]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![ -// Value::from(42), -// Value::from(23.0), -// Value::from("snot badger") -// ])) -// ); -// } -// -// #[test] -// fn nested_list1() { -// let mut d = String::from(r#"[42, [23.0, "snot"], "bad", "ger"]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![ -// Value::from(42), -// Value::Array(vec![Value::from(23.0), Value::from("snot")]), -// Value::from("bad"), -// Value::from("ger") -// ])) -// ); -// -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn nested_list2() { -// let mut d = String::from(r#"[42, [23.0, "snot"], {"bad": "ger"}]"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn utf8() { -// let mut d = String::from(r#""\u000e""#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, "\u{e}"); -// // NOTE: serde is broken for this -// //assert_eq!(v_serde, "\u{e}"); -// //assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn unicode() { -// let mut d = String::from(r#""¡\"""#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn odd_array() { -// let mut d = String::from("[{},null]"); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![Value::Object(Map::new()), Value::Null])) -// ); -// } -// -// #[test] -// fn map2() { -// let mut d = String::from(r#"[{"\u0000":null}]"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn null() { -// let mut d = String::from(r#"null"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// assert_eq!(to_value(&mut d1), Ok(Value::Null)); -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// #[test] -// fn null_null() { -// let mut d = String::from(r#"[null, null]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![Value::Null, Value::Null,])) -// ); -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn nested_null() { -// let mut d = String::from(r#"[[null, null]]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![Value::Array(vec![ -// Value::Null, -// Value::Null, -// ])])) -// ); -// -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// } -// -// #[test] -// fn nestednested_null() { -// let mut d = String::from(r#"[[[null, null]]]"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// assert_eq!( -// to_value(&mut d1), -// Ok(Value::Array(vec![Value::Array(vec![Value::Array(vec![ -// Value::Null, -// Value::Null, -// ])])])) -// ); -// } -// -// #[test] -// fn odd_array2() { -// let mut d = String::from("[[\"\\u0000\\\"\"]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn odd_array3() { -// let mut d = String::from("[{\"\\u0000\\u0000\":null}]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn odd_array4() { -// let mut d = String::from("[{\"\\u0000𐀀a\":null}]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn float1() { -// let mut d = String::from("2.3250706903316115e307"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// // We ignore this since serde is less percise on this test -// #[ignore] -// #[test] -// fn float2() { -// let mut d = String::from("-4.5512678569607477e306"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn map0() { -// let mut d = String::from(r#"{"snot": "badger"}"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// let mut h = Map::new(); -// h.insert("snot".into(), Value::from("badger")); -// assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); -// } -// -// #[test] -// fn map1() { -// let mut d = String::from(r#"{"snot": "badger", "badger": "snot"}"#); -// let mut d1 = d.clone(); -// let mut d1 = unsafe { d1.as_bytes_mut() }; -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); -// let v_simd: serde_json::Value = from_slice(&mut d).expect(""); -// assert_eq!(v_simd, v_serde); -// let mut h = Map::new(); -// h.insert("snot".into(), Value::from("badger")); -// h.insert("badger".into(), Value::from("snot")); -// assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); -// } -// -// #[test] -// fn tpl1() { -// let mut d = String::from("[-65.613616999999977, 43.420273000000009]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: (f32, f32) = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: (f32, f32) = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn tpl2() { -// let mut d = String::from("[[-65.613616999999977, 43.420273000000009]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn tpl3() { -// let mut d = String::from( -// "[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]", -// ); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// #[test] -// fn tpl4() { -// let mut d = String::from("[[[-65.613616999999977,43.420273000000009]]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// #[test] -// fn tpl5() { -// let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn tpl6() { -// let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn tpl7() { -// let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[derive(Deserialize, PartialEq, Debug)] -// struct Obj { -// a: u64, -// b: u64, -// } -// -// #[derive(Deserialize, PartialEq, Debug)] -// struct Obj1 { -// a: Obj, -// } -// -// #[test] -// fn obj() { -// let mut d = String::from(r#"{"a": 1, "b":1}"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Obj = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Obj = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn obj2() { -// let mut d = -// String::from(r#"{"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn obj3() { -// let mut d = String::from( -// r#"{"c": {"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}}"#, -// ); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: HashMap> = -// serde_json::from_slice(d).expect("serde_json"); -// let v_simd: HashMap> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn obj4() { -// let mut d = String::from(r#"{"c": {"a": {"a": 1, "b":1}}}"#); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn vecvec() { -// let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]], [[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn crazy_string() { -// // there is unicode in here! -// let d = "\"𐀀𐀀 𐀀𐀀0 𐀀A\\u00000A0 A \\u000b\""; -// let mut d = String::from(d); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// #[test] -// fn event() { -// #[derive(Deserialize, Debug, PartialEq)] -// #[serde(deny_unknown_fields, rename_all = "camelCase")] -// pub struct CitmCatalog { -// pub area_names: HashMap, -// pub audience_sub_category_names: HashMap, -// pub block_names: HashMap, -// pub events: HashMap, -// } -// pub type Id = u32; -// #[derive(Deserialize, Debug, PartialEq)] -// #[serde(deny_unknown_fields, rename_all = "camelCase")] -// pub struct Event { -// pub description: (), -// pub id: Id, -// pub logo: Option, -// pub name: String, -// pub sub_topic_ids: Vec, -// pub subject_code: (), -// pub subtitle: (), -// pub topic_ids: Vec, -// } -// -// let mut d = String::from( -// r#" -//{ -// "areaNames": { -// "205705993": "Arrière-scène central", -// "205705994": "1er balcon central", -// "205705995": "2ème balcon bergerie cour", -// "205705996": "2ème balcon bergerie jardin", -// "205705998": "1er balcon bergerie jardin", -// "205705999": "1er balcon bergerie cour", -// "205706000": "Arrière-scène jardin", -// "205706001": "Arrière-scène cour", -// "205706002": "2ème balcon jardin", -// "205706003": "2ème balcon cour", -// "205706004": "2ème Balcon central", -// "205706005": "1er balcon jardin", -// "205706006": "1er balcon cour", -// "205706007": "Orchestre central", -// "205706008": "Orchestre jardin", -// "205706009": "Orchestre cour", -// "342752287": "Zone physique secrète" -// }, -// "audienceSubCategoryNames": { -// "337100890": "Abonné" -// }, -// "blockNames": {}, -// "events": { -// "138586341": { -// "description": null, -// "id": 138586341, -// "logo": null, -// "name": "30th Anniversary Tour", -// "subTopicIds": [ -// 337184269, -// 337184283 -// ], -// "subjectCode": null, -// "subtitle": null, -// "topicIds": [ -// 324846099, -// 107888604 -// ] -// }, -// "138586345": { -// "description": null, -// "id": 138586345, -// "logo": "/images/UE0AAAAACEKo6QAAAAZDSVRN", -// "name": "Berliner Philharmoniker", -// "subTopicIds": [ -// 337184268, -// 337184283, -// 337184275 -// ], -// "subjectCode": null, -// "subtitle": null, -// "topicIds": [ -// 324846099, -// 107888604, -// 324846100 -// ] -// } -// } -//} -//"#, -// ); -// let mut d = unsafe { d.as_bytes_mut() }; -// let v_serde: CitmCatalog = serde_json::from_slice(d).expect("serde_json"); -// let v_simd: CitmCatalog = from_slice(&mut d).expect("simd_json"); -// assert_eq!(v_simd, v_serde) -// } -// -// // How much do we care about this, it's within the same range and -// // based on floating point math inprecisions during parsing. -// // Is this a real issue worth improving? -// #[test] -// fn silly_float1() { -// let v = Value::from(3.0901448042322017e305); -// let s = v.to_string(); -// dbg!(&s); -// let mut bytes = s.as_bytes().to_vec(); -// let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); -// assert_eq!(v, parsed); -// } -// -// #[test] -// #[ignore] -// fn silly_float2() { -// let v = Value::from(-6.990585694841803e305); -// let s = v.to_string(); -// dbg!(&s); -// let mut bytes = s.as_bytes().to_vec(); -// let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); -// assert_eq!(v, parsed); -// } -// -// //6.576692109929364e305 -// fn arb_json() -> BoxedStrategy { -// let leaf = prop_oneof![ -// Just(Value::Null), -// any::().prop_map(Value::Bool), -// // (-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // The float parsing of simd and serde are too different -// any::().prop_map(|i| json!(i)), -// ".*".prop_map(Value::from), -// ]; -// leaf.prop_recursive( -// 8, // 8 levels deep -// 256, // Shoot for maximum size of 256 nodes -// 10, // We put up to 10 items per collection -// |inner| { -// prop_oneof![ -// // Take the inner strategy and make the two recursive cases. -// prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), -// prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), -// ] -// }, -// ) -// .prop_map(|v| serde_json::to_string(&v).expect("").to_string()) -// .boxed() -// } -// -// fn arb_json_value() -> BoxedStrategy { -// let leaf = prop_oneof![ -// Just(Value::Null), -// any::().prop_map(Value::Bool), -// //(-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // damn you float! -// any::().prop_map(|i| json!(i)), -// ".*".prop_map(Value::from), -// ]; -// leaf.prop_recursive( -// 8, // 8 levels deep -// 256, // Shoot for maximum size of 256 nodes -// 10, // We put up to 10 items per collection -// |inner| { -// prop_oneof![ -// // Take the inner strategy and make the two recursive cases. -// prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), -// prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), -// ] -// }, -// ) -// .boxed() -// } -// -// proptest! { -// #![proptest_config(ProptestConfig { -// // Setting both fork and timeout is redundant since timeout implies -// // fork, but both are shown for clarity. -// fork: true, -// .. ProptestConfig::default() -// })] -// -// #[test] -// fn prop_json_encode_decode(val in arb_json_value()) { -// let mut encoded: Vec = Vec::new(); -// let _ = val.write(&mut encoded); -// println!("{}", String::from_utf8(encoded.clone()).unwrap()); -// let res = to_owned_value(&mut encoded).unwrap(); -// assert_eq!(val, res); -// } -// -// } -// proptest! { -// #![proptest_config(ProptestConfig { -// // Setting both fork and timeout is redundant since timeout implies -// // fork, but both are shown for clarity. -// fork: true, -// .. ProptestConfig::default() -// })] -// -// #[test] -// fn prop_json(d in arb_json()) { -// if let Ok(v_serde) = serde_json::from_slice::(&d.as_bytes()) { -// let mut d1 = d.clone(); -// let d1 = unsafe{ d1.as_bytes_mut()}; -// let v_simd_serde: serde_json::Value = from_slice(d1).expect(""); -// // We add our own encoder in here. -// let mut d2 = v_simd_serde.to_string(); -// let d2 = unsafe{ d2.as_bytes_mut()}; -// let mut d3 = d.clone(); -// let d3 = unsafe{ d3.as_bytes_mut()}; -// assert_eq!(v_simd_serde, v_serde); -// let v_simd_owned = to_owned_value(d2); -// assert!(v_simd_owned.is_ok()); -// let v_simd_borrowed = to_borrowed_value(d3); -// dbg!(&v_simd_borrowed); -// assert!(v_simd_borrowed.is_ok()); -// assert_eq!(v_simd_owned.unwrap(), super::OwnedValue::from(v_simd_borrowed.unwrap())); -// } -// -// } -// -// } -// -// fn arb_junk() -> BoxedStrategy> { -// prop::collection::vec(any::(), 0..(1024 * 8)).boxed() -// } -// proptest! { -// #![proptest_config(ProptestConfig { -// // Setting both fork and timeout is redundant since timeout implies -// // fork, but both are shown for clarity. -// fork: true, -// .. ProptestConfig::default() -// })] -// #[test] -// fn prop_junk(d in arb_junk()) { -// let mut d1 = d.clone(); -// let mut d2 = d.clone(); -// let mut d3 = d.clone(); -// -// let _ = from_slice::(&mut d1); -// let _ = to_borrowed_value(&mut d2); -// let _ = to_owned_value(&mut d3); -// -// } -// } -// -// proptest! { -// #![proptest_config(ProptestConfig { -// // Setting both fork and timeout is redundant since timeout implies -// // fork, but both are shown for clarity. -// fork: true, -// .. ProptestConfig::default() -// })] -// -// #[test] -// fn prop_string(d in "\\PC*") { -// let mut d1 = d.clone(); -// let mut d1 = unsafe{ d1.as_bytes_mut()}; -// let mut d2 = d.clone(); -// let mut d2 = unsafe{ d2.as_bytes_mut()}; -// let mut d3 = d.clone(); -// let mut d3 = unsafe{ d3.as_bytes_mut()}; -// let _ = from_slice::(&mut d1); -// let _ = to_borrowed_value(&mut d2); -// let _ = to_owned_value(&mut d3); -// -// } -// } -//} + +#[cfg(test)] +mod tests { + use super::serde::from_slice; + use super::{ + owned::to_value, owned::Map, owned::Value, to_borrowed_value, to_owned_value, Deserializer, + }; + use halfbrown::HashMap; + use proptest::prelude::*; + use serde::Deserialize; + use serde_json; + + #[test] + fn count1() { + let mut d = String::from("[]"); + let mut d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(&mut d).expect(""); + assert_eq!(simd.counts[1], 0); + } + + #[test] + fn count2() { + let mut d = String::from("[1]"); + let mut d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(&mut d).expect(""); + assert_eq!(simd.counts[1], 1); + } + + #[test] + fn count3() { + let mut d = String::from("[1,2]"); + let mut d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(&mut d).expect(""); + assert_eq!(simd.counts[1], 2); + } + + #[test] + fn count4() { + let mut d = String::from(" [ 1 , [ 3 ] , 2 ]"); + let mut d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(&mut d).expect(""); + assert_eq!(simd.counts[1], 3); + assert_eq!(simd.counts[4], 1); + } + + #[test] + fn count5() { + let mut d = String::from("[[],null,null]"); + let mut d = unsafe { d.as_bytes_mut() }; + let simd = Deserializer::from_slice(&mut d).expect(""); + assert_eq!(simd.counts[1], 3); + assert_eq!(simd.counts[2], 0); + } + + #[test] + fn empty() { + let mut d = String::from(""); + let mut d = unsafe { d.as_bytes_mut() }; + let v_simd = from_slice::(&mut d); + let v_serde = serde_json::from_slice::(d); + assert!(v_simd.is_err()); + assert!(v_serde.is_err()); + } + + #[test] + fn bool_true() { + let mut d = String::from("true"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(true))); + } + + #[test] + fn bool_false() { + let mut d = String::from("false"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(false))); + //assert!(false) + } + + #[test] + fn union() { + let mut d = String::from("null"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::Null)); + } + + #[test] + fn int() { + let mut d = String::from("42"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(42))); + } + + #[test] + fn zero() { + let mut d = String::from("0"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(0))); + } + + #[test] + fn one() { + let mut d = String::from("1"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(1))); + } + + #[test] + fn minus_one() { + let mut d = String::from("-1"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(-1))); + } + + #[test] + fn float() { + let mut d = String::from("23.0"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!(to_value(&mut d1), Ok(Value::from(23.0))); + } + + #[test] + fn string() { + let mut d = String::from(r#""snot""#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(to_value(&mut d1), Ok(Value::from("snot"))); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn lonely_quote() { + let mut d = String::from(r#"""#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(&mut d).is_err(); + assert!(v_simd); + assert!(v_serde); + } + + #[test] + fn lonely_quote1() { + let mut d = String::from(r#"["]"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(&mut d).is_err(); + assert!(v_simd); + assert!(v_serde); + } + #[test] + fn lonely_quote2() { + let mut d = String::from(r#"[1, "]"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(&mut d).is_err(); + assert!(v_simd); + assert!(v_serde); + } + + #[test] + fn lonely_quote3() { + let mut d = String::from(r#"{": 1}"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde = serde_json::from_slice::(d).is_err(); + let v_simd = from_slice::(&mut d).is_err(); + assert!(v_simd); + assert!(v_serde); + } + + #[test] + fn empty_string() { + let mut d = String::from(r#""""#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(to_value(&mut d1), Ok(Value::from(""))); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn empty_array() { + let mut d = String::from(r#"[]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); + let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); + assert_eq!(to_value(&mut d1), Ok(Value::Array(vec![]))); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn malformed_array() { + let mut d = String::from(r#"[["#); + let mut d1 = d.clone(); + let mut d2 = d.clone(); + let mut d = unsafe { d.as_bytes_mut() }; + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d2 = unsafe { d2.as_bytes_mut() }; + let v_serde: Result = serde_json::from_slice(d); + let v_simd_ov = to_owned_value(&mut d); + let v_simd_bv = to_borrowed_value(&mut d1); + let v_simd: Result = from_slice(&mut d2); + assert!(v_simd_ov.is_err()); + assert!(v_simd_bv.is_err()); + assert!(v_simd.is_err()); + assert!(v_serde.is_err()); + } + + #[test] + fn double_array() { + let mut d = String::from(r#"[[]]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); + let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![Value::Array(vec![])])) + ); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn null_null_array() { + let mut d = String::from(r#"[[],null,null]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("parse_serde"); + let v_simd: serde_json::Value = from_slice(&mut d).expect("parse_simd"); + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![ + Value::Array(vec![]), + Value::Null, + Value::Null, + ])) + ); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn one_element_array() { + let mut d = String::from(r#"["snot"]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![Value::from("snot")])) + ); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn two_element_array() { + let mut d = String::from(r#"["snot", "badger"]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![ + Value::from("snot"), + Value::from("badger") + ])) + ); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn list() { + let mut d = String::from(r#"[42, 23.0, "snot badger"]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![ + Value::from(42), + Value::from(23.0), + Value::from("snot badger") + ])) + ); + } + + #[test] + fn nested_list1() { + let mut d = String::from(r#"[42, [23.0, "snot"], "bad", "ger"]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![ + Value::from(42), + Value::Array(vec![Value::from(23.0), Value::from("snot")]), + Value::from("bad"), + Value::from("ger") + ])) + ); + + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn nested_list2() { + let mut d = String::from(r#"[42, [23.0, "snot"], {"bad": "ger"}]"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn utf8() { + let mut d = String::from(r#""\u000e""#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, "\u{e}"); + // NOTE: serde is broken for this + //assert_eq!(v_serde, "\u{e}"); + //assert_eq!(v_simd, v_serde) + } + + #[test] + fn unicode() { + let mut d = String::from(r#""¡\"""#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn odd_array() { + let mut d = String::from("[{},null]"); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![Value::Object(Map::new()), Value::Null])) + ); + } + + #[test] + fn map2() { + let mut d = String::from(r#"[{"\u0000":null}]"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn null() { + let mut d = String::from(r#"null"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + assert_eq!(to_value(&mut d1), Ok(Value::Null)); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + #[test] + fn null_null() { + let mut d = String::from(r#"[null, null]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![Value::Null, Value::Null,])) + ); + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn nested_null() { + let mut d = String::from(r#"[[null, null]]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![Value::Array(vec![ + Value::Null, + Value::Null, + ])])) + ); + + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + } + + #[test] + fn nestednested_null() { + let mut d = String::from(r#"[[[null, null]]]"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + assert_eq!( + to_value(&mut d1), + Ok(Value::Array(vec![Value::Array(vec![Value::Array(vec![ + Value::Null, + Value::Null, + ])])])) + ); + } + + #[test] + fn odd_array2() { + let mut d = String::from("[[\"\\u0000\\\"\"]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn odd_array3() { + let mut d = String::from("[{\"\\u0000\\u0000\":null}]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn odd_array4() { + let mut d = String::from("[{\"\\u0000𐀀a\":null}]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn float1() { + let mut d = String::from("2.3250706903316115e307"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + // We ignore this since serde is less percise on this test + #[ignore] + #[test] + fn float2() { + let mut d = String::from("-4.5512678569607477e306"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn map0() { + let mut d = String::from(r#"{"snot": "badger"}"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + let mut h = Map::new(); + h.insert("snot".into(), Value::from("badger")); + assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); + } + + #[test] + fn map1() { + let mut d = String::from(r#"{"snot": "badger", "badger": "snot"}"#); + let mut d1 = d.clone(); + let mut d1 = unsafe { d1.as_bytes_mut() }; + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect(""); + let v_simd: serde_json::Value = from_slice(&mut d).expect(""); + assert_eq!(v_simd, v_serde); + let mut h = Map::new(); + h.insert("snot".into(), Value::from("badger")); + h.insert("badger".into(), Value::from("snot")); + assert_eq!(to_value(&mut d1), Ok(Value::Object(h))); + } + + #[test] + fn tpl1() { + let mut d = String::from("[-65.613616999999977, 43.420273000000009]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: (f32, f32) = serde_json::from_slice(d).expect("serde_json"); + let v_simd: (f32, f32) = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn tpl2() { + let mut d = String::from("[[-65.613616999999977, 43.420273000000009]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn tpl3() { + let mut d = String::from( + "[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]", + ); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec<(f32, f32)> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec<(f32, f32)> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + #[test] + fn tpl4() { + let mut d = String::from("[[[-65.613616999999977,43.420273000000009]]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + #[test] + fn tpl5() { + let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn tpl6() { + let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn tpl7() { + let mut d = String::from("[[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec>> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec>> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[derive(Deserialize, PartialEq, Debug)] + struct Obj { + a: u64, + b: u64, + } + + #[derive(Deserialize, PartialEq, Debug)] + struct Obj1 { + a: Obj, + } + + #[test] + fn obj() { + let mut d = String::from(r#"{"a": 1, "b":1}"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Obj = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Obj = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn obj2() { + let mut d = + String::from(r#"{"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); + let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn obj3() { + let mut d = String::from( + r#"{"c": {"a": {"a": 1, "b":1}, "b": {"a": 1, "b":1}, "c": {"a": 1, "b": 1}}}"#, + ); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: HashMap> = + serde_json::from_slice(d).expect("serde_json"); + let v_simd: HashMap> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn obj4() { + let mut d = String::from(r#"{"c": {"a": {"a": 1, "b":1}}}"#); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: HashMap = serde_json::from_slice(d).expect("serde_json"); + let v_simd: HashMap = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn vecvec() { + let mut d = String::from("[[[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]], [[-65.613616999999977,43.420273000000009], [-65.613616999999977,43.420273000000009]]]"); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: Vec> = serde_json::from_slice(d).expect("serde_json"); + let v_simd: Vec> = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn crazy_string() { + // there is unicode in here! + let d = "\"𐀀𐀀 𐀀𐀀0 𐀀A\\u00000A0 A \\u000b\""; + let mut d = String::from(d); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: serde_json::Value = serde_json::from_slice(d).expect("serde_json"); + let v_simd: serde_json::Value = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + #[test] + fn event() { + #[derive(Deserialize, Debug, PartialEq)] + #[serde(deny_unknown_fields, rename_all = "camelCase")] + pub struct CitmCatalog { + pub area_names: HashMap, + pub audience_sub_category_names: HashMap, + pub block_names: HashMap, + pub events: HashMap, + } + pub type Id = u32; + #[derive(Deserialize, Debug, PartialEq)] + #[serde(deny_unknown_fields, rename_all = "camelCase")] + pub struct Event { + pub description: (), + pub id: Id, + pub logo: Option, + pub name: String, + pub sub_topic_ids: Vec, + pub subject_code: (), + pub subtitle: (), + pub topic_ids: Vec, + } + + let mut d = String::from( + r#" +{ + "areaNames": { + "205705993": "Arrière-scène central", + "205705994": "1er balcon central", + "205705995": "2ème balcon bergerie cour", + "205705996": "2ème balcon bergerie jardin", + "205705998": "1er balcon bergerie jardin", + "205705999": "1er balcon bergerie cour", + "205706000": "Arrière-scène jardin", + "205706001": "Arrière-scène cour", + "205706002": "2ème balcon jardin", + "205706003": "2ème balcon cour", + "205706004": "2ème Balcon central", + "205706005": "1er balcon jardin", + "205706006": "1er balcon cour", + "205706007": "Orchestre central", + "205706008": "Orchestre jardin", + "205706009": "Orchestre cour", + "342752287": "Zone physique secrète" + }, + "audienceSubCategoryNames": { + "337100890": "Abonné" + }, + "blockNames": {}, + "events": { + "138586341": { + "description": null, + "id": 138586341, + "logo": null, + "name": "30th Anniversary Tour", + "subTopicIds": [ + 337184269, + 337184283 + ], + "subjectCode": null, + "subtitle": null, + "topicIds": [ + 324846099, + 107888604 + ] + }, + "138586345": { + "description": null, + "id": 138586345, + "logo": "/images/UE0AAAAACEKo6QAAAAZDSVRN", + "name": "Berliner Philharmoniker", + "subTopicIds": [ + 337184268, + 337184283, + 337184275 + ], + "subjectCode": null, + "subtitle": null, + "topicIds": [ + 324846099, + 107888604, + 324846100 + ] + } + } +} +"#, + ); + let mut d = unsafe { d.as_bytes_mut() }; + let v_serde: CitmCatalog = serde_json::from_slice(d).expect("serde_json"); + let v_simd: CitmCatalog = from_slice(&mut d).expect("simd_json"); + assert_eq!(v_simd, v_serde) + } + + // How much do we care about this, it's within the same range and + // based on floating point math inprecisions during parsing. + // Is this a real issue worth improving? + #[test] + fn silly_float1() { + let v = Value::from(3.0901448042322017e305); + let s = v.to_string(); + dbg!(&s); + let mut bytes = s.as_bytes().to_vec(); + let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); + assert_eq!(v, parsed); + } + + #[test] + #[ignore] + fn silly_float2() { + let v = Value::from(-6.990585694841803e305); + let s = v.to_string(); + dbg!(&s); + let mut bytes = s.as_bytes().to_vec(); + let parsed = to_owned_value(&mut bytes).expect("failed to parse gernated float"); + assert_eq!(v, parsed); + } + + //6.576692109929364e305 + fn arb_json() -> BoxedStrategy { + let leaf = prop_oneof![ + Just(Value::Null), + any::().prop_map(Value::Bool), + // (-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // The float parsing of simd and serde are too different + any::().prop_map(|i| json!(i)), + ".*".prop_map(Value::from), + ]; + leaf.prop_recursive( + 8, // 8 levels deep + 256, // Shoot for maximum size of 256 nodes + 10, // We put up to 10 items per collection + |inner| { + prop_oneof![ + // Take the inner strategy and make the two recursive cases. + prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), + prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), + ] + }, + ) + .prop_map(|v| serde_json::to_string(&v).expect("").to_string()) + .boxed() + } + + fn arb_json_value() -> BoxedStrategy { + let leaf = prop_oneof![ + Just(Value::Null), + any::().prop_map(Value::Bool), + //(-1.0e306f64..1.0e306f64).prop_map(|f| json!(f)), // damn you float! + any::().prop_map(|i| json!(i)), + ".*".prop_map(Value::from), + ]; + leaf.prop_recursive( + 8, // 8 levels deep + 256, // Shoot for maximum size of 256 nodes + 10, // We put up to 10 items per collection + |inner| { + prop_oneof![ + // Take the inner strategy and make the two recursive cases. + prop::collection::vec(inner.clone(), 0..10).prop_map(|v| json!(v)), + prop::collection::hash_map(".*", inner, 0..10).prop_map(|m| json!(m)), + ] + }, + ) + .boxed() + } + + proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + fork: true, + .. ProptestConfig::default() + })] + + #[test] + fn prop_json_encode_decode(val in arb_json_value()) { + let mut encoded: Vec = Vec::new(); + let _ = val.write(&mut encoded); + println!("{}", String::from_utf8(encoded.clone()).unwrap()); + let res = to_owned_value(&mut encoded).unwrap(); + assert_eq!(val, res); + } + + } + proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + fork: true, + .. ProptestConfig::default() + })] + + #[test] + fn prop_json(d in arb_json()) { + if let Ok(v_serde) = serde_json::from_slice::(&d.as_bytes()) { + let mut d1 = d.clone(); + let d1 = unsafe{ d1.as_bytes_mut()}; + let v_simd_serde: serde_json::Value = from_slice(d1).expect(""); + // We add our own encoder in here. + let mut d2 = v_simd_serde.to_string(); + let d2 = unsafe{ d2.as_bytes_mut()}; + let mut d3 = d.clone(); + let d3 = unsafe{ d3.as_bytes_mut()}; + assert_eq!(v_simd_serde, v_serde); + let v_simd_owned = to_owned_value(d2); + assert!(v_simd_owned.is_ok()); + let v_simd_borrowed = to_borrowed_value(d3); + dbg!(&v_simd_borrowed); + assert!(v_simd_borrowed.is_ok()); + assert_eq!(v_simd_owned.unwrap(), super::OwnedValue::from(v_simd_borrowed.unwrap())); + } + + } + + } + + fn arb_junk() -> BoxedStrategy> { + prop::collection::vec(any::(), 0..(1024 * 8)).boxed() + } + proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + fork: true, + .. ProptestConfig::default() + })] + #[test] + fn prop_junk(d in arb_junk()) { + let mut d1 = d.clone(); + let mut d2 = d.clone(); + let mut d3 = d.clone(); + + let _ = from_slice::(&mut d1); + let _ = to_borrowed_value(&mut d2); + let _ = to_owned_value(&mut d3); + + } + } + + proptest! { + #![proptest_config(ProptestConfig { + // Setting both fork and timeout is redundant since timeout implies + // fork, but both are shown for clarity. + fork: true, + .. ProptestConfig::default() + })] + + #[test] + fn prop_string(d in "\\PC*") { + let mut d1 = d.clone(); + let mut d1 = unsafe{ d1.as_bytes_mut()}; + let mut d2 = d.clone(); + let mut d2 = unsafe{ d2.as_bytes_mut()}; + let mut d3 = d.clone(); + let mut d3 = unsafe{ d3.as_bytes_mut()}; + let _ = from_slice::(&mut d1); + let _ = to_borrowed_value(&mut d2); + let _ = to_owned_value(&mut d3); + + } + } +} From b9b39007f295761fbc3b8de73c70bff55abe9487 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 9 Aug 2019 19:07:35 -0400 Subject: [PATCH 05/34] feat: remove core_arch unused dependency --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index cae31f52..399f7c9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ halfbrown = "0.1" page_size = "0.4" itoa = "0.4" ryu = "1" -core_arch = "0.1.5" # serde compatibilty serde = { version = "1", features = ["derive"], optional = true} From 1c989cf5023e7badb4d78332e18493231d4e0488 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Sat, 10 Aug 2019 08:08:14 -0400 Subject: [PATCH 06/34] feat: update string parse --- src/neon/deser.rs | 144 ++++++++++++----------------------------- src/neon/intrinsics.rs | 10 ++- src/stringparse.rs | 6 ++ src/value/generator.rs | 7 +- 4 files changed, 61 insertions(+), 106 deletions(-) diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 9cc5ca13..b194dde3 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -1,131 +1,73 @@ -use std::mem; +//use std::mem; pub use crate::error::{Error, ErrorType}; pub use crate::Deserializer; pub use crate::Result; +pub use crate::neon::stage1::SIMDJSON_PADDING; pub use crate::neon::intrinsics::*; pub use crate::neon::utf8check::*; pub use crate::stringparse::*; -use crate::neon::intrinsics::*; +pub use crate::neon::intrinsics::*; +unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dst: &mut [u8]) -> ParseStringHelper { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + let v0 : uint8x16_t = vld1q_u8(src.as_ptr()); + let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); + vst1q_u8(dst.as_mut_ptr(), v0); + vst1q_u8(dst.as_mut_ptr().add(16), v1); + + let bs_mask : uint8x16_t = vmovq_n_u8('\\' as u8); + let qt_mask : uint8x16_t = vmovq_n_u8('"' as u8); + + let bit_mask = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + + let cmp_bs_0 : uint8x16_t = vceqq_u8(v0, bs_mask); + let cmp_bs_1 : uint8x16_t = vceqq_u8(v1, bs_mask); + let cmp_qt_0 : uint8x16_t = vceqq_u8(v0, qt_mask); + let cmp_qt_1 : uint8x16_t = vceqq_u8(v1, qt_mask); + + let cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask); + let cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask); + let cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask); + let cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask); + + let sum0 : uint8x16_t = vpaddq_u8(cmp_bs_0, cmp_bs_1); + let sum1 : uint8x16_t = vpaddq_u8(cmp_qt_0, cmp_qt_1); + let sum0 = vpaddq_u8(sum0, sum1); + let sum0 = vpaddq_u8(sum0, sum0); + + ParseStringHelper { + bs_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits + quote_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits + } +} impl<'de> Deserializer<'de> { #[cfg_attr(not(feature = "no-inline"), inline(always))] pub fn parse_str_(&mut self) -> Result<&'de str> { // Add 1 to skip the initial " let idx = self.iidx + 1; - let mut padding = [0u8; 32]; +// let padding = [0u8; 32]; //let mut read: usize = 0; // we include the terminal '"' so we know where to end // This is safe since we check sub's lenght in the range access above and only // create sub sliced form sub to `sub.len()`. - - // - // FIXME - this section is somewhat questionable -- needs a complete proofread!!! - // - let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; let mut src_i: usize = 0; - let mut len = src_i; - loop { - let v: int8x16_t = if src.len() >= src_i + 16 { - // This is safe since we ensure src is at least 16 wide - #[allow(clippy::cast_ptr_alignment)] - unsafe { - *(src.as_ptr().add(src_i) as *const int8x16_t) - } - } else { - unsafe { - padding - .get_unchecked_mut(..src.len() - src_i) - .clone_from_slice(src.get_unchecked(src_i..)); - // This is safe since we ensure src is at least 16 wide - #[allow(clippy::cast_ptr_alignment)] - *(padding.as_ptr() as *const int8x16_t) - } - }; - - // store to dest unconditionally - we can overwrite the bits we don't like - // later - let bs_bits: u32 = 0; // FIXME: vceqq_s8(v, vdupq_n_s8(b'\\' as i8)); - let quote_mask = 0; // FIXME: unsafe { vceqq_s8(v, vdupq_n_s8(b'"' as i8)) }; - let quote_bits = 0; // FIXME: unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; - if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { - // we encountered quotes first. Move dst to point to quotes and exit - // find out where the quote is... - let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; - - /////////////////////// - // Above, check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - //////////////////////// - - // we advance the point, accounting for the fact that we have a NULl termination - - len += quote_dist as usize; - unsafe { - let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; - return Ok(&*v); - } - - // we compare the pointers since we care if they are 'at the same spot' - // not if they are the same value - } - if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { - // Move to the 'bad' character - let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); - len += bs_dist as usize; - src_i += bs_dist as usize; - break; - } else { - // they are the same. Since they can't co-occur, it means we encountered - // neither. - src_i += 16; - len += 16; - } - } - + let len = src_i; let mut dst_i: usize = 0; let dst: &mut [u8] = &mut self.strings; loop { - let v: int8x16_t = if src.len() >= src_i + 16 { - // This is safe since we ensure src is at least 16 wide - #[allow(clippy::cast_ptr_alignment)] - unsafe { - *(src.as_ptr().add(src_i) as *const int8x16_t) - } - } else { - unsafe { - padding - .get_unchecked_mut(..src.len() - src_i) - .clone_from_slice(src.get_unchecked(src_i..)); - // This is safe since we ensure src is at least 16 wide - #[allow(clippy::cast_ptr_alignment)] - *(padding.as_ptr() as *const int8x16_t) - } - }; - - #[allow(clippy::cast_ptr_alignment)] - unsafe { - *(dst.as_mut_ptr().add(dst_i) as *mut int8x16_t) = v; - }; - // store to dest unconditionally - we can overwrite the bits we don't like // later - let bs_bits: u32 = 0; // FIXME -// unsafe { -// static_cast_u32!(_mm_movemask_epi8(vceqq_s8( -// v, -// vdupq_n_s8(b'\\' as i8) -// ))) -// }; - let quote_mask = 0; // FIXME: unsafe { vceqq_s8(v, vdupq_n_s8(b'"' as i8)) }; - let quote_bits = 0; // FIXME: unsafe { static_cast_u32!(_mm_movemask_epi8(quote_mask)) }; + let ParseStringHelper {bs_bits, quote_bits} = unsafe { find_bs_bits_and_quote_bits(src, dst) }; + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit // find out where the quote is... @@ -196,8 +138,8 @@ impl<'de> Deserializer<'de> { } else { // they are the same. Since they can't co-occur, it means we encountered // neither. - src_i += 16; - dst_i += 16; + src_i += 32; + dst_i += 32; } } } diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 065047b6..094e9f7f 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -154,6 +154,11 @@ pub unsafe fn vld1q_u8(addr: *const u8) -> uint8x16_t { *(addr as *const uint8x16_t) } +#[inline] +pub unsafe fn vst1q_u8(addr: *mut u8, val: uint8x16_t) { + std::ptr::write(addr as *mut uint8x16_t, val); +} + macro_rules! aarch64_simd_2 { ($name:ident, $type:ty, $simd_fn:ident, $intr:ident) => { #[inline] @@ -341,6 +346,7 @@ macro_rules! arm_vget_lane { } arm_vget_lane!(vgetq_lane_u16, u16, uint16x8_t, 7); +arm_vget_lane!(vgetq_lane_u32, u32, uint32x4_t, 3); arm_vget_lane!(vgetq_lane_u64, u64, uint64x2_t, 1); arm_vget_lane!(vget_lane_u64, u64, uint64x1_t, 0); @@ -385,8 +391,8 @@ pub unsafe fn hamming(a:u64) -> u32 { } #[inline] -pub unsafe fn trailingzeroes(a:u64) -> u32 { - cttz_u64_(a) +pub fn trailingzeroes(a:u64) -> u32 { + unsafe { cttz_u64_(a) } } #[inline] diff --git a/src/stringparse.rs b/src/stringparse.rs index 0ff8a078..9a7e3b8f 100644 --- a/src/stringparse.rs +++ b/src/stringparse.rs @@ -73,3 +73,9 @@ pub fn handle_unicode_codepoint( let offset: usize = codepoint_to_utf8(code_point, dst_ptr); Ok((offset, src_offset)) } + +// Holds backslashes and quotes locations. +pub struct ParseStringHelper { + pub bs_bits: u32, + pub quote_bits: u32, +} diff --git a/src/value/generator.rs b/src/value/generator.rs index 1d21a833..bb054fc0 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -97,9 +97,10 @@ pub trait BaseGenerator { #[inline(always)] fn write_string(&mut self, string: &str) -> io::Result<()> { stry!(self.write_char(b'"')); - let mut string = string.as_bytes(); - let mut len = string.len(); - let mut idx = 0; + let string = string.as_bytes(); +// let mut string = string.as_bytes(); +// let mut len = string.len(); +// let mut idx = 0; // unsafe { // Looking at the table above the lower 5 bits are entirely From 592607091a67732262a6bea655dde9d532218123 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Sat, 10 Aug 2019 09:08:34 -0400 Subject: [PATCH 07/34] feat: additional tweaks, not breaks during linking --- src/neon/intrinsics.rs | 94 +++++++++++++++++++------------ src/neon/simd_llvm.rs | 2 +- src/neon/stage1.rs | 22 ++++---- src/neon/utf8check.rs | 125 ++++++++++++++++++++--------------------- src/value/generator.rs | 8 +-- 5 files changed, 137 insertions(+), 114 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 094e9f7f..3db115b8 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -21,42 +21,48 @@ macro_rules! types { )*) } +#[allow(non_camel_case_types)] +type poly128 = [u8; 16]; + extern "C" { #[link_name = "llvm.aarch64.neon.pmull64"] - fn vmull_p64_(a: i64, b: i64) -> int8x16_t; + fn vmull_p64_(a: i64, b: i64) -> poly128; #[link_name = "llvm.aarch64.neon.vshrq"] - fn vshrq_n_u8_(a: uint8x16_t, b: u8) -> uint8x16_t; + fn vshrq_n_u8_(a: poly128, b: u8) -> poly128; #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + fn vpaddq_u8_(a: poly128, b: poly128) -> poly128; + #[link_name = "llvm.aarch64.neon.addp.v16u8"] + fn vaddq_u8_(a: poly128, b: poly128) -> poly128; #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + fn vaddq_s8_(a: poly128, b: poly128) -> poly128; #[link_name = "llvm.aarch64.neon.addp.v16i32"] - fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + fn vaddq_s32_(a: poly128, b: poly128) -> poly128; + #[link_name = "llvm.aarch64.neon.vextq.v16u8"] + fn vextq_u8_(a: poly128, b: poly128, n: u8) -> poly128; #[link_name = "llvm.aarch64.neon.vextq.v16s8"] - fn vextq_s8_(a: int8x16_t, b: int8x16_t, n:u8) -> int8x16_t; + fn vextq_s8_(a: poly128, b: poly128, n: u8) -> poly128; #[link_name = "llvm.aarch64.neon.vtstq.v16u8"] - fn vtstq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + fn vtstq_u8_(a: poly128, b: poly128) -> poly128; #[link_name = "llvm.aarch64.neon.vtstq.v16s8"] - fn vtstq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + fn vtstq_s8_(a: poly128, b: poly128) -> poly128; #[link_name = "llvm.ctpop.u64"] fn ctpop_u64_(a: u64) -> u32; - #[link_name = "llvm.ctlz.u64"] - fn ctlz_u64_(a: u64) -> u32; + // #[link_name = "llvm.ctlz.u64"] +// fn ctlz_u64_(a: u64) -> u32; #[link_name = "llvm.cttz.u64"] fn cttz_u64_(a: u64) -> u32; } #[inline] -#[cfg_attr(test, assert_instr(pmull))] -pub unsafe fn vmull_p64(a: i64, b: i64) -> uint8x16_t { - mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b))) +pub unsafe fn vmull_p64(a: i64, b: i64) -> int8x16_t { + mem::transmute(vmull_p64_(a, b)) } #[inline] pub unsafe fn vshrq_n_u8(a: uint8x16_t, b: u8) -> uint8x16_t { // FIXME? - vshrq_n_u8_(a, b) + mem::transmute(vshrq_n_u8_(mem::transmute(a), mem::transmute(b))) } types! { @@ -116,19 +122,19 @@ types! { } impl uint8x16_t { - pub fn new(a:u8,b:u8,c:u8,d:u8,e:u8,f:u8,g:u8,h:u8,i:u8,j:u8,k:u8,l:u8,m:u8,n:u8,o:u8,p:u8) -> uint8x16_t { + pub fn new(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8, i: u8, j: u8, k: u8, l: u8, m: u8, n: u8, o: u8, p: u8) -> uint8x16_t { uint8x16_t(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) } } impl int8x16_t { - pub fn new(a:i8,b:i8,c:i8,d:i8,e:i8,f:i8,g:i8,h:i8,i:i8,j:i8,k:i8,l:i8,m:i8,n:i8,o:i8,p:i8) -> int8x16_t { + pub fn new(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> int8x16_t { int8x16_t(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) } } impl int32x4_t { - pub fn new(a:i32,b:i32,c:i32,d:i32) -> int32x4_t { + pub fn new(a: i32, b: i32, c: i32, d: i32) -> int32x4_t { int32x4_t(a, b, c, d) } } @@ -199,6 +205,8 @@ macro_rules! aarch64_simd_cgtu { }; } +aarch64_simd_cgtu!(vcgtq_u8, uint8x16_t); + aarch64_simd_cgt!(vcgt_s64, int64x1_t); aarch64_simd_cgt!(vcgtq_s64, int64x2_t); aarch64_simd_cgtu!(vcgt_u64, uint64x1_t); @@ -262,7 +270,7 @@ aarch64_simd_cleu!(vcleq_u64, uint64x2_t); aarch64_simd_cleu!(vcgtq_s8, int8x16_t); #[inline] -pub fn vdupq_n_s8(a:i8) -> int8x16_t { +pub fn vdupq_n_s8(a: i8) -> int8x16_t { int8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } @@ -272,12 +280,12 @@ pub fn zeroi8x16() -> int8x16_t { } #[inline] -pub fn vdupq_n_u8(a:u8) -> uint8x16_t { +pub fn vdupq_n_u8(a: u8) -> uint8x16_t { uint8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } #[inline] -pub fn vmovq_n_u8(a:u8) -> uint8x16_t { +pub fn vmovq_n_u8(a: u8) -> uint8x16_t { uint8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } @@ -288,30 +296,35 @@ pub fn zerou8x16() -> uint8x16_t { #[inline] pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - vpaddq_u8_(a, b) + mem::transmute(vpaddq_u8_(mem::transmute(a), mem::transmute(b))) +} + +#[inline] +pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + mem::transmute(vaddq_u8_(mem::transmute(a), mem::transmute(b))) } #[inline] pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - vaddq_s8_(a, b) + mem::transmute(vaddq_s8_(mem::transmute(a), mem::transmute(b))) } #[inline] pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - vaddq_s32_(a, b) + mem::transmute(vaddq_s32_(mem::transmute(a), mem::transmute(b))) } #[inline] -pub unsafe fn vandq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_and(a, b) } +pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_and(a, b) } #[inline] -pub unsafe fn vorrq_u8(a:uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_or(a, b) } +pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_or(a, b) } #[inline] -pub unsafe fn vandq_s8(a:int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_and(a, b) } +pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_and(a, b) } #[inline] -pub unsafe fn vorrq_s8(a:int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_or(a, b) } +pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_or(a, b) } macro_rules! arm_reinterpret { ($name:ident, $from:ty, $to:ty) => { @@ -332,6 +345,8 @@ arm_reinterpret!(vreinterpretq_u32_u8, uint8x16_t, uint32x4_t); arm_reinterpret!(vreinterpretq_u64_u8, uint8x16_t, uint64x2_t); arm_reinterpret!(vreinterpretq_u8_s8, int8x16_t, uint8x16_t); +arm_reinterpret!(vreinterpretq_s64_s8, int8x16_t, int64x2_t); + macro_rules! arm_vget_lane { ($name:ident, $to:ty, $from:ty, $lanes:literal) => { #[inline] @@ -350,12 +365,21 @@ arm_vget_lane!(vgetq_lane_u32, u32, uint32x4_t, 3); arm_vget_lane!(vgetq_lane_u64, u64, uint64x2_t, 1); arm_vget_lane!(vget_lane_u64, u64, uint64x1_t, 0); -pub unsafe fn vextq_s8(a:int8x16_t, b:int8x16_t, n:u8) -> int8x16_t { - vextq_s8_(a, b, n) +arm_vget_lane!(vgetq_lane_s16, i16, int16x8_t, 7); +arm_vget_lane!(vgetq_lane_s32, i32, int32x4_t, 3); +arm_vget_lane!(vgetq_lane_s64, i64, int64x2_t, 1); +arm_vget_lane!(vget_lane_s64, i64, int64x1_t, 0); + +pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: u8) -> uint8x16_t { + mem::transmute(vextq_u8_(mem::transmute(a), mem::transmute(b), n)) +} + +pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t { + mem::transmute(vextq_s8_(mem::transmute(a), mem::transmute(b), n)) } #[inline] -pub fn vqmovn_u64(a:uint64x2_t) -> uint32x2_t { +pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { uint32x2_t(a.0 as u32, a.1 as u32) } @@ -377,25 +401,25 @@ pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[inline] pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - vtstq_u8_(a, b) + mem::transmute(vtstq_u8_(mem::transmute(a), mem::transmute(b))) } #[inline] pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - vtstq_s8_(a, b) + mem::transmute(vtstq_s8_(mem::transmute(a), mem::transmute(b))) } #[inline] -pub unsafe fn hamming(a:u64) -> u32 { +pub unsafe fn hamming(a: u64) -> u32 { ctpop_u64_(a) } #[inline] -pub fn trailingzeroes(a:u64) -> u32 { +pub fn trailingzeroes(a: u64) -> u32 { unsafe { cttz_u64_(a) } } #[inline] -pub unsafe fn vst1q_u32(addr: *mut u8, val : uint32x4_t) { +pub unsafe fn vst1q_u32(addr: *mut u8, val: uint32x4_t) { std::ptr::write(addr as *mut uint32x4_t, val) } diff --git a/src/neon/simd_llvm.rs b/src/neon/simd_llvm.rs index 892ae59c..d91ba422 100644 --- a/src/neon/simd_llvm.rs +++ b/src/neon/simd_llvm.rs @@ -19,7 +19,7 @@ extern "platform-intrinsic" { // // pub fn simd_cast(x: T) -> U; // - pub fn simd_add(x: T, y: T) -> T; +// pub fn simd_add(x: T, y: T) -> T; pub fn simd_sub(x: T, y: T) -> T; // pub fn simd_mul(x: T, y: T) -> T; // pub fn simd_div(x: T, y: T) -> T; diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index fc4daabc..b2cdbaf5 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -57,11 +57,11 @@ unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: } unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { - vgetq_lane_u64(vreinterpretq_u64_u8(vmull_p64(-1, quote_bits as i64)), 0) + vgetq_lane_s64(vreinterpretq_s64_s8(vmull_p64(-1, quote_bits as i64)), 0) as u64 } struct Utf8CheckingState { - has_error: int8x16_t, + has_error: uint8x16_t, previous: ProcessedUtfBytes, } @@ -69,7 +69,7 @@ impl Default for Utf8CheckingState { #[cfg_attr(not(feature = "no-inline"), inline)] fn default() -> Self { Utf8CheckingState { - has_error: vdupq_n_s8(0), + has_error: vdupq_n_u8(0), previous:ProcessedUtfBytes::default() } } @@ -99,20 +99,20 @@ unsafe fn check_utf8( // All bytes are ascii. Therefore the byte that was just before must be // ascii too. We only check the byte that was just before simd_input. Nines // are arbitrary values. - let VERROR: int8x16_t = - int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); + let verror: uint8x16_t = + uint8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); state.has_error = - vorrq_s8(vcgtq_s8(state.previous.carried_continuations, VERROR), + vorrq_u8(vcgtq_u8(state.previous.carried_continuations, verror), state.has_error); } else { // it is not ascii so we have to do heavy work - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), + state.previous = check_utf8_bytes(input.v0, &(state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), + state.previous = check_utf8_bytes(input.v1, &(state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), + state.previous = check_utf8_bytes(input.v2, &(state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), + state.previous = check_utf8_bytes(input.v3, &(state.previous), &mut (state.has_error)); } } @@ -497,7 +497,7 @@ impl<'de> Deserializer<'de> { return Err(ErrorType::Syntax); } - let err: i128 = mem::transmute(vtstq_s8(previous.has_error, previous.has_error)); + let err: i128 = mem::transmute(vtstq_u8(previous.has_error, previous.has_error)); if err != 0 { Ok(structural_indexes) diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index de190f46..10c6b7c9 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -1,6 +1,6 @@ use crate::neon::intrinsics::*; -use std::mem; +//use std::mem; /* * legal utf-8 byte sequence @@ -21,18 +21,17 @@ use std::mem; // all byte values must be no larger than 0xF4 #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8x16_t) { +unsafe fn check_smaller_than_0xf4(current_bytes: uint8x16_t, has_error: &mut uint8x16_t) { // unsigned, saturates to 0 below max *has_error = - vorrq_s8( + vorrq_u8( *has_error, - vreinterpretq_s8_u8(vqsubq_u8( - vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4)))); + vqsubq_u8(current_bytes, vdupq_n_u8(0xF4))); } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { - let NIBBLES : int8x16_t = int8x16_t::new( +unsafe fn continuation_lengths(high_nibbles: uint8x16_t) -> uint8x16_t { + let nibbles : uint8x16_t = uint8x16_t::new( 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 0, 0, 0, 0, // 10xx (continuation) 2, 2, // 110x @@ -40,35 +39,35 @@ unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { 4, // 1111, next should be 0 (not checked here) ); - vqtbl1q_s8(NIBBLES, vreinterpretq_u8_s8(high_nibbles)) + vqtbl1q_u8(nibbles, high_nibbles) } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn carry_continuations(initial_lengths: int8x16_t, previous_carries: int8x16_t) -> int8x16_t { - let right1 : int8x16_t = vreinterpretq_s8_u8(vqsubq_u8( - vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)), - vdupq_n_u8(1))); - let sum : int8x16_t = vaddq_s8(initial_lengths, right1); +unsafe fn carry_continuations(initial_lengths: uint8x16_t, previous_carries: uint8x16_t) -> uint8x16_t { + let right1 : uint8x16_t = vqsubq_u8( + vextq_u8(previous_carries, initial_lengths, 16 - 1), + vdupq_n_u8(1)); + let sum : uint8x16_t = vaddq_u8(initial_lengths, right1); - let right2 : int8x16_t = vreinterpretq_s8_u8( - vqsubq_u8(vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)), - vdupq_n_u8(2))); + let right2 : uint8x16_t = vqsubq_u8( + vextq_u8(previous_carries, sum, 16 - 2), + vdupq_n_u8(2)); - vaddq_s8(sum, right2) + vaddq_u8(sum, right2) } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, has_error: &mut int8x16_t) { +unsafe fn check_continuations(initial_lengths: uint8x16_t, carries: uint8x16_t, has_error: &mut uint8x16_t) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) // FIXME: was vceqq_u8 ? - let overunder : int8x16_t = vceqq_s8( - vcgtq_s8(carries, initial_lengths), - vcgtq_s8(initial_lengths, vdupq_n_s8(0))); + let overunder : uint8x16_t = vceqq_u8( + vcgtq_u8(carries, initial_lengths), + vcgtq_u8(initial_lengths, vdupq_n_u8(0))); - *has_error = vorrq_s8(*has_error, overunder); + *has_error = vorrq_u8(*has_error, overunder); } // when 0xED is found, next byte must be no larger than 0x9F @@ -76,21 +75,21 @@ unsafe fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, ha // next byte must be continuation, ie sign bit is set, so signed < is ok #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn check_first_continuation_max( - current_bytes: int8x16_t, - off1_current_bytes: int8x16_t, - has_error: &mut int8x16_t, + current_bytes: uint8x16_t, + off1_current_bytes: uint8x16_t, + has_error: &mut uint8x16_t, ) { - let maskED : int8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xED)); - let maskF4 : int8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xF4)); + let mask_ed : uint8x16_t = vceqq_u8(off1_current_bytes, vdupq_n_u8(0xED)); + let mask_f4 : uint8x16_t = vceqq_u8(off1_current_bytes, vdupq_n_u8(0xF4)); // FIXME: was vandq_u8? - let badfollowED : int8x16_t = - vandq_s8(vcgtq_s8(current_bytes, vdupq_n_s8(0x9F)), maskED); - let badfollowF4 : int8x16_t = - vandq_s8(vcgtq_s8(current_bytes, vdupq_n_s8(0x8F)), maskF4); + let badfollow_ed : uint8x16_t = + vandq_u8(vcgtq_u8(current_bytes, vdupq_n_u8(0x9F)), mask_ed); + let badfollow_f4 : uint8x16_t = + vandq_u8(vcgtq_u8(current_bytes, vdupq_n_u8(0x8F)), mask_f4); - *has_error = vorrq_s8( - *has_error, vorrq_s8(badfollowED, badfollowF4)); + *has_error = vorrq_u8( + *has_error, vorrq_u8(badfollow_ed, badfollow_f4)); } // map off1_hibits => error condition @@ -101,73 +100,73 @@ unsafe fn check_first_continuation_max( // else false && false #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn check_overlong( - current_bytes: int8x16_t, - off1_current_bytes: int8x16_t, - hibits: int8x16_t, - previous_hibits: int8x16_t, - has_error: &mut int8x16_t, + current_bytes: uint8x16_t, + off1_current_bytes: uint8x16_t, + hibits: uint8x16_t, + previous_hibits: uint8x16_t, + has_error: &mut uint8x16_t, ) { let _initial_mins : int8x16_t = int8x16_t::new( -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false - 0xC2, -128, // 110x - 0xE1, // 1110 - 0xF1, + -62 /* 0xC2 */, -128, // 110x + -31 /* 0xE1 */, // 1110 + -15 /*0xF1 */, ); let _second_mins : int8x16_t = int8x16_t::new( -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 127, 127, // 110x => true - 0xA0, // 1110 - 0x90, + -96 /* 0xA0 */, // 1110 + -112 /* 0x90 */, ); - let off1_hibits : int8x16_t = vextq_s8(previous_hibits, hibits, 16 - 1); + let off1_hibits : uint8x16_t = vextq_u8(previous_hibits, hibits, 16 - 1); let initial_mins : int8x16_t = - vqtbl1q_s8(_initial_mins, vreinterpretq_u8_s8(off1_hibits)); + vqtbl1q_s8(_initial_mins, off1_hibits); - let initial_under : int8x16_t = vcgtq_s8(initial_mins, off1_current_bytes); + let initial_under : uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(initial_mins, vreinterpretq_s8_u8(off1_current_bytes))); let second_mins : int8x16_t = - vqtbl1q_s8(_second_mins, vreinterpretq_u8_s8(off1_hibits)); + vqtbl1q_s8(_second_mins, off1_hibits); - let second_under : int8x16_t = vcgtq_s8(second_mins, current_bytes); + let second_under : uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(second_mins, vreinterpretq_s8_u8(current_bytes))); - *has_error = vorrq_s8( - *has_error, vandq_s8(initial_under, second_under)); + *has_error = vorrq_u8( + *has_error, vandq_u8(initial_under, second_under)); } pub struct ProcessedUtfBytes { - rawbytes: int8x16_t, - high_nibbles: int8x16_t, - pub carried_continuations: int8x16_t, + rawbytes: uint8x16_t, + high_nibbles: uint8x16_t, + pub carried_continuations: uint8x16_t, } impl Default for ProcessedUtfBytes { #[cfg_attr(not(feature = "no-inline"), inline)] fn default() -> Self { ProcessedUtfBytes { - rawbytes: vdupq_n_s8(0x00), - high_nibbles: vdupq_n_s8(0x00), - carried_continuations: vdupq_n_s8(0x00), + rawbytes: vdupq_n_u8(0x00), + high_nibbles: vdupq_n_u8(0x00), + carried_continuations: vdupq_n_u8(0x00), } } } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) { +unsafe fn count_nibbles(bytes: uint8x16_t, answer: &mut ProcessedUtfBytes) { answer.rawbytes = bytes; - answer.high_nibbles = vreinterpretq_s8_u8(mem::transmute(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4))); + answer.high_nibbles = vshrq_n_u8(bytes, 4); } // check whether the current bytes are valid UTF-8 // at the end of the function, previous gets updated #[cfg_attr(not(feature = "no-inline"), inline)] pub fn check_utf8_bytes( - current_bytes: int8x16_t, + current_bytes: uint8x16_t, previous: &ProcessedUtfBytes, - has_error: &mut int8x16_t, + has_error: &mut uint8x16_t, ) -> ProcessedUtfBytes { let mut pb = ProcessedUtfBytes::default(); unsafe { @@ -175,15 +174,15 @@ pub fn check_utf8_bytes( check_smaller_than_0xf4(current_bytes, has_error); - let initial_lengths: int8x16_t = continuation_lengths(pb.high_nibbles); + let initial_lengths: uint8x16_t = continuation_lengths(pb.high_nibbles); pb.carried_continuations = carry_continuations(initial_lengths, previous.carried_continuations); check_continuations(initial_lengths, pb.carried_continuations, has_error); - let off1_current_bytes : int8x16_t = - vextq_s8(previous.rawbytes, pb.rawbytes, 16 - 1); + let off1_current_bytes : uint8x16_t = + vextq_u8(previous.rawbytes, pb.rawbytes, 16 - 1); check_first_continuation_max(current_bytes, off1_current_bytes, has_error); diff --git a/src/value/generator.rs b/src/value/generator.rs index bb054fc0..1e0e4baf 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -14,10 +14,10 @@ use std::io::Write; use std::marker::PhantomData; use std::ptr; -#[cfg(target_feature = "avx2")] -const AVX2_PRESENT : bool = true; -#[cfg(not(target_feature = "avx2"))] -const AVX2_PRESENT : bool = false; +//#[cfg(target_feature = "avx2")] +//const AVX2_PRESENT : bool = true; +//#[cfg(not(target_feature = "avx2"))] +//const AVX2_PRESENT : bool = false; const QU: u8 = b'"'; const BS: u8 = b'\\'; From 62a3f93579f06e633d69351ff83cda9bfd6250f2 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Sat, 10 Aug 2019 09:37:32 -0400 Subject: [PATCH 08/34] feat: fixing intrinsics (maybe) --- src/neon/intrinsics.rs | 45 +++++++++++++++++++++++------------------- src/neon/stage1.rs | 2 +- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 3db115b8..1520e5b0 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -22,40 +22,43 @@ macro_rules! types { } #[allow(non_camel_case_types)] -type poly128 = [u8; 16]; +pub type poly64_t = i64; +//#[allow(non_camel_case_types)] +//type poly128_t = [u8; 16]; +#[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.aarch64.neon.pmull64"] - fn vmull_p64_(a: i64, b: i64) -> poly128; + fn vmull_p64_(a: i64, b: i64) -> int8x16_t; #[link_name = "llvm.aarch64.neon.vshrq"] - fn vshrq_n_u8_(a: poly128, b: u8) -> poly128; + fn vshrq_n_u8_(a: poly128_t, b: u8) -> poly128_t; #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vpaddq_u8_(a: poly128, b: poly128) -> poly128; + fn vpaddq_u8_(a: poly128_t, b: poly128_t) -> poly128_t; #[link_name = "llvm.aarch64.neon.addp.v16u8"] - fn vaddq_u8_(a: poly128, b: poly128) -> poly128; + fn vaddq_u8_(a: poly128_t, b: poly128_t) -> poly128_t; #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vaddq_s8_(a: poly128, b: poly128) -> poly128; + fn vaddq_s8_(a: poly128_t, b: poly128_t) -> poly128_t; #[link_name = "llvm.aarch64.neon.addp.v16i32"] - fn vaddq_s32_(a: poly128, b: poly128) -> poly128; + fn vaddq_s32_(a: poly128_t, b: poly128_t) -> poly128_t; #[link_name = "llvm.aarch64.neon.vextq.v16u8"] - fn vextq_u8_(a: poly128, b: poly128, n: u8) -> poly128; + fn vextq_u8_(a: poly128_t, b: poly128_t, n: u8) -> poly128_t; #[link_name = "llvm.aarch64.neon.vextq.v16s8"] - fn vextq_s8_(a: poly128, b: poly128, n: u8) -> poly128; + fn vextq_s8_(a: poly128_t, b: poly128_t, n: u8) -> poly128_t; #[link_name = "llvm.aarch64.neon.vtstq.v16u8"] - fn vtstq_u8_(a: poly128, b: poly128) -> poly128; + fn vtstq_u8_(a: poly128_t, b: poly128_t) -> poly128_t; #[link_name = "llvm.aarch64.neon.vtstq.v16s8"] - fn vtstq_s8_(a: poly128, b: poly128) -> poly128; - #[link_name = "llvm.ctpop.u64"] - fn ctpop_u64_(a: u64) -> u32; + fn vtstq_s8_(a: poly128_t, b: poly128_t) -> poly128_t; + #[link_name = "llvm.ctpop.i64"] + fn ctpop_s64_(a: i64) -> i64; // #[link_name = "llvm.ctlz.u64"] -// fn ctlz_u64_(a: u64) -> u32; - #[link_name = "llvm.cttz.u64"] - fn cttz_u64_(a: u64) -> u32; + // fn ctlz_u64_(a: u64) -> u32; + #[link_name = "llvm.cttz.i64"] + fn cttz_u64_(a: i64) -> i64; } #[inline] -pub unsafe fn vmull_p64(a: i64, b: i64) -> int8x16_t { - mem::transmute(vmull_p64_(a, b)) +pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { + mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b))) } @@ -119,6 +122,8 @@ types! { pub struct int64x2_t(i64, i64); /// ARM-specific 128-bit wide vector of two packed `u64`. pub struct uint64x2_t(u64, u64); + /// ARM-specific 128-bit wide vector of one packed `i128`. + pub struct poly128_t(i128); // FIXME: check this! } impl uint8x16_t { @@ -411,12 +416,12 @@ pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[inline] pub unsafe fn hamming(a: u64) -> u32 { - ctpop_u64_(a) + ctpop_s64_(a as i64) as u32 } #[inline] pub fn trailingzeroes(a: u64) -> u32 { - unsafe { cttz_u64_(a) } + unsafe { cttz_u64_(a as i64) as u32 } } #[inline] diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index b2cdbaf5..0e3818d0 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -57,7 +57,7 @@ unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: } unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { - vgetq_lane_s64(vreinterpretq_s64_s8(vmull_p64(-1, quote_bits as i64)), 0) as u64 + vgetq_lane_u64(vreinterpretq_u64_u8(mem::transmute(vmull_p64(-1, quote_bits as i64))), 0) } struct Utf8CheckingState { From bca6cbab67e6444c78644c1ac169121f2f71c3b1 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Sat, 10 Aug 2019 11:35:06 -0400 Subject: [PATCH 09/34] feat: temp stub replacements for intrinsics (still broken) --- src/neon/intrinsics.rs | 217 +++++++++++++++++++++++++++++++++++------ src/neon/simd_llvm.rs | 2 +- 2 files changed, 187 insertions(+), 32 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 1520e5b0..b14fd4c0 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -30,32 +30,124 @@ pub type poly64_t = i64; extern "C" { #[link_name = "llvm.aarch64.neon.pmull64"] fn vmull_p64_(a: i64, b: i64) -> int8x16_t; - #[link_name = "llvm.aarch64.neon.vshrq"] - fn vshrq_n_u8_(a: poly128_t, b: u8) -> poly128_t; - #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vpaddq_u8_(a: poly128_t, b: poly128_t) -> poly128_t; - #[link_name = "llvm.aarch64.neon.addp.v16u8"] - fn vaddq_u8_(a: poly128_t, b: poly128_t) -> poly128_t; - #[link_name = "llvm.aarch64.neon.addp.v16i8"] - fn vaddq_s8_(a: poly128_t, b: poly128_t) -> poly128_t; - #[link_name = "llvm.aarch64.neon.addp.v16i32"] - fn vaddq_s32_(a: poly128_t, b: poly128_t) -> poly128_t; - #[link_name = "llvm.aarch64.neon.vextq.v16u8"] - fn vextq_u8_(a: poly128_t, b: poly128_t, n: u8) -> poly128_t; - #[link_name = "llvm.aarch64.neon.vextq.v16s8"] - fn vextq_s8_(a: poly128_t, b: poly128_t, n: u8) -> poly128_t; - #[link_name = "llvm.aarch64.neon.vtstq.v16u8"] - fn vtstq_u8_(a: poly128_t, b: poly128_t) -> poly128_t; - #[link_name = "llvm.aarch64.neon.vtstq.v16s8"] - fn vtstq_s8_(a: poly128_t, b: poly128_t) -> poly128_t; #[link_name = "llvm.ctpop.i64"] fn ctpop_s64_(a: i64) -> i64; - // #[link_name = "llvm.ctlz.u64"] - // fn ctlz_u64_(a: u64) -> u32; #[link_name = "llvm.cttz.i64"] fn cttz_u64_(a: i64) -> i64; } +//unsafe fn vpaddq_u8_(_a: poly128_t, _b: poly128_t) -> poly128_t { mem::transmute(vdupq_n_u8(0)) } + +unsafe fn vaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } +unsafe fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } +unsafe fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } + +unsafe fn vextq_u8_(a: uint8x16_t, b: uint8x16_t, n: u8) -> uint8x16_t { + match n { + 0 => b, + 1 => uint8x16_t( + a.0, b.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 2 => uint8x16_t( + a.0, a.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 3 => uint8x16_t( + a.0, a.1, a.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 4 => uint8x16_t( + a.0, a.1, a.2, a.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 5 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 6 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 7 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 8 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 9 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 10 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 11 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, b.11, b.12, b.13, b.14, b.15, + ), + 12 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, b.12, b.13, b.14, b.15, + ), + 13 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, b.13, b.14, b.15, + ), + 14 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, b.14, b.15, + ), + 15 => uint8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, a.14, b.15, + ), + 16 => a, + _ => unreachable_unchecked(), + } +} + +unsafe fn vextq_s8_(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t { + match n { + 0 => b, + 1 => int8x16_t( + a.0, b.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 2 => int8x16_t( + a.0, a.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 3 => int8x16_t( + a.0, a.1, a.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 4 => int8x16_t( + a.0, a.1, a.2, a.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 5 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 6 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 7 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 8 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 9 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 10 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, b.10, b.11, b.12, b.13, b.14, b.15, + ), + 11 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, b.11, b.12, b.13, b.14, b.15, + ), + 12 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, b.12, b.13, b.14, b.15, + ), + 13 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, b.13, b.14, b.15, + ), + 14 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, b.14, b.15, + ), + 15 => int8x16_t( + a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, a.14, b.15, + ), + 16 => a, + _ => unreachable_unchecked(), + } +} + #[inline] pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b))) @@ -63,9 +155,25 @@ pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { #[inline] -pub unsafe fn vshrq_n_u8(a: uint8x16_t, b: u8) -> uint8x16_t { - // FIXME? - mem::transmute(vshrq_n_u8_(mem::transmute(a), mem::transmute(b))) +pub unsafe fn vshrq_n_u8(a: uint8x16_t, n: u8) -> uint8x16_t { + uint8x16_t( + a.0 >> n, + a.1 >> n, + a.2 >> n, + a.3 >> n, + a.4 >> n, + a.5 >> n, + a.6 >> n, + a.7 >> n, + a.8 >> n, + a.9 >> n, + a.10 >> n, + a.11 >> n, + a.12 >> n, + a.13 >> n, + a.14 >> n, + a.15 >> n, + ) } types! { @@ -173,8 +281,6 @@ pub unsafe fn vst1q_u8(addr: *mut u8, val: uint8x16_t) { macro_rules! aarch64_simd_2 { ($name:ident, $type:ty, $simd_fn:ident, $intr:ident) => { #[inline] - #[target_feature(enable = "neon")] - #[cfg_attr(test, assert_instr($intr))] pub unsafe fn $name(a: $type, b: $type) -> $type { simd_llvm::$simd_fn(a, b) } @@ -301,7 +407,7 @@ pub fn zerou8x16() -> uint8x16_t { #[inline] pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - mem::transmute(vpaddq_u8_(mem::transmute(a), mem::transmute(b))) + mem::transmute(vaddq_u8_(mem::transmute(a), mem::transmute(b))) } #[inline] @@ -355,9 +461,6 @@ arm_reinterpret!(vreinterpretq_s64_s8, int8x16_t, int64x2_t); macro_rules! arm_vget_lane { ($name:ident, $to:ty, $from:ty, $lanes:literal) => { #[inline] - #[target_feature(enable = "neon")] - #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] - #[cfg_attr(test, assert_instr(umov))] pub unsafe fn $name(v: $from, lane: u32) -> $to { if lane > $lanes { unreachable_unchecked() } simd_llvm::simd_extract(v, lane) @@ -404,14 +507,66 @@ pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_sub(mem::transmute(a), mem::transmute(b)) } +#[inline] +fn test_u8(a:u8,b:u8) -> u8 { + if a & b != 0 { + 0xFF + } else { + 0x00 + } +} + #[inline] pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - mem::transmute(vtstq_u8_(mem::transmute(a), mem::transmute(b))) + uint8x16_t( + test_u8(a.0, b.0), + test_u8(a.1, b.1), + test_u8(a.2, b.2), + test_u8(a.3, b.3), + test_u8(a.4, b.4), + test_u8(a.5, b.5), + test_u8(a.6, b.6), + test_u8(a.7, b.7), + test_u8(a.8, b.8), + test_u8(a.9, b.9), + test_u8(a.10, b.10), + test_u8(a.11, b.11), + test_u8(a.12, b.12), + test_u8(a.13, b.13), + test_u8(a.14, b.14), + test_u8(a.15, b.15) + ) +} + +#[inline] +fn test_s8(a:i8,b:i8) -> i8 { + if a & b != 0 { + -1 + } else { + 0x00 + } } #[inline] pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - mem::transmute(vtstq_s8_(mem::transmute(a), mem::transmute(b))) + int8x16_t( + test_s8(a.0, b.0), + test_s8(a.1, b.1), + test_s8(a.2, b.2), + test_s8(a.3, b.3), + test_s8(a.4, b.4), + test_s8(a.5, b.5), + test_s8(a.6, b.6), + test_s8(a.7, b.7), + test_s8(a.8, b.8), + test_s8(a.9, b.9), + test_s8(a.10, b.10), + test_s8(a.11, b.11), + test_s8(a.12, b.12), + test_s8(a.13, b.13), + test_s8(a.14, b.14), + test_s8(a.15, b.15) + ) } #[inline] diff --git a/src/neon/simd_llvm.rs b/src/neon/simd_llvm.rs index d91ba422..892ae59c 100644 --- a/src/neon/simd_llvm.rs +++ b/src/neon/simd_llvm.rs @@ -19,7 +19,7 @@ extern "platform-intrinsic" { // // pub fn simd_cast(x: T) -> U; // -// pub fn simd_add(x: T, y: T) -> T; + pub fn simd_add(x: T, y: T) -> T; pub fn simd_sub(x: T, y: T) -> T; // pub fn simd_mul(x: T, y: T) -> T; // pub fn simd_div(x: T, y: T) -> T; From 620e69725f23ea1c95f85b7bb276aa5ef71b5bea Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Mon, 12 Aug 2019 23:46:23 -0400 Subject: [PATCH 10/34] feat: get code closer to simdjson (still broken) --- src/neon/intrinsics.rs | 70 ++++++++++++--- src/neon/simd.rs | 2 +- src/neon/simd_llvm.rs | 2 +- src/neon/stage1.rs | 200 ++++++++++++++++++++++++----------------- src/neon/utf8check.rs | 133 ++++++++++++++------------- 5 files changed, 239 insertions(+), 168 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index b14fd4c0..2539ae33 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -28,16 +28,20 @@ pub type poly64_t = i64; #[allow(improper_ctypes)] extern "C" { + #[link_name = "llvm.aarch64.neon.addp.v16u8"] + fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.neon.pmull64"] fn vmull_p64_(a: i64, b: i64) -> int8x16_t; #[link_name = "llvm.ctpop.i64"] fn ctpop_s64_(a: i64) -> i64; #[link_name = "llvm.cttz.i64"] fn cttz_u64_(a: i64) -> i64; + #[link_name = "llvm.aarch64.neon.uqxtn.v2u32"] + fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t; + #[link_name = "llvm.aarch64.neon.uqsub.v16u8"] + fn vqsubq_u8_(a: uint8x16_t, a: uint8x16_t) -> uint8x16_t; } -//unsafe fn vpaddq_u8_(_a: poly128_t, _b: poly128_t) -> poly128_t { mem::transmute(vdupq_n_u8(0)) } - unsafe fn vaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } unsafe fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } unsafe fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } @@ -153,6 +157,10 @@ pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { mem::transmute(vmull_p64_(mem::transmute(a), mem::transmute(b))) } +#[inline] +pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + vpaddq_u8_(a, b) +} #[inline] pub unsafe fn vshrq_n_u8(a: uint8x16_t, n: u8) -> uint8x16_t { @@ -294,6 +302,7 @@ macro_rules! aarch64_simd_ceq { } aarch64_simd_ceq!(vceqq_u8, uint8x16_t); + aarch64_simd_ceq!(vceq_s64, int64x1_t); aarch64_simd_ceq!(vceqq_s64, int64x2_t); aarch64_simd_ceq!(vceq_u64, uint64x1_t); @@ -405,11 +414,6 @@ pub fn zerou8x16() -> uint8x16_t { uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) } -#[inline] -pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - mem::transmute(vaddq_u8_(mem::transmute(a), mem::transmute(b))) -} - #[inline] pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { mem::transmute(vaddq_u8_(mem::transmute(a), mem::transmute(b))) @@ -441,8 +445,6 @@ macro_rules! arm_reinterpret { ($name:ident, $from:ty, $to:ty) => { // Vector reinterpret cast operation #[inline] - #[target_feature(enable = "neon")] - #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] pub unsafe fn $name(a: $from) -> $to { mem::transmute(a) } @@ -462,7 +464,6 @@ macro_rules! arm_vget_lane { ($name:ident, $to:ty, $from:ty, $lanes:literal) => { #[inline] pub unsafe fn $name(v: $from, lane: u32) -> $to { - if lane > $lanes { unreachable_unchecked() } simd_llvm::simd_extract(v, lane) } }; @@ -487,8 +488,8 @@ pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t { } #[inline] -pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { - uint32x2_t(a.0 as u32, a.1 as u32) +pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { + vqmovn_u64_(a) } #[inline] @@ -503,8 +504,7 @@ pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t { #[inline] pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - // FIXME? - simd_llvm::simd_sub(mem::transmute(a), mem::transmute(b)) + vqsubq_u8_(a, b) } #[inline] @@ -583,3 +583,45 @@ pub fn trailingzeroes(a: u64) -> u32 { pub unsafe fn vst1q_u32(addr: *mut u8, val: uint32x4_t) { std::ptr::write(addr as *mut uint32x4_t, val) } + + +#[allow(unused)] +macro_rules! constify_imm5 { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match ($imm8) & 0b1_1111 { + 0 => $expand!(0), + 1 => $expand!(1), + 2 => $expand!(2), + 3 => $expand!(3), + 4 => $expand!(4), + 5 => $expand!(5), + 6 => $expand!(6), + 7 => $expand!(7), + 8 => $expand!(8), + 9 => $expand!(9), + 10 => $expand!(10), + 11 => $expand!(11), + 12 => $expand!(12), + 13 => $expand!(13), + 14 => $expand!(14), + 15 => $expand!(15), + 16 => $expand!(16), + 17 => $expand!(17), + 18 => $expand!(18), + 19 => $expand!(19), + 20 => $expand!(20), + 21 => $expand!(21), + 22 => $expand!(22), + 23 => $expand!(23), + 24 => $expand!(24), + 25 => $expand!(25), + 26 => $expand!(26), + 27 => $expand!(27), + 28 => $expand!(28), + 29 => $expand!(29), + 30 => $expand!(30), + _ => $expand!(31), + } + }; +} \ No newline at end of file diff --git a/src/neon/simd.rs b/src/neon/simd.rs index 544bc0d8..8a5a21fc 100644 --- a/src/neon/simd.rs +++ b/src/neon/simd.rs @@ -467,4 +467,4 @@ macro_rules! constify_imm8 { _ => $expand!(255), } }; -} \ No newline at end of file +} diff --git a/src/neon/simd_llvm.rs b/src/neon/simd_llvm.rs index 892ae59c..bb7b25bd 100644 --- a/src/neon/simd_llvm.rs +++ b/src/neon/simd_llvm.rs @@ -20,7 +20,7 @@ extern "platform-intrinsic" { // pub fn simd_cast(x: T) -> U; // pub fn simd_add(x: T, y: T) -> T; - pub fn simd_sub(x: T, y: T) -> T; +// pub fn simd_sub(x: T, y: T) -> T; // pub fn simd_mul(x: T, y: T) -> T; // pub fn simd_div(x: T, y: T) -> T; // pub fn simd_shl(x: T, y: T) -> T; diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 0e3818d0..a1e15a10 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -57,11 +57,13 @@ unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: } unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { - vgetq_lane_u64(vreinterpretq_u64_u8(mem::transmute(vmull_p64(-1, quote_bits as i64))), 0) + vgetq_lane_u64( + vreinterpretq_u64_u8( + mem::transmute(vmull_p64(-1, quote_bits as i64))), 0) } struct Utf8CheckingState { - has_error: uint8x16_t, + has_error: int8x16_t, previous: ProcessedUtfBytes, } @@ -69,8 +71,8 @@ impl Default for Utf8CheckingState { #[cfg_attr(not(feature = "no-inline"), inline)] fn default() -> Self { Utf8CheckingState { - has_error: vdupq_n_u8(0), - previous:ProcessedUtfBytes::default() + has_error: vdupq_n_s8(0), + previous: ProcessedUtfBytes::default(), } } } @@ -99,20 +101,20 @@ unsafe fn check_utf8( // All bytes are ascii. Therefore the byte that was just before must be // ascii too. We only check the byte that was just before simd_input. Nines // are arbitrary values. - let verror: uint8x16_t = - uint8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); + let verror: int8x16_t = + int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); state.has_error = - vorrq_u8(vcgtq_u8(state.previous.carried_continuations, verror), + vorrq_s8(vcgtq_s8(state.previous.carried_continuations, verror), state.has_error); } else { // it is not ascii so we have to do heavy work - state.previous = check_utf8_bytes(input.v0, + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), &(state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(input.v1, + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), &(state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(input.v2, + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), &(state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(input.v3, + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), &(state.previous), &mut (state.has_error)); } } @@ -122,7 +124,7 @@ unsafe fn check_utf8( unsafe fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { macro_rules! call { ($imm8:expr) => { - vdupq_n_u8($imm8) + vmovq_n_u8($imm8) }; }; let mask: uint8x16_t = constify_imm8!(m, call); @@ -140,7 +142,7 @@ unsafe fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { unsafe fn unsigned_lteq_against_input(input: &SimdInput, maxval: u8) -> u64 { macro_rules! call { ($imm8:expr) => { - vdupq_n_u8($imm8) + vmovq_n_u8($imm8) }; }; let mask: uint8x16_t = constify_imm8!(maxval, call); @@ -153,7 +155,94 @@ unsafe fn unsigned_lteq_against_input(input: &SimdInput, maxval: u8) -> u64 { neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) } -unsafe fn find_whitespace_and_structurals(input: &SimdInput, whitespace: &mut u64, structurals: &mut u64) { +// return a bitvector indicating where we have characters that end an odd-length +// sequence of backslashes (and thus change the behavior of the next character +// to follow). A even-length sequence of backslashes, and, for that matter, the +// largest even-length prefix of our odd-length sequence of backslashes, simply +// modify the behavior of the backslashes themselves. +// We also update the prev_iter_ends_odd_backslash reference parameter to +// indicate whether we end an iteration on an odd-length sequence of +// backslashes, which modifies our subsequent search for odd-length +// sequences of backslashes in an obvious way. +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_backslash: &mut u64) -> u64 { + const EVEN_BITS: u64 = 0x5555_5555_5555_5555; + const ODD_BITS: u64 = !EVEN_BITS; + + let bs_bits: u64 = cmp_mask_against_input(input, b'\\'); + let start_edges: u64 = bs_bits & !(bs_bits << 1); + // flip lowest if we have an odd-length run at the end of the prior + // iteration + let even_start_mask: u64 = EVEN_BITS ^ *prev_iter_ends_odd_backslash; + let even_starts: u64 = start_edges & even_start_mask; + let odd_starts: u64 = start_edges & !even_start_mask; + let even_carries: u64 = bs_bits.wrapping_add(even_starts); + + let mut odd_carries: u64 = 0; + // must record the carry-out of our odd-carries out of bit 63; this + // indicates whether the sense of any edge going to the next iteration + // should be flipped + let iter_ends_odd_backslash: bool = add_overflow(bs_bits, odd_starts, &mut odd_carries); + + odd_carries |= *prev_iter_ends_odd_backslash; // push in bit zero as a potential end + // if we had an odd-numbered run at the + // end of the previous iteration + *prev_iter_ends_odd_backslash = if iter_ends_odd_backslash { 0x1 } else { 0x0 }; + let even_carry_ends: u64 = even_carries & !bs_bits; + let odd_carry_ends: u64 = odd_carries & !bs_bits; + let even_start_odd_end: u64 = even_carry_ends & ODD_BITS; + let odd_start_even_end: u64 = odd_carry_ends & EVEN_BITS; + let odd_ends: u64 = even_start_odd_end | odd_start_even_end; + odd_ends +} + +// return both the quote mask (which is a half-open mask that covers the first +// quote in an unescaped quote pair and everything in the quote pair) and the +// quote bits, which are the simple unescaped quoted bits. +// +// We also update the prev_iter_inside_quote value to tell the next iteration +// whether we finished the final iteration inside a quote pair; if so, this +// inverts our behavior of whether we're inside quotes for the next iteration. +// +// Note that we don't do any error checking to see if we have backslash +// sequences outside quotes; these +// backslash sequences (of any length) will be detected elsewhere. +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn find_quote_mask_and_bits( + input: &SimdInput, + odd_ends: u64, + prev_iter_inside_quote: &mut u64, + quote_bits: &mut u64, + error_mask: &mut u64 +) -> u64 { + *quote_bits = cmp_mask_against_input(input, b'"'); + *quote_bits &= !odd_ends; + + let mut quote_mask: u64 = compute_quote_mask(*quote_bits); + + quote_mask ^= *prev_iter_inside_quote; + + // All Unicode characters may be placed within the + // quotation marks, except for the characters that MUST be escaped: + // quotation mark, reverse solidus, and the control characters (U+0000 + //through U+001F). + // https://tools.ietf.org/html/rfc8259 + let unescaped: u64 = unsigned_lteq_against_input(input, 0x1F); + *error_mask |= quote_mask & unescaped; + + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, + // John Regher from Utah U. says this is fine code + *prev_iter_inside_quote = (quote_mask as i64 >> 63) as u64; + quote_mask +} + +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn find_whitespace_and_structurals( + input: &SimdInput, + whitespace: &mut u64, + structurals: &mut u64, +) { let low_nibble_mask: uint8x16_t = uint8x16_t::new(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); let high_nibble_mask: uint8x16_t = uint8x16_t::new(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); let structural_shufti_mask: uint8x16_t = vmovq_n_u8(0x7); @@ -197,38 +286,6 @@ unsafe fn find_whitespace_and_structurals(input: &SimdInput, whitespace: &mut u6 *whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); } - -unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_backslash: &mut u64) -> u64 { - const EVEN_BITS: u64 = 0x5555_5555_5555_5555; - const ODD_BITS: u64 = !EVEN_BITS; - - let bs_bits: u64 = cmp_mask_against_input(input, b'\\'); - let start_edges: u64 = bs_bits & !(bs_bits << 1); - // flip lowest if we have an odd-length run at the end of the prior - // iteration - let even_start_mask: u64 = EVEN_BITS ^ *prev_iter_ends_odd_backslash; - let even_starts: u64 = start_edges & even_start_mask; - let odd_starts: u64 = start_edges & !even_start_mask; - let even_carries: u64 = bs_bits.wrapping_add(even_starts); - - let mut odd_carries: u64 = 0; - // must record the carry-out of our odd-carries out of bit 63; this - // indicates whether the sense of any edge going to the next iteration - // should be flipped - let iter_ends_odd_backslash: bool = add_overflow(bs_bits, odd_starts, &mut odd_carries); - - odd_carries |= *prev_iter_ends_odd_backslash; // push in bit zero as a potential end - // if we had an odd-numbered run at the - // end of the previous iteration - *prev_iter_ends_odd_backslash = if iter_ends_odd_backslash { 0x1 } else { 0x0 }; - let even_carry_ends: u64 = even_carries & !bs_bits; - let odd_carry_ends: u64 = odd_carries & !bs_bits; - let even_start_odd_end: u64 = even_carry_ends & ODD_BITS; - let odd_start_even_end: u64 = odd_carry_ends & EVEN_BITS; - let odd_ends: u64 = even_start_odd_end | odd_start_even_end; - odd_ends -} - //template <> //really_inline ErrorValues check_utf8_errors( // utf8_checking_state &state) { @@ -239,31 +296,6 @@ unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_bac // : simdjson::SUCCESS; //} -unsafe fn find_quote_mask_and_bits(input: &SimdInput, odd_ends: u64, - prev_iter_inside_quote: &mut u64, quote_bits: &mut u64, error_mask: &mut u64) -> u64 { - *quote_bits = cmp_mask_against_input(input, b'"'); - *quote_bits &= !odd_ends; - - let mut quote_mask: u64 = compute_quote_mask(*quote_bits); - - quote_mask ^= *prev_iter_inside_quote; - - // All Unicode characters may be placed within the - // quotation marks, except for the characters that MUST be escaped: - // quotation mark, reverse solidus, and the control characters (U+0000 - //through U+001F). - // https://tools.ietf.org/html/rfc8259 - let unescaped: u64 = unsigned_lteq_against_input(input, 0x1F); - *error_mask |= quote_mask & unescaped; - - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, - // John Regher from Utah U. says this is fine code - *prev_iter_inside_quote = quote_mask >> 63; - - quote_mask -} - // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at // base_ptr[base] incrementing base as we go @@ -291,13 +323,13 @@ unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { base.set_len(l + cnt); while bits != 0 { - let v0 : i32 = static_cast_i32!(trailingzeroes(bits)); + let v0: i32 = static_cast_i32!(trailingzeroes(bits)); bits &= bits.wrapping_sub(1); - let v1 : i32 = static_cast_i32!(trailingzeroes(bits)); + let v1: i32 = static_cast_i32!(trailingzeroes(bits)); bits &= bits.wrapping_sub(1); - let v2 : i32 = static_cast_i32!(trailingzeroes(bits)); + let v2: i32 = static_cast_i32!(trailingzeroes(bits)); bits &= bits.wrapping_sub(1); - let v3 : i32 = static_cast_i32!(trailingzeroes(bits)); + let v3: i32 = static_cast_i32!(trailingzeroes(bits)); bits &= bits.wrapping_sub(1); let v: int32x4_t = int32x4_t::new(v3, v2, v1, v0); @@ -308,7 +340,6 @@ unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { } } - // return a updated structural bit vector with quoted contents cleared out and // pseudo-structural characters added to the mask // updates prev_iter_ends_pseudo_pred which tells us whether the previous @@ -352,7 +383,7 @@ fn finalize_structurals( } impl<'de> Deserializer<'de> { - #[inline(never)] + //#[inline(never)] pub unsafe fn find_structural_bits(input: &[u8]) -> std::result::Result, ErrorType> { let len = input.len(); // 6 is a heuristic number to estimate it turns out a rate of 1/6 structural caracters lears @@ -360,8 +391,8 @@ impl<'de> Deserializer<'de> { let mut structural_indexes = Vec::with_capacity(len / 6); structural_indexes.push(0); // push extra root element - // let mut has_error: uint8x16_t = vdupq_n_u8(0); - let mut previous = Utf8CheckingState::default(); + let mut utf8_state : Utf8CheckingState = Utf8CheckingState::default(); + // we have padded the input out to 64 byte multiple with the remainder being // zeros @@ -397,7 +428,9 @@ impl<'de> Deserializer<'de> { #endif */ let input: SimdInput = fill_input(input.get_unchecked(idx as usize..)); - check_utf8(&input, &mut previous); + + check_utf8(&input, &mut utf8_state); + // detect odd sequences of backslashes let odd_ends: u64 = find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); @@ -441,7 +474,7 @@ impl<'de> Deserializer<'de> { .copy_from(input.as_ptr().add(idx), len as usize - idx); let input: SimdInput = fill_input(&tmpbuf); - check_utf8(&input, &mut previous); + check_utf8(&input, &mut utf8_state); // detect odd sequences of backslashes let odd_ends: u64 = @@ -475,7 +508,6 @@ impl<'de> Deserializer<'de> { ); idx += 64; } - // This test isn't in upstream, for some reason the error mask is et for then. if prev_iter_inside_quote != 0 { return Err(ErrorType::Syntax); @@ -497,9 +529,9 @@ impl<'de> Deserializer<'de> { return Err(ErrorType::Syntax); } - let err: i128 = mem::transmute(vtstq_u8(previous.has_error, previous.has_error)); + let has_error : i128 = mem::transmute(utf8_state.has_error); - if err != 0 { + if has_error != 0 { Ok(structural_indexes) } else { Err(ErrorType::InvalidUTF8) diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index 10c6b7c9..c62707fb 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -1,6 +1,4 @@ - use crate::neon::intrinsics::*; -//use std::mem; /* * legal utf-8 byte sequence @@ -21,17 +19,17 @@ use crate::neon::intrinsics::*; // all byte values must be no larger than 0xF4 #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_smaller_than_0xf4(current_bytes: uint8x16_t, has_error: &mut uint8x16_t) { +unsafe fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8x16_t) { // unsigned, saturates to 0 below max *has_error = - vorrq_u8( + vorrq_s8( *has_error, - vqsubq_u8(current_bytes, vdupq_n_u8(0xF4))); + vreinterpretq_s8_u8(vqsubq_u8(vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4)))); } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn continuation_lengths(high_nibbles: uint8x16_t) -> uint8x16_t { - let nibbles : uint8x16_t = uint8x16_t::new( +unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { + let nibbles: int8x16_t = int8x16_t::new( 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) 0, 0, 0, 0, // 10xx (continuation) 2, 2, // 110x @@ -39,35 +37,34 @@ unsafe fn continuation_lengths(high_nibbles: uint8x16_t) -> uint8x16_t { 4, // 1111, next should be 0 (not checked here) ); - vqtbl1q_u8(nibbles, high_nibbles) + vreinterpretq_s8_u8(vqtbl1q_u8(vreinterpretq_u8_s8(nibbles), vreinterpretq_u8_s8(high_nibbles))) } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn carry_continuations(initial_lengths: uint8x16_t, previous_carries: uint8x16_t) -> uint8x16_t { - let right1 : uint8x16_t = vqsubq_u8( - vextq_u8(previous_carries, initial_lengths, 16 - 1), - vdupq_n_u8(1)); - let sum : uint8x16_t = vaddq_u8(initial_lengths, right1); +unsafe fn carry_continuations(initial_lengths: int8x16_t, previous_carries: int8x16_t) -> int8x16_t { + let right1: int8x16_t = vreinterpretq_s8_u8(vqsubq_u8( + vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)), + vdupq_n_u8(1))); + let sum: int8x16_t = vaddq_s8(initial_lengths, right1); - let right2 : uint8x16_t = vqsubq_u8( - vextq_u8(previous_carries, sum, 16 - 2), - vdupq_n_u8(2)); + let right2: int8x16_t = vreinterpretq_s8_u8(vqsubq_u8( + vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)), + vdupq_n_u8(2))); - vaddq_u8(sum, right2) + vaddq_s8(sum, right2) } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_continuations(initial_lengths: uint8x16_t, carries: uint8x16_t, has_error: &mut uint8x16_t) { +unsafe fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, has_error: &mut int8x16_t) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) - // FIXME: was vceqq_u8 ? - let overunder : uint8x16_t = vceqq_u8( - vcgtq_u8(carries, initial_lengths), - vcgtq_u8(initial_lengths, vdupq_n_u8(0))); + let overunder: uint8x16_t = vceqq_u8( + vreinterpretq_u8_s8(vcgtq_s8(carries, initial_lengths)), + vreinterpretq_u8_s8(vcgtq_s8(initial_lengths, vdupq_n_s8(0)))); - *has_error = vorrq_u8(*has_error, overunder); + *has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder)); } // when 0xED is found, next byte must be no larger than 0x9F @@ -75,21 +72,21 @@ unsafe fn check_continuations(initial_lengths: uint8x16_t, carries: uint8x16_t, // next byte must be continuation, ie sign bit is set, so signed < is ok #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn check_first_continuation_max( - current_bytes: uint8x16_t, - off1_current_bytes: uint8x16_t, - has_error: &mut uint8x16_t, + current_bytes: int8x16_t, + off1_current_bytes: int8x16_t, + has_error: &mut int8x16_t, ) { - let mask_ed : uint8x16_t = vceqq_u8(off1_current_bytes, vdupq_n_u8(0xED)); - let mask_f4 : uint8x16_t = vceqq_u8(off1_current_bytes, vdupq_n_u8(0xF4)); + let mask_ed: uint8x16_t = vreinterpretq_u8_s8(vceqq_s8(off1_current_bytes, vdupq_n_s8(-19 /* 0xED */))); + let mask_f4: uint8x16_t = vreinterpretq_u8_s8(vceqq_s8(off1_current_bytes, vdupq_n_s8(-12 /* 0xF4 */))); // FIXME: was vandq_u8? - let badfollow_ed : uint8x16_t = - vandq_u8(vcgtq_u8(current_bytes, vdupq_n_u8(0x9F)), mask_ed); - let badfollow_f4 : uint8x16_t = - vandq_u8(vcgtq_u8(current_bytes, vdupq_n_u8(0x8F)), mask_f4); + let badfollow_ed: uint8x16_t = + vandq_u8(vreinterpretq_u8_s8(vcgtq_s8(current_bytes, vdupq_n_s8(-97 /* 0x9F */))), mask_ed); + let badfollow_f4: uint8x16_t = + vandq_u8(vreinterpretq_u8_s8(vcgtq_s8(current_bytes, vdupq_n_s8(-113 /* 0x8F */))), mask_f4); - *has_error = vorrq_u8( - *has_error, vorrq_u8(badfollow_ed, badfollow_f4)); + *has_error = vorrq_s8( + *has_error, vreinterpretq_s8_u8(vorrq_u8(badfollow_ed, badfollow_f4))); } // map off1_hibits => error condition @@ -100,73 +97,73 @@ unsafe fn check_first_continuation_max( // else false && false #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn check_overlong( - current_bytes: uint8x16_t, - off1_current_bytes: uint8x16_t, - hibits: uint8x16_t, - previous_hibits: uint8x16_t, - has_error: &mut uint8x16_t, + current_bytes: int8x16_t, + off1_current_bytes: int8x16_t, + hibits: int8x16_t, + previous_hibits: int8x16_t, + has_error: &mut int8x16_t, ) { - let _initial_mins : int8x16_t = int8x16_t::new( - -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, // 10xx => false + let _initial_mins: int8x16_t = int8x16_t::new( + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false -62 /* 0xC2 */, -128, // 110x -31 /* 0xE1 */, // 1110 -15 /*0xF1 */, ); - let _second_mins : int8x16_t = int8x16_t::new( - -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, // 10xx => false - 127, 127, // 110x => true + let _second_mins: int8x16_t = int8x16_t::new( + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + 127, 127, // 110x => true -96 /* 0xA0 */, // 1110 -112 /* 0x90 */, ); - let off1_hibits : uint8x16_t = vextq_u8(previous_hibits, hibits, 16 - 1); - let initial_mins : int8x16_t = - vqtbl1q_s8(_initial_mins, off1_hibits); + let off1_hibits: int8x16_t = vextq_s8(previous_hibits, hibits, 16 - 1); + let initial_mins: int8x16_t = + vqtbl1q_s8(_initial_mins, vreinterpretq_u8_s8(off1_hibits)); - let initial_under : uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(initial_mins, vreinterpretq_s8_u8(off1_current_bytes))); + let initial_under: uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(initial_mins, off1_current_bytes)); - let second_mins : int8x16_t = - vqtbl1q_s8(_second_mins, off1_hibits); + let second_mins: int8x16_t = + vqtbl1q_s8(_second_mins, vreinterpretq_u8_s8(off1_hibits)); - let second_under : uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(second_mins, vreinterpretq_s8_u8(current_bytes))); + let second_under: uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(second_mins, current_bytes)); - *has_error = vorrq_u8( - *has_error, vandq_u8(initial_under, second_under)); + *has_error = vorrq_s8( + *has_error, vreinterpretq_s8_u8(vandq_u8(initial_under, second_under))); } pub struct ProcessedUtfBytes { - rawbytes: uint8x16_t, - high_nibbles: uint8x16_t, - pub carried_continuations: uint8x16_t, + rawbytes: int8x16_t, + high_nibbles: int8x16_t, + pub carried_continuations: int8x16_t, } impl Default for ProcessedUtfBytes { #[cfg_attr(not(feature = "no-inline"), inline)] fn default() -> Self { ProcessedUtfBytes { - rawbytes: vdupq_n_u8(0x00), - high_nibbles: vdupq_n_u8(0x00), - carried_continuations: vdupq_n_u8(0x00), + rawbytes: vdupq_n_s8(0x00), + high_nibbles: vdupq_n_s8(0x00), + carried_continuations: vdupq_n_s8(0x00), } } } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn count_nibbles(bytes: uint8x16_t, answer: &mut ProcessedUtfBytes) { +unsafe fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) { answer.rawbytes = bytes; - answer.high_nibbles = vshrq_n_u8(bytes, 4); + answer.high_nibbles = vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4)); } // check whether the current bytes are valid UTF-8 // at the end of the function, previous gets updated #[cfg_attr(not(feature = "no-inline"), inline)] pub fn check_utf8_bytes( - current_bytes: uint8x16_t, + current_bytes: int8x16_t, previous: &ProcessedUtfBytes, - has_error: &mut uint8x16_t, + has_error: &mut int8x16_t, ) -> ProcessedUtfBytes { let mut pb = ProcessedUtfBytes::default(); unsafe { @@ -174,15 +171,15 @@ pub fn check_utf8_bytes( check_smaller_than_0xf4(current_bytes, has_error); - let initial_lengths: uint8x16_t = continuation_lengths(pb.high_nibbles); + let initial_lengths: int8x16_t = continuation_lengths(pb.high_nibbles); pb.carried_continuations = carry_continuations(initial_lengths, previous.carried_continuations); check_continuations(initial_lengths, pb.carried_continuations, has_error); - let off1_current_bytes : uint8x16_t = - vextq_u8(previous.rawbytes, pb.rawbytes, 16 - 1); + let off1_current_bytes: int8x16_t = + vextq_s8(previous.rawbytes, pb.rawbytes, 16 - 1); check_first_continuation_max(current_bytes, off1_current_bytes, has_error); From 68691ad120f4943b802823b4c57fa4982bdbd7b8 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Tue, 13 Aug 2019 00:29:54 -0400 Subject: [PATCH 11/34] feat: trying to fix parse_str --- src/neon/deser.rs | 63 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/src/neon/deser.rs b/src/neon/deser.rs index b194dde3..0945c847 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -3,20 +3,26 @@ pub use crate::error::{Error, ErrorType}; pub use crate::Deserializer; pub use crate::Result; -pub use crate::neon::stage1::SIMDJSON_PADDING; +pub use crate::neon::stage1::*; pub use crate::neon::intrinsics::*; pub use crate::neon::utf8check::*; pub use crate::stringparse::*; pub use crate::neon::intrinsics::*; -unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dst: &mut [u8]) -> ParseStringHelper { +unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dstx: Option<&mut [u8]>) -> ParseStringHelper { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding let v0 : uint8x16_t = vld1q_u8(src.as_ptr()); let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); - vst1q_u8(dst.as_mut_ptr(), v0); - vst1q_u8(dst.as_mut_ptr().add(16), v1); + + match dstx { + Some(dst) => { + vst1q_u8(dst.as_mut_ptr(), v0); + vst1q_u8(dst.as_mut_ptr().add(16), v1); + }, + _ => () + } let bs_mask : uint8x16_t = vmovq_n_u8('\\' as u8); let qt_mask : uint8x16_t = vmovq_n_u8('"' as u8); @@ -50,7 +56,7 @@ impl<'de> Deserializer<'de> { pub fn parse_str_(&mut self) -> Result<&'de str> { // Add 1 to skip the initial " let idx = self.iidx + 1; -// let padding = [0u8; 32]; +// let mut padding = [0u8; 32]; //let mut read: usize = 0; // we include the terminal '"' so we know where to end @@ -59,14 +65,55 @@ impl<'de> Deserializer<'de> { let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; let mut src_i: usize = 0; - let len = src_i; + let mut len = src_i; + loop { + // store to dest unconditionally - we can overwrite the bits we don't like + // later + let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], None) }; + + if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { + // we encountered quotes first. Move dst to point to quotes and exit + // find out where the quote is... + let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; + + /////////////////////// + // Above, check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + //////////////////////// + + // we advance the point, accounting for the fact that we have a NULl termination + + len += quote_dist as usize; + unsafe { + let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; + return Ok(&*v); + } + + // we compare the pointers since we care if they are 'at the same spot' + // not if they are the same value + } + if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { + // Move to the 'bad' character + let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); + len += bs_dist as usize; + src_i += bs_dist as usize; + break; + } else { + // they are the same. Since they can't co-occur, it means we encountered + // neither. + src_i += 32; + len += 32; + } + } + let mut dst_i: usize = 0; let dst: &mut [u8] = &mut self.strings; loop { // store to dest unconditionally - we can overwrite the bits we don't like // later - let ParseStringHelper {bs_bits, quote_bits} = unsafe { find_bs_bits_and_quote_bits(src, dst) }; + let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], Some(&mut dst[dst_i..])) }; if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit @@ -143,4 +190,4 @@ impl<'de> Deserializer<'de> { } } } -} +} \ No newline at end of file From be606903255d814c6ec2053ab1140be99d0d6b78 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Tue, 13 Aug 2019 11:19:10 -0400 Subject: [PATCH 12/34] fix: numberparse --- src/numberparse.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/numberparse.rs b/src/numberparse.rs index 425d16ab..2362e6d0 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -129,14 +129,14 @@ pub enum Number { } #[cfg_attr(not(feature = "no-inline"), inline)] -#[cfg(target_arch = "aarch64")] +//#[cfg(target_arch = "aarch64")] fn parse_eight_digits_unrolled(chars: &[u8]) -> u32 { let val: u64 = unsafe { *(chars.as_ptr() as *const u64) }; // memcpy(&val, chars, sizeof(u64)); - let val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - let val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + let val = (val & 0x0F0F0F0F0F0F0F0F).wrapping_mul(2561) >> 8; + let val = (val & 0x00FF00FF00FF00FF).wrapping_mul(6553601) >> 16; - return ((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32) as u32; + return ((val & 0x0000FFFF0000FFFF).wrapping_mul(42949672960001) >> 32) as u32; } impl<'de> Deserializer<'de> { From e6609ff1877d2908389fdc78dbc6e8d29af42dff Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Tue, 13 Aug 2019 11:22:06 -0400 Subject: [PATCH 13/34] fix: endian in flatten_bits --- src/neon/stage1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index a1e15a10..2af15e6d 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -332,7 +332,7 @@ unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let v3: i32 = static_cast_i32!(trailingzeroes(bits)); bits &= bits.wrapping_sub(1); - let v: int32x4_t = int32x4_t::new(v3, v2, v1, v0); + let v: int32x4_t = int32x4_t::new(v0, v1, v2, v3); let v: int32x4_t = vaddq_s32(idx_64_v, v); std::ptr::write(base.as_mut_ptr().add(l) as *mut int32x4_t, v); From 45cbbd497385fb0f5490122a85bacf7ee9aff3f5 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Tue, 13 Aug 2019 12:11:25 -0400 Subject: [PATCH 14/34] fix: utf8 encoding (still broken but closer) --- src/neon/deser.rs | 29 ++++++++++++++++++++++++++--- src/neon/stage1.rs | 8 ++++---- src/neon/utf8check.rs | 2 +- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 0945c847..65f744a1 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -56,7 +56,7 @@ impl<'de> Deserializer<'de> { pub fn parse_str_(&mut self) -> Result<&'de str> { // Add 1 to skip the initial " let idx = self.iidx + 1; -// let mut padding = [0u8; 32]; + let mut padding = [0u8; 32]; //let mut read: usize = 0; // we include the terminal '"' so we know where to end @@ -69,7 +69,19 @@ impl<'de> Deserializer<'de> { loop { // store to dest unconditionally - we can overwrite the bits we don't like // later - let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], None) }; + + let srcx = if src.len() >= src_i + 32 { + &src[src_i..] + } else { + unsafe { + padding + .get_unchecked_mut(..src.len() - src_i) + .clone_from_slice(src.get_unchecked(src_i..)); + &padding + } + }; + + let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx, None) }; if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit @@ -111,9 +123,20 @@ impl<'de> Deserializer<'de> { let dst: &mut [u8] = &mut self.strings; loop { + let srcx = if src.len() >= src_i + 32 { + &src[src_i..] + } else { + unsafe { + padding + .get_unchecked_mut(..src.len() - src_i) + .clone_from_slice(src.get_unchecked(src_i..)); + &padding + } + }; + // store to dest unconditionally - we can overwrite the bits we don't like // later - let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], Some(&mut dst[dst_i..])) }; + let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx, Some(dst)) }; if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 2af15e6d..7748ee37 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -109,13 +109,13 @@ unsafe fn check_utf8( } else { // it is not ascii so we have to do heavy work state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), - &(state.previous), &mut (state.has_error)); + &mut (state.previous), &mut (state.has_error)); state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), - &(state.previous), &mut (state.has_error)); + &mut (state.previous), &mut (state.has_error)); state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), - &(state.previous), &mut (state.has_error)); + &mut (state.previous), &mut (state.has_error)); state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), - &(state.previous), &mut (state.has_error)); + &mut (state.previous), &mut (state.has_error)); } } diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index c62707fb..8e260ae8 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -162,7 +162,7 @@ unsafe fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) { #[cfg_attr(not(feature = "no-inline"), inline)] pub fn check_utf8_bytes( current_bytes: int8x16_t, - previous: &ProcessedUtfBytes, + previous: &mut ProcessedUtfBytes, has_error: &mut int8x16_t, ) -> ProcessedUtfBytes { let mut pb = ProcessedUtfBytes::default(); From 15c892b11f3fb32e25167db0f4186e4ba2c464f6 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Tue, 13 Aug 2019 14:37:11 -0400 Subject: [PATCH 15/34] fix: use write instead of intrinsic (for now) --- src/neon/deser.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 65f744a1..30f3cabe 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -9,6 +9,7 @@ pub use crate::neon::utf8check::*; pub use crate::stringparse::*; pub use crate::neon::intrinsics::*; +use std::io::Write; unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dstx: Option<&mut [u8]>) -> ParseStringHelper { // this can read up to 31 bytes beyond the buffer size, but we require @@ -17,9 +18,11 @@ unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dstx: Option<&mut [u8]>) -> Pa let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); match dstx { - Some(dst) => { - vst1q_u8(dst.as_mut_ptr(), v0); - vst1q_u8(dst.as_mut_ptr().add(16), v1); + Some(mut dst) => { +// vst1q_u8(dst.as_mut_ptr(), v0); +// vst1q_u8(dst.as_mut_ptr().add(16), v1); + dst.write(&src[0..16]).unwrap(); + dst.write(&src[16..32]).unwrap(); }, _ => () } From e3872b59e86a0c413137680d1936fa5be81ba734 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Tue, 13 Aug 2019 15:57:44 -0400 Subject: [PATCH 16/34] fix: fix string parsing (mostly, thanks to @licenser) --- src/lib.rs | 5 ++++- src/neon/deser.rs | 21 ++++++--------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c032bd0d..261672ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -180,7 +180,10 @@ impl<'de> Deserializer<'de> { let counts = Deserializer::validate(input, &structural_indexes)?; - let strings = Vec::with_capacity(len + SIMDJSON_PADDING); + let mut strings = Vec::with_capacity(len + SIMDJSON_PADDING); + unsafe { + strings.set_len(len + SIMDJSON_PADDING); + } Ok(Deserializer { counts, diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 30f3cabe..4f45d2b6 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -9,24 +9,13 @@ pub use crate::neon::utf8check::*; pub use crate::stringparse::*; pub use crate::neon::intrinsics::*; -use std::io::Write; -unsafe fn find_bs_bits_and_quote_bits(src: &[u8], dstx: Option<&mut [u8]>) -> ParseStringHelper { +unsafe fn find_bs_bits_and_quote_bits(src: &[u8]) -> ParseStringHelper { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding let v0 : uint8x16_t = vld1q_u8(src.as_ptr()); let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); - match dstx { - Some(mut dst) => { -// vst1q_u8(dst.as_mut_ptr(), v0); -// vst1q_u8(dst.as_mut_ptr().add(16), v1); - dst.write(&src[0..16]).unwrap(); - dst.write(&src[16..32]).unwrap(); - }, - _ => () - } - let bs_mask : uint8x16_t = vmovq_n_u8('\\' as u8); let qt_mask : uint8x16_t = vmovq_n_u8('"' as u8); @@ -84,7 +73,7 @@ impl<'de> Deserializer<'de> { } }; - let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx, None) }; + let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx) }; if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit @@ -123,7 +112,7 @@ impl<'de> Deserializer<'de> { } let mut dst_i: usize = 0; - let dst: &mut [u8] = &mut self.strings; + let dst: &mut [u8] = self.strings.as_mut_slice(); loop { let srcx = if src.len() >= src_i + 32 { @@ -137,9 +126,11 @@ impl<'de> Deserializer<'de> { } }; + dst[dst_i..dst_i + 32].copy_from_slice(&srcx[..32]); + // store to dest unconditionally - we can overwrite the bits we don't like // later - let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx, Some(dst)) }; + let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx) }; if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit From 782b0a6d8ab5af7afab2fbc0b3c1b15c599fffe7 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Wed, 14 Aug 2019 12:29:58 -0400 Subject: [PATCH 17/34] fix: add_overflow should always output carry --- src/neon/intrinsics.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 2539ae33..8c5dac7d 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -263,11 +263,7 @@ impl int32x4_t { #[inline] pub fn add_overflow(a: u64, b: u64, out: &mut u64) -> bool { let (carry, did_carry) = a.overflowing_add(b); - - if did_carry { - *out = carry; - }; - + *out = carry; did_carry } From 98b335640f118f5f1fa0ce4d9e0b741e86357e5b Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 09:27:57 -0400 Subject: [PATCH 18/34] fix: tests PASS, improved comparison operators, thanks @Licenser :) --- src/neon/intrinsics.rs | 436 +++++++++++++++++++---------------------- src/neon/stage1.rs | 23 +-- src/neon/utf8check.rs | 31 +-- 3 files changed, 223 insertions(+), 267 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 8c5dac7d..1c86659c 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -3,7 +3,6 @@ use crate::neon::simd_llvm; use std::mem; -use std::hint::unreachable_unchecked; use core; #[allow(unused)] @@ -23,8 +22,6 @@ macro_rules! types { #[allow(non_camel_case_types)] pub type poly64_t = i64; -//#[allow(non_camel_case_types)] -//type poly128_t = [u8; 16]; #[allow(improper_ctypes)] extern "C" { @@ -40,116 +37,55 @@ extern "C" { fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t; #[link_name = "llvm.aarch64.neon.uqsub.v16u8"] fn vqsubq_u8_(a: uint8x16_t, a: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.neon.uqsub.v16i8"] + fn vqsubq_s8_(a: int8x16_t, a: int8x16_t) -> int8x16_t; } -unsafe fn vaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } -unsafe fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } -unsafe fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t { simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } - -unsafe fn vextq_u8_(a: uint8x16_t, b: uint8x16_t, n: u8) -> uint8x16_t { - match n { - 0 => b, - 1 => uint8x16_t( - a.0, b.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 2 => uint8x16_t( - a.0, a.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 3 => uint8x16_t( - a.0, a.1, a.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 4 => uint8x16_t( - a.0, a.1, a.2, a.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 5 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 6 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 7 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 8 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 9 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 10 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 11 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, b.11, b.12, b.13, b.14, b.15, - ), - 12 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, b.12, b.13, b.14, b.15, - ), - 13 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, b.13, b.14, b.15, - ), - 14 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, b.14, b.15, - ), - 15 => uint8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, a.14, b.15, - ), - 16 => a, - _ => unreachable_unchecked(), - } +#[inline] +unsafe fn vaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) } -unsafe fn vextq_s8_(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t { - match n { - 0 => b, - 1 => int8x16_t( - a.0, b.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 2 => int8x16_t( - a.0, a.1, b.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 3 => int8x16_t( - a.0, a.1, a.2, b.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 4 => int8x16_t( - a.0, a.1, a.2, a.3, b.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 5 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, b.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 6 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, b.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 7 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, b.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 8 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, b.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 9 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, b.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 10 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, b.10, b.11, b.12, b.13, b.14, b.15, - ), - 11 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, b.11, b.12, b.13, b.14, b.15, - ), - 12 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, b.12, b.13, b.14, b.15, - ), - 13 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, b.13, b.14, b.15, - ), - 14 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, b.14, b.15, - ), - 15 => int8x16_t( - a.0, a.1, a.2, a.3, a.4, a.5, a.6, a.7, a.8, a.9, a.10, a.11, a.12, a.13, a.14, b.15, - ), - 16 => a, - _ => unreachable_unchecked(), - } +#[inline] +unsafe fn vaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) +} + +#[inline] +unsafe fn vaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_llvm::simd_add(mem::transmute(a), mem::transmute(b)) +} + +#[inline] +pub unsafe fn vnegq_u8(a: uint8x16_t) -> uint8x16_t { + let x: u128 = mem::transmute(a); + let nx = !x; + mem::transmute(nx) +} + +#[inline] +pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t { + let x: u128 = mem::transmute(a); + let nx = !x; + mem::transmute(nx) +} + + +#[inline] +fn rotate_(a: u128, b: u128, n: u128) -> u128 { + let az = a >> (n * 8); + let bz = b << (128 - (n * 8)); + az | bz +} + +#[inline] +pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: u8) -> uint8x16_t { + mem::transmute(rotate_(mem::transmute(a), mem::transmute(b), n as u128)) +} + +#[inline] +pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t { + mem::transmute(rotate_(mem::transmute(a), mem::transmute(b), n as u128)) } #[inline] @@ -243,18 +179,21 @@ types! { } impl uint8x16_t { + #[inline] pub fn new(a: u8, b: u8, c: u8, d: u8, e: u8, f: u8, g: u8, h: u8, i: u8, j: u8, k: u8, l: u8, m: u8, n: u8, o: u8, p: u8) -> uint8x16_t { uint8x16_t(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) } } impl int8x16_t { + #[inline] pub fn new(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> int8x16_t { int8x16_t(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) } } impl int32x4_t { + #[inline] pub fn new(a: i32, b: i32, c: i32, d: i32) -> int32x4_t { int32x4_t(a, b, c, d) } @@ -283,107 +222,149 @@ pub unsafe fn vst1q_u8(addr: *mut u8, val: uint8x16_t) { } macro_rules! aarch64_simd_2 { - ($name:ident, $type:ty, $simd_fn:ident, $intr:ident) => { + ($name: ident, $type: ty, $simd_fn: ident, $intrarm: ident, $intraarch: ident) => { + aarch64_simd_2!($name, $type, $type, $simd_fn, $intrarm, $intraarch); + }; + ($name: ident, $type: ty, $res: ty, $simd_fn: ident, $intrarm: ident, $intraarch: ident) => { #[inline] - pub unsafe fn $name(a: $type, b: $type) -> $type { + pub unsafe fn $name(a: $type, b: $type) -> $res { simd_llvm::$simd_fn(a, b) } - }; + } } + macro_rules! aarch64_simd_ceq { - ($name:ident, $type:ty) => { + ($name: ident, $type: ty, $res: ty) => { /// Compare bitwise Equal (vector) - aarch64_simd_2!($name, $type, simd_eq, cmeq); + aarch64_simd_2!($name, $type, $res, simd_eq, cmeq, cmeq); }; } -aarch64_simd_ceq!(vceqq_u8, uint8x16_t); - -aarch64_simd_ceq!(vceq_s64, int64x1_t); -aarch64_simd_ceq!(vceqq_s64, int64x2_t); -aarch64_simd_ceq!(vceq_u64, uint64x1_t); -aarch64_simd_ceq!(vceqq_u64, uint64x2_t); -aarch64_simd_ceq!(vceq_p64, uint64x1_t); -aarch64_simd_ceq!(vceqq_p64, uint64x2_t); - -aarch64_simd_ceq!(vceqq_s8, int8x16_t); +aarch64_simd_ceq!(vceq_s8, int8x8_t, uint8x8_t); +aarch64_simd_ceq!(vceqq_s8, int8x16_t, uint8x16_t); +aarch64_simd_ceq!(vceq_s16, int16x4_t, uint16x4_t); +aarch64_simd_ceq!(vceqq_s16, int16x8_t, uint16x8_t); +aarch64_simd_ceq!(vceq_s32, int32x2_t, uint32x2_t); +aarch64_simd_ceq!(vceqq_s32, int32x4_t, uint32x4_t); +aarch64_simd_ceq!(vceq_u8, uint8x8_t, uint8x8_t); +aarch64_simd_ceq!(vceqq_u8, uint8x16_t, uint8x16_t); +aarch64_simd_ceq!(vceq_u16, uint16x4_t, uint16x4_t); +aarch64_simd_ceq!(vceqq_u16, uint16x8_t, uint16x8_t); +aarch64_simd_ceq!(vceq_u32, uint32x2_t, uint32x2_t); +aarch64_simd_ceq!(vceqq_u32, uint32x4_t, uint32x4_t); +aarch64_simd_2!(vceq_f32, float32x2_t, uint32x2_t, simd_eq, fcmeq, fcmeq); +aarch64_simd_2!(vceqq_f32, float32x4_t, uint32x4_t, simd_eq, fcmeq, fcmeq); +aarch64_simd_ceq!(vceq_p8, poly8x8_t, poly8x8_t); +aarch64_simd_ceq!(vceqq_p8, poly8x16_t, poly8x16_t); macro_rules! aarch64_simd_cgt { - ($name:ident, $type:ty) => { + ($name:ident, $type:ty, $res:ty) => { /// Compare signed Greater than (vector) - aarch64_simd_2!($name, $type, simd_gt, cmgt); - }; -} -macro_rules! aarch64_simd_cgtu { - ($name:ident, $type:ty) => { - /// Compare Greater than (vector) - aarch64_simd_2!($name, $type, simd_gt, cmhi); + aarch64_simd_2!($name, $type, $res, simd_gt, cmgt, cmgt); }; } -aarch64_simd_cgtu!(vcgtq_u8, uint8x16_t); +//macro_rules! aarch64_simd_cgtu { +// ($name: ident, $type: ty) => { +// /// Compare Greater than (vector) +// aarch64_simd_2!($name, $type, simd_gt, cmhi); +// }; +//} -aarch64_simd_cgt!(vcgt_s64, int64x1_t); -aarch64_simd_cgt!(vcgtq_s64, int64x2_t); -aarch64_simd_cgtu!(vcgt_u64, uint64x1_t); -aarch64_simd_cgtu!(vcgtq_u64, uint64x2_t); +aarch64_simd_cgt!(vcgt_s8, int8x8_t, uint8x8_t); +aarch64_simd_cgt!(vcgtq_s8, int8x16_t, uint8x16_t); +aarch64_simd_cgt!(vcgt_s16, int16x4_t, uint16x4_t); +aarch64_simd_cgt!(vcgtq_s16, int16x8_t, uint16x8_t); +aarch64_simd_cgt!(vcgt_s32, int32x2_t, uint32x2_t); +aarch64_simd_cgt!(vcgtq_s32, int32x4_t, uint32x4_t); + +//aarch64_simd_cgtu!(vcgtq_u8, uint8x16_t); +//aarch64_simd_cgt!(vcgt_s64, int64x1_t); +//aarch64_simd_cgt!(vcgtq_s64, int64x2_t); +//aarch64_simd_cgtu!(vcgt_u64, uint64x1_t); +//aarch64_simd_cgtu!(vcgtq_u64, uint64x2_t); macro_rules! aarch64_simd_clt { - ($name:ident, $type:ty) => { + ($name:ident, $type:ty, $res:ty) => { /// Compare signed Lesser than (vector) - aarch64_simd_2!($name, $type, simd_lt, cmgt); - }; -} -macro_rules! aarch64_simd_cltu { - ($name:ident, $type:ty) => { - /// Compare Lesser than (vector) - aarch64_simd_2!($name, $type, simd_lt, cmhi); + aarch64_simd_2!($name, $type, $res, simd_lt, cmgt, cmgt); }; } -aarch64_simd_clt!(vclt_s64, int64x1_t); -aarch64_simd_clt!(vcltq_s64, int64x2_t); -aarch64_simd_cltu!(vclt_u64, uint64x1_t); -aarch64_simd_cltu!(vcltq_u64, uint64x2_t); +//macro_rules! aarch64_simd_cltu { +//( $ name: ident, $ type: ty) => { +///// Compare Lesser than (vector) +//aarch64_simd_2 ! ( $ name, $ type, simd_lt, cmhi); +//}; +//} + +aarch64_simd_clt!(vclt_s8, int8x8_t, uint8x8_t); +aarch64_simd_clt!(vcltq_s8, int8x16_t, uint8x16_t); +aarch64_simd_clt!(vclt_s16, int16x4_t, uint16x4_t); +aarch64_simd_clt!(vcltq_s16, int16x8_t, uint16x8_t); +aarch64_simd_clt!(vclt_s32, int32x2_t, uint32x2_t); +aarch64_simd_clt!(vcltq_s32, int32x4_t, uint32x4_t); + +//arm_simd_cltu!(vclt_u8, uint8x8_t); +//arm_simd_cltu!(vcltq_u8, uint8x16_t); +//arm_simd_cltu!(vclt_u16, uint16x4_t); +//arm_simd_cltu!(vcltq_u16, uint16x8_t); +//arm_simd_cltu!(vclt_u32, uint32x2_t); +//arm_simd_cltu!(vcltq_u32, uint32x4_t); macro_rules! aarch64_simd_cge { - ($name:ident, $type:ty) => { - /// Compare signed Greater than (vector) - aarch64_simd_2!($name, $type, simd_ge, cmge); - }; -} -macro_rules! aarch64_simd_cgeu { - ($name:ident, $type:ty) => { - /// Compare Greater than (vector) - aarch64_simd_2!($name, $type, simd_ge, cmhs); + ($name:ident, $type:ty, $res:ty) => { + /// Compare signed Greater than equals (vector) + aarch64_simd_2!($name, $type, $res, simd_ge, cmge, cmge); }; } -aarch64_simd_cge!(vcge_s64, int64x1_t); -aarch64_simd_cge!(vcgeq_s64, int64x2_t); -aarch64_simd_cgeu!(vcge_u64, uint64x1_t); -aarch64_simd_cgeu!(vcgeq_u64, uint64x2_t); +//macro_rules! aarch64_simd_cgeu { +//( $ name: ident, $ type: ty) => { +///// Compare Greater than (vector) +//aarch64_simd_2 ! ( $ name, $ type, simd_ge, cmhs); +//}; +//} + +aarch64_simd_cge!(vcge_s8, int8x8_t, uint8x8_t); +aarch64_simd_cge!(vcgeq_s8, int8x16_t, uint8x16_t); +aarch64_simd_cge!(vcge_s16, int16x4_t, uint16x4_t); +aarch64_simd_cge!(vcgeq_s16, int16x8_t, uint16x8_t); +aarch64_simd_cge!(vcge_s32, int32x2_t, uint32x2_t); +aarch64_simd_cge!(vcgeq_s32, int32x4_t, uint32x4_t); +//arm_simd_cgeu!(vcge_u8, uint8x8_t); +//arm_simd_cgeu!(vcgeq_u8, uint8x16_t); +//arm_simd_cgeu!(vcge_u16, uint16x4_t); +//arm_simd_cgeu!(vcgeq_u16, uint16x8_t); +//arm_simd_cgeu!(vcge_u32, uint32x2_t); +//arm_simd_cgeu!(vcgeq_u32, uint32x4_t); macro_rules! aarch64_simd_cle { - ($name:ident, $type:ty) => { - /// Compare signed Lesser than (vector) - aarch64_simd_2!($name, $type, simd_le, cmge); - }; -} -macro_rules! aarch64_simd_cleu { - ($name:ident, $type:ty) => { - /// Compare Lesser than (vector) - aarch64_simd_2!($name, $type, simd_le, cmhs); + ($name:ident, $type:ty, $res:ty) => { + /// Compare signed Lesser than equals (vector) + aarch64_simd_2!($name, $type, $res, simd_le, cmge, cmge); }; } -aarch64_simd_cleu!(vcleq_u8, uint8x16_t); -aarch64_simd_cle!(vcle_s64, int64x1_t); -aarch64_simd_cle!(vcleq_s64, int64x2_t); -aarch64_simd_cleu!(vcle_u64, uint64x1_t); -aarch64_simd_cleu!(vcleq_u64, uint64x2_t); - -aarch64_simd_cleu!(vcgtq_s8, int8x16_t); +//macro_rules! aarch64_simd_cleu { +//( $ name: ident, $ type: ty) => { +///// Compare Lesser than (vector) +//aarch64_simd_2 ! ( $ name, $ type, simd_le, cmhs); +//}; +//} + +aarch64_simd_cle!(vcle_s8, int8x8_t, uint8x8_t); +aarch64_simd_cle!(vcleq_s8, int8x16_t, uint8x16_t); +aarch64_simd_cle!(vcle_s16, int16x4_t, uint16x4_t); +aarch64_simd_cle!(vcleq_s16, int16x8_t, uint16x8_t); +aarch64_simd_cle!(vcle_s32, int32x2_t, uint32x2_t); +aarch64_simd_cle!(vcleq_s32, int32x4_t, uint32x4_t); +//arm_simd_cleu!(vcle_u8, uint8x8_t); +aarch64_simd_cle!(vcleq_u8, uint8x16_t, uint8x16_t); +//arm_simd_cleu!(vcle_u16, uint16x4_t); +//arm_simd_cleu!(vcleq_u16, uint16x8_t); +//arm_simd_cleu!(vcle_u32, uint32x2_t); +//arm_simd_cleu!(vcleq_u32, uint32x4_t); #[inline] pub fn vdupq_n_s8(a: i8) -> int8x16_t { @@ -428,6 +409,30 @@ pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[inline] pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_and(a, b) } +#[inline] +fn and16(a: i16, b: i16) -> i16 { + if (a & b) != 0 { + 0 + } else { + -1 /* 0xFFFF */ + } +} + +#[inline] +pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +// simd_llvm::simd_and(a, b) + int16x8_t( + and16(a.0, b.0), + and16(a.1, b.1), + and16(a.2, b.2), + and16(a.3, b.3), + and16(a.4, b.4), + and16(a.5, b.5), + and16(a.6, b.6), + and16(a.7, b.7), + ) +} + #[inline] pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_or(a, b) } @@ -438,9 +443,9 @@ pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::sim pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_or(a, b) } macro_rules! arm_reinterpret { - ($name:ident, $from:ty, $to:ty) => { + ($name: ident, $from: ty, $to: ty) => { // Vector reinterpret cast operation - #[inline] + # [inline] pub unsafe fn $name(a: $from) -> $to { mem::transmute(a) } @@ -448,18 +453,22 @@ macro_rules! arm_reinterpret { } arm_reinterpret!(vreinterpret_u64_u32, uint32x2_t, uint64x1_t); +arm_reinterpret!(vreinterpretq_u64_u32, uint32x4_t, uint64x2_t); arm_reinterpret!(vreinterpretq_s8_u8, uint8x16_t, int8x16_t); arm_reinterpret!(vreinterpretq_u16_u8, uint8x16_t, uint16x8_t); arm_reinterpret!(vreinterpretq_u32_u8, uint8x16_t, uint32x4_t); arm_reinterpret!(vreinterpretq_u64_u8, uint8x16_t, uint64x2_t); +arm_reinterpret!(vreinterpretq_u64_s8, int8x16_t, uint64x2_t); arm_reinterpret!(vreinterpretq_u8_s8, int8x16_t, uint8x16_t); +arm_reinterpret!(vreinterpretq_s16_s8, int8x16_t, int16x8_t); +arm_reinterpret!(vreinterpretq_s32_s8, int8x16_t, int32x4_t); arm_reinterpret!(vreinterpretq_s64_s8, int8x16_t, int64x2_t); macro_rules! arm_vget_lane { - ($name:ident, $to:ty, $from:ty, $lanes:literal) => { + ($name: ident, $to: ty, $from: ty, $lanes: literal) => { #[inline] - pub unsafe fn $name(v: $from, lane: u32) -> $to { + pub unsafe fn $ name(v: $from, lane: u32) -> $ to { simd_llvm::simd_extract(v, lane) } }; @@ -475,14 +484,6 @@ arm_vget_lane!(vgetq_lane_s32, i32, int32x4_t, 3); arm_vget_lane!(vgetq_lane_s64, i64, int64x2_t, 1); arm_vget_lane!(vget_lane_s64, i64, int64x1_t, 0); -pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: u8) -> uint8x16_t { - mem::transmute(vextq_u8_(mem::transmute(a), mem::transmute(b), n)) -} - -pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: u8) -> int8x16_t { - mem::transmute(vextq_s8_(mem::transmute(a), mem::transmute(b), n)) -} - #[inline] pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { vqmovn_u64_(a) @@ -504,7 +505,12 @@ pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { } #[inline] -fn test_u8(a:u8,b:u8) -> u8 { +pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + vqsubq_s8_(a, b) +} + +#[inline] +fn test_u8(a: u8, b: u8) -> u8 { if a & b != 0 { 0xFF } else { @@ -530,12 +536,12 @@ pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { test_u8(a.12, b.12), test_u8(a.13, b.13), test_u8(a.14, b.14), - test_u8(a.15, b.15) + test_u8(a.15, b.15), ) } #[inline] -fn test_s8(a:i8,b:i8) -> i8 { +fn test_s8(a: i8, b: i8) -> i8 { if a & b != 0 { -1 } else { @@ -561,7 +567,7 @@ pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { test_s8(a.12, b.12), test_s8(a.13, b.13), test_s8(a.14, b.14), - test_s8(a.15, b.15) + test_s8(a.15, b.15), ) } @@ -579,45 +585,3 @@ pub fn trailingzeroes(a: u64) -> u32 { pub unsafe fn vst1q_u32(addr: *mut u8, val: uint32x4_t) { std::ptr::write(addr as *mut uint32x4_t, val) } - - -#[allow(unused)] -macro_rules! constify_imm5 { - ($imm8:expr, $expand:ident) => { - #[allow(overflowing_literals)] - match ($imm8) & 0b1_1111 { - 0 => $expand!(0), - 1 => $expand!(1), - 2 => $expand!(2), - 3 => $expand!(3), - 4 => $expand!(4), - 5 => $expand!(5), - 6 => $expand!(6), - 7 => $expand!(7), - 8 => $expand!(8), - 9 => $expand!(9), - 10 => $expand!(10), - 11 => $expand!(11), - 12 => $expand!(12), - 13 => $expand!(13), - 14 => $expand!(14), - 15 => $expand!(15), - 16 => $expand!(16), - 17 => $expand!(17), - 18 => $expand!(18), - 19 => $expand!(19), - 20 => $expand!(20), - 21 => $expand!(21), - 22 => $expand!(22), - 23 => $expand!(23), - 24 => $expand!(24), - 25 => $expand!(25), - 26 => $expand!(26), - 27 => $expand!(27), - 28 => $expand!(28), - 29 => $expand!(29), - 30 => $expand!(30), - _ => $expand!(31), - } - }; -} \ No newline at end of file diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 7748ee37..4ff84aff 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -104,8 +104,8 @@ unsafe fn check_utf8( let verror: int8x16_t = int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); state.has_error = - vorrq_s8(vcgtq_s8(state.previous.carried_continuations, verror), - state.has_error); + vreinterpretq_s8_u8(vorrq_u8(vcgtq_s8(state.previous.carried_continuations, verror), + vreinterpretq_u8_s8(state.has_error))); } else { // it is not ascii so we have to do heavy work state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), @@ -213,7 +213,7 @@ unsafe fn find_quote_mask_and_bits( odd_ends: u64, prev_iter_inside_quote: &mut u64, quote_bits: &mut u64, - error_mask: &mut u64 + error_mask: &mut u64, ) -> u64 { *quote_bits = cmp_mask_against_input(input, b'"'); *quote_bits &= !odd_ends; @@ -286,16 +286,6 @@ unsafe fn find_whitespace_and_structurals( *whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); } -//template <> -//really_inline ErrorValues check_utf8_errors( -// utf8_checking_state &state) { -// uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error); -// uint32x2_t v32 = vqmovn_u64(v64); -// uint64x1_t result = vreinterpret_u64_u32(v32); -// return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR -// : simdjson::SUCCESS; -//} - // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at // base_ptr[base] incrementing base as we go @@ -391,7 +381,7 @@ impl<'de> Deserializer<'de> { let mut structural_indexes = Vec::with_capacity(len / 6); structural_indexes.push(0); // push extra root element - let mut utf8_state : Utf8CheckingState = Utf8CheckingState::default(); + let mut utf8_state: Utf8CheckingState = Utf8CheckingState::default(); // we have padded the input out to 64 byte multiple with the remainder being // zeros @@ -529,9 +519,10 @@ impl<'de> Deserializer<'de> { return Err(ErrorType::Syntax); } - let has_error : i128 = mem::transmute(utf8_state.has_error); + let utf8_error_bits: u128 = mem::transmute(vandq_s16(mem::transmute(utf8_state.has_error), mem::transmute(utf8_state.has_error))); + let utf8_error: u16 = utf8_error_bits as u16; - if has_error != 0 { + if utf8_error != 0 { Ok(structural_indexes) } else { Err(ErrorType::InvalidUTF8) diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index 8e260ae8..5e2fa56f 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -24,7 +24,7 @@ unsafe fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8 *has_error = vorrq_s8( *has_error, - vreinterpretq_s8_u8(vqsubq_u8(vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4)))); + vqsubq_s8(current_bytes, vdupq_n_s8(-12 /* 0xF4 */))); } #[cfg_attr(not(feature = "no-inline"), inline)] @@ -42,14 +42,15 @@ unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn carry_continuations(initial_lengths: int8x16_t, previous_carries: int8x16_t) -> int8x16_t { - let right1: int8x16_t = vreinterpretq_s8_u8(vqsubq_u8( - vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)), - vdupq_n_u8(1))); + let right1: int8x16_t = vqsubq_s8( + vextq_s8(previous_carries, initial_lengths, 16 - 1), + vdupq_n_s8(1)); + let sum: int8x16_t = vaddq_s8(initial_lengths, right1); - let right2: int8x16_t = vreinterpretq_s8_u8(vqsubq_u8( - vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)), - vdupq_n_u8(2))); + let right2: int8x16_t = vqsubq_s8( + vextq_s8(previous_carries, sum, 16 - 2), + vdupq_n_s8(2)); vaddq_s8(sum, right2) } @@ -61,8 +62,8 @@ unsafe fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, ha // (carries > length) == (lengths > 0) let overunder: uint8x16_t = vceqq_u8( - vreinterpretq_u8_s8(vcgtq_s8(carries, initial_lengths)), - vreinterpretq_u8_s8(vcgtq_s8(initial_lengths, vdupq_n_s8(0)))); + vcgtq_s8(carries, initial_lengths), + vcgtq_s8(initial_lengths, vdupq_n_s8(0))); *has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder)); } @@ -76,14 +77,14 @@ unsafe fn check_first_continuation_max( off1_current_bytes: int8x16_t, has_error: &mut int8x16_t, ) { - let mask_ed: uint8x16_t = vreinterpretq_u8_s8(vceqq_s8(off1_current_bytes, vdupq_n_s8(-19 /* 0xED */))); - let mask_f4: uint8x16_t = vreinterpretq_u8_s8(vceqq_s8(off1_current_bytes, vdupq_n_s8(-12 /* 0xF4 */))); + let mask_ed: uint8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(-19 /* 0xED */)); + let mask_f4: uint8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(-12 /* 0xF4 */)); // FIXME: was vandq_u8? let badfollow_ed: uint8x16_t = - vandq_u8(vreinterpretq_u8_s8(vcgtq_s8(current_bytes, vdupq_n_s8(-97 /* 0x9F */))), mask_ed); + vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(-97 /* 0x9F */)), mask_ed); let badfollow_f4: uint8x16_t = - vandq_u8(vreinterpretq_u8_s8(vcgtq_s8(current_bytes, vdupq_n_s8(-113 /* 0x8F */))), mask_f4); + vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(-113 /* 0x8F */)), mask_f4); *has_error = vorrq_s8( *has_error, vreinterpretq_s8_u8(vorrq_u8(badfollow_ed, badfollow_f4))); @@ -123,12 +124,12 @@ unsafe fn check_overlong( let initial_mins: int8x16_t = vqtbl1q_s8(_initial_mins, vreinterpretq_u8_s8(off1_hibits)); - let initial_under: uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(initial_mins, off1_current_bytes)); + let initial_under: uint8x16_t = vcgtq_s8(initial_mins, off1_current_bytes); let second_mins: int8x16_t = vqtbl1q_s8(_second_mins, vreinterpretq_u8_s8(off1_hibits)); - let second_under: uint8x16_t = vreinterpretq_u8_s8(vcgtq_s8(second_mins, current_bytes)); + let second_under: uint8x16_t = vcgtq_s8(second_mins, current_bytes); *has_error = vorrq_s8( *has_error, vreinterpretq_s8_u8(vandq_u8(initial_under, second_under))); From 19ad13e5533fdefc29af185cce7713736f43492b Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 09:51:54 -0400 Subject: [PATCH 19/34] fix: update arm64 to use nightly --- .drone.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.drone.yml b/.drone.yml index 9b8e8bb2..8ceef40b 100644 --- a/.drone.yml +++ b/.drone.yml @@ -58,5 +58,5 @@ steps: - name: test image: rust:1 commands: - - cargo build --verbose --all - - cargo test --verbose --all + - cargo +nightly build --verbose --all + - cargo +nightly test --verbose --all From a97487b40c61673236e61dd430ce99ce51ba68c9 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 10:54:58 -0400 Subject: [PATCH 20/34] fix: use rust nightly image for drone CI --- .drone.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index 8ceef40b..370db257 100644 --- a/.drone.yml +++ b/.drone.yml @@ -56,7 +56,7 @@ platform: steps: - name: test - image: rust:1 + image: rustlang/rust:nightly commands: - cargo +nightly build --verbose --all - cargo +nightly test --verbose --all From faea015d0bea8dd9602e658c6e355c6851266512 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 11:55:35 -0400 Subject: [PATCH 21/34] fix: drone CI rustup nightly --- .drone.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index 370db257..8f00d391 100644 --- a/.drone.yml +++ b/.drone.yml @@ -56,7 +56,9 @@ platform: steps: - name: test - image: rustlang/rust:nightly + image: rust:1 commands: + - rustup default nightly + - rustup update - cargo +nightly build --verbose --all - cargo +nightly test --verbose --all From d66d68a473d3c540d2e3c42ce3e79c9219393c0b Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 14:23:26 -0400 Subject: [PATCH 22/34] feat: fix guards, use rust stdlib for bit count operations --- src/lib.rs | 17 +++++++++-------- src/neon/deser.rs | 8 ++++---- src/neon/intrinsics.rs | 14 -------------- src/neon/stage1.rs | 10 +++++----- src/stage2.rs | 2 +- src/value/generator.rs | 4 ---- 6 files changed, 19 insertions(+), 36 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 261672ec..9a7931d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -99,26 +99,27 @@ pub use crate::avx2::deser::*; #[cfg(target_feature = "avx2")] use crate::avx2::stage1::SIMDJSON_PADDING; -#[cfg(target_feature = "sse42")] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))] mod sse42; -#[cfg(target_feature = "sse42")] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))] pub use crate::sse42::deser::*; -#[cfg(target_feature = "sse42")] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))] use crate::sse42::stage1::SIMDJSON_PADDING; -//#[cfg(target_feature = "neon")] +#[cfg(target_feature = "neon")] mod neon; -//#[cfg(target_feature = "neon")] +#[cfg(target_feature = "neon")] pub use crate::neon::deser::*; -//#[cfg(target_feature = "neon")] +#[cfg(target_feature = "neon")] use crate::neon::stage1::SIMDJSON_PADDING; mod stage2; pub mod value; use crate::numberparse::Number; -//use std::mem; -//use std::str; +#[cfg(not(target_feature = "neon"))] +use std::mem; +use std::str; pub use crate::error::{Error, ErrorType}; pub use crate::value::*; diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 4f45d2b6..bc093865 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -78,7 +78,7 @@ impl<'de> Deserializer<'de> { if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit // find out where the quote is... - let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; + let quote_dist: u32 = quote_bits.trailing_zeros(); /////////////////////// // Above, check for overflow in case someone has a crazy string (>=4GB?) @@ -99,7 +99,7 @@ impl<'de> Deserializer<'de> { } if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { // Move to the 'bad' character - let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); + let bs_dist: u32 = bs_bits.trailing_zeros(); len += bs_dist as usize; src_i += bs_dist as usize; break; @@ -135,7 +135,7 @@ impl<'de> Deserializer<'de> { if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit // find out where the quote is... - let quote_dist: u32 = trailingzeroes(u64::from(quote_bits)) as u32; + let quote_dist: u32 = quote_bits.trailing_zeros(); /////////////////////// // Above, check for overflow in case someone has a crazy string (>=4GB?) @@ -161,7 +161,7 @@ impl<'de> Deserializer<'de> { } if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { // find out where the backspace is - let bs_dist: u32 = trailingzeroes(u64::from(bs_bits)); + let bs_dist: u32 = bs_bits.trailing_zeros(); let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; // we encountered backslash first. Handle backslash if escape_char == b'u' { diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 1c86659c..c8edefe8 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -29,10 +29,6 @@ extern "C" { fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.neon.pmull64"] fn vmull_p64_(a: i64, b: i64) -> int8x16_t; - #[link_name = "llvm.ctpop.i64"] - fn ctpop_s64_(a: i64) -> i64; - #[link_name = "llvm.cttz.i64"] - fn cttz_u64_(a: i64) -> i64; #[link_name = "llvm.aarch64.neon.uqxtn.v2u32"] fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t; #[link_name = "llvm.aarch64.neon.uqsub.v16u8"] @@ -571,16 +567,6 @@ pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { ) } -#[inline] -pub unsafe fn hamming(a: u64) -> u32 { - ctpop_s64_(a as i64) as u32 -} - -#[inline] -pub fn trailingzeroes(a: u64) -> u32 { - unsafe { cttz_u64_(a as i64) as u32 } -} - #[inline] pub unsafe fn vst1q_u32(addr: *mut u8, val: uint32x4_t) { std::ptr::write(addr as *mut uint32x4_t, val) diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 4ff84aff..6fadcc50 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -294,7 +294,7 @@ unsafe fn find_whitespace_and_structurals( //TODO: usize was u32 here does this matter? #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { - let cnt: usize = hamming(bits) as usize; + let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); let idx_minus_64 = idx.wrapping_sub(64); let idx_64_v = int32x4_t::new( @@ -313,13 +313,13 @@ unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { base.set_len(l + cnt); while bits != 0 { - let v0: i32 = static_cast_i32!(trailingzeroes(bits)); + let v0: i32 = bits.trailing_zeros() as i32; bits &= bits.wrapping_sub(1); - let v1: i32 = static_cast_i32!(trailingzeroes(bits)); + let v1: i32 = bits.trailing_zeros() as i32; bits &= bits.wrapping_sub(1); - let v2: i32 = static_cast_i32!(trailingzeroes(bits)); + let v2: i32 = bits.trailing_zeros() as i32; bits &= bits.wrapping_sub(1); - let v3: i32 = static_cast_i32!(trailingzeroes(bits)); + let v3: i32 = bits.trailing_zeros() as i32; bits &= bits.wrapping_sub(1); let v: int32x4_t = int32x4_t::new(v0, v1, v2, v3); diff --git a/src/stage2.rs b/src/stage2.rs index f33a3877..3bbd71c1 100644 --- a/src/stage2.rs +++ b/src/stage2.rs @@ -2,7 +2,7 @@ use crate::charutils::*; #[cfg(target_feature = "avx2")] use crate::avx2::stage1::SIMDJSON_PADDING; -#[cfg(target_feature = "sse4.2")] +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))] use crate::sse42::stage1::SIMDJSON_PADDING; #[cfg(target_feature = "neon")] use crate::neon::stage1::SIMDJSON_PADDING; diff --git a/src/value/generator.rs b/src/value/generator.rs index 1e0e4baf..e17b2d13 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -5,10 +5,6 @@ // https://github.com/maciejhirsz/json-rust/blob/master/src/codegen.rs use crate::value::ValueTrait; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] -use std::arch::x86_64::*; use std::io; use std::io::Write; use std::marker::PhantomData; From 4c51dc4311196e08ae179eb63ac33b574e42cd34 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 14:53:14 -0400 Subject: [PATCH 23/34] feat: address review comments, platform handling and misc --- src/lib.rs | 1 + src/neon/deser.rs | 41 +++++------------------------------------ src/neon/stage1.rs | 42 ++++++++++++++++++++++++++++++++++++++---- src/neon/utf8check.rs | 8 ++++---- src/numberparse.rs | 31 ++++++++++++++++++++++++++++++- 5 files changed, 78 insertions(+), 45 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9a7931d1..690a5b0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -181,6 +181,7 @@ impl<'de> Deserializer<'de> { let counts = Deserializer::validate(input, &structural_indexes)?; + // Set length to allow slice access in ARM code let mut strings = Vec::with_capacity(len + SIMDJSON_PADDING); unsafe { strings.set_len(len + SIMDJSON_PADDING); diff --git a/src/neon/deser.rs b/src/neon/deser.rs index bc093865..26de719b 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -10,39 +10,6 @@ pub use crate::stringparse::*; pub use crate::neon::intrinsics::*; -unsafe fn find_bs_bits_and_quote_bits(src: &[u8]) -> ParseStringHelper { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - let v0 : uint8x16_t = vld1q_u8(src.as_ptr()); - let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); - - let bs_mask : uint8x16_t = vmovq_n_u8('\\' as u8); - let qt_mask : uint8x16_t = vmovq_n_u8('"' as u8); - - let bit_mask = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); - - let cmp_bs_0 : uint8x16_t = vceqq_u8(v0, bs_mask); - let cmp_bs_1 : uint8x16_t = vceqq_u8(v1, bs_mask); - let cmp_qt_0 : uint8x16_t = vceqq_u8(v0, qt_mask); - let cmp_qt_1 : uint8x16_t = vceqq_u8(v1, qt_mask); - - let cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask); - let cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask); - let cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask); - let cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask); - - let sum0 : uint8x16_t = vpaddq_u8(cmp_bs_0, cmp_bs_1); - let sum1 : uint8x16_t = vpaddq_u8(cmp_qt_0, cmp_qt_1); - let sum0 = vpaddq_u8(sum0, sum1); - let sum0 = vpaddq_u8(sum0, sum0); - - ParseStringHelper { - bs_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits - quote_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits - } -} - impl<'de> Deserializer<'de> { #[cfg_attr(not(feature = "no-inline"), inline(always))] pub fn parse_str_(&mut self) -> Result<&'de str> { @@ -63,7 +30,7 @@ impl<'de> Deserializer<'de> { // later let srcx = if src.len() >= src_i + 32 { - &src[src_i..] + unsafe { src.get_unchecked(src_i..) } } else { unsafe { padding @@ -116,7 +83,7 @@ impl<'de> Deserializer<'de> { loop { let srcx = if src.len() >= src_i + 32 { - &src[src_i..] + unsafe { src.get_unchecked(src_i..) } } else { unsafe { padding @@ -126,7 +93,9 @@ impl<'de> Deserializer<'de> { } }; - dst[dst_i..dst_i + 32].copy_from_slice(&srcx[..32]); + unsafe { + dst.get_unchecked_mut(dst_i..dst_i + 32).copy_from_slice(srcx.get_unchecked(..32)); + } // store to dest unconditionally - we can overwrite the bits we don't like // later diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 6fadcc50..3f005ad5 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -27,10 +27,12 @@ fn fill_input(ptr: &[u8]) -> SimdInput { } } +const BIT_MASK: [u8; 16] = [0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80]; + #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn neon_movemask(input: uint8x16_t) -> u16 { - let bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + let bit_mask = vld1q_u8(BIT_MASK.as_ptr()); let minput: uint8x16_t = vandq_u8(input, bit_mask); let tmp: uint8x16_t = vpaddq_u8(minput, minput); @@ -42,8 +44,8 @@ unsafe fn neon_movemask(input: uint8x16_t) -> u16 { #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: uint8x16_t) -> u64 { - let bit_mask: uint8x16_t = uint8x16_t::new(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + let bit_mask = vld1q_u8(BIT_MASK.as_ptr()); + let t0 = vandq_u8(p0, bit_mask); let t1 = vandq_u8(p1, bit_mask); let t2 = vandq_u8(p2, bit_mask); @@ -372,6 +374,38 @@ fn finalize_structurals( structurals } +pub unsafe fn find_bs_bits_and_quote_bits(src: &[u8]) -> ParseStringHelper { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + let v0 : uint8x16_t = vld1q_u8(src.as_ptr()); + let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); + + let bs_mask : uint8x16_t = vmovq_n_u8(b'\\'); + let qt_mask : uint8x16_t = vmovq_n_u8(b'"'); + + let bit_mask = vld1q_u8(BIT_MASK.as_ptr()); + + let cmp_bs_0 : uint8x16_t = vceqq_u8(v0, bs_mask); + let cmp_bs_1 : uint8x16_t = vceqq_u8(v1, bs_mask); + let cmp_qt_0 : uint8x16_t = vceqq_u8(v0, qt_mask); + let cmp_qt_1 : uint8x16_t = vceqq_u8(v1, qt_mask); + + let cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask); + let cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask); + let cmp_qt_0 = vandq_u8(cmp_qt_0, bit_mask); + let cmp_qt_1 = vandq_u8(cmp_qt_1, bit_mask); + + let sum0 : uint8x16_t = vpaddq_u8(cmp_bs_0, cmp_bs_1); + let sum1 : uint8x16_t = vpaddq_u8(cmp_qt_0, cmp_qt_1); + let sum0 = vpaddq_u8(sum0, sum1); + let sum0 = vpaddq_u8(sum0, sum0); + + ParseStringHelper { + bs_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits + quote_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits + } +} + impl<'de> Deserializer<'de> { //#[inline(never)] pub unsafe fn find_structural_bits(input: &[u8]) -> std::result::Result, ErrorType> { diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index 5e2fa56f..87505f94 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -104,7 +104,7 @@ unsafe fn check_overlong( previous_hibits: int8x16_t, has_error: &mut int8x16_t, ) { - let _initial_mins: int8x16_t = int8x16_t::new( + let initial_mins_tbl: int8x16_t = int8x16_t::new( -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false -62 /* 0xC2 */, -128, // 110x @@ -112,7 +112,7 @@ unsafe fn check_overlong( -15 /*0xF1 */, ); - let _second_mins: int8x16_t = int8x16_t::new( + let second_mins_tbl: int8x16_t = int8x16_t::new( -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, // 10xx => false 127, 127, // 110x => true @@ -122,12 +122,12 @@ unsafe fn check_overlong( let off1_hibits: int8x16_t = vextq_s8(previous_hibits, hibits, 16 - 1); let initial_mins: int8x16_t = - vqtbl1q_s8(_initial_mins, vreinterpretq_u8_s8(off1_hibits)); + vqtbl1q_s8(initial_mins_tbl, vreinterpretq_u8_s8(off1_hibits)); let initial_under: uint8x16_t = vcgtq_s8(initial_mins, off1_current_bytes); let second_mins: int8x16_t = - vqtbl1q_s8(_second_mins, vreinterpretq_u8_s8(off1_hibits)); + vqtbl1q_s8(second_mins_tbl, vreinterpretq_u8_s8(off1_hibits)); let second_under: uint8x16_t = vcgtq_s8(second_mins, current_bytes); diff --git a/src/numberparse.rs b/src/numberparse.rs index 2362e6d0..7674a977 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -2,6 +2,11 @@ use crate::charutils::*; use crate::unlikely; use crate::*; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + const POWER_OF_TEN: [f64; 617] = [ 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, @@ -129,7 +134,31 @@ pub enum Number { } #[cfg_attr(not(feature = "no-inline"), inline)] -//#[cfg(target_arch = "aarch64")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn parse_eight_digits_unrolled(chars: &[u8]) -> u32 { + unsafe { + // this actually computes *16* values so we are being wasteful. + let ascii0: __m128i = _mm_set1_epi8(b'0' as i8); + let mul_1_10: __m128i = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + let mul_1_100: __m128i = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + let mul_1_10000: __m128i = _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + // We know what we're doing right? :P + #[allow(clippy::cast_ptr_alignment)] + let input: __m128i = _mm_sub_epi8( + _mm_loadu_si128(chars.get_unchecked(0..16).as_ptr() as *const __m128i), + ascii0, + ); + let t1: __m128i = _mm_maddubs_epi16(input, mul_1_10); + let t2: __m128i = _mm_madd_epi16(t1, mul_1_100); + let t3: __m128i = _mm_packus_epi32(t2, t2); + let t4: __m128i = _mm_madd_epi16(t3, mul_1_10000); + _mm_cvtsi128_si32(t4) as u32 // only captures the sum of the first 8 digits, drop the rest + } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +#[cfg(target_feature = "neon")] fn parse_eight_digits_unrolled(chars: &[u8]) -> u32 { let val: u64 = unsafe { *(chars.as_ptr() as *const u64) }; // memcpy(&val, chars, sizeof(u64)); From bdc82d68d3625af5fe09f385c392e8be04ec2a51 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 14:58:21 -0400 Subject: [PATCH 24/34] feat: refactor immediate tables into loads --- src/neon/utf8check.rs | 48 +++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index 87505f94..b78fe7d1 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -27,15 +27,17 @@ unsafe fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8 vqsubq_s8(current_bytes, vdupq_n_s8(-12 /* 0xF4 */))); } +const NIBBLES_TBL: [i8; 16] = [ + 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) + 0, 0, 0, 0, // 10xx (continuation) + 2, 2, // 110x + 3, // 1110 + 4, // 1111, next should be 0 (not checked here) +]; + #[cfg_attr(not(feature = "no-inline"), inline)] unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { - let nibbles: int8x16_t = int8x16_t::new( - 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) - 0, 0, 0, 0, // 10xx (continuation) - 2, 2, // 110x - 3, // 1110 - 4, // 1111, next should be 0 (not checked here) - ); + let nibbles: int8x16_t = vld1q_s8(NIBBLES_TBL.as_ptr()); vreinterpretq_s8_u8(vqtbl1q_u8(vreinterpretq_u8_s8(nibbles), vreinterpretq_u8_s8(high_nibbles))) } @@ -90,6 +92,21 @@ unsafe fn check_first_continuation_max( *has_error, vreinterpretq_s8_u8(vorrq_u8(badfollow_ed, badfollow_f4))); } +const INITIAL_MINS_TBL: [i8; 16] = [ + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + -62 /* 0xC2 */, -128, // 110x + -31 /* 0xE1 */, // 1110 + -15 /*0xF1 */]; + +const SECOND_MINS_TBL: [i8; 16] = [ + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + 127, 127, // 110x => true + -96 /* 0xA0 */, // 1110 + -112 /* 0x90 */, +]; + // map off1_hibits => error condition // hibits off1 cur // C => < C2 && true @@ -104,21 +121,8 @@ unsafe fn check_overlong( previous_hibits: int8x16_t, has_error: &mut int8x16_t, ) { - let initial_mins_tbl: int8x16_t = int8x16_t::new( - -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, // 10xx => false - -62 /* 0xC2 */, -128, // 110x - -31 /* 0xE1 */, // 1110 - -15 /*0xF1 */, - ); - - let second_mins_tbl: int8x16_t = int8x16_t::new( - -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, // 10xx => false - 127, 127, // 110x => true - -96 /* 0xA0 */, // 1110 - -112 /* 0x90 */, - ); + let initial_mins_tbl: int8x16_t = vld1q_s8(INITIAL_MINS_TBL.as_ptr()); + let second_mins_tbl: int8x16_t = vld1q_s8(SECOND_MINS_TBL.as_ptr()); let off1_hibits: int8x16_t = vextq_s8(previous_hibits, hibits, 16 - 1); let initial_mins: int8x16_t = From 4fb672a91375244d1299a70d549a804a298cbd2a Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 20:23:05 -0400 Subject: [PATCH 25/34] feat: improving code style and similarity across architectures --- src/neon/deser.rs | 54 ++++--- src/neon/intrinsics.rs | 68 ++++----- src/neon/stage1.rs | 316 +++++++++++++++++++++++------------------ src/neon/utf8check.rs | 238 +++++++++++++++++++------------ 4 files changed, 386 insertions(+), 290 deletions(-) diff --git a/src/neon/deser.rs b/src/neon/deser.rs index 26de719b..5d70b7af 100644 --- a/src/neon/deser.rs +++ b/src/neon/deser.rs @@ -1,14 +1,11 @@ -//use std::mem; pub use crate::error::{Error, ErrorType}; pub use crate::Deserializer; pub use crate::Result; pub use crate::neon::stage1::*; -pub use crate::neon::intrinsics::*; pub use crate::neon::utf8check::*; -pub use crate::stringparse::*; - pub use crate::neon::intrinsics::*; +pub use crate::stringparse::*; impl<'de> Deserializer<'de> { #[cfg_attr(not(feature = "no-inline"), inline(always))] @@ -29,18 +26,29 @@ impl<'de> Deserializer<'de> { // store to dest unconditionally - we can overwrite the bits we don't like // later - let srcx = if src.len() >= src_i + 32 { - unsafe { src.get_unchecked(src_i..) } + let (v0, v1) = if src.len() >= src_i + 32 { + // This is safe since we ensure src is at least 16 wide + #[allow(clippy::cast_ptr_alignment)] + unsafe { + ( + vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()), + vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()), + ) + } } else { unsafe { padding .get_unchecked_mut(..src.len() - src_i) .clone_from_slice(src.get_unchecked(src_i..)); - &padding + // This is safe since we ensure src is at least 32 wide + ( + vld1q_u8(padding.get_unchecked(0..16).as_ptr()), + vld1q_u8(padding.get_unchecked(16..32).as_ptr()), + ) } }; - let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx) }; + let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits(v0, v1); if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit @@ -82,24 +90,35 @@ impl<'de> Deserializer<'de> { let dst: &mut [u8] = self.strings.as_mut_slice(); loop { - let srcx = if src.len() >= src_i + 32 { - unsafe { src.get_unchecked(src_i..) } + let (v0, v1) = if src.len() >= src_i + 32 { + // This is safe since we ensure src is at least 16 wide + #[allow(clippy::cast_ptr_alignment)] + unsafe { + ( + vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()), + vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()), + ) + } } else { unsafe { padding .get_unchecked_mut(..src.len() - src_i) .clone_from_slice(src.get_unchecked(src_i..)); - &padding + // This is safe since we ensure src is at least 32 wide + ( + vld1q_u8(padding.get_unchecked(0..16).as_ptr()), + vld1q_u8(padding.get_unchecked(16..32).as_ptr()), + ) } }; unsafe { - dst.get_unchecked_mut(dst_i..dst_i + 32).copy_from_slice(srcx.get_unchecked(..32)); + dst.get_unchecked_mut(dst_i..dst_i + 32).copy_from_slice(src.get_unchecked(src_i..src_i + 32)); } // store to dest unconditionally - we can overwrite the bits we don't like // later - let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx) }; + let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits(v0, v1); if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { // we encountered quotes first. Move dst to point to quotes and exit @@ -138,10 +157,11 @@ impl<'de> Deserializer<'de> { // within the unicode codepoint handling code. src_i += bs_dist as usize; dst_i += bs_dist as usize; - let (o, s) = if let Ok(r) = - handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe { - dst.get_unchecked_mut(dst_i..) - }) { + let (o, s) = if let Ok(r) = handle_unicode_codepoint( + unsafe { src.get_unchecked(src_i..) }, + unsafe { dst.get_unchecked_mut(dst_i..) } + ) + { r } else { return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index c8edefe8..2ecfda86 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -90,8 +90,8 @@ pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { } #[inline] -pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - vpaddq_u8_(a, b) +pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + unsafe { vpaddq_u8_(a, b) } } #[inline] @@ -195,12 +195,12 @@ impl int32x4_t { } } -#[inline] -pub fn add_overflow(a: u64, b: u64, out: &mut u64) -> bool { - let (carry, did_carry) = a.overflowing_add(b); - *out = carry; - did_carry -} +//#[inline] +//pub fn add_overflow(a: u64, b: u64, out: &mut u64) -> bool { +// let (carry, did_carry) = a.overflowing_add(b); +// *out = carry; +// did_carry +//} #[inline] pub unsafe fn vld1q_s8(addr: *const i8) -> int8x16_t { @@ -223,8 +223,8 @@ macro_rules! aarch64_simd_2 { }; ($name: ident, $type: ty, $res: ty, $simd_fn: ident, $intrarm: ident, $intraarch: ident) => { #[inline] - pub unsafe fn $name(a: $type, b: $type) -> $res { - simd_llvm::$simd_fn(a, b) + pub fn $name(a: $type, b: $type) -> $res { + unsafe { simd_llvm::$simd_fn(a, b) } } } } @@ -382,6 +382,11 @@ pub fn vmovq_n_u8(a: u8) -> uint8x16_t { uint8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } +#[inline] +pub fn vmovq_n_s8(a: i8) -> int8x16_t { + int8x16_t(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) +} + #[inline] pub fn zerou8x16() -> uint8x16_t { uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) @@ -403,47 +408,22 @@ pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { } #[inline] -pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_and(a, b) } - +pub fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_llvm::simd_and(a, b) } } #[inline] -fn and16(a: i16, b: i16) -> i16 { - if (a & b) != 0 { - 0 - } else { - -1 /* 0xFFFF */ - } -} - +pub fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { simd_llvm::simd_and(a, b) } } #[inline] -pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { -// simd_llvm::simd_and(a, b) - int16x8_t( - and16(a.0, b.0), - and16(a.1, b.1), - and16(a.2, b.2), - and16(a.3, b.3), - and16(a.4, b.4), - and16(a.5, b.5), - and16(a.6, b.6), - and16(a.7, b.7), - ) -} - +pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_llvm::simd_or(a, b) } } #[inline] -pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_llvm::simd_or(a, b) } - -#[inline] -pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_and(a, b) } - +pub fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_llvm::simd_and(a, b) } } #[inline] -pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_llvm::simd_or(a, b) } +pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_llvm::simd_or(a, b) } } macro_rules! arm_reinterpret { ($name: ident, $from: ty, $to: ty) => { // Vector reinterpret cast operation - # [inline] - pub unsafe fn $name(a: $from) -> $to { - mem::transmute(a) + #[inline] + pub fn $name(a: $from) -> $to { + unsafe { mem::transmute(a) } } }; } @@ -464,7 +444,7 @@ arm_reinterpret!(vreinterpretq_s64_s8, int8x16_t, int64x2_t); macro_rules! arm_vget_lane { ($name: ident, $to: ty, $from: ty, $lanes: literal) => { #[inline] - pub unsafe fn $ name(v: $from, lane: u32) -> $ to { + pub unsafe fn $name(v: $from, lane: u32) -> $ to { simd_llvm::simd_extract(v, lane) } }; diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 3f005ad5..b23f059f 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -3,38 +3,23 @@ use crate::neon::intrinsics::*; use crate::neon::utf8check::*; use crate::*; -use std::mem; -pub const SIMDJSON_PADDING: usize = mem::size_of::() * 4; +use std::mem; -#[derive(Debug)] -struct SimdInput { - v0: uint8x16_t, - v1: uint8x16_t, - v2: uint8x16_t, - v3: uint8x16_t, -} +// NEON-SPECIFIC -fn fill_input(ptr: &[u8]) -> SimdInput { - unsafe { - #[allow(clippy::cast_ptr_alignment)] - SimdInput { - v0: vld1q_u8(ptr.as_ptr() as *const u8), - v1: vld1q_u8(ptr.as_ptr().add(16) as *const u8), - v2: vld1q_u8(ptr.as_ptr().add(32) as *const u8), - v3: vld1q_u8(ptr.as_ptr().add(48) as *const u8), - } - } +macro_rules! bit_mask { + () => { + uint8x16_t::new( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ) + }; } -const BIT_MASK: [u8; 16] = [0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80]; - #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn neon_movemask(input: uint8x16_t) -> u16 { - let bit_mask = vld1q_u8(BIT_MASK.as_ptr()); - - let minput: uint8x16_t = vandq_u8(input, bit_mask); + let minput: uint8x16_t = vandq_u8(input, bit_mask!()); let tmp: uint8x16_t = vpaddq_u8(minput, minput); let tmp = vpaddq_u8(tmp, tmp); let tmp = vpaddq_u8(tmp, tmp); @@ -44,7 +29,7 @@ unsafe fn neon_movemask(input: uint8x16_t) -> u16 { #[cfg_attr(not(feature = "no-inline"), inline(always))] unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: uint8x16_t) -> u64 { - let bit_mask = vld1q_u8(BIT_MASK.as_ptr()); + let bit_mask = bit_mask!(); let t0 = vandq_u8(p0, bit_mask); let t1 = vandq_u8(p1, bit_mask); @@ -58,10 +43,56 @@ unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0) } +// /NEON-SPECIFIC + +pub const SIMDJSON_PADDING: usize = mem::size_of::() * 4; + unsafe fn compute_quote_mask(quote_bits: u64) -> u64 { vgetq_lane_u64( vreinterpretq_u64_u8( - mem::transmute(vmull_p64(-1, quote_bits as i64))), 0) + mem::transmute( + vmull_p64( + -1, + quote_bits as i64) + ) + ), + 0 + ) +} + +#[cfg_attr(not(feature = "no-inline"), inline(always))] +unsafe fn check_ascii(si: &SimdInput) -> bool { + let highbit: uint8x16_t = vdupq_n_u8(0x80); + let t0: uint8x16_t = vorrq_u8(si.v0, si.v1); + let t1: uint8x16_t = vorrq_u8(si.v2, si.v3); + let t3: uint8x16_t = vorrq_u8(t0, t1); + let t4: uint8x16_t = vandq_u8(t3, highbit); + + let v64: uint64x2_t = vreinterpretq_u64_u8(t4); + let v32: uint32x2_t = vqmovn_u64(v64); + let result: uint64x1_t = vreinterpret_u64_u32(v32); + + vget_lane_u64(result, 0) == 0 +} + +#[derive(Debug)] +struct SimdInput { + v0: uint8x16_t, + v1: uint8x16_t, + v2: uint8x16_t, + v3: uint8x16_t, +} + +fn fill_input(ptr: &[u8]) -> SimdInput { + unsafe { + #[allow(clippy::cast_ptr_alignment)] + SimdInput { + v0: vld1q_u8(ptr.as_ptr() as *const u8), + v1: vld1q_u8(ptr.as_ptr().add(16) as *const u8), + v2: vld1q_u8(ptr.as_ptr().add(32) as *const u8), + v3: vld1q_u8(ptr.as_ptr().add(48) as *const u8), + } + } } struct Utf8CheckingState { @@ -79,19 +110,18 @@ impl Default for Utf8CheckingState { } } -#[cfg_attr(not(feature = "no-inline"), inline(always))] -unsafe fn check_ascii_neon(si: &SimdInput) -> bool { - let high_bit: uint8x16_t = vdupq_n_u8(0x80); - let t0: uint8x16_t = vorrq_u8(si.v0, si.v1); - let t1: uint8x16_t = vorrq_u8(si.v2, si.v3); - let t3: uint8x16_t = vorrq_u8(t0, t1); - let t4: uint8x16_t = vandq_u8(t3, high_bit); - - let v64: uint64x2_t = vreinterpretq_u64_u8(t4); - let v32: uint32x2_t = vqmovn_u64(v64); - let result: uint64x1_t = vreinterpret_u64_u32(v32); - - vget_lane_u64(result, 0) == 0 +#[inline] +fn is_utf8_status_ok(has_error: int8x16_t) -> bool { + unsafe { + let utf8_error_bits: u128 = mem::transmute( + vandq_s16( + mem::transmute(has_error), + mem::transmute(has_error) + ) + ); + + utf8_error_bits as u16 == 0 + } } #[cfg_attr(not(feature = "no-inline"), inline(always))] @@ -99,62 +129,53 @@ unsafe fn check_utf8( input: &SimdInput, state: &mut Utf8CheckingState, ) { - if check_ascii_neon(input) { + if check_ascii(input) { // All bytes are ascii. Therefore the byte that was just before must be // ascii too. We only check the byte that was just before simd_input. Nines // are arbitrary values. - let verror: int8x16_t = - int8x16_t::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1); - state.has_error = - vreinterpretq_s8_u8(vorrq_u8(vcgtq_s8(state.previous.carried_continuations, verror), - vreinterpretq_u8_s8(state.has_error))); + let verror: int8x16_t = int8x16_t::new( + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, + ); + state.has_error = vreinterpretq_s8_u8(vorrq_u8( + vcgtq_s8( + state.previous.carried_continuations, + verror, + ), + vreinterpretq_u8_s8(state.has_error)), + ); } else { // it is not ascii so we have to do heavy work - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), - &mut (state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), - &mut (state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), - &mut (state.previous), &mut (state.has_error)); - state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), - &mut (state.previous), &mut (state.has_error)); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0), &mut state.previous, &mut state.has_error); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1), &mut state.previous, &mut state.has_error); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2), &mut state.previous, &mut state.has_error); + state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3), &mut state.previous, &mut state.has_error); } } // a straightforward comparison of a mask against input #[cfg_attr(not(feature = "no-inline"), inline(always))] -unsafe fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { - macro_rules! call { - ($imm8:expr) => { - vmovq_n_u8($imm8) - }; - }; - let mask: uint8x16_t = constify_imm8!(m, call); - - let cmp_res_0: uint8x16_t = vceqq_u8(input.v0, mask); - let cmp_res_1: uint8x16_t = vceqq_u8(input.v1, mask); - let cmp_res_2: uint8x16_t = vceqq_u8(input.v2, mask); - let cmp_res_3: uint8x16_t = vceqq_u8(input.v3, mask); +fn cmp_mask_against_input(input: &SimdInput, m: u8) -> u64 { + unsafe { + let mask: uint8x16_t = vmovq_n_u8(m); + let cmp_res_0: uint8x16_t = vceqq_u8(input.v0, mask); + let cmp_res_1: uint8x16_t = vceqq_u8(input.v1, mask); + let cmp_res_2: uint8x16_t = vceqq_u8(input.v2, mask); + let cmp_res_3: uint8x16_t = vceqq_u8(input.v3, mask); - neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) + neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) + } } // find all values less than or equal than the content of maxval (using unsigned arithmetic) #[cfg_attr(not(feature = "no-inline"), inline(always))] -unsafe fn unsigned_lteq_against_input(input: &SimdInput, maxval: u8) -> u64 { - macro_rules! call { - ($imm8:expr) => { - vmovq_n_u8($imm8) - }; - }; - let mask: uint8x16_t = constify_imm8!(maxval, call); - - let cmp_res_0: uint8x16_t = vcleq_u8(input.v0, mask); - let cmp_res_1: uint8x16_t = vcleq_u8(input.v1, mask); - let cmp_res_2: uint8x16_t = vcleq_u8(input.v2, mask); - let cmp_res_3: uint8x16_t = vcleq_u8(input.v3, mask); - - neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) +fn unsigned_lteq_against_input(input: &SimdInput, maxval: uint8x16_t) -> u64 { + unsafe { + let cmp_res_0: uint8x16_t = vcleq_u8(input.v0, maxval); + let cmp_res_1: uint8x16_t = vcleq_u8(input.v1, maxval); + let cmp_res_2: uint8x16_t = vcleq_u8(input.v2, maxval); + let cmp_res_3: uint8x16_t = vcleq_u8(input.v3, maxval); + neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3) + } } // return a bitvector indicating where we have characters that end an odd-length @@ -171,7 +192,7 @@ unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_bac const EVEN_BITS: u64 = 0x5555_5555_5555_5555; const ODD_BITS: u64 = !EVEN_BITS; - let bs_bits: u64 = cmp_mask_against_input(input, b'\\'); + let bs_bits: u64 = cmp_mask_against_input(&input, b'\\'); let start_edges: u64 = bs_bits & !(bs_bits << 1); // flip lowest if we have an odd-length run at the end of the prior // iteration @@ -180,13 +201,13 @@ unsafe fn find_odd_backslash_sequences(input: &SimdInput, prev_iter_ends_odd_bac let odd_starts: u64 = start_edges & !even_start_mask; let even_carries: u64 = bs_bits.wrapping_add(even_starts); - let mut odd_carries: u64 = 0; // must record the carry-out of our odd-carries out of bit 63; this // indicates whether the sense of any edge going to the next iteration // should be flipped - let iter_ends_odd_backslash: bool = add_overflow(bs_bits, odd_starts, &mut odd_carries); + let (mut odd_carries, iter_ends_odd_backslash) = bs_bits.overflowing_add(odd_starts); - odd_carries |= *prev_iter_ends_odd_backslash; // push in bit zero as a potential end + odd_carries |= *prev_iter_ends_odd_backslash; + // push in bit zero as a potential end // if we had an odd-numbered run at the // end of the previous iteration *prev_iter_ends_odd_backslash = if iter_ends_odd_backslash { 0x1 } else { 0x0 }; @@ -217,25 +238,23 @@ unsafe fn find_quote_mask_and_bits( quote_bits: &mut u64, error_mask: &mut u64, ) -> u64 { - *quote_bits = cmp_mask_against_input(input, b'"'); + *quote_bits = cmp_mask_against_input(&input, b'"'); *quote_bits &= !odd_ends; - + // remove from the valid quoted region the unescapted characters. let mut quote_mask: u64 = compute_quote_mask(*quote_bits); quote_mask ^= *prev_iter_inside_quote; - // All Unicode characters may be placed within the // quotation marks, except for the characters that MUST be escaped: // quotation mark, reverse solidus, and the control characters (U+0000 //through U+001F). // https://tools.ietf.org/html/rfc8259 - let unescaped: u64 = unsigned_lteq_against_input(input, 0x1F); + let unescaped: u64 = unsigned_lteq_against_input(input, vmovq_n_u8(0x1F)); *error_mask |= quote_mask & unescaped; - // right shift of a signed value expected to be well-defined and standard // compliant as of C++20, // John Regher from Utah U. says this is fine code - *prev_iter_inside_quote = (quote_mask as i64 >> 63) as u64; + *prev_iter_inside_quote = static_cast_u64!(static_cast_i64!(quote_mask) >> 63); quote_mask } @@ -245,8 +264,32 @@ unsafe fn find_whitespace_and_structurals( whitespace: &mut u64, structurals: &mut u64, ) { - let low_nibble_mask: uint8x16_t = uint8x16_t::new(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - let high_nibble_mask: uint8x16_t = uint8x16_t::new(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // do a 'shufti' to detect structural JSON characters + // they are + // * `{` 0x7b + // * `}` 0x7d + // * `:` 0x3a + // * `[` 0x5b + // * `]` 0x5d + // * `,` 0x2c + // these go into the first 3 buckets of the comparison (1/2/4) + + // we are also interested in the four whitespace characters: + // * space 0x20 + // * linefeed 0x0a + // * horizontal tab 0x09 + // * carriage return 0x0d + // these go into the next 2 buckets of the comparison (8/16) + + // TODO: const? + let low_nibble_mask: uint8x16_t = uint8x16_t::new( + 16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, + ); + // TODO: const? + let high_nibble_mask: uint8x16_t = uint8x16_t::new( + 8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, + ); + let structural_shufti_mask: uint8x16_t = vmovq_n_u8(0x7); let whitespace_shufti_mask: uint8x16_t = vmovq_n_u8(0x18); let low_nib_and_mask: uint8x16_t = vmovq_n_u8(0xf); @@ -281,11 +324,11 @@ unsafe fn find_whitespace_and_structurals( let tmp_3: uint8x16_t = vtstq_u8(v_3, structural_shufti_mask); *structurals = neon_movemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3); - let tmp_ws_0: uint8x16_t = vtstq_u8(v_0, whitespace_shufti_mask); - let tmp_ws_1: uint8x16_t = vtstq_u8(v_1, whitespace_shufti_mask); - let tmp_ws_2: uint8x16_t = vtstq_u8(v_2, whitespace_shufti_mask); - let tmp_ws_3: uint8x16_t = vtstq_u8(v_3, whitespace_shufti_mask); - *whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3); + let tmp_ws_v0: uint8x16_t = vtstq_u8(v_0, whitespace_shufti_mask); + let tmp_ws_v1: uint8x16_t = vtstq_u8(v_1, whitespace_shufti_mask); + let tmp_ws_v2: uint8x16_t = vtstq_u8(v_2, whitespace_shufti_mask); + let tmp_ws_v3: uint8x16_t = vtstq_u8(v_3, whitespace_shufti_mask); + *whitespace = neon_movemask_bulk(tmp_ws_v0, tmp_ws_v1, tmp_ws_v2, tmp_ws_v3); } // flatten out values in 'bits' assuming that they are are to have values of idx @@ -295,16 +338,18 @@ unsafe fn find_whitespace_and_structurals( // needs to be large enough to handle this //TODO: usize was u32 here does this matter? #[cfg_attr(not(feature = "no-inline"), inline(always))] -unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { +fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { let cnt: usize = bits.count_ones() as usize; let mut l = base.len(); let idx_minus_64 = idx.wrapping_sub(64); - let idx_64_v = int32x4_t::new( - static_cast_i32!(idx_minus_64), - static_cast_i32!(idx_minus_64), - static_cast_i32!(idx_minus_64), - static_cast_i32!(idx_minus_64), - ); + let idx_64_v = unsafe { + int32x4_t::new( + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + static_cast_i32!(idx_minus_64), + ) + }; // We're doing some trickery here. // We reserve 64 extra entries, because we've at most 64 bit to set @@ -312,22 +357,26 @@ unsafe fn flatten_bits(base: &mut Vec, idx: u32, mut bits: u64) { // We later indiscriminatory writre over the len we set but that's OK // since we ensure we reserve the needed space base.reserve(64); - base.set_len(l + cnt); + unsafe { + base.set_len(l + cnt); + } while bits != 0 { - let v0: i32 = bits.trailing_zeros() as i32; - bits &= bits.wrapping_sub(1); - let v1: i32 = bits.trailing_zeros() as i32; - bits &= bits.wrapping_sub(1); - let v2: i32 = bits.trailing_zeros() as i32; - bits &= bits.wrapping_sub(1); - let v3: i32 = bits.trailing_zeros() as i32; - bits &= bits.wrapping_sub(1); - - let v: int32x4_t = int32x4_t::new(v0, v1, v2, v3); - let v: int32x4_t = vaddq_s32(idx_64_v, v); - - std::ptr::write(base.as_mut_ptr().add(l) as *mut int32x4_t, v); + unsafe { + let v0 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v1 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v2 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + let v3 = bits.trailing_zeros() as i32; + bits &= bits.wrapping_sub(1); + + let v: int32x4_t = int32x4_t::new(v0, v1, v2, v3); + let v: int32x4_t = vaddq_s32(idx_64_v, v); + #[allow(clippy::cast_ptr_alignment)] + std::ptr::write(base.as_mut_ptr().add(l) as *mut int32x4_t, v); + } l += 4; } } @@ -374,21 +423,15 @@ fn finalize_structurals( structurals } -pub unsafe fn find_bs_bits_and_quote_bits(src: &[u8]) -> ParseStringHelper { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - let v0 : uint8x16_t = vld1q_u8(src.as_ptr()); - let v1 : uint8x16_t = vld1q_u8(src.as_ptr().add(16)); - - let bs_mask : uint8x16_t = vmovq_n_u8(b'\\'); - let qt_mask : uint8x16_t = vmovq_n_u8(b'"'); - - let bit_mask = vld1q_u8(BIT_MASK.as_ptr()); +pub fn find_bs_bits_and_quote_bits(v0: uint8x16_t, v1: uint8x16_t) -> ParseStringHelper { + let quote_mask = vmovq_n_u8(b'"'); + let bs_mask = vmovq_n_u8(b'\\'); + let bit_mask = bit_mask!(); let cmp_bs_0 : uint8x16_t = vceqq_u8(v0, bs_mask); let cmp_bs_1 : uint8x16_t = vceqq_u8(v1, bs_mask); - let cmp_qt_0 : uint8x16_t = vceqq_u8(v0, qt_mask); - let cmp_qt_1 : uint8x16_t = vceqq_u8(v1, qt_mask); + let cmp_qt_0 : uint8x16_t = vceqq_u8(v0, quote_mask); + let cmp_qt_1 : uint8x16_t = vceqq_u8(v1, quote_mask); let cmp_bs_0 = vandq_u8(cmp_bs_0, bit_mask); let cmp_bs_1 = vandq_u8(cmp_bs_1, bit_mask); @@ -401,8 +444,8 @@ pub unsafe fn find_bs_bits_and_quote_bits(src: &[u8]) -> ParseStringHelper { let sum0 = vpaddq_u8(sum0, sum0); ParseStringHelper { - bs_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0), // bs_bits - quote_bits: vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) // quote_bits + bs_bits: unsafe { vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 0) }, + quote_bits: unsafe { vgetq_lane_u32(vreinterpretq_u32_u8(sum0), 1) }, } } @@ -452,9 +495,7 @@ impl<'de> Deserializer<'de> { #endif */ let input: SimdInput = fill_input(input.get_unchecked(idx as usize..)); - check_utf8(&input, &mut utf8_state); - // detect odd sequences of backslashes let odd_ends: u64 = find_odd_backslash_sequences(&input, &mut prev_iter_ends_odd_backslash); @@ -553,10 +594,7 @@ impl<'de> Deserializer<'de> { return Err(ErrorType::Syntax); } - let utf8_error_bits: u128 = mem::transmute(vandq_s16(mem::transmute(utf8_state.has_error), mem::transmute(utf8_state.has_error))); - let utf8_error: u16 = utf8_error_bits as u16; - - if utf8_error != 0 { + if is_utf8_status_ok(utf8_state.has_error) { Ok(structural_indexes) } else { Err(ErrorType::InvalidUTF8) diff --git a/src/neon/utf8check.rs b/src/neon/utf8check.rs index b78fe7d1..082183b1 100644 --- a/src/neon/utf8check.rs +++ b/src/neon/utf8check.rs @@ -17,95 +17,144 @@ use crate::neon::intrinsics::*; * */ +/*****************************/ +#[cfg_attr(not(feature = "no-inline"), inline)] +fn push_last_byte_of_a_to_b(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + vextq_s8(a, b, 16 - 1) + } +} + +#[cfg_attr(not(feature = "no-inline"), inline)] +fn push_last_2bytes_of_a_to_b(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe { + vextq_s8(a, b, 16 - 2) + } +} + // all byte values must be no larger than 0xF4 #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8x16_t) { +fn check_smaller_than_0xf4(current_bytes: int8x16_t, has_error: &mut int8x16_t) { // unsigned, saturates to 0 below max - *has_error = + *has_error = unsafe { vorrq_s8( *has_error, - vqsubq_s8(current_bytes, vdupq_n_s8(-12 /* 0xF4 */))); + vqsubq_s8(current_bytes, vdupq_n_s8(-12 /* 0xF4 */)) + ) + }; } -const NIBBLES_TBL: [i8; 16] = [ - 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) - 0, 0, 0, 0, // 10xx (continuation) - 2, 2, // 110x - 3, // 1110 - 4, // 1111, next should be 0 (not checked here) -]; +macro_rules! nibbles_tbl { + () => { + int8x16_t::new( + 1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII) + 0, 0, 0, 0, // 10xx (continuation) + 2, 2, // 110x + 3, // 1110 + 4, // 1111, next should be 0 (not checked here) + ) + }; +} #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { - let nibbles: int8x16_t = vld1q_s8(NIBBLES_TBL.as_ptr()); - - vreinterpretq_s8_u8(vqtbl1q_u8(vreinterpretq_u8_s8(nibbles), vreinterpretq_u8_s8(high_nibbles))) +fn continuation_lengths(high_nibbles: int8x16_t) -> int8x16_t { + unsafe { + vqtbl1q_s8( + nibbles_tbl!(), + vreinterpretq_u8_s8(high_nibbles), + ) + } } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn carry_continuations(initial_lengths: int8x16_t, previous_carries: int8x16_t) -> int8x16_t { - let right1: int8x16_t = vqsubq_s8( - vextq_s8(previous_carries, initial_lengths, 16 - 1), - vdupq_n_s8(1)); - - let sum: int8x16_t = vaddq_s8(initial_lengths, right1); - - let right2: int8x16_t = vqsubq_s8( - vextq_s8(previous_carries, sum, 16 - 2), - vdupq_n_s8(2)); - - vaddq_s8(sum, right2) +fn carry_continuations(initial_lengths: int8x16_t, previous_carries: int8x16_t) -> int8x16_t { + unsafe { + let right1: int8x16_t = vqsubq_s8( + push_last_byte_of_a_to_b(previous_carries, initial_lengths), + vdupq_n_s8(1), + ); + let sum: int8x16_t = vaddq_s8(initial_lengths, right1); + let right2: int8x16_t = vqsubq_s8( + push_last_2bytes_of_a_to_b(previous_carries, sum), + vdupq_n_s8(2), + ); + vaddq_s8(sum, right2) + } } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, has_error: &mut int8x16_t) { +fn check_continuations(initial_lengths: int8x16_t, carries: int8x16_t, has_error: &mut int8x16_t) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) + { + let overunder: uint8x16_t = vceqq_u8( + vcgtq_s8(carries, initial_lengths), + vcgtq_s8(initial_lengths, vdupq_n_s8(0)), + ); - let overunder: uint8x16_t = vceqq_u8( - vcgtq_s8(carries, initial_lengths), - vcgtq_s8(initial_lengths, vdupq_n_s8(0))); - - *has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder)); + *has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder)); + } } // when 0xED is found, next byte must be no larger than 0x9F // when 0xF4 is found, next byte must be no larger than 0x8F // next byte must be continuation, ie sign bit is set, so signed < is ok #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_first_continuation_max( +fn check_first_continuation_max( current_bytes: int8x16_t, off1_current_bytes: int8x16_t, has_error: &mut int8x16_t, ) { - let mask_ed: uint8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(-19 /* 0xED */)); - let mask_f4: uint8x16_t = vceqq_s8(off1_current_bytes, vdupq_n_s8(-12 /* 0xF4 */)); - - // FIXME: was vandq_u8? - let badfollow_ed: uint8x16_t = - vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(-97 /* 0x9F */)), mask_ed); - let badfollow_f4: uint8x16_t = - vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(-113 /* 0x8F */)), mask_f4); + { + let mask_ed: uint8x16_t = vceqq_s8( + off1_current_bytes, + vdupq_n_s8(-19 /* 0xED */), + ); + let mask_f4: uint8x16_t = vceqq_s8( + off1_current_bytes, + vdupq_n_s8(-12 /* 0xF4 */), + ); + + let badfollow_ed: uint8x16_t = vandq_u8( + vcgtq_s8(current_bytes, vdupq_n_s8(-97 /* 0x9F */)), + mask_ed, + ); + let badfollow_f4: uint8x16_t = vandq_u8( + vcgtq_s8(current_bytes, vdupq_n_s8(-113 /* 0x8F */)), + mask_f4, + ); + + *has_error = vorrq_s8( + *has_error, + vreinterpretq_s8_u8(vorrq_u8(badfollow_ed, badfollow_f4)), + ); + } +} - *has_error = vorrq_s8( - *has_error, vreinterpretq_s8_u8(vorrq_u8(badfollow_ed, badfollow_f4))); +macro_rules! initial_mins_tbl { + () => { + int8x16_t::new( + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + -62 /* 0xC2 */, -128, // 110x + -31 /* 0xE1 */, // 1110 + -15 /*0xF1 */, // 1111 + ) + }; } -const INITIAL_MINS_TBL: [i8; 16] = [ - -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, // 10xx => false - -62 /* 0xC2 */, -128, // 110x - -31 /* 0xE1 */, // 1110 - -15 /*0xF1 */]; - -const SECOND_MINS_TBL: [i8; 16] = [ - -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, // 10xx => false - 127, 127, // 110x => true - -96 /* 0xA0 */, // 1110 - -112 /* 0x90 */, -]; +macro_rules! second_mins_tbl { + () => { + int8x16_t::new( + -128, -128, -128, -128, -128, -128, + -128, -128, -128, -128, -128, -128, // 10xx => false + 127, 127, // 110x => true + -96 /* 0xA0 */, // 1110 + -112 /* 0x90 */, // 1111 + ) + }; +} // map off1_hibits => error condition // hibits off1 cur @@ -114,29 +163,32 @@ const SECOND_MINS_TBL: [i8; 16] = [ // F => < F1 && < 90 // else false && false #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn check_overlong( +fn check_overlong( current_bytes: int8x16_t, off1_current_bytes: int8x16_t, hibits: int8x16_t, previous_hibits: int8x16_t, has_error: &mut int8x16_t, ) { - let initial_mins_tbl: int8x16_t = vld1q_s8(INITIAL_MINS_TBL.as_ptr()); - let second_mins_tbl: int8x16_t = vld1q_s8(SECOND_MINS_TBL.as_ptr()); - - let off1_hibits: int8x16_t = vextq_s8(previous_hibits, hibits, 16 - 1); - let initial_mins: int8x16_t = - vqtbl1q_s8(initial_mins_tbl, vreinterpretq_u8_s8(off1_hibits)); - - let initial_under: uint8x16_t = vcgtq_s8(initial_mins, off1_current_bytes); - - let second_mins: int8x16_t = - vqtbl1q_s8(second_mins_tbl, vreinterpretq_u8_s8(off1_hibits)); - - let second_under: uint8x16_t = vcgtq_s8(second_mins, current_bytes); - - *has_error = vorrq_s8( - *has_error, vreinterpretq_s8_u8(vandq_u8(initial_under, second_under))); + unsafe { + let off1_hibits: int8x16_t = push_last_byte_of_a_to_b(previous_hibits, hibits); + let initial_mins: int8x16_t = vqtbl1q_s8( + initial_mins_tbl!(), + vreinterpretq_u8_s8(off1_hibits) + ); + + let initial_under: uint8x16_t = vcgtq_s8(initial_mins, off1_current_bytes); + + let second_mins: int8x16_t = vqtbl1q_s8( + second_mins_tbl!(), + vreinterpretq_u8_s8(off1_hibits) + ); + let second_under: uint8x16_t = vcgtq_s8(second_mins, current_bytes); + *has_error = vorrq_s8( + *has_error, + vreinterpretq_s8_u8(vandq_u8(initial_under, second_under)) + ); + } } pub struct ProcessedUtfBytes { @@ -157,9 +209,14 @@ impl Default for ProcessedUtfBytes { } #[cfg_attr(not(feature = "no-inline"), inline)] -unsafe fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) { +fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) { answer.rawbytes = bytes; - answer.high_nibbles = vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4)); + answer.high_nibbles = unsafe { + vandq_s8( + vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4)), + vmovq_n_s8(0x0F) + ) + }; } // check whether the current bytes are valid UTF-8 @@ -171,25 +228,26 @@ pub fn check_utf8_bytes( has_error: &mut int8x16_t, ) -> ProcessedUtfBytes { let mut pb = ProcessedUtfBytes::default(); - unsafe { - count_nibbles(current_bytes, &mut pb); + count_nibbles(current_bytes, &mut pb); - check_smaller_than_0xf4(current_bytes, has_error); + check_smaller_than_0xf4(current_bytes, has_error); - let initial_lengths: int8x16_t = continuation_lengths(pb.high_nibbles); + let initial_lengths: int8x16_t = continuation_lengths(pb.high_nibbles); - pb.carried_continuations = - carry_continuations(initial_lengths, previous.carried_continuations); + pb.carried_continuations = + carry_continuations(initial_lengths, previous.carried_continuations); - check_continuations(initial_lengths, pb.carried_continuations, has_error); + check_continuations(initial_lengths, pb.carried_continuations, has_error); - let off1_current_bytes: int8x16_t = - vextq_s8(previous.rawbytes, pb.rawbytes, 16 - 1); + let off1_current_bytes: int8x16_t = push_last_byte_of_a_to_b(previous.rawbytes, pb.rawbytes); + check_first_continuation_max(current_bytes, off1_current_bytes, has_error); - check_first_continuation_max(current_bytes, off1_current_bytes, has_error); - - check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles, - previous.high_nibbles, has_error); - } + check_overlong( + current_bytes, + off1_current_bytes, + pb.high_nibbles, + previous.high_nibbles, + has_error, + ); pb } From ee2bd9b655ddacb1c0f0d7a5b2375a428d7e78e7 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 22:02:39 -0400 Subject: [PATCH 26/34] feat: update generator with neon support, conditional compilation --- src/neon/intrinsics.rs | 8 +- src/neon/simd_llvm.rs | 2 +- src/value/generator.rs | 278 ++++++++++++++++++++++++++++------------- 3 files changed, 196 insertions(+), 92 deletions(-) diff --git a/src/neon/intrinsics.rs b/src/neon/intrinsics.rs index 2ecfda86..c5c98cb6 100644 --- a/src/neon/intrinsics.rs +++ b/src/neon/intrinsics.rs @@ -410,13 +410,17 @@ pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[inline] pub fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_llvm::simd_and(a, b) } } #[inline] +pub fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_llvm::simd_and(a, b) } } +#[inline] pub fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe { simd_llvm::simd_and(a, b) } } #[inline] pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_llvm::simd_or(a, b) } } #[inline] -pub fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_llvm::simd_and(a, b) } } -#[inline] pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_llvm::simd_or(a, b) } } +#[inline] +pub fn vxorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { unsafe { simd_llvm::simd_xor(a, b) } } +#[inline] +pub fn vxorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe { simd_llvm::simd_xor(a, b) } } macro_rules! arm_reinterpret { ($name: ident, $from: ty, $to: ty) => { diff --git a/src/neon/simd_llvm.rs b/src/neon/simd_llvm.rs index bb7b25bd..6e60b63c 100644 --- a/src/neon/simd_llvm.rs +++ b/src/neon/simd_llvm.rs @@ -27,7 +27,7 @@ extern "platform-intrinsic" { // pub fn simd_shr(x: T, y: T) -> T; pub fn simd_and(x: T, y: T) -> T; pub fn simd_or(x: T, y: T) -> T; -// pub fn simd_xor(x: T, y: T) -> T; + pub fn simd_xor(x: T, y: T) -> T; // // pub fn simd_reduce_add_unordered(x: T) -> U; // pub fn simd_reduce_mul_unordered(x: T) -> U; diff --git a/src/value/generator.rs b/src/value/generator.rs index e17b2d13..8b6ef266 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -10,10 +10,25 @@ use std::io::Write; use std::marker::PhantomData; use std::ptr; -//#[cfg(target_feature = "avx2")] -//const AVX2_PRESENT : bool = true; -//#[cfg(not(target_feature = "avx2"))] -//const AVX2_PRESENT : bool = false; +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +#[cfg(target_feature = "neon")] +use crate::*; + +#[cfg(target_feature = "avx2")] +const AVX2_PRESENT : bool = true; +#[cfg(not(target_feature = "avx2"))] +const AVX2_PRESENT : bool = false; +#[cfg(target_feature = "sse4.2")] +const SSE42_PRESENT : bool = true; +#[cfg(not(target_feature = "sse4.2"))] +const SSE42_PRESENT : bool = false; +#[cfg(target_feature = "neon")] +const NEON_PRESENT : bool = true; +#[cfg(not(target_feature = "neon"))] +const NEON_PRESENT : bool = false; const QU: u8 = b'"'; const BS: u8 = b'\\'; @@ -93,96 +108,23 @@ pub trait BaseGenerator { #[inline(always)] fn write_string(&mut self, string: &str) -> io::Result<()> { stry!(self.write_char(b'"')); - let string = string.as_bytes(); -// let mut string = string.as_bytes(); -// let mut len = string.len(); -// let mut idx = 0; +// let string = string.as_bytes(); + let mut string = string.as_bytes(); + let mut len = string.len(); + let mut idx = 0; -// unsafe { + unsafe { // Looking at the table above the lower 5 bits are entirely // quote characters that gives us a bitmask of 0x1f for that // region, only quote (`"`) and backslash (`\`) are not in // this range. -// if AVX2_PRESENT { -// let zero = _mm256_set1_epi8(0); -// let lower_quote_range = _mm256_set1_epi8(0x1F as i8); -// let quote = _mm256_set1_epi8(b'"' as i8); -// let backslash = _mm256_set1_epi8(b'\\' as i8); -// while len - idx >= 32 { -// // Load 32 bytes of data; -// #[allow(clippy::cast_ptr_alignment)] -// let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(idx) as *const __m256i); -// // Test the data against being backslash and quote. -// let bs_or_quote = _mm256_or_si256( -// _mm256_cmpeq_epi8(data, backslash), -// _mm256_cmpeq_epi8(data, quote), -// ); -// // Now mask the data with the quote range (0x1F). -// let in_quote_range = _mm256_and_si256(data, lower_quote_range); -// // then test of the data is unchanged. aka: xor it with the -// // Any field that was inside the quote range it will be zero -// // now. -// let is_unchanged = _mm256_xor_si256(data, in_quote_range); -// let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); -// let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); -// if quote_bits != 0 { -// let quote_dist = trailingzeroes(quote_bits as u64) as usize; -// stry!(self.get_writer().write_all(&string[0..idx + quote_dist])); -// let ch = string[idx + quote_dist]; -// match ESCAPED[ch as usize] { -// b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), -// -// escape => stry!(self.write(&[b'\\', escape])), -// }; -// string = &string[idx + quote_dist + 1..]; -// idx = 0; -// len = string.len(); -// } else { -// idx += 32; -// } -// } -// } - // The case where we have a 16+ byte block - // we repeate the same logic as above but with - // only 16 bytes -// let zero = _mm_set1_epi8(0); -// let lower_quote_range = _mm_set1_epi8(0x1F as i8); -// let quote = _mm_set1_epi8(b'"' as i8); -// let backslash = _mm_set1_epi8(b'\\' as i8); -// while len - idx > 16 { -// // Load 16 bytes of data; -// #[allow(clippy::cast_ptr_alignment)] -// let data: __m128i = _mm_loadu_si128(string.as_ptr().add(idx) as *const __m128i); -// // Test the data against being backslash and quote. -// let bs_or_quote = -// _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); -// // Now mask the data with the quote range (0x1F). -// let in_quote_range = _mm_and_si128(data, lower_quote_range); -// // then test of the data is unchanged. aka: xor it with the -// // Any field that was inside the quote range it will be zero -// // now. -// let is_unchanged = _mm_xor_si128(data, in_quote_range); -// let in_range = _mm_cmpeq_epi8(is_unchanged, zero); -// let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); -// if quote_bits != 0 { -// let quote_dist = trailingzeroes(quote_bits as u64) as usize; -// stry!(self.get_writer().write_all(&string[0..idx + quote_dist])); -// let ch = string[idx + quote_dist]; -// match ESCAPED[ch as usize] { -// b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), -// -// escape => stry!(self.write(&[b'\\', escape])), -// }; -// string = &string[idx + quote_dist + 1..]; -// idx = 0; -// len = string.len(); -// } else { -// idx += 16; -// } -// } -// stry!(self.get_writer().write_all(&string[0..idx])); -// string = &string[idx..]; -// } + if AVX2_PRESENT { + self.process_32_bytes(&mut string, &mut len, &mut idx).unwrap(); + } + if SSE42_PRESENT || NEON_PRESENT { + self.process_16_bytes(&mut string, &mut len, &mut idx).unwrap(); + } + } // Legacy code to handle the remainder of the code for (index, ch) in string.iter().enumerate() { if ESCAPED[*ch as usize] > 0 { @@ -194,6 +136,164 @@ pub trait BaseGenerator { self.write_char(b'"') } + #[cfg(target_feature = "neon")] + unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + macro_rules! bit_mask { + () => { + uint8x16_t::new( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ) + }; + } + + #[cfg_attr(not(feature = "no-inline"), inline(always))] + unsafe fn neon_movemask(input: uint8x16_t) -> u16 { + let minput: uint8x16_t = vandq_u8(input, bit_mask!()); + let tmp: uint8x16_t = vpaddq_u8(minput, minput); + let tmp = vpaddq_u8(tmp, tmp); + let tmp = vpaddq_u8(tmp, tmp); + + vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) + } + + // The case where we have a 16+ byte block + // we repeate the same logic as above but with + // only 16 bytes + let zero = vdupq_n_u8(0); + let lower_quote_range = vdupq_n_u8(0x1F); + let quote = vdupq_n_u8(b'"'); + let backslash = vdupq_n_u8(b'\\'); + while *len - *idx > 16 { + // Load 16 bytes of data; + let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx)); + // Test the data against being backslash and quote. + let bs_or_quote = + vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote)); + // Now mask the data with the quote range (0x1F). + let in_quote_range = vandq_u8(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = vxorrq_u8(data, in_quote_range); + let in_range = vceqq_u8(is_unchanged, zero); + let quote_bits = neon_movemask(vorrq_u8(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; + } + } + stry!(self.get_writer().write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) + } + + #[cfg(not(target_feature = "avx2"))] + #[inline(always)] + fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { + Ok(()) + } + + #[cfg(target_feature = "sse4.2")] + #[inline(always)] + unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + // The case where we have a 16+ byte block + // we repeate the same logic as above but with + // only 16 bytes + let zero = _mm_set1_epi8(0); + let lower_quote_range = _mm_set1_epi8(0x1F as i8); + let quote = _mm_set1_epi8(b'"' as i8); + let backslash = _mm_set1_epi8(b'\\' as i8); + while *len - *idx > 16 { + // Load 16 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); + // Test the data against being backslash and quote. + let bs_or_quote = + _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm_and_si128(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm_xor_si128(data, in_quote_range); + let in_range = _mm_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; + } + } + stry!(self.get_writer().write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) + } + + #[cfg(target_feature = "avx2")] + #[inline(always)] + unsafe fn process_32_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + let zero = _mm256_set1_epi8(0); + let lower_quote_range = _mm256_set1_epi8(0x1F as i8); + let quote = _mm256_set1_epi8(b'"' as i8); + let backslash = _mm256_set1_epi8(b'\\' as i8); + while *len - *idx >= 32 { + // Load 32 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i); + // Test the data against being backslash and quote. + let bs_or_quote = _mm256_or_si256( + _mm256_cmpeq_epi8(data, backslash), + _mm256_cmpeq_epi8(data, quote), + ); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm256_and_si256(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm256_xor_si256(data, in_quote_range); + let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 32; + } + } + Ok(()) + } + #[inline(always)] fn write_float(&mut self, num: f64) -> io::Result<()> { let mut buffer = ryu::Buffer::new(); From 75511d388aa2c8bbe4a5512033c03cbf0fb7d527 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 22:10:47 -0400 Subject: [PATCH 27/34] fix: remove double semicolon --- src/numberparse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/numberparse.rs b/src/numberparse.rs index 7674a977..01af460e 100644 --- a/src/numberparse.rs +++ b/src/numberparse.rs @@ -228,7 +228,7 @@ impl<'de> Deserializer<'de> { digit = unsafe { *p.get_unchecked(digitcount) } - b'0'; digitcount += 1; fraction_weight *= 10.0; - fraction += f64::from(digit) / fraction_weight;; + fraction += f64::from(digit) / fraction_weight; } i += fraction; } From 23191c61d164d854f1842abac8e7e7b274293bdb Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Thu, 15 Aug 2019 23:44:54 -0400 Subject: [PATCH 28/34] feat: conditional feature enablement for neon only --- src/lib.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 690a5b0c..b68deadb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,17 @@ #![deny(warnings)] -#![feature(asm)] -#![feature(stdsimd)] -#![feature(repr_simd)] -#![feature(custom_inner_attributes)] -#![feature(aarch64_target_feature)] -#![feature(platform_intrinsics)] -#![feature(stmt_expr_attributes)] -#![feature(simd_ffi)] -#![feature(link_llvm_intrinsics)] + +#![cfg_attr(target_feature = "neon", feature( + asm, + stdsimd, + repr_simd, + custom_inner_attributes, + aarch64_target_feature, + platform_intrinsics, + stmt_expr_attributes, + simd_ffi, + link_llvm_intrinsics + ) +)] #![cfg_attr(feature = "hints", feature(core_intrinsics))] //! simdjson-rs is a rust port of the simejson c++ library. It follows From d514f2d30a11a5b1a861ad3babe31c819645cdf6 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 16 Aug 2019 00:09:17 -0400 Subject: [PATCH 29/34] fix: add conditional compilation for target_feature=-avx2,-sse4.2 on x86/x86_64 platforms --- src/value/generator.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/value/generator.rs b/src/value/generator.rs index 8b6ef266..920b6615 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -12,7 +12,7 @@ use std::ptr; #[cfg(target_arch = "x86")] use std::arch::x86::*; -#[cfg(target_arch = "x86_64")] +#[cfg(any(target_feature = "sse4.2", target_feature = "avx2"))] use std::arch::x86_64::*; #[cfg(target_feature = "neon")] use crate::*; @@ -108,7 +108,6 @@ pub trait BaseGenerator { #[inline(always)] fn write_string(&mut self, string: &str) -> io::Result<()> { stry!(self.write_char(b'"')); -// let string = string.as_bytes(); let mut string = string.as_bytes(); let mut len = string.len(); let mut idx = 0; @@ -199,12 +198,6 @@ pub trait BaseGenerator { Ok(()) } - #[cfg(not(target_feature = "avx2"))] - #[inline(always)] - fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { - Ok(()) - } - #[cfg(target_feature = "sse4.2")] #[inline(always)] unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { @@ -251,6 +244,18 @@ pub trait BaseGenerator { Ok(()) } + #[cfg(all(not(target_feature = "sse4.2"), not(target_feature = "neon")))] + #[inline(always)] + unsafe fn process_16_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { + Ok(()) + } + + #[cfg(not(target_feature = "avx2"))] + #[inline(always)] + fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { + Ok(()) + } + #[cfg(target_feature = "avx2")] #[inline(always)] unsafe fn process_32_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { From 3bdc16922d747170e5d42314a7168e87b46ee8f8 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 16 Aug 2019 13:23:42 -0400 Subject: [PATCH 30/34] feat: factor arch-specific methods into separate modules (with macros, need some advice there) --- src/avx2/generator.rs | 97 +++++++++++++++++++++ src/avx2/mod.rs | 3 +- src/neon/generator.rs | 73 ++++++++++++++++ src/neon/mod.rs | 1 + src/sse42/generator.rs | 72 ++++++++++++++++ src/sse42/mod.rs | 3 +- src/value/generator.rs | 192 +++-------------------------------------- 7 files changed, 260 insertions(+), 181 deletions(-) create mode 100644 src/avx2/generator.rs create mode 100644 src/neon/generator.rs create mode 100644 src/sse42/generator.rs diff --git a/src/avx2/generator.rs b/src/avx2/generator.rs new file mode 100644 index 00000000..c77a4ebc --- /dev/null +++ b/src/avx2/generator.rs @@ -0,0 +1,97 @@ + +#[macro_export] +macro_rules! process_32_bytes { + () => { + #[inline(always)] + unsafe fn process_32_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + let zero = _mm256_set1_epi8(0); + let lower_quote_range = _mm256_set1_epi8(0x1F as i8); + let quote = _mm256_set1_epi8(b'"' as i8); + let backslash = _mm256_set1_epi8(b'\\' as i8); + while *len - *idx >= 32 { + // Load 32 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i); + // Test the data against being backslash and quote. + let bs_or_quote = _mm256_or_si256( + _mm256_cmpeq_epi8(data, backslash), + _mm256_cmpeq_epi8(data, quote), + ); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm256_and_si256(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm256_xor_si256(data, in_quote_range); + let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 32; + } + } + Ok(()) + } + } +} + +#[macro_export] +macro_rules! process_16_bytes { + () => { + #[inline(always)] + unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + // The case where we have a 16+ byte block + // we repeate the same logic as above but with + // only 16 bytes + let zero = _mm_set1_epi8(0); + let lower_quote_range = _mm_set1_epi8(0x1F as i8); + let quote = _mm_set1_epi8(b'"' as i8); + let backslash = _mm_set1_epi8(b'\\' as i8); + while *len - *idx > 16 { + // Load 16 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); + // Test the data against being backslash and quote. + let bs_or_quote = + _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm_and_si128(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm_xor_si128(data, in_quote_range); + let in_range = _mm_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; + } + } + stry!(self.get_writer().write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) + } + } +} \ No newline at end of file diff --git a/src/avx2/mod.rs b/src/avx2/mod.rs index 30c55c86..ac608ae2 100644 --- a/src/avx2/mod.rs +++ b/src/avx2/mod.rs @@ -1,3 +1,4 @@ pub mod deser; pub mod stage1; -pub mod utf8check; \ No newline at end of file +pub mod utf8check; +pub mod generator; \ No newline at end of file diff --git a/src/neon/generator.rs b/src/neon/generator.rs new file mode 100644 index 00000000..7e0c8b60 --- /dev/null +++ b/src/neon/generator.rs @@ -0,0 +1,73 @@ + +#[macro_export] +macro_rules! process_32_bytes { + () => { + #[inline(always)] + unsafe fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result < () > { + Ok(()) + } + }; +} + +#[macro_export] +macro_rules! process_16_bytes { + () => { + #[inline(always)] + unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + #[cfg_attr(not(feature = "no-inline"), inline(always))] + unsafe fn __neon_movemask(input: uint8x16_t) -> u16 { + let bit_mask = uint8x16_t::new( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); + let minput: uint8x16_t = vandq_u8(input, bit_mask); + let tmp: uint8x16_t = vpaddq_u8(minput, minput); + let tmp = vpaddq_u8(tmp, tmp); + let tmp = vpaddq_u8(tmp, tmp); + + vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) + } + + // The case where we have a 16+ byte block + // we repeate the same logic as above but with + // only 16 bytes + let zero = vdupq_n_u8(0); + let lower_quote_range = vdupq_n_u8(0x1F); + let quote = vdupq_n_u8(b'"'); + let backslash = vdupq_n_u8(b'\\'); + while *len - *idx > 16 { + // Load 16 bytes of data; + let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx)); + // Test the data against being backslash and quote. + let bs_or_quote = + vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote)); + // Now mask the data with the quote range (0x1F). + let in_quote_range = vandq_u8(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = vxorrq_u8(data, in_quote_range); + let in_range = vceqq_u8(is_unchanged, zero); + let quote_bits = __neon_movemask(vorrq_u8(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; + } + } + stry!(self.get_writer().write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) + } + }; +} \ No newline at end of file diff --git a/src/neon/mod.rs b/src/neon/mod.rs index 1b979e5a..f7868249 100644 --- a/src/neon/mod.rs +++ b/src/neon/mod.rs @@ -1,6 +1,7 @@ pub mod deser; pub mod stage1; pub mod utf8check; +pub mod generator; mod simd; mod simd_llvm; mod intrinsics; \ No newline at end of file diff --git a/src/sse42/generator.rs b/src/sse42/generator.rs new file mode 100644 index 00000000..4b3a90f3 --- /dev/null +++ b/src/sse42/generator.rs @@ -0,0 +1,72 @@ + +#[macro_export] +macro_rules! process_32_bytes { + () => { + #[inline(always)] + unsafe fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { + Ok(()) + } + } +} + +#[macro_export] +#[cfg(target_feature = "sse4.2")] +macro_rules! process_16_bytes { + () => { + #[inline(always)] + unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { + // The case where we have a 16+ byte block + // we repeate the same logic as above but with + // only 16 bytes + let zero = _mm_set1_epi8(0); + let lower_quote_range = _mm_set1_epi8(0x1F as i8); + let quote = _mm_set1_epi8(b'"' as i8); + let backslash = _mm_set1_epi8(b'\\' as i8); + while *len - *idx > 16 { + // Load 16 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); + // Test the data against being backslash and quote. + let bs_or_quote = + _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm_and_si128(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm_xor_si128(data, in_quote_range); + let in_range = _mm_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), + + escape => stry!(self.write(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; + } + } + stry!(self.get_writer().write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) + } + } +} + +#[macro_export] +#[cfg(not(target_feature = "sse4.2"))] +macro_rules! process_16_bytes { + () => { + #[inline(always)] + unsafe fn process_16_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { + Ok(()) + } + } +} \ No newline at end of file diff --git a/src/sse42/mod.rs b/src/sse42/mod.rs index 30c55c86..ac608ae2 100644 --- a/src/sse42/mod.rs +++ b/src/sse42/mod.rs @@ -1,3 +1,4 @@ pub mod deser; pub mod stage1; -pub mod utf8check; \ No newline at end of file +pub mod utf8check; +pub mod generator; \ No newline at end of file diff --git a/src/value/generator.rs b/src/value/generator.rs index 920b6615..b053344f 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -14,21 +14,17 @@ use std::ptr; use std::arch::x86::*; #[cfg(any(target_feature = "sse4.2", target_feature = "avx2"))] use std::arch::x86_64::*; -#[cfg(target_feature = "neon")] + use crate::*; #[cfg(target_feature = "avx2")] -const AVX2_PRESENT : bool = true; -#[cfg(not(target_feature = "avx2"))] -const AVX2_PRESENT : bool = false; -#[cfg(target_feature = "sse4.2")] -const SSE42_PRESENT : bool = true; -#[cfg(not(target_feature = "sse4.2"))] -const SSE42_PRESENT : bool = false; +pub use crate::avx2::generator::*; + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))] +pub use crate::sse42::generator::*; + #[cfg(target_feature = "neon")] -const NEON_PRESENT : bool = true; -#[cfg(not(target_feature = "neon"))] -const NEON_PRESENT : bool = false; +pub use crate::neon::generator::*; const QU: u8 = b'"'; const BS: u8 = b'\\'; @@ -117,12 +113,8 @@ pub trait BaseGenerator { // quote characters that gives us a bitmask of 0x1f for that // region, only quote (`"`) and backslash (`\`) are not in // this range. - if AVX2_PRESENT { - self.process_32_bytes(&mut string, &mut len, &mut idx).unwrap(); - } - if SSE42_PRESENT || NEON_PRESENT { - self.process_16_bytes(&mut string, &mut len, &mut idx).unwrap(); - } + stry!(self.process_32_bytes(&mut string, &mut len, &mut idx)); + stry!(self.process_32_bytes(&mut string, &mut len, &mut idx)); } // Legacy code to handle the remainder of the code for (index, ch) in string.iter().enumerate() { @@ -135,169 +127,11 @@ pub trait BaseGenerator { self.write_char(b'"') } - #[cfg(target_feature = "neon")] - unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - macro_rules! bit_mask { - () => { - uint8x16_t::new( - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - ) - }; - } - - #[cfg_attr(not(feature = "no-inline"), inline(always))] - unsafe fn neon_movemask(input: uint8x16_t) -> u16 { - let minput: uint8x16_t = vandq_u8(input, bit_mask!()); - let tmp: uint8x16_t = vpaddq_u8(minput, minput); - let tmp = vpaddq_u8(tmp, tmp); - let tmp = vpaddq_u8(tmp, tmp); - - vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) - } - - // The case where we have a 16+ byte block - // we repeate the same logic as above but with - // only 16 bytes - let zero = vdupq_n_u8(0); - let lower_quote_range = vdupq_n_u8(0x1F); - let quote = vdupq_n_u8(b'"'); - let backslash = vdupq_n_u8(b'\\'); - while *len - *idx > 16 { - // Load 16 bytes of data; - let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx)); - // Test the data against being backslash and quote. - let bs_or_quote = - vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote)); - // Now mask the data with the quote range (0x1F). - let in_quote_range = vandq_u8(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = vxorrq_u8(data, in_quote_range); - let in_range = vceqq_u8(is_unchanged, zero); - let quote_bits = neon_movemask(vorrq_u8(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), - - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 16; - } - } - stry!(self.get_writer().write_all(&string[0..*idx])); - *string = &string[*idx..]; - Ok(()) - } - - #[cfg(target_feature = "sse4.2")] - #[inline(always)] - unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - // The case where we have a 16+ byte block - // we repeate the same logic as above but with - // only 16 bytes - let zero = _mm_set1_epi8(0); - let lower_quote_range = _mm_set1_epi8(0x1F as i8); - let quote = _mm_set1_epi8(b'"' as i8); - let backslash = _mm_set1_epi8(b'\\' as i8); - while *len - *idx > 16 { - // Load 16 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); - // Test the data against being backslash and quote. - let bs_or_quote = - _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm_and_si128(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm_xor_si128(data, in_quote_range); - let in_range = _mm_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), - - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 16; - } - } - stry!(self.get_writer().write_all(&string[0..*idx])); - *string = &string[*idx..]; - Ok(()) - } - - #[cfg(all(not(target_feature = "sse4.2"), not(target_feature = "neon")))] - #[inline(always)] - unsafe fn process_16_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { - Ok(()) - } + // 32-byte generation implementation + process_32_bytes!(); - #[cfg(not(target_feature = "avx2"))] - #[inline(always)] - fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { - Ok(()) - } - - #[cfg(target_feature = "avx2")] - #[inline(always)] - unsafe fn process_32_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - let zero = _mm256_set1_epi8(0); - let lower_quote_range = _mm256_set1_epi8(0x1F as i8); - let quote = _mm256_set1_epi8(b'"' as i8); - let backslash = _mm256_set1_epi8(b'\\' as i8); - while *len - *idx >= 32 { - // Load 32 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i); - // Test the data against being backslash and quote. - let bs_or_quote = _mm256_or_si256( - _mm256_cmpeq_epi8(data, backslash), - _mm256_cmpeq_epi8(data, quote), - ); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm256_and_si256(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm256_xor_si256(data, in_quote_range); - let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), - - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 32; - } - } - Ok(()) - } + // 16-byte generation implementation + process_16_bytes!(); #[inline(always)] fn write_float(&mut self, num: f64) -> io::Result<()> { From 40bb843919a4f88261cb77b4b12e1eb404419924 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 16 Aug 2019 14:25:09 -0400 Subject: [PATCH 31/34] fix: does drone need clean? --- .drone.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.drone.yml b/.drone.yml index 8f00d391..dbc33acb 100644 --- a/.drone.yml +++ b/.drone.yml @@ -60,5 +60,5 @@ steps: commands: - rustup default nightly - rustup update - - cargo +nightly build --verbose --all + - cargo clean && cargo +nightly build --verbose --all - cargo +nightly test --verbose --all From 78b0aa8c6ef0b6002c3de7a0e53d86ec2a983381 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 16 Aug 2019 14:44:31 -0400 Subject: [PATCH 32/34] fix: utf8 error checking (maybe) --- src/neon/stage1.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index b23f059f..8f3b9b5d 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -113,14 +113,9 @@ impl Default for Utf8CheckingState { #[inline] fn is_utf8_status_ok(has_error: int8x16_t) -> bool { unsafe { - let utf8_error_bits: u128 = mem::transmute( - vandq_s16( - mem::transmute(has_error), - mem::transmute(has_error) - ) - ); + let has_error_128 : i128 = mem::transmute(has_error); - utf8_error_bits as u16 == 0 + has_error_128 as u32 == 0 } } From 5a23b4058608d7e226c9e6c20a7d9c8354d1d122 Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 16 Aug 2019 15:10:18 -0400 Subject: [PATCH 33/34] fix: utf8 error checking (maybe maybe) --- src/neon/stage1.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 8f3b9b5d..0f008574 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -115,7 +115,7 @@ fn is_utf8_status_ok(has_error: int8x16_t) -> bool { unsafe { let has_error_128 : i128 = mem::transmute(has_error); - has_error_128 as u32 == 0 + has_error_128 == 0 } } From af177f3cb3da9d6200ca4dd428103345d929357c Mon Sep 17 00:00:00 2001 From: Sunny Gleason Date: Fri, 16 Aug 2019 16:11:19 -0400 Subject: [PATCH 34/34] feat: fancy generic generator functions, thanks @Licenser --- src/avx2/generator.rs | 136 ++++++++++++++--------------------------- src/neon/generator.rs | 115 ++++++++++++++-------------------- src/neon/stage1.rs | 4 +- src/sse42/generator.rs | 111 ++++++++++++++------------------- src/value.rs | 2 +- src/value/generator.rs | 22 ++----- 6 files changed, 143 insertions(+), 247 deletions(-) diff --git a/src/avx2/generator.rs b/src/avx2/generator.rs index c77a4ebc..13e72061 100644 --- a/src/avx2/generator.rs +++ b/src/avx2/generator.rs @@ -1,97 +1,51 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; -#[macro_export] -macro_rules! process_32_bytes { - () => { - #[inline(always)] - unsafe fn process_32_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - let zero = _mm256_set1_epi8(0); - let lower_quote_range = _mm256_set1_epi8(0x1F as i8); - let quote = _mm256_set1_epi8(b'"' as i8); - let backslash = _mm256_set1_epi8(b'\\' as i8); - while *len - *idx >= 32 { - // Load 32 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i); - // Test the data against being backslash and quote. - let bs_or_quote = _mm256_or_si256( - _mm256_cmpeq_epi8(data, backslash), - _mm256_cmpeq_epi8(data, quote), - ); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm256_and_si256(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm256_xor_si256(data, in_quote_range); - let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), +use crate::value::generator::ESCAPED; +use std::io; - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 32; - } - } - Ok(()) - } - } -} - -#[macro_export] -macro_rules! process_16_bytes { - () => { - #[inline(always)] - unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - // The case where we have a 16+ byte block - // we repeate the same logic as above but with - // only 16 bytes - let zero = _mm_set1_epi8(0); - let lower_quote_range = _mm_set1_epi8(0x1F as i8); - let quote = _mm_set1_epi8(b'"' as i8); - let backslash = _mm_set1_epi8(b'\\' as i8); - while *len - *idx > 16 { - // Load 16 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); - // Test the data against being backslash and quote. - let bs_or_quote = - _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm_and_si128(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm_xor_si128(data, in_quote_range); - let in_range = _mm_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), +#[inline(always)] +pub unsafe fn write_str_simd(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write { + let zero = _mm256_set1_epi8(0); + let lower_quote_range = _mm256_set1_epi8(0x1F as i8); + let quote = _mm256_set1_epi8(b'"' as i8); + let backslash = _mm256_set1_epi8(b'\\' as i8); + while *len - *idx >= 32 { + // Load 32 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i); + // Test the data against being backslash and quote. + let bs_or_quote = _mm256_or_si256( + _mm256_cmpeq_epi8(data, backslash), + _mm256_cmpeq_epi8(data, quote), + ); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm256_and_si256(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm256_xor_si256(data, in_quote_range); + let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(writer.write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(writer, "\\u{:04x}", ch)), - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 16; - } - } - stry!(self.get_writer().write_all(&string[0..*idx])); - *string = &string[*idx..]; - Ok(()) + escape => stry!(writer.write_all(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 32; } } + stry!(writer.write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) } \ No newline at end of file diff --git a/src/neon/generator.rs b/src/neon/generator.rs index 7e0c8b60..6c8cf358 100644 --- a/src/neon/generator.rs +++ b/src/neon/generator.rs @@ -1,73 +1,48 @@ +use crate::value::generator::ESCAPED; +use std::io; +use crate::neon::intrinsics::*; +use crate::neon::stage1::neon_movemask; -#[macro_export] -macro_rules! process_32_bytes { - () => { - #[inline(always)] - unsafe fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result < () > { - Ok(()) - } - }; -} - -#[macro_export] -macro_rules! process_16_bytes { - () => { - #[inline(always)] - unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - #[cfg_attr(not(feature = "no-inline"), inline(always))] - unsafe fn __neon_movemask(input: uint8x16_t) -> u16 { - let bit_mask = uint8x16_t::new( - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - ); - let minput: uint8x16_t = vandq_u8(input, bit_mask); - let tmp: uint8x16_t = vpaddq_u8(minput, minput); - let tmp = vpaddq_u8(tmp, tmp); - let tmp = vpaddq_u8(tmp, tmp); +#[inline(always)] +pub unsafe fn write_str_simd(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write { + // The case where we have a 16+ byte block + // we repeate the same logic as above but with + // only 16 bytes + let zero = vdupq_n_u8(0); + let lower_quote_range = vdupq_n_u8(0x1F); + let quote = vdupq_n_u8(b'"'); + let backslash = vdupq_n_u8(b'\\'); + while *len - *idx > 16 { + // Load 16 bytes of data; + let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx)); + // Test the data against being backslash and quote. + let bs_or_quote = + vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote)); + // Now mask the data with the quote range (0x1F). + let in_quote_range = vandq_u8(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = vxorrq_u8(data, in_quote_range); + let in_range = vceqq_u8(is_unchanged, zero); + let quote_bits = neon_movemask(vorrq_u8(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(writer.write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(writer, "\\u{:04x}", ch)), - vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0) - } - - // The case where we have a 16+ byte block - // we repeate the same logic as above but with - // only 16 bytes - let zero = vdupq_n_u8(0); - let lower_quote_range = vdupq_n_u8(0x1F); - let quote = vdupq_n_u8(b'"'); - let backslash = vdupq_n_u8(b'\\'); - while *len - *idx > 16 { - // Load 16 bytes of data; - let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx)); - // Test the data against being backslash and quote. - let bs_or_quote = - vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote)); - // Now mask the data with the quote range (0x1F). - let in_quote_range = vandq_u8(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = vxorrq_u8(data, in_quote_range); - let in_range = vceqq_u8(is_unchanged, zero); - let quote_bits = __neon_movemask(vorrq_u8(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), - - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 16; - } - } - stry!(self.get_writer().write_all(&string[0..*idx])); - *string = &string[*idx..]; - Ok(()) + escape => stry!(writer.write_all(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; } - }; -} \ No newline at end of file + } + stry!(writer.write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) +} diff --git a/src/neon/stage1.rs b/src/neon/stage1.rs index 0f008574..45322cee 100644 --- a/src/neon/stage1.rs +++ b/src/neon/stage1.rs @@ -18,7 +18,7 @@ macro_rules! bit_mask { } #[cfg_attr(not(feature = "no-inline"), inline(always))] -unsafe fn neon_movemask(input: uint8x16_t) -> u16 { +pub(crate) unsafe fn neon_movemask(input: uint8x16_t) -> u16 { let minput: uint8x16_t = vandq_u8(input, bit_mask!()); let tmp: uint8x16_t = vpaddq_u8(minput, minput); let tmp = vpaddq_u8(tmp, tmp); @@ -28,7 +28,7 @@ unsafe fn neon_movemask(input: uint8x16_t) -> u16 { } #[cfg_attr(not(feature = "no-inline"), inline(always))] -unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: uint8x16_t) -> u64 { +pub unsafe fn neon_movemask_bulk(p0: uint8x16_t, p1: uint8x16_t, p2: uint8x16_t, p3: uint8x16_t) -> u64 { let bit_mask = bit_mask!(); let t0 = vandq_u8(p0, bit_mask); diff --git a/src/sse42/generator.rs b/src/sse42/generator.rs index 4b3a90f3..e6636585 100644 --- a/src/sse42/generator.rs +++ b/src/sse42/generator.rs @@ -1,72 +1,51 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; -#[macro_export] -macro_rules! process_32_bytes { - () => { - #[inline(always)] - unsafe fn process_32_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { - Ok(()) - } - } -} +use crate::value::generator::ESCAPED; +use std::io; -#[macro_export] -#[cfg(target_feature = "sse4.2")] -macro_rules! process_16_bytes { - () => { - #[inline(always)] - unsafe fn process_16_bytes(&mut self, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> { - // The case where we have a 16+ byte block - // we repeate the same logic as above but with - // only 16 bytes - let zero = _mm_set1_epi8(0); - let lower_quote_range = _mm_set1_epi8(0x1F as i8); - let quote = _mm_set1_epi8(b'"' as i8); - let backslash = _mm_set1_epi8(b'\\' as i8); - while *len - *idx > 16 { - // Load 16 bytes of data; - #[allow(clippy::cast_ptr_alignment)] - let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); - // Test the data against being backslash and quote. - let bs_or_quote = - _mm_or_si128(_mm_cmpeq_epi8(data, backslash), _mm_cmpeq_epi8(data, quote)); - // Now mask the data with the quote range (0x1F). - let in_quote_range = _mm_and_si128(data, lower_quote_range); - // then test of the data is unchanged. aka: xor it with the - // Any field that was inside the quote range it will be zero - // now. - let is_unchanged = _mm_xor_si128(data, in_quote_range); - let in_range = _mm_cmpeq_epi8(is_unchanged, zero); - let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); - if quote_bits != 0 { - let quote_dist = quote_bits.trailing_zeros() as usize; - stry!(self.get_writer().write_all(&string[0..*idx + quote_dist])); - let ch = string[*idx + quote_dist]; - match ESCAPED[ch as usize] { - b'u' => stry!(write!(self.get_writer(), "\\u{:04x}", ch)), +#[inline(always)] +pub unsafe fn write_str_simd(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write { + let zero = _mm_set1_epi8(0); + let lower_quote_range = _mm_set1_epi8(0x1F as i8); + let quote = _mm_set1_epi8(b'"' as i8); + let backslash = _mm_set1_epi8(b'\\' as i8); + while *len - *idx > 16 { + // Load 16 bytes of data; + #[allow(clippy::cast_ptr_alignment)] + let data: __m128i = _mm_loadu_si128(string.as_ptr().add(*idx) as *const __m128i); + // Test the data against being backslash and quote. + let bs_or_quote = _mm_or_si128( + _mm_cmpeq_epi8(data, backslash), + _mm_cmpeq_epi8(data, quote) + ); + // Now mask the data with the quote range (0x1F). + let in_quote_range = _mm_and_si128(data, lower_quote_range); + // then test of the data is unchanged. aka: xor it with the + // Any field that was inside the quote range it will be zero + // now. + let is_unchanged = _mm_xor_si128(data, in_quote_range); + let in_range = _mm_cmpeq_epi8(is_unchanged, zero); + let quote_bits = _mm_movemask_epi8(_mm_or_si128(bs_or_quote, in_range)); + if quote_bits != 0 { + let quote_dist = quote_bits.trailing_zeros() as usize; + stry!(writer.write_all(&string[0..*idx + quote_dist])); + let ch = string[*idx + quote_dist]; + match ESCAPED[ch as usize] { + b'u' => stry!(write!(writer, "\\u{:04x}", ch)), - escape => stry!(self.write(&[b'\\', escape])), - }; - *string = &string[*idx + quote_dist + 1..]; - *idx = 0; - *len = string.len(); - } else { - *idx += 16; - } - } - stry!(self.get_writer().write_all(&string[0..*idx])); - *string = &string[*idx..]; - Ok(()) + escape => stry!(writer.write_all(&[b'\\', escape])), + }; + *string = &string[*idx + quote_dist + 1..]; + *idx = 0; + *len = string.len(); + } else { + *idx += 16; } } + stry!(writer.write_all(&string[0..*idx])); + *string = &string[*idx..]; + Ok(()) } - -#[macro_export] -#[cfg(not(target_feature = "sse4.2"))] -macro_rules! process_16_bytes { - () => { - #[inline(always)] - unsafe fn process_16_bytes(&mut self, _string: &mut &[u8], _len: &mut usize, _idx: &mut usize) -> io::Result<()> { - Ok(()) - } - } -} \ No newline at end of file diff --git a/src/value.rs b/src/value.rs index 06245878..c5d5aa87 100644 --- a/src/value.rs +++ b/src/value.rs @@ -11,7 +11,7 @@ /// we do not require prior knowledge sbout string comtent to to take advantage /// of it. pub mod borrowed; -mod generator; +pub(crate) mod generator; pub mod owned; pub use self::borrowed::{to_value as to_borrowed_value, Value as BorrowedValue}; diff --git a/src/value/generator.rs b/src/value/generator.rs index b053344f..ebdb0a88 100644 --- a/src/value/generator.rs +++ b/src/value/generator.rs @@ -10,21 +10,16 @@ use std::io::Write; use std::marker::PhantomData; use std::ptr; -#[cfg(target_arch = "x86")] -use std::arch::x86::*; -#[cfg(any(target_feature = "sse4.2", target_feature = "avx2"))] -use std::arch::x86_64::*; - use crate::*; #[cfg(target_feature = "avx2")] -pub use crate::avx2::generator::*; +use crate::avx2::generator::*; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(target_feature = "avx2")))] -pub use crate::sse42::generator::*; +use crate::sse42::generator::*; #[cfg(target_feature = "neon")] -pub use crate::neon::generator::*; +use crate::neon::generator::*; const QU: u8 = b'"'; const BS: u8 = b'\\'; @@ -37,7 +32,7 @@ const UU: u8 = b'u'; const __: u8 = 0; // Look up table for characters that need escaping in a product string -static ESCAPED: [u8; 256] = [ +pub(crate) static ESCAPED: [u8; 256] = [ // 0 1 2 3 4 5 6 7 8 9 A B C D E F UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0 UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1 @@ -113,8 +108,7 @@ pub trait BaseGenerator { // quote characters that gives us a bitmask of 0x1f for that // region, only quote (`"`) and backslash (`\`) are not in // this range. - stry!(self.process_32_bytes(&mut string, &mut len, &mut idx)); - stry!(self.process_32_bytes(&mut string, &mut len, &mut idx)); + stry!(write_str_simd(self.get_writer(), &mut string, &mut len, &mut idx)); } // Legacy code to handle the remainder of the code for (index, ch) in string.iter().enumerate() { @@ -127,12 +121,6 @@ pub trait BaseGenerator { self.write_char(b'"') } - // 32-byte generation implementation - process_32_bytes!(); - - // 16-byte generation implementation - process_16_bytes!(); - #[inline(always)] fn write_float(&mut self, num: f64) -> io::Result<()> { let mut buffer = ryu::Buffer::new();