-
Notifications
You must be signed in to change notification settings - Fork 91
RFC: Neon support (pretty much working) #35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
34 commits
Select commit
Hold shift + click to select a range
fa26882
feat: neon support (mostly broken)
sunnygleason 9e96fab
feat: progress on neon compilation
sunnygleason 05fd4f3
feat: partial implementation of deserializer (broken, needs movemask)
sunnygleason 086e745
feat: re-enable tests (still broken)
sunnygleason b9b3900
feat: remove core_arch unused dependency
sunnygleason 1c989cf
feat: update string parse
sunnygleason 5926070
feat: additional tweaks, not breaks during linking
sunnygleason 62a3f93
feat: fixing intrinsics (maybe)
sunnygleason bca6cba
feat: temp stub replacements for intrinsics (still broken)
sunnygleason 620e697
feat: get code closer to simdjson (still broken)
sunnygleason 68691ad
feat: trying to fix parse_str
sunnygleason be60690
fix: numberparse
sunnygleason e6609ff
fix: endian in flatten_bits
sunnygleason 45cbbd4
fix: utf8 encoding (still broken but closer)
sunnygleason 15c892b
fix: use write instead of intrinsic (for now)
sunnygleason e3872b5
fix: fix string parsing (mostly, thanks to @licenser)
sunnygleason 782b0a6
fix: add_overflow should always output carry
sunnygleason 98b3356
fix: tests PASS, improved comparison operators, thanks @Licenser :)
sunnygleason 19ad13e
fix: update arm64 to use nightly
sunnygleason a97487b
fix: use rust nightly image for drone CI
sunnygleason faea015
fix: drone CI rustup nightly
sunnygleason d66d68a
feat: fix guards, use rust stdlib for bit count operations
sunnygleason 4c51dc4
feat: address review comments, platform handling and misc
sunnygleason bdc82d6
feat: refactor immediate tables into loads
sunnygleason 4fb672a
feat: improving code style and similarity across architectures
sunnygleason ee2bd9b
feat: update generator with neon support, conditional compilation
sunnygleason 75511d3
fix: remove double semicolon
sunnygleason 23191c6
feat: conditional feature enablement for neon only
sunnygleason d514f2d
fix: add conditional compilation for target_feature=-avx2,-sse4.2 on …
sunnygleason 3bdc169
feat: factor arch-specific methods into separate modules (with macros…
sunnygleason 40bb843
fix: does drone need clean?
sunnygleason 78b0aa8
fix: utf8 error checking (maybe)
sunnygleason 5a23b40
fix: utf8 error checking (maybe maybe)
sunnygleason af177f3
feat: fancy generic generator functions, thanks @Licenser
sunnygleason File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#[cfg(target_arch = "x86")] | ||
use std::arch::x86::*; | ||
#[cfg(target_arch = "x86_64")] | ||
use std::arch::x86_64::*; | ||
|
||
use crate::value::generator::ESCAPED; | ||
use std::io; | ||
|
||
#[inline(always)] | ||
pub unsafe fn write_str_simd<W>(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write { | ||
let zero = _mm256_set1_epi8(0); | ||
let lower_quote_range = _mm256_set1_epi8(0x1F as i8); | ||
let quote = _mm256_set1_epi8(b'"' as i8); | ||
let backslash = _mm256_set1_epi8(b'\\' as i8); | ||
while *len - *idx >= 32 { | ||
// Load 32 bytes of data; | ||
#[allow(clippy::cast_ptr_alignment)] | ||
let data: __m256i = _mm256_loadu_si256(string.as_ptr().add(*idx) as *const __m256i); | ||
// Test the data against being backslash and quote. | ||
let bs_or_quote = _mm256_or_si256( | ||
_mm256_cmpeq_epi8(data, backslash), | ||
_mm256_cmpeq_epi8(data, quote), | ||
); | ||
// Now mask the data with the quote range (0x1F). | ||
let in_quote_range = _mm256_and_si256(data, lower_quote_range); | ||
// then test of the data is unchanged. aka: xor it with the | ||
// Any field that was inside the quote range it will be zero | ||
// now. | ||
let is_unchanged = _mm256_xor_si256(data, in_quote_range); | ||
let in_range = _mm256_cmpeq_epi8(is_unchanged, zero); | ||
let quote_bits = _mm256_movemask_epi8(_mm256_or_si256(bs_or_quote, in_range)); | ||
if quote_bits != 0 { | ||
let quote_dist = quote_bits.trailing_zeros() as usize; | ||
stry!(writer.write_all(&string[0..*idx + quote_dist])); | ||
let ch = string[*idx + quote_dist]; | ||
match ESCAPED[ch as usize] { | ||
b'u' => stry!(write!(writer, "\\u{:04x}", ch)), | ||
|
||
escape => stry!(writer.write_all(&[b'\\', escape])), | ||
}; | ||
*string = &string[*idx + quote_dist + 1..]; | ||
*idx = 0; | ||
*len = string.len(); | ||
} else { | ||
*idx += 32; | ||
} | ||
} | ||
stry!(writer.write_all(&string[0..*idx])); | ||
*string = &string[*idx..]; | ||
Ok(()) | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
pub mod deser; | ||
pub mod stage1; | ||
pub mod utf8check; | ||
pub mod utf8check; | ||
pub mod generator; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,199 @@ | ||
|
||
pub use crate::error::{Error, ErrorType}; | ||
pub use crate::Deserializer; | ||
pub use crate::Result; | ||
pub use crate::neon::stage1::*; | ||
pub use crate::neon::utf8check::*; | ||
pub use crate::neon::intrinsics::*; | ||
pub use crate::stringparse::*; | ||
|
||
impl<'de> Deserializer<'de> { | ||
#[cfg_attr(not(feature = "no-inline"), inline(always))] | ||
pub fn parse_str_(&mut self) -> Result<&'de str> { | ||
// Add 1 to skip the initial " | ||
let idx = self.iidx + 1; | ||
let mut padding = [0u8; 32]; | ||
//let mut read: usize = 0; | ||
|
||
// we include the terminal '"' so we know where to end | ||
// This is safe since we check sub's lenght in the range access above and only | ||
// create sub sliced form sub to `sub.len()`. | ||
|
||
let src: &[u8] = unsafe { &self.input.get_unchecked(idx..) }; | ||
let mut src_i: usize = 0; | ||
let mut len = src_i; | ||
loop { | ||
// store to dest unconditionally - we can overwrite the bits we don't like | ||
// later | ||
|
||
let (v0, v1) = if src.len() >= src_i + 32 { | ||
// This is safe since we ensure src is at least 16 wide | ||
#[allow(clippy::cast_ptr_alignment)] | ||
unsafe { | ||
( | ||
vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()), | ||
vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()), | ||
) | ||
} | ||
} else { | ||
unsafe { | ||
padding | ||
.get_unchecked_mut(..src.len() - src_i) | ||
.clone_from_slice(src.get_unchecked(src_i..)); | ||
// This is safe since we ensure src is at least 32 wide | ||
( | ||
vld1q_u8(padding.get_unchecked(0..16).as_ptr()), | ||
vld1q_u8(padding.get_unchecked(16..32).as_ptr()), | ||
) | ||
} | ||
}; | ||
|
||
let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits(v0, v1); | ||
|
||
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { | ||
// we encountered quotes first. Move dst to point to quotes and exit | ||
// find out where the quote is... | ||
let quote_dist: u32 = quote_bits.trailing_zeros(); | ||
|
||
/////////////////////// | ||
// Above, check for overflow in case someone has a crazy string (>=4GB?) | ||
// But only add the overflow check when the document itself exceeds 4GB | ||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB. | ||
//////////////////////// | ||
|
||
// we advance the point, accounting for the fact that we have a NULl termination | ||
|
||
len += quote_dist as usize; | ||
unsafe { | ||
let v = self.input.get_unchecked(idx..idx + len) as *const [u8] as *const str; | ||
return Ok(&*v); | ||
} | ||
|
||
// we compare the pointers since we care if they are 'at the same spot' | ||
// not if they are the same value | ||
} | ||
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { | ||
// Move to the 'bad' character | ||
let bs_dist: u32 = bs_bits.trailing_zeros(); | ||
len += bs_dist as usize; | ||
src_i += bs_dist as usize; | ||
break; | ||
} else { | ||
// they are the same. Since they can't co-occur, it means we encountered | ||
// neither. | ||
src_i += 32; | ||
len += 32; | ||
} | ||
} | ||
|
||
let mut dst_i: usize = 0; | ||
let dst: &mut [u8] = self.strings.as_mut_slice(); | ||
|
||
loop { | ||
let (v0, v1) = if src.len() >= src_i + 32 { | ||
// This is safe since we ensure src is at least 16 wide | ||
#[allow(clippy::cast_ptr_alignment)] | ||
unsafe { | ||
( | ||
vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()), | ||
vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()), | ||
) | ||
} | ||
} else { | ||
unsafe { | ||
padding | ||
.get_unchecked_mut(..src.len() - src_i) | ||
.clone_from_slice(src.get_unchecked(src_i..)); | ||
// This is safe since we ensure src is at least 32 wide | ||
( | ||
vld1q_u8(padding.get_unchecked(0..16).as_ptr()), | ||
vld1q_u8(padding.get_unchecked(16..32).as_ptr()), | ||
) | ||
} | ||
}; | ||
|
||
unsafe { | ||
dst.get_unchecked_mut(dst_i..dst_i + 32).copy_from_slice(src.get_unchecked(src_i..src_i + 32)); | ||
} | ||
|
||
// store to dest unconditionally - we can overwrite the bits we don't like | ||
// later | ||
let ParseStringHelper { bs_bits, quote_bits } = find_bs_bits_and_quote_bits(v0, v1); | ||
|
||
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 { | ||
sunnygleason marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// we encountered quotes first. Move dst to point to quotes and exit | ||
// find out where the quote is... | ||
let quote_dist: u32 = quote_bits.trailing_zeros(); | ||
|
||
/////////////////////// | ||
// Above, check for overflow in case someone has a crazy string (>=4GB?) | ||
// But only add the overflow check when the document itself exceeds 4GB | ||
// Currently unneeded because we refuse to parse docs larger or equal to 4GB. | ||
//////////////////////// | ||
|
||
// we advance the point, accounting for the fact that we have a NULl termination | ||
|
||
dst_i += quote_dist as usize; | ||
unsafe { | ||
self.input | ||
.get_unchecked_mut(idx + len..idx + len + dst_i) | ||
.clone_from_slice(&self.strings.get_unchecked(..dst_i)); | ||
let v = self.input.get_unchecked(idx..idx + len + dst_i) as *const [u8] | ||
as *const str; | ||
self.str_offset += dst_i as usize; | ||
return Ok(&*v); | ||
} | ||
|
||
// we compare the pointers since we care if they are 'at the same spot' | ||
// not if they are the same value | ||
} | ||
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 { | ||
// find out where the backspace is | ||
let bs_dist: u32 = bs_bits.trailing_zeros(); | ||
let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) }; | ||
// we encountered backslash first. Handle backslash | ||
if escape_char == b'u' { | ||
// move src/dst up to the start; they will be further adjusted | ||
// within the unicode codepoint handling code. | ||
src_i += bs_dist as usize; | ||
dst_i += bs_dist as usize; | ||
let (o, s) = if let Ok(r) = handle_unicode_codepoint( | ||
unsafe { src.get_unchecked(src_i..) }, | ||
unsafe { dst.get_unchecked_mut(dst_i..) } | ||
) | ||
{ | ||
r | ||
} else { | ||
return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); | ||
}; | ||
if o == 0 { | ||
return Err(self.error(ErrorType::InvlaidUnicodeCodepoint)); | ||
}; | ||
// We moved o steps forword at the destiation and 6 on the source | ||
src_i += s; | ||
dst_i += o; | ||
} else { | ||
// simple 1:1 conversion. Will eat bs_dist+2 characters in input and | ||
// write bs_dist+1 characters to output | ||
// note this may reach beyond the part of the buffer we've actually | ||
// seen. I think this is ok | ||
let escape_result: u8 = | ||
unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) }; | ||
if escape_result == 0 { | ||
return Err(self.error(ErrorType::InvalidEscape)); | ||
} | ||
unsafe { | ||
*dst.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result; | ||
} | ||
src_i += bs_dist as usize + 2; | ||
dst_i += bs_dist as usize + 1; | ||
} | ||
} else { | ||
// they are the same. Since they can't co-occur, it means we encountered | ||
// neither. | ||
src_i += 32; | ||
dst_i += 32; | ||
} | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
use crate::value::generator::ESCAPED; | ||
use std::io; | ||
use crate::neon::intrinsics::*; | ||
use crate::neon::stage1::neon_movemask; | ||
|
||
#[inline(always)] | ||
pub unsafe fn write_str_simd<W>(writer: &mut W, string: &mut &[u8], len: &mut usize, idx: &mut usize) -> io::Result<()> where W: std::io::Write { | ||
// The case where we have a 16+ byte block | ||
// we repeate the same logic as above but with | ||
// only 16 bytes | ||
let zero = vdupq_n_u8(0); | ||
let lower_quote_range = vdupq_n_u8(0x1F); | ||
let quote = vdupq_n_u8(b'"'); | ||
let backslash = vdupq_n_u8(b'\\'); | ||
while *len - *idx > 16 { | ||
// Load 16 bytes of data; | ||
let data: uint8x16_t = vld1q_u8(string.as_ptr().add(*idx)); | ||
// Test the data against being backslash and quote. | ||
let bs_or_quote = | ||
vorrq_u8(vceqq_u8(data, backslash), vceqq_u8(data, quote)); | ||
// Now mask the data with the quote range (0x1F). | ||
let in_quote_range = vandq_u8(data, lower_quote_range); | ||
// then test of the data is unchanged. aka: xor it with the | ||
// Any field that was inside the quote range it will be zero | ||
// now. | ||
let is_unchanged = vxorrq_u8(data, in_quote_range); | ||
let in_range = vceqq_u8(is_unchanged, zero); | ||
let quote_bits = neon_movemask(vorrq_u8(bs_or_quote, in_range)); | ||
if quote_bits != 0 { | ||
let quote_dist = quote_bits.trailing_zeros() as usize; | ||
stry!(writer.write_all(&string[0..*idx + quote_dist])); | ||
let ch = string[*idx + quote_dist]; | ||
match ESCAPED[ch as usize] { | ||
b'u' => stry!(write!(writer, "\\u{:04x}", ch)), | ||
|
||
escape => stry!(writer.write_all(&[b'\\', escape])), | ||
}; | ||
*string = &string[*idx + quote_dist + 1..]; | ||
*idx = 0; | ||
*len = string.len(); | ||
} else { | ||
*idx += 16; | ||
} | ||
} | ||
stry!(writer.write_all(&string[0..*idx])); | ||
*string = &string[*idx..]; | ||
Ok(()) | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.