Skip to content

Commit a873c47

Browse files
committed
fix: utf8 encoding (still broken but closer)
1 parent e40145b commit a873c47

File tree

3 files changed

+31
-8
lines changed

3 files changed

+31
-8
lines changed

src/neon/deser.rs

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl<'de> Deserializer<'de> {
5656
pub fn parse_str_(&mut self) -> Result<&'de str> {
5757
// Add 1 to skip the initial "
5858
let idx = self.iidx + 1;
59-
// let mut padding = [0u8; 32];
59+
let mut padding = [0u8; 32];
6060
//let mut read: usize = 0;
6161

6262
// we include the terminal '"' so we know where to end
@@ -69,7 +69,19 @@ impl<'de> Deserializer<'de> {
6969
loop {
7070
// store to dest unconditionally - we can overwrite the bits we don't like
7171
// later
72-
let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], None) };
72+
73+
let srcx = if src.len() >= src_i + 32 {
74+
&src[src_i..]
75+
} else {
76+
unsafe {
77+
padding
78+
.get_unchecked_mut(..src.len() - src_i)
79+
.clone_from_slice(src.get_unchecked(src_i..));
80+
&padding
81+
}
82+
};
83+
84+
let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx, None) };
7385

7486
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
7587
// we encountered quotes first. Move dst to point to quotes and exit
@@ -111,9 +123,20 @@ impl<'de> Deserializer<'de> {
111123
let dst: &mut [u8] = &mut self.strings;
112124

113125
loop {
126+
let srcx = if src.len() >= src_i + 32 {
127+
&src[src_i..]
128+
} else {
129+
unsafe {
130+
padding
131+
.get_unchecked_mut(..src.len() - src_i)
132+
.clone_from_slice(src.get_unchecked(src_i..));
133+
&padding
134+
}
135+
};
136+
114137
// store to dest unconditionally - we can overwrite the bits we don't like
115138
// later
116-
let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&src[src_i..], Some(&mut dst[dst_i..])) };
139+
let ParseStringHelper { bs_bits, quote_bits } = unsafe { find_bs_bits_and_quote_bits(&srcx, Some(dst)) };
117140

118141
if (bs_bits.wrapping_sub(1) & quote_bits) != 0 {
119142
// we encountered quotes first. Move dst to point to quotes and exit

src/neon/stage1.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,13 @@ unsafe fn check_utf8(
109109
} else {
110110
// it is not ascii so we have to do heavy work
111111
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v0),
112-
&(state.previous), &mut (state.has_error));
112+
&mut (state.previous), &mut (state.has_error));
113113
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v1),
114-
&(state.previous), &mut (state.has_error));
114+
&mut (state.previous), &mut (state.has_error));
115115
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v2),
116-
&(state.previous), &mut (state.has_error));
116+
&mut (state.previous), &mut (state.has_error));
117117
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(input.v3),
118-
&(state.previous), &mut (state.has_error));
118+
&mut (state.previous), &mut (state.has_error));
119119
}
120120
}
121121

src/neon/utf8check.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ unsafe fn count_nibbles(bytes: int8x16_t, answer: &mut ProcessedUtfBytes) {
162162
#[cfg_attr(not(feature = "no-inline"), inline)]
163163
pub fn check_utf8_bytes(
164164
current_bytes: int8x16_t,
165-
previous: &ProcessedUtfBytes,
165+
previous: &mut ProcessedUtfBytes,
166166
has_error: &mut int8x16_t,
167167
) -> ProcessedUtfBytes {
168168
let mut pb = ProcessedUtfBytes::default();

0 commit comments

Comments
 (0)