Skip to content

Commit 1368ee9

Browse files
5225225Licenser
authored andcommitted
Add bounds checked get_unchecked, use it everywhere.
1 parent 767ed75 commit 1368ee9

File tree

13 files changed

+174
-122
lines changed

13 files changed

+174
-122
lines changed

src/avx2/deser.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::arch::x86_64::{
1212
use std::mem;
1313

1414
pub use crate::error::{Error, ErrorType};
15+
use crate::safer_unchecked::GetSaferUnchecked;
1516
use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP};
1617
use crate::Deserializer;
1718
pub use crate::Result;
@@ -44,7 +45,7 @@ impl<'de> Deserializer<'de> {
4445
// This is safe since we check sub's length in the range access above and only
4546
// create sub sliced form sub to `sub.len()`.
4647

47-
let src: &[u8] = unsafe { data.get_unchecked(idx..) };
48+
let src: &[u8] = unsafe { data.get_kinda_unchecked(idx..) };
4849
let mut src_i: usize = 0;
4950
let mut len = src_i;
5051
loop {
@@ -77,7 +78,7 @@ impl<'de> Deserializer<'de> {
7778

7879
len += quote_dist as usize;
7980
unsafe {
80-
let v = input.get_unchecked(idx..idx + len) as *const [u8] as *const str;
81+
let v = input.get_kinda_unchecked(idx..idx + len) as *const [u8] as *const str;
8182
return Ok(&*v);
8283
}
8384

@@ -142,10 +143,10 @@ impl<'de> Deserializer<'de> {
142143
dst_i += quote_dist as usize;
143144
unsafe {
144145
input
145-
.get_unchecked_mut(idx + len..idx + len + dst_i)
146-
.clone_from_slice(buffer.get_unchecked(..dst_i));
146+
.get_kinda_unchecked_mut(idx + len..idx + len + dst_i)
147+
.clone_from_slice(buffer.get_kinda_unchecked(..dst_i));
147148
let v =
148-
input.get_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str;
149+
input.get_kinda_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str;
149150
return Ok(&*v);
150151
}
151152

@@ -155,16 +156,16 @@ impl<'de> Deserializer<'de> {
155156
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 {
156157
// find out where the backspace is
157158
let bs_dist: u32 = bs_bits.trailing_zeros();
158-
let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) };
159+
let escape_char: u8 = unsafe { *src.get_kinda_unchecked(src_i + bs_dist as usize + 1) };
159160
// we encountered backslash first. Handle backslash
160161
if escape_char == b'u' {
161162
// move src/dst up to the start; they will be further adjusted
162163
// within the unicode codepoint handling code.
163164
src_i += bs_dist as usize;
164165
dst_i += bs_dist as usize;
165166
let (o, s) = if let Ok(r) =
166-
handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe {
167-
buffer.get_unchecked_mut(dst_i..)
167+
handle_unicode_codepoint(unsafe { src.get_kinda_unchecked(src_i..) }, unsafe {
168+
buffer.get_kinda_unchecked_mut(dst_i..)
168169
}) {
169170
r
170171
} else {
@@ -182,12 +183,12 @@ impl<'de> Deserializer<'de> {
182183
// note this may reach beyond the part of the buffer we've actually
183184
// seen. I think this is ok
184185
let escape_result: u8 =
185-
unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) };
186+
unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) };
186187
if escape_result == 0 {
187188
return Err(Self::raw_error(src_i, escape_char as char, InvalidEscape));
188189
}
189190
unsafe {
190-
*buffer.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result;
191+
*buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result;
191192
}
192193
src_i += bs_dist as usize + 2;
193194
dst_i += bs_dist as usize + 1;

src/charutils.rs

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use crate::safer_unchecked::GetSaferUnchecked;
2+
13
const STRUCTURAL_OR_WHITESPACE_NEGATED: [u32; 256] = [
24
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
@@ -22,12 +24,12 @@ const STRUCTURAL_OR_WHITESPACE: [u32; 256] = [
2224

2325
#[cfg_attr(not(feature = "no-inline"), inline(always))]
2426
pub fn is_not_structural_or_whitespace(c: u8) -> u32 {
25-
unsafe { *STRUCTURAL_OR_WHITESPACE_NEGATED.get_unchecked(c as usize) }
27+
unsafe { *STRUCTURAL_OR_WHITESPACE_NEGATED.get_kinda_unchecked(c as usize) }
2628
}
2729

2830
#[cfg_attr(not(feature = "no-inline"), inline(always))]
2931
pub fn is_structural_or_whitespace(c: u8) -> u32 {
30-
unsafe { *STRUCTURAL_OR_WHITESPACE.get_unchecked(c as usize) }
32+
unsafe { *STRUCTURAL_OR_WHITESPACE.get_kinda_unchecked(c as usize) }
3133
}
3234

3335
const DIGITTOVAL: [i8; 256] = [
@@ -55,10 +57,10 @@ pub fn hex_to_u32_nocheck(src: &[u8]) -> u32 {
5557
// invalid value. After the shifts, this will *still* result in the outcome that the high 16 bits of any
5658
// value with any invalid char will be all 1's. We check for this in the caller.
5759
unsafe {
58-
let v1: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(0) as usize));
59-
let v2: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(1) as usize));
60-
let v3: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(2) as usize));
61-
let v4: i32 = i32::from(*DIGITTOVAL.get_unchecked(*src.get_unchecked(3) as usize));
60+
let v1: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(0) as usize));
61+
let v2: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(1) as usize));
62+
let v3: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(2) as usize));
63+
let v4: i32 = i32::from(*DIGITTOVAL.get_kinda_unchecked(*src.get_kinda_unchecked(3) as usize));
6264
(v1 << 12 | v2 << 8 | v3 << 4 | v4) as u32
6365
}
6466
}
@@ -80,27 +82,27 @@ pub fn hex_to_u32_nocheck(src: &[u8]) -> u32 {
8082
pub fn codepoint_to_utf8(cp: u32, c: &mut [u8]) -> usize {
8183
unsafe {
8284
if cp <= 0x7F {
83-
*c.get_unchecked_mut(0) = cp as u8;
85+
*c.get_kinda_unchecked_mut(0) = cp as u8;
8486
return 1; // ascii
8587
}
8688
if cp <= 0x7FF {
87-
*c.get_unchecked_mut(0) = ((cp >> 6) + 192) as u8;
88-
*c.get_unchecked_mut(1) = ((cp & 63) + 128) as u8;
89+
*c.get_kinda_unchecked_mut(0) = ((cp >> 6) + 192) as u8;
90+
*c.get_kinda_unchecked_mut(1) = ((cp & 63) + 128) as u8;
8991
return 2; // universal plane
9092
// Surrogates are treated elsewhere...
9193
//} //else if (0xd800 <= cp && cp <= 0xdfff) {
9294
// return 0; // surrogates // could put assert here
9395
} else if cp <= 0xFFFF {
94-
*c.get_unchecked_mut(0) = ((cp >> 12) + 224) as u8;
95-
*c.get_unchecked_mut(1) = (((cp >> 6) & 63) + 128) as u8;
96-
*c.get_unchecked_mut(2) = ((cp & 63) + 128) as u8;
96+
*c.get_kinda_unchecked_mut(0) = ((cp >> 12) + 224) as u8;
97+
*c.get_kinda_unchecked_mut(1) = (((cp >> 6) & 63) + 128) as u8;
98+
*c.get_kinda_unchecked_mut(2) = ((cp & 63) + 128) as u8;
9799
return 3;
98100
} else if cp <= 0x0010_FFFF {
99101
// if you know you have a valid code point, this is not needed
100-
*c.get_unchecked_mut(0) = ((cp >> 18) + 240) as u8;
101-
*c.get_unchecked_mut(1) = (((cp >> 12) & 63) + 128) as u8;
102-
*c.get_unchecked_mut(2) = (((cp >> 6) & 63) + 128) as u8;
103-
*c.get_unchecked_mut(3) = ((cp & 63) + 128) as u8;
102+
*c.get_kinda_unchecked_mut(0) = ((cp >> 18) + 240) as u8;
103+
*c.get_kinda_unchecked_mut(1) = (((cp >> 12) & 63) + 128) as u8;
104+
*c.get_kinda_unchecked_mut(2) = (((cp >> 6) & 63) + 128) as u8;
105+
*c.get_kinda_unchecked_mut(3) = ((cp & 63) + 128) as u8;
104106
return 4;
105107
}
106108
}

src/lib.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ mod macros;
139139
mod error;
140140
mod numberparse;
141141
mod stringparse;
142+
mod safer_unchecked;
143+
144+
use safer_unchecked::GetSaferUnchecked;
142145

143146
/// Reexport of Cow
144147
pub mod cow;
@@ -481,9 +484,9 @@ impl<'de> Deserializer<'de> {
481484
unsafe {
482485
input_buffer
483486
.as_mut_slice()
484-
.get_unchecked_mut(..len)
487+
.get_kinda_unchecked_mut(..len)
485488
.clone_from_slice(input);
486-
*(input_buffer.get_unchecked_mut(len)) = 0;
489+
*(input_buffer.get_kinda_unchecked_mut(len)) = 0;
487490
input_buffer.set_len(len);
488491
};
489492

@@ -519,7 +522,7 @@ impl<'de> Deserializer<'de> {
519522
#[cfg_attr(not(feature = "no-inline"), inline(always))]
520523
pub unsafe fn next_(&mut self) -> Node<'de> {
521524
self.idx += 1;
522-
*self.tape.get_unchecked(self.idx)
525+
*self.tape.get_kinda_unchecked(self.idx)
523526
}
524527

525528
//#[inline(never)]
@@ -569,7 +572,7 @@ impl<'de> Deserializer<'de> {
569572
__builtin_prefetch(buf + idx + 128);
570573
#endif
571574
*/
572-
let chunk = input.get_unchecked(idx..idx + 64);
575+
let chunk = input.get_kinda_unchecked(idx..idx + 64);
573576
utf8_validator.update_from_chunks(chunk);
574577

575578
let input = SimdInput::new(chunk);

src/neon/deser.rs

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::safer_unchecked::GetSaferUnchecked;
12
use crate::error::ErrorType;
23
use crate::neon::stage1::bit_mask;
34
use crate::stringparse::{handle_unicode_codepoint, ESCAPE_MAP};
@@ -64,14 +65,14 @@ impl<'de> Deserializer<'de> {
6465
// This is safe since we check sub's length in the range access above and only
6566
// create sub sliced form sub to `sub.len()`.
6667

67-
let src: &[u8] = unsafe { data.get_unchecked(idx..) };
68+
let src: &[u8] = unsafe { data.get_kinda_unchecked(idx..) };
6869
let mut src_i: usize = 0;
6970
let mut len = src_i;
7071
loop {
7172
let (v0, v1) = unsafe {
7273
(
73-
vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()),
74-
vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()),
74+
vld1q_u8(src.get_kinda_unchecked(src_i..src_i + 16).as_ptr()),
75+
vld1q_u8(src.get_kinda_unchecked(src_i + 16..src_i + 32).as_ptr()),
7576
)
7677
};
7778

@@ -92,7 +93,7 @@ impl<'de> Deserializer<'de> {
9293

9394
len += quote_dist as usize;
9495
unsafe {
95-
let v = input.get_unchecked(idx..idx + len) as *const [u8] as *const str;
96+
let v = input.get_kinda_unchecked(idx..idx + len) as *const [u8] as *const str;
9697
return Ok(&*v);
9798
}
9899

@@ -119,15 +120,15 @@ impl<'de> Deserializer<'de> {
119120
loop {
120121
let (v0, v1) = unsafe {
121122
(
122-
vld1q_u8(src.get_unchecked(src_i..src_i + 16).as_ptr()),
123-
vld1q_u8(src.get_unchecked(src_i + 16..src_i + 32).as_ptr()),
123+
vld1q_u8(src.get_kinda_unchecked(src_i..src_i + 16).as_ptr()),
124+
vld1q_u8(src.get_kinda_unchecked(src_i + 16..src_i + 32).as_ptr()),
124125
)
125126
};
126127

127128
unsafe {
128129
buffer
129-
.get_unchecked_mut(dst_i..dst_i + 32)
130-
.copy_from_slice(src.get_unchecked(src_i..src_i + 32));
130+
.get_kinda_unchecked_mut(dst_i..dst_i + 32)
131+
.copy_from_slice(src.get_kinda_unchecked(src_i..src_i + 32));
131132
}
132133

133134
// store to dest unconditionally - we can overwrite the bits we don't like
@@ -150,10 +151,10 @@ impl<'de> Deserializer<'de> {
150151
dst_i += quote_dist as usize;
151152
unsafe {
152153
input
153-
.get_unchecked_mut(idx + len..idx + len + dst_i)
154-
.clone_from_slice(buffer.get_unchecked(..dst_i));
154+
.get_kinda_unchecked_mut(idx + len..idx + len + dst_i)
155+
.clone_from_slice(buffer.get_kinda_unchecked(..dst_i));
155156
let v =
156-
input.get_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str;
157+
input.get_kinda_unchecked(idx..idx + len + dst_i) as *const [u8] as *const str;
157158
return Ok(&*v);
158159
}
159160

@@ -163,16 +164,16 @@ impl<'de> Deserializer<'de> {
163164
if (quote_bits.wrapping_sub(1) & bs_bits) != 0 {
164165
// find out where the backspace is
165166
let bs_dist: u32 = bs_bits.trailing_zeros();
166-
let escape_char: u8 = unsafe { *src.get_unchecked(src_i + bs_dist as usize + 1) };
167+
let escape_char: u8 = unsafe { *src.get_kinda_unchecked(src_i + bs_dist as usize + 1) };
167168
// we encountered backslash first. Handle backslash
168169
if escape_char == b'u' {
169170
// move src/dst up to the start; they will be further adjusted
170171
// within the unicode codepoint handling code.
171172
src_i += bs_dist as usize;
172173
dst_i += bs_dist as usize;
173174
let (o, s) = if let Ok(r) =
174-
handle_unicode_codepoint(unsafe { src.get_unchecked(src_i..) }, unsafe {
175-
buffer.get_unchecked_mut(dst_i..)
175+
handle_unicode_codepoint(unsafe { src.get_kinda_unchecked(src_i..) }, unsafe {
176+
buffer.get_kinda_unchecked_mut(dst_i..)
176177
}) {
177178
r
178179
} else {
@@ -190,12 +191,12 @@ impl<'de> Deserializer<'de> {
190191
// note this may reach beyond the part of the buffer we've actually
191192
// seen. I think this is ok
192193
let escape_result: u8 =
193-
unsafe { *ESCAPE_MAP.get_unchecked(escape_char as usize) };
194+
unsafe { *ESCAPE_MAP.get_kinda_unchecked(escape_char as usize) };
194195
if escape_result == 0 {
195196
return Err(Self::raw_error(src_i, escape_char as char, InvalidEscape));
196197
}
197198
unsafe {
198-
*buffer.get_unchecked_mut(dst_i + bs_dist as usize) = escape_result;
199+
*buffer.get_kinda_unchecked_mut(dst_i + bs_dist as usize) = escape_result;
199200
}
200201
src_i += bs_dist as usize + 2;
201202
dst_i += bs_dist as usize + 1;

0 commit comments

Comments
 (0)