Skip to content

Commit a52b36a

Browse files
committed
replace check_raw_common with trait
1 parent ec07712 commit a52b36a

File tree

1 file changed

+85
-43
lines changed

1 file changed

+85
-43
lines changed

src/lib.rs

Lines changed: 85 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Utilities for validating string and char literals and turning them into
22
//! values they represent.
33
4+
use std::ffi::CStr;
45
use std::ops::Range;
56
use std::str::Chars;
67

@@ -138,37 +139,94 @@ pub fn unescape_for_errors(
138139
/// and produces a sequence of characters or errors,
139140
/// which are returned by invoking `callback`.
140141
/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
141-
pub fn check_raw_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
142-
check_raw_common(src, Mode::RawStr, &mut callback)
142+
pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
143+
str::check_raw(src, callback);
143144
}
144145

145146
/// Takes the contents of a raw byte string literal (without quotes)
146147
/// and produces a sequence of bytes or errors,
147148
/// which are returned by invoking `callback`.
148149
/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
149-
pub fn check_raw_byte_str(
150-
src: &str,
151-
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
152-
) {
153-
check_raw_common(src, Mode::RawByteStr, &mut |r, res| {
154-
callback(r, res.map(byte_from_char))
155-
})
150+
pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
151+
<[u8]>::check_raw(src, callback);
156152
}
157153

158154
/// Takes the contents of a raw C string literal (without quotes)
159155
/// and produces a sequence of characters or errors,
160156
/// which are returned by invoking `callback`.
161157
/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
162-
pub fn check_raw_c_str(
163-
src: &str,
164-
mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>),
165-
) {
166-
check_raw_common(src, Mode::RawCStr, &mut |r, mut result| {
167-
if let Ok('\0') = result {
168-
result = Err(EscapeError::NulInCStr);
158+
pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
159+
CStr::check_raw(src, callback);
160+
}
161+
162+
/// trait for checking raw strings
163+
trait CheckRaw {
164+
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
165+
type RawUnit;
166+
167+
/// Converts chars to the unit type of the literal type
168+
fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError>;
169+
170+
/// Takes the contents of a raw literal (without quotes)
171+
/// and produces a sequence of `Result<Self::RawUnit, EscapeError>`
172+
/// which are returned via `callback`.
173+
///
174+
/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
175+
fn check_raw(
176+
src: &str,
177+
mut callback: impl FnMut(Range<usize>, Result<Self::RawUnit, EscapeError>),
178+
) {
179+
let mut chars = src.chars();
180+
while let Some(c) = chars.next() {
181+
let start = src.len() - chars.as_str().len() - c.len_utf8();
182+
let res = match c {
183+
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
184+
_ => Self::char2raw_unit(c),
185+
};
186+
let end = src.len() - chars.as_str().len();
187+
callback(start..end, res);
169188
}
170-
callback(r, result)
171-
})
189+
190+
// Unfortunately, it is a bit unclear whether the following equivalent code is slower or faster: bug 141855
191+
// src.char_indices().for_each(|(pos, c)| {
192+
// callback(
193+
// pos..pos + c.len_utf8(),
194+
// if c == '\r' {
195+
// Err(EscapeError::BareCarriageReturnInRawString)
196+
// } else {
197+
// Self::char2raw_unit(c)
198+
// },
199+
// );
200+
// });
201+
}
202+
}
203+
204+
impl CheckRaw for str {
205+
type RawUnit = char;
206+
207+
fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
208+
Ok(c)
209+
}
210+
}
211+
212+
impl CheckRaw for [u8] {
213+
type RawUnit = u8;
214+
215+
fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
216+
char2byte(c)
217+
}
218+
}
219+
220+
impl CheckRaw for CStr {
221+
type RawUnit = char;
222+
223+
fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> {
224+
if c == '\0' {
225+
Err(EscapeError::NulInCStr)
226+
} else {
227+
Ok(c)
228+
}
229+
}
172230
}
173231

174232
/// Takes the contents of a string literal (without quotes)
@@ -497,34 +555,18 @@ where
497555
*chars = tail.chars();
498556
}
499557

500-
/// Takes a contents of a string literal (without quotes) and produces a
501-
/// sequence of characters or errors.
502-
/// NOTE: Raw strings do not perform any explicit character escaping, here we
503-
/// only produce errors on bare CR.
504-
fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F)
505-
where
506-
F: FnMut(Range<usize>, Result<char, EscapeError>),
507-
{
508-
let mut chars = src.chars();
509-
let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop
510-
511-
// The `start` and `end` computation here matches the one in
512-
// `unescape_non_raw_common` for consistency, even though this function
513-
// doesn't have to worry about skipping any chars.
514-
while let Some(c) = chars.next() {
515-
let start = src.len() - chars.as_str().len() - c.len_utf8();
516-
let res = match c {
517-
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
518-
_ => ascii_check(c, allow_unicode_chars),
519-
};
520-
let end = src.len() - chars.as_str().len();
521-
callback(start..end, res);
522-
}
523-
}
524-
525558
#[inline]
526559
fn byte_from_char(c: char) -> u8 {
527560
let res = c as u32;
528561
debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
529562
res as u8
530563
}
564+
565+
fn char2byte(c: char) -> Result<u8, EscapeError> {
566+
// do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
567+
if c.is_ascii() {
568+
Ok(c as u8)
569+
} else {
570+
Err(EscapeError::NonAsciiCharInByte)
571+
}
572+
}

0 commit comments

Comments
 (0)