Skip to content

Commit 6faef8e

Browse files
committed
rename unescape_for_errors -> check_for_errors, and improve docs
1 parent 45a5bf4 commit 6faef8e

File tree

2 files changed

+48
-15
lines changed

2 files changed

+48
-15
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
- Add `check_raw_str`, `check_raw_byte_str`, `check_raw_c_str`,
44
- Add `unescape_str`, `unescape_byte_str`, `unescape_c_str`,
5-
- Add `unescape_for_errors`,
5+
- Add `check_for_errors`,
66
- Remove: `unescape_unicode` and `unescape_mixed`
77

88
# 0.0.2

src/lib.rs

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
//! Utilities for validating string and char literals and turning them into
2-
//! values they represent.
1+
//! Utilities for validating (raw) string, char, and byte literals and
2+
//! turning escape sequences into the values they represent.
33
44
use std::ffi::CStr;
55
use std::ops::Range;
@@ -8,9 +8,9 @@ use std::str::Chars;
88
#[cfg(test)]
99
mod tests;
1010

11-
/// Errors and warnings that can occur during string unescaping. They mostly
12-
/// relate to malformed escape sequences, but there are a few that are about
13-
/// other problems.
11+
/// Errors and warnings that can occur during string, char, and byte unescaping.
12+
///
13+
/// Mostly relating to malformed escape sequences, but also a few other problems.
1414
#[derive(Debug, PartialEq, Eq)]
1515
pub enum EscapeError {
1616
/// Expected 1 char, but 0 were found.
@@ -58,7 +58,7 @@ pub enum EscapeError {
5858
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
5959
NonAsciiCharInByte,
6060

61-
// `\0` in a C string literal.
61+
/// `\0` in a C string literal.
6262
NulInCStr,
6363

6464
/// After a line ending with '\', the next line contains whitespace
@@ -79,6 +79,8 @@ impl EscapeError {
7979
}
8080
}
8181

82+
/// Check a raw string literal for validity
83+
///
8284
/// Takes the contents of a raw string literal (without quotes)
8385
/// and produces a sequence of characters or errors,
8486
/// which are returned by invoking `callback`.
@@ -87,6 +89,8 @@ pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char,
8789
str::check_raw(src, callback);
8890
}
8991

92+
/// Check a raw byte string literal for validity
93+
///
9094
/// Takes the contents of a raw byte string literal (without quotes)
9195
/// and produces a sequence of bytes or errors,
9296
/// which are returned by invoking `callback`.
@@ -95,6 +99,8 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
9599
<[u8]>::check_raw(src, callback);
96100
}
97101

102+
/// Check a raw C string literal for validity
103+
///
98104
/// Takes the contents of a raw C string literal (without quotes)
99105
/// and produces a sequence of characters or errors,
100106
/// which are returned by invoking `callback`.
@@ -103,7 +109,7 @@ pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char
103109
CStr::check_raw(src, callback);
104110
}
105111

106-
/// trait for checking raw strings
112+
/// Trait for checking raw string literals for validity
107113
trait CheckRaw {
108114
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
109115
type RawUnit;
@@ -149,6 +155,7 @@ impl CheckRaw for [u8] {
149155
}
150156
}
151157

158+
/// Turn an ascii char into a byte
152159
fn char2byte(c: char) -> Result<u8, EscapeError> {
153160
// do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
154161
if c.is_ascii() {
@@ -170,32 +177,42 @@ impl CheckRaw for CStr {
170177
}
171178
}
172179

180+
/// Unescape a char literal
181+
///
173182
/// Takes the contents of a char literal (without quotes),
174183
/// and returns an unescaped char or an error.
175184
pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
176185
str::unescape_single(&mut src.chars())
177186
}
178187

188+
/// Unescape a byte literal
189+
///
179190
/// Takes the contents of a byte literal (without quotes),
180191
/// and returns an unescaped byte or an error.
181192
pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
182193
<[u8]>::unescape_single(&mut src.chars())
183194
}
184195

196+
/// Unescape a string literal
197+
///
185198
/// Takes the contents of a string literal (without quotes)
186199
/// and produces a sequence of escaped characters or errors,
187200
/// which are returned by invoking `callback`.
188201
pub fn unescape_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
189202
str::unescape(src, callback)
190203
}
191204

205+
/// Unescape a byte string literal
206+
///
192207
/// Takes the contents of a byte string literal (without quotes)
193208
/// and produces a sequence of escaped bytes or errors,
194209
/// which are returned by invoking `callback`.
195210
pub fn unescape_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) {
196211
<[u8]>::unescape(src, callback)
197212
}
198213

214+
/// Unescape a C string literal
215+
///
199216
/// Takes the contents of a C string literal (without quotes)
200217
/// and produces a sequence of escaped MixedUnits or errors,
201218
/// which are returned by invoking `callback`.
@@ -206,6 +223,8 @@ pub fn unescape_c_str(
206223
CStr::unescape(src, callback)
207224
}
208225

226+
/// Enum representing either a char or a byte
227+
///
209228
/// Used for mixed utf8 string literals, i.e. those that allow both unicode
210229
/// chars and high bytes.
211230
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -242,7 +261,7 @@ impl From<u8> for MixedUnit {
242261
}
243262
}
244263

245-
/// trait for unescaping escape sequences in strings
264+
/// Trait for unescaping escape sequences in strings
246265
trait Unescape {
247266
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
248267
type Unit: From<u8>;
@@ -295,7 +314,9 @@ trait Unescape {
295314
}
296315
}
297316

298-
/// Takes the contents of a raw literal (without quotes)
317+
/// Unescape a string literal
318+
///
319+
/// Takes the contents of a raw string literal (without quotes)
299320
/// and produces a sequence of `Result<Self::Unit, EscapeError>`
300321
/// which are returned via `callback`.
301322
fn unescape(
@@ -328,7 +349,9 @@ trait Unescape {
328349
}
329350
}
330351

331-
/// Parse the character of an ASCII escape (except nul) without the leading backslash.
352+
/// Interpret a non-nul ASCII escape
353+
///
354+
/// Parses the character of an ASCII escape (except nul) without the leading backslash.
332355
fn simple_escape(c: char) -> Result<u8, char> {
333356
// Previous character was '\\', unescape what follows.
334357
Ok(match c {
@@ -342,7 +365,9 @@ fn simple_escape(c: char) -> Result<u8, char> {
342365
})
343366
}
344367

345-
/// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
368+
/// Interpret a hexadecimal escape
369+
///
370+
/// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
346371
fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError> {
347372
let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
348373
let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
@@ -353,6 +378,8 @@ fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError>
353378
Ok((hi * 16 + lo) as u8)
354379
}
355380

381+
/// Interpret a unicode escape
382+
///
356383
/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
357384
/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
358385
fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeError> {
@@ -400,6 +427,8 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
400427
}
401428
}
402429

430+
/// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
431+
///
403432
/// Skip ASCII whitespace, except for the formfeed character
404433
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
405434
/// Warns on unescaped newline and following non-ASCII whitespace.
@@ -501,7 +530,7 @@ impl Unescape for CStr {
501530
}
502531
}
503532

504-
/// What kind of literal do we parse.
533+
/// Enum of the different kinds of literal
505534
#[derive(Debug, Clone, Copy, PartialEq)]
506535
pub enum Mode {
507536
Char,
@@ -540,10 +569,14 @@ impl Mode {
540569
}
541570
}
542571

572+
/// Check a literal only for errors
573+
///
543574
/// Takes the contents of a literal (without quotes)
544-
/// and produces a sequence of errors,
575+
/// and produces a sequence of only errors,
545576
/// which are returned by invoking `error_callback`.
546-
pub fn unescape_for_errors(
577+
///
578+
/// NB Does not produce any output other than errors
579+
pub fn check_for_errors(
547580
src: &str,
548581
mode: Mode,
549582
mut error_callback: impl FnMut(Range<usize>, EscapeError),

0 commit comments

Comments
 (0)