1
- //! Utilities for validating string and char literals and turning them into
2
- //! values they represent.
1
+ //! Utilities for validating (raw) string, char, and byte literals and
2
+ //! turning escape sequences into the values they represent.
3
3
4
4
use std:: ffi:: CStr ;
5
5
use std:: ops:: Range ;
@@ -8,9 +8,9 @@ use std::str::Chars;
8
8
#[ cfg( test) ]
9
9
mod tests;
10
10
11
- /// Errors and warnings that can occur during string unescaping. They mostly
12
- /// relate to malformed escape sequences, but there are a few that are about
13
- /// other problems.
11
+ /// Errors and warnings that can occur during string, char, and byte unescaping.
12
+ ///
13
+ /// Mostly relating to malformed escape sequences, but also a few other problems.
14
14
#[ derive( Debug , PartialEq , Eq ) ]
15
15
pub enum EscapeError {
16
16
/// Expected 1 char, but 0 were found.
@@ -58,7 +58,7 @@ pub enum EscapeError {
58
58
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
59
59
NonAsciiCharInByte ,
60
60
61
- // `\0` in a C string literal.
61
+ /// `\0` in a C string literal.
62
62
NulInCStr ,
63
63
64
64
/// After a line ending with '\', the next line contains whitespace
@@ -79,6 +79,8 @@ impl EscapeError {
79
79
}
80
80
}
81
81
82
+ /// Check a raw string literal for validity
83
+ ///
82
84
/// Takes the contents of a raw string literal (without quotes)
83
85
/// and produces a sequence of characters or errors,
84
86
/// which are returned by invoking `callback`.
@@ -87,6 +89,8 @@ pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char,
87
89
str:: check_raw ( src, callback) ;
88
90
}
89
91
92
+ /// Check a raw byte string literal for validity
93
+ ///
90
94
/// Takes the contents of a raw byte string literal (without quotes)
91
95
/// and produces a sequence of bytes or errors,
92
96
/// which are returned by invoking `callback`.
@@ -95,6 +99,8 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
95
99
<[ u8 ] >:: check_raw ( src, callback) ;
96
100
}
97
101
102
+ /// Check a raw C string literal for validity
103
+ ///
98
104
/// Takes the contents of a raw C string literal (without quotes)
99
105
/// and produces a sequence of characters or errors,
100
106
/// which are returned by invoking `callback`.
@@ -103,7 +109,7 @@ pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char
103
109
CStr :: check_raw ( src, callback) ;
104
110
}
105
111
106
- /// trait for checking raw strings
112
+ /// Trait for checking raw string literals for validity
107
113
trait CheckRaw {
108
114
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
109
115
type RawUnit ;
@@ -149,6 +155,7 @@ impl CheckRaw for [u8] {
149
155
}
150
156
}
151
157
158
+ /// Turn an ascii char into a byte
152
159
fn char2byte ( c : char ) -> Result < u8 , EscapeError > {
153
160
// do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte)
154
161
if c. is_ascii ( ) {
@@ -170,32 +177,42 @@ impl CheckRaw for CStr {
170
177
}
171
178
}
172
179
180
+ /// Unescape a char literal
181
+ ///
173
182
/// Takes the contents of a char literal (without quotes),
174
183
/// and returns an unescaped char or an error.
175
184
pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
176
185
str:: unescape_single ( & mut src. chars ( ) )
177
186
}
178
187
188
+ /// Unescape a byte literal
189
+ ///
179
190
/// Takes the contents of a byte literal (without quotes),
180
191
/// and returns an unescaped byte or an error.
181
192
pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
182
193
<[ u8 ] >:: unescape_single ( & mut src. chars ( ) )
183
194
}
184
195
196
+ /// Unescape a string literal
197
+ ///
185
198
/// Takes the contents of a string literal (without quotes)
186
199
/// and produces a sequence of escaped characters or errors,
187
200
/// which are returned by invoking `callback`.
188
201
pub fn unescape_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
189
202
str:: unescape ( src, callback)
190
203
}
191
204
205
+ /// Unescape a byte string literal
206
+ ///
192
207
/// Takes the contents of a byte string literal (without quotes)
193
208
/// and produces a sequence of escaped bytes or errors,
194
209
/// which are returned by invoking `callback`.
195
210
pub fn unescape_byte_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < u8 , EscapeError > ) ) {
196
211
<[ u8 ] >:: unescape ( src, callback)
197
212
}
198
213
214
+ /// Unescape a C string literal
215
+ ///
199
216
/// Takes the contents of a C string literal (without quotes)
200
217
/// and produces a sequence of escaped MixedUnits or errors,
201
218
/// which are returned by invoking `callback`.
@@ -206,6 +223,8 @@ pub fn unescape_c_str(
206
223
CStr :: unescape ( src, callback)
207
224
}
208
225
226
+ /// Enum representing either a char or a byte
227
+ ///
209
228
/// Used for mixed utf8 string literals, i.e. those that allow both unicode
210
229
/// chars and high bytes.
211
230
#[ derive( Copy , Clone , Debug , PartialEq , Eq ) ]
@@ -242,7 +261,7 @@ impl From<u8> for MixedUnit {
242
261
}
243
262
}
244
263
245
- /// trait for unescaping escape sequences in strings
264
+ /// Trait for unescaping escape sequences in strings
246
265
trait Unescape {
247
266
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
248
267
type Unit : From < u8 > ;
@@ -295,7 +314,9 @@ trait Unescape {
295
314
}
296
315
}
297
316
298
- /// Takes the contents of a raw literal (without quotes)
317
+ /// Unescape a string literal
318
+ ///
319
+ /// Takes the contents of a raw string literal (without quotes)
299
320
/// and produces a sequence of `Result<Self::Unit, EscapeError>`
300
321
/// which are returned via `callback`.
301
322
fn unescape (
@@ -328,7 +349,9 @@ trait Unescape {
328
349
}
329
350
}
330
351
331
- /// Parse the character of an ASCII escape (except nul) without the leading backslash.
352
+ /// Interpret a non-nul ASCII escape
353
+ ///
354
+ /// Parses the character of an ASCII escape (except nul) without the leading backslash.
332
355
fn simple_escape ( c : char ) -> Result < u8 , char > {
333
356
// Previous character was '\\', unescape what follows.
334
357
Ok ( match c {
@@ -342,7 +365,9 @@ fn simple_escape(c: char) -> Result<u8, char> {
342
365
} )
343
366
}
344
367
345
- /// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
368
+ /// Interpret a hexadecimal escape
369
+ ///
370
+ /// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
346
371
fn hex_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u8 , EscapeError > {
347
372
let hi = chars. next ( ) . ok_or ( EscapeError :: TooShortHexEscape ) ?;
348
373
let hi = hi. to_digit ( 16 ) . ok_or ( EscapeError :: InvalidCharInHexEscape ) ?;
@@ -353,6 +378,8 @@ fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError>
353
378
Ok ( ( hi * 16 + lo) as u8 )
354
379
}
355
380
381
+ /// Interpret a unicode escape
382
+ ///
356
383
/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
357
384
/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
358
385
fn unicode_escape ( chars : & mut impl Iterator < Item = char > ) -> Result < u32 , EscapeError > {
@@ -400,6 +427,8 @@ fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeE
400
427
}
401
428
}
402
429
430
+ /// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
431
+ ///
403
432
/// Skip ASCII whitespace, except for the formfeed character
404
433
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
405
434
/// Warns on unescaped newline and following non-ASCII whitespace.
@@ -501,7 +530,7 @@ impl Unescape for CStr {
501
530
}
502
531
}
503
532
504
- /// What kind of literal do we parse.
533
+ /// Enum of the different kinds of literal
505
534
#[ derive( Debug , Clone , Copy , PartialEq ) ]
506
535
pub enum Mode {
507
536
Char ,
@@ -540,10 +569,14 @@ impl Mode {
540
569
}
541
570
}
542
571
572
+ /// Check a literal only for errors
573
+ ///
543
574
/// Takes the contents of a literal (without quotes)
544
- /// and produces a sequence of errors,
575
+ /// and produces a sequence of only errors,
545
576
/// which are returned by invoking `error_callback`.
546
- pub fn unescape_for_errors (
577
+ ///
578
+ /// NB Does not produce any output other than errors
579
+ pub fn check_for_errors (
547
580
src : & str ,
548
581
mode : Mode ,
549
582
mut error_callback : impl FnMut ( Range < usize > , EscapeError ) ,
0 commit comments