2
2
//! turning escape sequences into the values they represent.
3
3
4
4
use std:: ffi:: CStr ;
5
+ use std:: num:: NonZero ;
5
6
use std:: ops:: Range ;
6
7
use std:: str:: Chars ;
7
8
@@ -105,7 +106,10 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
105
106
/// and produces a sequence of characters or errors,
106
107
/// which are returned by invoking `callback`.
107
108
/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
108
- pub fn check_raw_c_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
109
+ pub fn check_raw_c_str (
110
+ src : & str ,
111
+ callback : impl FnMut ( Range < usize > , Result < NonZero < char > , EscapeError > ) ,
112
+ ) {
109
113
CStr :: check_raw ( src, callback) ;
110
114
}
111
115
@@ -181,15 +185,11 @@ fn char2byte(c: char) -> Result<u8, EscapeError> {
181
185
}
182
186
183
187
impl CheckRaw for CStr {
184
- type RawUnit = char ;
188
+ type RawUnit = NonZero < char > ;
185
189
186
190
#[ inline]
187
191
fn char2raw_unit ( c : char ) -> Result < Self :: RawUnit , EscapeError > {
188
- if c == '\0' {
189
- Err ( EscapeError :: NulInCStr )
190
- } else {
191
- Ok ( c)
192
- }
192
+ NonZero :: new ( c) . ok_or ( EscapeError :: NulInCStr )
193
193
}
194
194
}
195
195
@@ -253,42 +253,67 @@ pub enum MixedUnit {
253
253
/// For example, if '¥' appears in a string it is represented here as
254
254
/// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte
255
255
/// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]`
256
- Char ( char ) ,
256
+ Char ( NonZero < char > ) ,
257
257
258
258
/// Used for high bytes (`\x80`..`\xff`).
259
259
///
260
260
/// For example, if `\xa5` appears in a string it is represented here as
261
261
/// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant
262
262
/// byte string as the single byte `0xa5`.
263
- HighByte ( u8 ) ,
263
+ HighByte ( NonZero < u8 > ) ,
264
264
}
265
265
266
- impl From < char > for MixedUnit {
266
+ impl From < NonZero < char > > for MixedUnit {
267
267
#[ inline]
268
- fn from ( c : char ) -> Self {
268
+ fn from ( c : NonZero < char > ) -> Self {
269
269
MixedUnit :: Char ( c)
270
270
}
271
271
}
272
272
273
- impl From < u8 > for MixedUnit {
273
+ impl From < NonZero < u8 > > for MixedUnit {
274
274
#[ inline]
275
- fn from ( n : u8 ) -> Self {
276
- if n . is_ascii ( ) {
277
- MixedUnit :: Char ( n as char )
275
+ fn from ( byte : NonZero < u8 > ) -> Self {
276
+ if byte . get ( ) . is_ascii ( ) {
277
+ MixedUnit :: Char ( NonZero :: new ( byte . get ( ) as char ) . unwrap ( ) )
278
278
} else {
279
- MixedUnit :: HighByte ( n )
279
+ MixedUnit :: HighByte ( byte )
280
280
}
281
281
}
282
282
}
283
283
284
+ impl TryFrom < char > for MixedUnit {
285
+ type Error = EscapeError ;
286
+
287
+ #[ inline]
288
+ fn try_from ( c : char ) -> Result < Self , EscapeError > {
289
+ NonZero :: new ( c)
290
+ . map ( MixedUnit :: Char )
291
+ . ok_or ( EscapeError :: NulInCStr )
292
+ }
293
+ }
294
+
295
+ impl TryFrom < u8 > for MixedUnit {
296
+ type Error = EscapeError ;
297
+
298
+ #[ inline]
299
+ fn try_from ( byte : u8 ) -> Result < Self , EscapeError > {
300
+ NonZero :: new ( byte)
301
+ . map ( From :: from)
302
+ . ok_or ( EscapeError :: NulInCStr )
303
+ }
304
+ }
305
+
284
306
/// Trait for unescaping escape sequences in strings
285
307
trait Unescape {
286
308
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
287
- type Unit : From < u8 > ;
309
+ type Unit ;
288
310
289
311
/// Result of unescaping the zero char ('\0')
290
312
const ZERO_RESULT : Result < Self :: Unit , EscapeError > ;
291
313
314
+ /// Converts non-zero bytes to the unit type
315
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit ;
316
+
292
317
/// Converts chars to the unit type
293
318
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > ;
294
319
@@ -319,18 +344,20 @@ trait Unescape {
319
344
if c == '0' {
320
345
Self :: ZERO_RESULT
321
346
} else {
322
- simple_escape ( c) . map ( |b| b. into ( ) ) . or_else ( |c| match c {
323
- 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
324
- 'u' => Self :: unicode2unit ( {
325
- let value = unicode_escape ( chars) ?;
326
- if value > char:: MAX as u32 {
327
- Err ( EscapeError :: OutOfRangeUnicodeEscape )
328
- } else {
329
- char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
330
- }
331
- } ) ,
332
- _ => Err ( EscapeError :: InvalidEscape ) ,
333
- } )
347
+ simple_escape ( c)
348
+ . map ( |b| Self :: nonzero_byte2unit ( b) )
349
+ . or_else ( |c| match c {
350
+ 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
351
+ 'u' => Self :: unicode2unit ( {
352
+ let value = unicode_escape ( chars) ?;
353
+ if value > char:: MAX as u32 {
354
+ Err ( EscapeError :: OutOfRangeUnicodeEscape )
355
+ } else {
356
+ char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
357
+ }
358
+ } ) ,
359
+ _ => Err ( EscapeError :: InvalidEscape ) ,
360
+ } )
334
361
}
335
362
}
336
363
@@ -373,9 +400,9 @@ trait Unescape {
373
400
///
374
401
/// Parses the character of an ASCII escape (except nul) without the leading backslash.
375
402
#[ inline] // single use in Unescape::unescape_1
376
- fn simple_escape ( c : char ) -> Result < u8 , char > {
403
+ fn simple_escape ( c : char ) -> Result < NonZero < u8 > , char > {
377
404
// Previous character was '\\', unescape what follows.
378
- Ok ( match c {
405
+ Ok ( NonZero :: new ( match c {
379
406
'"' => b'"' ,
380
407
'n' => b'\n' ,
381
408
'r' => b'\r' ,
@@ -384,6 +411,7 @@ fn simple_escape(c: char) -> Result<u8, char> {
384
411
'\'' => b'\'' ,
385
412
_ => Err ( c) ?,
386
413
} )
414
+ . unwrap ( ) )
387
415
}
388
416
389
417
/// Interpret a hexadecimal escape
@@ -489,6 +517,11 @@ impl Unescape for str {
489
517
490
518
const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( '\0' ) ;
491
519
520
+ #[ inline]
521
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
522
+ b. get ( ) . into ( )
523
+ }
524
+
492
525
#[ inline]
493
526
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
494
527
Ok ( c)
@@ -514,6 +547,11 @@ impl Unescape for [u8] {
514
547
515
548
const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( b'\0' ) ;
516
549
550
+ #[ inline]
551
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
552
+ b. get ( )
553
+ }
554
+
517
555
#[ inline]
518
556
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
519
557
char2byte ( c)
@@ -535,24 +573,19 @@ impl Unescape for CStr {
535
573
536
574
const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Err ( EscapeError :: NulInCStr ) ;
537
575
576
+ #[ inline]
577
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
578
+ b. into ( )
579
+ }
580
+
538
581
#[ inline]
539
582
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
540
- if c == '\0' {
541
- Err ( EscapeError :: NulInCStr )
542
- } else {
543
- Ok ( MixedUnit :: Char ( c) )
544
- }
583
+ c. try_into ( )
545
584
}
546
585
547
586
#[ inline]
548
587
fn hex2unit ( byte : u8 ) -> Result < Self :: Unit , EscapeError > {
549
- if byte == b'\0' {
550
- Err ( EscapeError :: NulInCStr )
551
- } else if byte. is_ascii ( ) {
552
- Ok ( MixedUnit :: Char ( byte as char ) )
553
- } else {
554
- Ok ( MixedUnit :: HighByte ( byte) )
555
- }
588
+ byte. try_into ( )
556
589
}
557
590
558
591
#[ inline]
0 commit comments