2
2
//! turning escape sequences into the values they represent.
3
3
4
4
use std:: ffi:: CStr ;
5
+ use std:: num:: NonZero ;
5
6
use std:: ops:: Range ;
6
7
use std:: str:: Chars ;
7
8
@@ -105,7 +106,10 @@ pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u
105
106
/// and produces a sequence of characters or errors,
106
107
/// which are returned by invoking `callback`.
107
108
/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
108
- pub fn check_raw_c_str ( src : & str , callback : impl FnMut ( Range < usize > , Result < char , EscapeError > ) ) {
109
+ pub fn check_raw_c_str (
110
+ src : & str ,
111
+ callback : impl FnMut ( Range < usize > , Result < NonZero < char > , EscapeError > ) ,
112
+ ) {
109
113
CStr :: check_raw ( src, callback) ;
110
114
}
111
115
@@ -178,14 +182,10 @@ fn char2byte(c: char) -> Result<u8, EscapeError> {
178
182
}
179
183
180
184
impl CheckRaw for CStr {
181
- type RawUnit = char ;
185
+ type RawUnit = NonZero < char > ;
182
186
183
187
fn char2raw_unit ( c : char ) -> Result < Self :: RawUnit , EscapeError > {
184
- if c == '\0' {
185
- Err ( EscapeError :: NulInCStr )
186
- } else {
187
- Ok ( c)
188
- }
188
+ NonZero :: new ( c) . ok_or ( EscapeError :: NulInCStr )
189
189
}
190
190
}
191
191
@@ -247,40 +247,63 @@ pub enum MixedUnit {
247
247
/// For example, if '¥' appears in a string it is represented here as
248
248
/// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte
249
249
/// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]`
250
- Char ( char ) ,
250
+ Char ( NonZero < char > ) ,
251
251
252
252
/// Used for high bytes (`\x80`..`\xff`).
253
253
///
254
254
/// For example, if `\xa5` appears in a string it is represented here as
255
255
/// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant
256
256
/// byte string as the single byte `0xa5`.
257
- HighByte ( u8 ) ,
257
+ HighByte ( NonZero < u8 > ) ,
258
258
}
259
259
260
- impl From < char > for MixedUnit {
261
- fn from ( c : char ) -> Self {
260
+ impl From < NonZero < char > > for MixedUnit {
261
+ fn from ( c : NonZero < char > ) -> Self {
262
262
MixedUnit :: Char ( c)
263
263
}
264
264
}
265
265
266
- impl From < u8 > for MixedUnit {
267
- fn from ( n : u8 ) -> Self {
268
- if n . is_ascii ( ) {
269
- MixedUnit :: Char ( n as char )
266
+ impl From < NonZero < u8 > > for MixedUnit {
267
+ fn from ( byte : NonZero < u8 > ) -> Self {
268
+ if byte . get ( ) . is_ascii ( ) {
269
+ MixedUnit :: Char ( NonZero :: new ( byte . get ( ) as char ) . unwrap ( ) )
270
270
} else {
271
- MixedUnit :: HighByte ( n )
271
+ MixedUnit :: HighByte ( byte )
272
272
}
273
273
}
274
274
}
275
275
276
+ impl TryFrom < char > for MixedUnit {
277
+ type Error = EscapeError ;
278
+
279
+ fn try_from ( c : char ) -> Result < Self , EscapeError > {
280
+ NonZero :: new ( c)
281
+ . map ( MixedUnit :: Char )
282
+ . ok_or ( EscapeError :: NulInCStr )
283
+ }
284
+ }
285
+
286
+ impl TryFrom < u8 > for MixedUnit {
287
+ type Error = EscapeError ;
288
+
289
+ fn try_from ( byte : u8 ) -> Result < Self , EscapeError > {
290
+ NonZero :: new ( byte)
291
+ . map ( From :: from)
292
+ . ok_or ( EscapeError :: NulInCStr )
293
+ }
294
+ }
295
+
276
296
/// Trait for unescaping escape sequences in strings
277
297
trait Unescape {
278
298
/// Unit type of the implementing string type (`char` for string, `u8` for byte string)
279
- type Unit : From < u8 > ;
299
+ type Unit ;
280
300
281
301
/// Result of unescaping the zero char ('\0')
282
302
const ZERO_RESULT : Result < Self :: Unit , EscapeError > ;
283
303
304
+ /// Converts non-zero bytes to the unit type
305
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit ;
306
+
284
307
/// Converts chars to the unit type
285
308
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > ;
286
309
@@ -311,18 +334,20 @@ trait Unescape {
311
334
if c == '0' {
312
335
Self :: ZERO_RESULT
313
336
} else {
314
- simple_escape ( c) . map ( |b| b. into ( ) ) . or_else ( |c| match c {
315
- 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
316
- 'u' => Self :: unicode2unit ( {
317
- let value = unicode_escape ( chars) ?;
318
- if value > char:: MAX as u32 {
319
- Err ( EscapeError :: OutOfRangeUnicodeEscape )
320
- } else {
321
- char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
322
- }
323
- } ) ,
324
- _ => Err ( EscapeError :: InvalidEscape ) ,
325
- } )
337
+ simple_escape ( c)
338
+ . map ( |b| Self :: nonzero_byte2unit ( b) )
339
+ . or_else ( |c| match c {
340
+ 'x' => Self :: hex2unit ( hex_escape ( chars) ?) ,
341
+ 'u' => Self :: unicode2unit ( {
342
+ let value = unicode_escape ( chars) ?;
343
+ if value > char:: MAX as u32 {
344
+ Err ( EscapeError :: OutOfRangeUnicodeEscape )
345
+ } else {
346
+ char:: from_u32 ( value) . ok_or ( EscapeError :: LoneSurrogateUnicodeEscape )
347
+ }
348
+ } ) ,
349
+ _ => Err ( EscapeError :: InvalidEscape ) ,
350
+ } )
326
351
}
327
352
}
328
353
@@ -364,9 +389,9 @@ trait Unescape {
364
389
/// Interpret a non-nul ASCII escape
365
390
///
366
391
/// Parses the character of an ASCII escape (except nul) without the leading backslash.
367
- fn simple_escape ( c : char ) -> Result < u8 , char > {
392
+ fn simple_escape ( c : char ) -> Result < NonZero < u8 > , char > {
368
393
// Previous character was '\\', unescape what follows.
369
- Ok ( match c {
394
+ Ok ( NonZero :: new ( match c {
370
395
'"' => b'"' ,
371
396
'n' => b'\n' ,
372
397
'r' => b'\r' ,
@@ -375,6 +400,7 @@ fn simple_escape(c: char) -> Result<u8, char> {
375
400
'\'' => b'\'' ,
376
401
_ => Err ( c) ?,
377
402
} )
403
+ . unwrap ( ) )
378
404
}
379
405
380
406
/// Interpret a hexadecimal escape
@@ -476,6 +502,10 @@ impl Unescape for str {
476
502
477
503
const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( '\0' ) ;
478
504
505
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
506
+ b. get ( ) . into ( )
507
+ }
508
+
479
509
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
480
510
Ok ( c)
481
511
}
@@ -499,6 +529,10 @@ impl Unescape for [u8] {
499
529
500
530
const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Ok ( b'\0' ) ;
501
531
532
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
533
+ b. get ( )
534
+ }
535
+
502
536
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
503
537
char2byte ( c)
504
538
}
@@ -518,22 +552,16 @@ impl Unescape for CStr {
518
552
519
553
const ZERO_RESULT : Result < Self :: Unit , EscapeError > = Err ( EscapeError :: NulInCStr ) ;
520
554
555
+ fn nonzero_byte2unit ( b : NonZero < u8 > ) -> Self :: Unit {
556
+ b. into ( )
557
+ }
558
+
521
559
fn char2unit ( c : char ) -> Result < Self :: Unit , EscapeError > {
522
- if c == '\0' {
523
- Err ( EscapeError :: NulInCStr )
524
- } else {
525
- Ok ( MixedUnit :: Char ( c) )
526
- }
560
+ c. try_into ( )
527
561
}
528
562
529
563
fn hex2unit ( byte : u8 ) -> Result < Self :: Unit , EscapeError > {
530
- if byte == b'\0' {
531
- Err ( EscapeError :: NulInCStr )
532
- } else if byte. is_ascii ( ) {
533
- Ok ( MixedUnit :: Char ( byte as char ) )
534
- } else {
535
- Ok ( MixedUnit :: HighByte ( byte) )
536
- }
564
+ byte. try_into ( )
537
565
}
538
566
539
567
/// Converts the result of a unicode escape to the unit type
0 commit comments