@@ -114,7 +114,7 @@ fn decode_utf8_char(bytes: &[u8]) -> Option<(char, usize)> {
114
114
115
115
( ( bits0 << 12 ) | ( bits1 << 6 ) | bits2, 3 )
116
116
} else if ( first_byte & 0b1111_1000 ) == 0b1111_0000 {
117
- // This is a three byte character
117
+ // This is a four byte character
118
118
let bits0 = first_byte & 0b0000_0111 ;
119
119
let bits1 = ( bytes[ 1 ] & 0b0011_1111 ) as u32 ;
120
120
let bits2 = ( bytes[ 2 ] & 0b0011_1111 ) as u32 ;
@@ -290,12 +290,17 @@ mod tests {
290
290
291
291
#[ test]
292
292
fn utf8_char_decoding ( ) {
293
- let chars = vec ! [ ( '\0' , 1 ) , ( 'a' , 1 ) , ( 'Ω' , 2 ) , ( 'Ꜵ' , 3 ) , ( '𝔉' , 4 ) ] ;
294
-
295
- for ( c, len) in chars {
296
- let buffer = & mut [ 0 ; 4 ] ;
297
- c. encode_utf8 ( buffer) ;
298
- assert_eq ! ( Some ( ( c, len) ) , decode_utf8_char( & buffer[ ..] ) ) ;
293
+ use std:: convert:: TryFrom ;
294
+
295
+ // Let's just test all possible codepoints because there are not that
296
+ // many actually.
297
+ for codepoint in 0 ..=0x10FFFFu32 {
298
+ if let Ok ( expected_char) = char:: try_from ( codepoint) {
299
+ let buffer = & mut [ 0 ; 4 ] ;
300
+ let expected_len = expected_char. encode_utf8 ( buffer) . len ( ) ;
301
+ let expected = Some ( ( expected_char, expected_len) ) ;
302
+ assert_eq ! ( expected, decode_utf8_char( & buffer[ ..] ) ) ;
303
+ }
299
304
}
300
305
}
301
306
}
0 commit comments