@@ -279,16 +279,44 @@ mod prim_never {}
279
279
///
280
280
/// The `char` type represents a single character. More specifically, since
281
281
/// 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
282
- /// scalar value]', which is similar to, but not the same as, a '[Unicode code
283
- /// point]'.
284
- ///
285
- /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
286
- /// [Unicode code point]: https://www.unicode.org/glossary/#code_point
282
+ /// scalar value]'.
287
283
///
288
284
/// This documentation describes a number of methods and trait implementations on the
289
285
/// `char` type. For technical reasons, there is additional, separate
290
286
/// documentation in [the `std::char` module](char/index.html) as well.
291
287
///
288
+ /// # Validity
289
+ ///
290
+ /// A `char` is a '[Unicode scalar value]', which is any '[Unicode code point]'
291
+ /// other than a [surrogate code point]. This has a fixed numerical definition:
292
+ /// code points are in the range `'\0'` to `char::MAX` (`'\u{10FFFF}'`), inclusive.
293
+ /// Surrogate code points, used by UTF-16, are in the range U+D800 to U+DFFF.
294
+ ///
295
+ /// No `char` may be constructed, whether as a literal or at runtime, that is not a
296
+ /// Unicode scalar value:
297
+ ///
298
+ /// ```text
299
+ /// let forbidden_chars = [
300
+ /// // Each of these is a compiler error
301
+ /// '\u{D800}', '\u{DFFF}', '\u{110000}',
302
+ ///
303
+ /// // Panics; from_u32 returns None.
304
+ /// char::from_u32(0xDE01).unwrap(),
305
+ ///
306
+ /// // Undefined behaviour
307
+ /// unsafe { char::from_u32_unchecked(0x110000) },
308
+ /// ];
309
+ /// ```
310
+ ///
311
+ /// Unicode is regularly updated. Many USVs are not currently assigned to a
312
+ /// character, but may be in the future ("reserved"); some will never be a character
313
+ /// ("noncharacters"); and some may be given different meanings by different users
314
+ /// ("private use").
315
+ ///
316
+ /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
317
+ /// [Unicode code point]: https://www.unicode.org/glossary/#code_point
318
+ /// [surrogate code point]: https://www.unicode.org/glossary/#surrogate_code_point
319
+ ///
292
320
/// # Representation
293
321
///
294
322
/// `char` is always four bytes in size. This is a different representation than
0 commit comments