@@ -6,6 +6,46 @@ use crate::iter::FusedIterator;
6
6
use super::from_utf8_unchecked;
7
7
use super::validations::utf8_char_width;
8
8
9
+ impl [u8] {
10
+ /// Creates an iterator over the contiguous valid UTF-8 ranges of this
11
+ /// slice, and the non-UTF-8 fragments in between.
12
+ ///
13
+ /// # Examples
14
+ ///
15
+ /// This function formats arbitrary but mostly-UTF-8 bytes into Rust source
16
+ /// code in the form of a C-string literal (`c"..."`).
17
+ ///
18
+ /// ```
19
+ /// use std::fmt::Write as _;
20
+ ///
21
+ /// pub fn cstr_literal(bytes: &[u8]) -> String {
22
+ /// let mut repr = String::new();
23
+ /// repr.push_str("c\"");
24
+ /// for chunk in bytes.utf8_chunks() {
25
+ /// for ch in chunk.valid().chars() {
26
+ /// // Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for non-printable characters.
27
+ /// write!(repr, "{}", ch.escape_debug()).unwrap();
28
+ /// }
29
+ /// for byte in chunk.invalid() {
30
+ /// write!(repr, "\\x{:02X}", byte).unwrap();
31
+ /// }
32
+ /// }
33
+ /// repr.push('"');
34
+ /// repr
35
+ /// }
36
+ ///
37
+ /// fn main() {
38
+ /// let lit = cstr_literal(b"\xferris the \xf0\x9f\xa6\x80\x07");
39
+ /// let expected = stringify!(c"\xFErris the 🦀\u{7}");
40
+ /// assert_eq!(lit, expected);
41
+ /// }
42
+ /// ```
43
+ #[stable(feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION")]
44
+ pub fn utf8_chunks(&self) -> Utf8Chunks<'_> {
45
+ Utf8Chunks { source: self }
46
+ }
47
+ }
48
+
9
49
/// An item returned by the [`Utf8Chunks`] iterator.
10
50
///
11
51
/// A `Utf8Chunk` stores a sequence of [`u8`] up to the first broken character
@@ -14,23 +54,19 @@ use super::validations::utf8_char_width;
14
54
/// # Examples
15
55
///
16
56
/// ```
17
- /// #![feature(utf8_chunks)]
18
- ///
19
- /// use std::str::Utf8Chunks;
20
- ///
21
57
/// // An invalid UTF-8 string
22
58
/// let bytes = b"foo\xF1\x80bar";
23
59
///
24
60
/// // Decode the first `Utf8Chunk`
25
- /// let chunk = Utf8Chunks::new( bytes).next().unwrap();
61
+ /// let chunk = bytes.utf8_chunks( ).next().unwrap();
26
62
///
27
63
/// // The first three characters are valid UTF-8
28
64
/// assert_eq!("foo", chunk.valid());
29
65
///
30
66
/// // The fourth character is broken
31
67
/// assert_eq!(b"\xF1\x80", chunk.invalid());
32
68
/// ```
33
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
69
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
34
70
#[derive(Clone, Debug, PartialEq, Eq)]
35
71
pub struct Utf8Chunk<'a> {
36
72
valid: &'a str,
@@ -43,7 +79,7 @@ impl<'a> Utf8Chunk<'a> {
43
79
/// This substring can be empty at the start of the string or between
44
80
/// broken UTF-8 characters.
45
81
#[must_use]
46
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
82
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
47
83
pub fn valid(&self) -> &'a str {
48
84
self.valid
49
85
}
@@ -63,7 +99,7 @@ impl<'a> Utf8Chunk<'a> {
63
99
/// [`valid`]: Self::valid
64
100
/// [`U+FFFD REPLACEMENT CHARACTER`]: crate::char::REPLACEMENT_CHARACTER
65
101
#[must_use]
66
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
102
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
67
103
pub fn invalid(&self) -> &'a [u8] {
68
104
self.invalid
69
105
}
@@ -78,7 +114,7 @@ impl fmt::Debug for Debug<'_> {
78
114
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
79
115
f.write_char('"')?;
80
116
81
- for chunk in Utf8Chunks::new( self.0) {
117
+ for chunk in self.0.utf8_chunks( ) {
82
118
// Valid part.
83
119
// Here we partially parse UTF-8 again which is suboptimal.
84
120
{
@@ -123,12 +159,8 @@ impl fmt::Debug for Debug<'_> {
123
159
/// [`String::from_utf8_lossy`] without allocating heap memory:
124
160
///
125
161
/// ```
126
- /// #![feature(utf8_chunks)]
127
- ///
128
- /// use std::str::Utf8Chunks;
129
- ///
130
162
/// fn from_utf8_lossy<F>(input: &[u8], mut push: F) where F: FnMut(&str) {
131
- /// for chunk in Utf8Chunks::new( input) {
163
+ /// for chunk in input.utf8_chunks( ) {
132
164
/// push(chunk.valid());
133
165
///
134
166
/// if !chunk.invalid().is_empty() {
@@ -140,27 +172,21 @@ impl fmt::Debug for Debug<'_> {
140
172
///
141
173
/// [`String::from_utf8_lossy`]: ../../std/string/struct.String.html#method.from_utf8_lossy
142
174
#[must_use = "iterators are lazy and do nothing unless consumed"]
143
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
175
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
144
176
#[derive(Clone)]
145
177
pub struct Utf8Chunks<'a> {
146
178
source: &'a [u8],
147
179
}
148
180
149
181
impl<'a> Utf8Chunks<'a> {
150
- /// Creates a new iterator to decode the bytes.
151
- #[unstable(feature = "utf8_chunks", issue = "99543")]
152
- pub fn new(bytes: &'a [u8]) -> Self {
153
- Self { source: bytes }
154
- }
155
-
156
182
#[doc(hidden)]
157
183
#[unstable(feature = "str_internals", issue = "none")]
158
184
pub fn debug(&self) -> Debug<'_> {
159
185
Debug(self.source)
160
186
}
161
187
}
162
188
163
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
189
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
164
190
impl<'a> Iterator for Utf8Chunks<'a> {
165
191
type Item = Utf8Chunk<'a>;
166
192
@@ -259,10 +285,10 @@ impl<'a> Iterator for Utf8Chunks<'a> {
259
285
}
260
286
}
261
287
262
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
288
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
263
289
impl FusedIterator for Utf8Chunks<'_> {}
264
290
265
- #[unstable (feature = "utf8_chunks", issue = "99543 ")]
291
+ #[stable (feature = "utf8_chunks", since = "CURRENT_RUSTC_VERSION ")]
266
292
impl fmt::Debug for Utf8Chunks<'_> {
267
293
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
268
294
f.debug_struct("Utf8Chunks").field("source", &self.debug()).finish()
0 commit comments