Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 7dbf2c0

Browse files
committed
Make non-ASCII errors more consistent.
There are three kinds of "byte" literals: byte literals, byte string literals, and raw byte string literals. None are allowed to have non-ASCII chars in them. Two `EscapeError` variants exist for when that constraint is violated. - `NonAsciiCharInByte`: used for byte literals and byte string literals. - `NonAsciiCharInByteString`: used for raw byte string literals. As a result, the messages for raw byte string literals use different wording, without good reason. Also, byte string literals are incorrectly described as "byte constants" in some error messages. This commit eliminates `NonAsciiCharInByteString` so the three cases are handled similarly, and described correctly. The `mode` is enough to distinguish them. Note: Some existing error messages mention "byte constants" and some mention "byte literals". I went with the latter here, because it's a more correct name, as used by the Reference.
1 parent 34b32b0 commit 7dbf2c0

15 files changed

+62
-74
lines changed

compiler/rustc_lexer/src/unescape.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,8 @@ pub enum EscapeError {
5252

5353
/// Unicode escape code in byte literal.
5454
UnicodeEscapeInByte,
55-
/// Non-ascii character in byte literal.
55+
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
5656
NonAsciiCharInByte,
57-
/// Non-ascii character in byte string literal.
58-
NonAsciiCharInByteString,
5957

6058
/// After a line ending with '\', the next line contains whitespace
6159
/// characters that are not skipped.
@@ -349,8 +347,7 @@ where
349347
let start = src.len() - chars.as_str().len() - c.len_utf8();
350348
let result = match c {
351349
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
352-
c if is_byte && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString),
353-
c => Ok(c),
350+
_ => ascii_check(c, is_byte),
354351
};
355352
let end = src.len() - chars.as_str().len();
356353
callback(start..end, result);

compiler/rustc_lexer/src/unescape/tests.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,6 @@ fn test_unescape_raw_byte_str() {
289289
}
290290

291291
check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
292-
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
293-
check(
294-
"🦀a",
295-
&[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))],
296-
);
292+
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
293+
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok(byte_from_char('a')))]);
297294
}

compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -231,16 +231,23 @@ pub(crate) fn emit_unescape_error(
231231
.emit();
232232
}
233233
EscapeError::NonAsciiCharInByte => {
234-
assert!(mode.is_byte());
235234
let (c, span) = last_char();
236-
let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
235+
let desc = match mode {
236+
Mode::Byte => "byte literal",
237+
Mode::ByteStr => "byte string literal",
238+
Mode::RawByteStr => "raw byte string literal",
239+
_ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
240+
};
241+
let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
237242
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
238243
format!(" but is {:?}", c)
239244
} else {
240245
String::new()
241246
};
242-
err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
243-
if (c as u32) <= 0xFF {
247+
err.span_label(span, &format!("must be ASCII{}", postfix));
248+
// Note: the \\xHH suggestions are not given for raw byte string
249+
// literals, because they are araw and so cannot use any escapes.
250+
if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
244251
err.span_suggestion(
245252
span,
246253
&format!(
@@ -250,9 +257,9 @@ pub(crate) fn emit_unescape_error(
250257
format!("\\x{:X}", c as u32),
251258
Applicability::MaybeIncorrect,
252259
);
253-
} else if matches!(mode, Mode::Byte) {
260+
} else if mode == Mode::Byte {
254261
err.span_label(span, "this multibyte character does not fit into a single byte");
255-
} else if matches!(mode, Mode::ByteStr) {
262+
} else if mode != Mode::RawByteStr {
256263
let mut utf8 = String::new();
257264
utf8.push(c);
258265
err.span_suggestion(
@@ -270,19 +277,6 @@ pub(crate) fn emit_unescape_error(
270277
}
271278
err.emit();
272279
}
273-
EscapeError::NonAsciiCharInByteString => {
274-
assert!(mode.is_byte());
275-
let (c, span) = last_char();
276-
let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
277-
format!(" but is {:?}", c)
278-
} else {
279-
String::new()
280-
};
281-
handler
282-
.struct_span_err(span, "raw byte string must be ASCII")
283-
.span_label(span, &format!("must be ASCII{}", postfix))
284-
.emit();
285-
}
286280
EscapeError::OutOfRangeHexEscape => {
287281
handler
288282
.struct_span_err(span, "out of range hex escape")
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#![feature(rustc_attrs)]
22

3-
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant
3+
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte string literal
44
fn main() {}

src/test/ui/attributes/key-value-non-ascii.stderr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
error: non-ASCII character in byte constant
1+
error: non-ASCII character in byte string literal
22
--> $DIR/key-value-non-ascii.rs:3:19
33
|
44
LL | #[rustc_dummy = b"ffi.rs"]
5-
| ^ byte constant must be ASCII
5+
| ^ must be ASCII
66
|
77
help: if you meant to use the UTF-8 encoding of 'ffi', use \xHH escapes
88
|

src/test/ui/parser/byte-literals.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ pub fn main() {
77
b'\x0Z'; //~ ERROR invalid character in numeric character escape: `Z`
88
b' '; //~ ERROR byte constant must be escaped
99
b'''; //~ ERROR byte constant must be escaped
10-
b'é'; //~ ERROR non-ASCII character in byte constant
10+
b'é'; //~ ERROR non-ASCII character in byte literal
1111
b'a //~ ERROR unterminated byte constant [E0763]
1212
}

src/test/ui/parser/byte-literals.stderr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,11 @@ error: byte constant must be escaped: `'`
3232
LL | b''';
3333
| ^ help: escape the character: `\'`
3434

35-
error: non-ASCII character in byte constant
35+
error: non-ASCII character in byte literal
3636
--> $DIR/byte-literals.rs:10:7
3737
|
3838
LL | b'é';
39-
| ^ byte constant must be ASCII
39+
| ^ must be ASCII
4040
|
4141
help: if you meant to use the unicode code point for 'é', use a \xHH escape
4242
|

src/test/ui/parser/byte-string-literals.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape
33
pub fn main() {
44
b"\f"; //~ ERROR unknown byte escape
55
b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z`
6-
b"é"; //~ ERROR non-ASCII character in byte constant
7-
br##"é"##; //~ ERROR raw byte string must be ASCII
6+
b"é"; //~ ERROR non-ASCII character in byte string literal
7+
br##"é"##; //~ ERROR non-ASCII character in raw byte string literal
88
b"a //~ ERROR unterminated double quote byte string
99
}

src/test/ui/parser/byte-string-literals.stderr

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@ error: invalid character in numeric character escape: `Z`
2020
LL | b"\x0Z";
2121
| ^ invalid character in numeric character escape
2222

23-
error: non-ASCII character in byte constant
23+
error: non-ASCII character in byte string literal
2424
--> $DIR/byte-string-literals.rs:6:7
2525
|
2626
LL | b"é";
27-
| ^ byte constant must be ASCII
27+
| ^ must be ASCII
2828
|
2929
help: if you meant to use the unicode code point for 'é', use a \xHH escape
3030
|
3131
LL | b"\xE9";
3232
| ~~~~
3333

34-
error: raw byte string must be ASCII
34+
error: non-ASCII character in raw byte string literal
3535
--> $DIR/byte-string-literals.rs:7:10
3636
|
3737
LL | br##"é"##;

src/test/ui/parser/raw/raw-byte-string-literals.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22

33
pub fn main() {
44
br"a"; //~ ERROR bare CR not allowed in raw string
5-
br"é"; //~ ERROR raw byte string must be ASCII
5+
br"é"; //~ ERROR non-ASCII character in raw byte string literal
66
br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
77
}

0 commit comments

Comments
 (0)