diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c3f..e7873c772b7f6 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -158,6 +158,20 @@ pub(crate) fn emit_unescape_error( "this is an isolated carriage return; consider checking your editor and \ version control settings", ); + } else if looks_like_quote(c) { + diag.span_suggestion( + err_span, + "if you meant to use a unicode quote; \ + consider using its escaped form for clarity", + // lit.replace(c, &ec[1..]), + &ec, + Applicability::MaybeIncorrect, + ); + + diag.help(format!( + "{ec} is not an ASCII quote, but may look like one in some fonts; \ + consider writing it in its escaped form for clarity." + )); } else { if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( @@ -295,3 +309,17 @@ pub(crate) fn escaped_char(c: char) -> String { _ => c.escape_default().to_string(), } } + +/// Returns true if `c` may look identical to `"` in some fonts. +fn looks_like_quote(c: char) -> bool { + // list of homoglyphs generated using the following wikidata query: + // SELECT ?u WHERE { + // wd:Q87495536 wdt:P2444+ ?c. + // ?c wdt:P4213 ?u. + // } + match c { + '\u{2033}' | '\u{02BA}' | '\u{02DD}' | '\u{030B}' | '\u{030E}' | '\u{05F4}' + | '\u{201C}' | '\u{201D}' => true, + _ => false, + } +} diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs new file mode 100644 index 0000000000000..023903de4523d --- /dev/null +++ b/tests/ui/unicode-quote.rs @@ -0,0 +1,20 @@ +// Provide a useful error message when attempting to escape a non-ascii quotation mark. +// +fn main() { + dbg!("\″"); //U+2033 + //~^ERROR unknown character escape: `\u{2033}` + dbg!("\ʺ"); //U+02BA + //~^ERROR 6:12: 6:13: unknown character escape: `\u{2ba}` + dbg!("\˝"); //U+02DD + //~^ERROR 8:12: 8:13: unknown character escape: `\u{2dd}` + dbg!("\̋"); //U+030B + //~^ERROR 10:12: 10:13: unknown character escape: `\u{30b}` + dbg!("\̎"); //U+030E + //~^ERROR 12:12: 12:13: unknown character escape: `\u{30e}` + dbg!("\״"); //U+05F4 + //~^ERROR 14:12: 14:13: unknown character escape: `\u{5f4}` + dbg!("\“"); //U+201C + //~^ERROR 16:12: 16:13: unknown character escape: `\u{201c}` + dbg!("\”"); //U+201D + //~^ERROR 18:12: 18:13: unknown character escape: `\u{201d}` +} diff --git a/tests/ui/unicode-quote.stderr b/tests/ui/unicode-quote.stderr new file mode 100644 index 0000000000000..926f56eb9360f --- /dev/null +++ b/tests/ui/unicode-quote.stderr @@ -0,0 +1,106 @@ +error: unknown character escape: `\u{2033}` + --> $DIR/unicode-quote.rs:4:12 + | +LL | dbg!("\″"); //U+2033 + | ^ unknown character escape + | + = help: \u{2033} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\″"); //U+2033 +LL + dbg!("\u{2033}"); //U+2033 + | + +error: unknown character escape: `\u{2ba}` + --> $DIR/unicode-quote.rs:6:12 + | +LL | dbg!("\ʺ"); //U+02BA + | ^ unknown character escape + | + = help: \u{2ba} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\ʺ"); //U+02BA +LL + dbg!("\u{2ba}"); //U+02BA + | + +error: unknown character escape: `\u{2dd}` + --> $DIR/unicode-quote.rs:8:12 + | +LL | dbg!("\˝"); //U+02DD + | ^ unknown character escape + | + = help: \u{2dd} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\˝"); //U+02DD +LL + dbg!("\u{2dd}"); //U+02DD + | + +error: unknown character escape: `\u{30b}` + --> $DIR/unicode-quote.rs:10:12 + | +LL | dbg!("\̋"); //U+030B + | ^ unknown character escape + | + = help: \u{30b} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\̋"); //U+030B +LL + dbg!("\u{30b}"); //U+030B + | + +error: unknown character escape: `\u{30e}` + --> $DIR/unicode-quote.rs:12:12 + | +LL | dbg!("\̎"); //U+030E + | ^ unknown character escape + | + = help: \u{30e} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\̎"); //U+030E +LL + dbg!("\u{30e}"); //U+030E + | + +error: unknown character escape: `\u{5f4}` + --> $DIR/unicode-quote.rs:14:12 + | +LL | dbg!("\״"); //U+05F4 + | ^ unknown character escape + | + = help: \u{5f4} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\״"); //U+05F4 +LL + dbg!("\u{5f4}"); //U+05F4 + | + +error: unknown character escape: `\u{201c}` + --> $DIR/unicode-quote.rs:16:12 + | +LL | dbg!("\“"); //U+201C + | ^ unknown character escape + | + = help: \u{201c} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\“"); //U+201C +LL + dbg!("\u{201c}"); //U+201C + | + +error: unknown character escape: `\u{201d}` + --> $DIR/unicode-quote.rs:18:12 + | +LL | dbg!("\”"); //U+201D + | ^ unknown character escape + | + = help: \u{201d} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\”"); //U+201D +LL + dbg!("\u{201d}"); //U+201D + | + +error: aborting due to 8 previous errors +