From 73c88ced926bbdbdab4c123c62e79dfed54fdc2f Mon Sep 17 00:00:00 2001 From: binarycat Date: Fri, 9 Aug 2024 17:38:10 -0400 Subject: [PATCH 1/2] Hint on unknown escape of Unicode quotation marks in string literal Fixes #128858 I opted not to produce a suggestion, since it's not obvious what the user meant to do. --- .../src/lexer/unescape_error_reporting.rs | 26 +++++++++++++++++++ tests/ui/unicode-quote.rs | 3 +++ tests/ui/unicode-quote.stderr | 20 ++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 tests/ui/unicode-quote.rs create mode 100644 tests/ui/unicode-quote.stderr diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index efa53f0962b78..15b57e6c76926 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -158,6 +158,12 @@ pub(crate) fn emit_unescape_error( "this is an isolated carriage return; consider checking your editor and \ version control settings", ); + } else if looks_like_quote(c) { + diag.help( + format!("{ec} is not an ascii quote, \ + but may look like one in some fonts.\n\ + consider writing it in its \ + escaped form for clarity.")); } else { if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( @@ -295,3 +301,23 @@ pub(crate) fn escaped_char(c: char) -> String { _ => c.escape_default().to_string(), } } + +/// Returns true if `c` may look identical to `"` in some fonts. +fn looks_like_quote(c: char) -> bool { + // list of homoglyphs generated using the following wikidata query: + // SELECT ?u WHERE { + // wd:Q87495536 wdt:P2444+ ?c. + // ?c wdt:P4213 ?u. + // } + match c { + '\u{2033}' | + '\u{02BA}' | + '\u{02DD}' | + '\u{030B}' | + '\u{030E}' | + '\u{05F4}' | + '\u{201C}' | + '\u{201D}' => true, + _ => false, + } +} diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs new file mode 100644 index 0000000000000..0421fdb85041d --- /dev/null +++ b/tests/ui/unicode-quote.rs @@ -0,0 +1,3 @@ +fn main() { + dbg!("since when is \“THIS\” not allowed in a string literal"); +} diff --git a/tests/ui/unicode-quote.stderr b/tests/ui/unicode-quote.stderr new file mode 100644 index 0000000000000..f418bbb65fa2e --- /dev/null +++ b/tests/ui/unicode-quote.stderr @@ -0,0 +1,20 @@ +error: unknown character escape: `\u{201c}` + --> $DIR/unicode-quote.rs:2:26 + | +LL | dbg!("since when is \“THIS\” not allowed in a string literal"); + | ^ unknown character escape + | + = help: \u{201c} is not an ascii quote, but may look like one in some fonts. + consider writing it in its escaped form for clarity. + +error: unknown character escape: `\u{201d}` + --> $DIR/unicode-quote.rs:2:32 + | +LL | dbg!("since when is \“THIS\” not allowed in a string literal"); + | ^ unknown character escape + | + = help: \u{201d} is not an ascii quote, but may look like one in some fonts. + consider writing it in its escaped form for clarity. + +error: aborting due to 2 previous errors + From 775cb88ad6f7bcd9d80a40140324c2cf44a08015 Mon Sep 17 00:00:00 2001 From: binarycat Date: Fri, 9 Aug 2024 18:01:57 -0400 Subject: [PATCH 2/2] fmt --- .../src/lexer/unescape_error_reporting.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 15b57e6c76926..266cb01d5b534 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -159,11 +159,12 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else if looks_like_quote(c) { - diag.help( - format!("{ec} is not an ascii quote, \ + diag.help(format!( + "{ec} is not an ascii quote, \ but may look like one in some fonts.\n\ consider writing it in its \ - escaped form for clarity.")); + escaped form for clarity." + )); } else { if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( @@ -310,14 +311,8 @@ fn looks_like_quote(c: char) -> bool { // ?c wdt:P4213 ?u. // } match c { - '\u{2033}' | - '\u{02BA}' | - '\u{02DD}' | - '\u{030B}' | - '\u{030E}' | - '\u{05F4}' | - '\u{201C}' | - '\u{201D}' => true, + '\u{2033}' | '\u{02BA}' | '\u{02DD}' | '\u{030B}' | '\u{030E}' | '\u{05F4}' + | '\u{201C}' | '\u{201D}' => true, _ => false, } }