From f3daf639554b8db56c891eead4af0eba447e22c2 Mon Sep 17 00:00:00 2001 From: binarycat Date: Fri, 9 Aug 2024 17:38:10 -0400 Subject: [PATCH 1/6] Hint on unknown escape of Unicode quotation marks in string literal Fixes #128858 I opted not to produce a suggestion, since it's not obvious what the user meant to do. --- .../src/lexer/unescape_error_reporting.rs | 26 +++++++++++++++++++ tests/ui/unicode-quote.rs | 3 +++ tests/ui/unicode-quote.stderr | 20 ++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 tests/ui/unicode-quote.rs create mode 100644 tests/ui/unicode-quote.stderr diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c3f..6ec6c22bb43bb 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -158,6 +158,12 @@ pub(crate) fn emit_unescape_error( "this is an isolated carriage return; consider checking your editor and \ version control settings", ); + } else if looks_like_quote(c) { + diag.help( + format!("{ec} is not an ascii quote, \ + but may look like one in some fonts.\n\ + consider writing it in its \ + escaped form for clarity.")); } else { if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( @@ -295,3 +301,23 @@ pub(crate) fn escaped_char(c: char) -> String { _ => c.escape_default().to_string(), } } + +/// Returns true if `c` may look identical to `"` in some fonts. +fn looks_like_quote(c: char) -> bool { + // list of homoglyphs generated using the following wikidata query: + // SELECT ?u WHERE { + // wd:Q87495536 wdt:P2444+ ?c. + // ?c wdt:P4213 ?u. + // } + match c { + '\u{2033}' | + '\u{02BA}' | + '\u{02DD}' | + '\u{030B}' | + '\u{030E}' | + '\u{05F4}' | + '\u{201C}' | + '\u{201D}' => true, + _ => false, + } +} diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs new file mode 100644 index 0000000000000..0421fdb85041d --- /dev/null +++ b/tests/ui/unicode-quote.rs @@ -0,0 +1,3 @@ +fn main() { + dbg!("since when is \“THIS\” not allowed in a string literal"); +} diff --git a/tests/ui/unicode-quote.stderr b/tests/ui/unicode-quote.stderr new file mode 100644 index 0000000000000..f418bbb65fa2e --- /dev/null +++ b/tests/ui/unicode-quote.stderr @@ -0,0 +1,20 @@ +error: unknown character escape: `\u{201c}` + --> $DIR/unicode-quote.rs:2:26 + | +LL | dbg!("since when is \“THIS\” not allowed in a string literal"); + | ^ unknown character escape + | + = help: \u{201c} is not an ascii quote, but may look like one in some fonts. + consider writing it in its escaped form for clarity. + +error: unknown character escape: `\u{201d}` + --> $DIR/unicode-quote.rs:2:32 + | +LL | dbg!("since when is \“THIS\” not allowed in a string literal"); + | ^ unknown character escape + | + = help: \u{201d} is not an ascii quote, but may look like one in some fonts. + consider writing it in its escaped form for clarity. + +error: aborting due to 2 previous errors + From c9e0f09841f651e595225e430bdc312d9615414f Mon Sep 17 00:00:00 2001 From: binarycat Date: Fri, 9 Aug 2024 18:01:57 -0400 Subject: [PATCH 2/6] fmt --- .../src/lexer/unescape_error_reporting.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 6ec6c22bb43bb..b7651cac1df12 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -159,11 +159,12 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else if looks_like_quote(c) { - diag.help( - format!("{ec} is not an ascii quote, \ + diag.help(format!( + "{ec} is not an ascii quote, \ but may look like one in some fonts.\n\ consider writing it in its \ - escaped form for clarity.")); + escaped form for clarity." + )); } else { if mode == Mode::Str || mode == Mode::Char { diag.span_suggestion( @@ -310,14 +311,8 @@ fn looks_like_quote(c: char) -> bool { // ?c wdt:P4213 ?u. // } match c { - '\u{2033}' | - '\u{02BA}' | - '\u{02DD}' | - '\u{030B}' | - '\u{030E}' | - '\u{05F4}' | - '\u{201C}' | - '\u{201D}' => true, + '\u{2033}' | '\u{02BA}' | '\u{02DD}' | '\u{030B}' | '\u{030E}' | '\u{05F4}' + | '\u{201C}' | '\u{201D}' => true, _ => false, } } From 206dc78b7c475bab4fde50cdd01eaae468625b49 Mon Sep 17 00:00:00 2001 From: Xelph Date: Mon, 17 Feb 2025 13:46:44 -0700 Subject: [PATCH 3/6] Implement suggested fix and update test --- .../src/lexer/unescape_error_reporting.rs | 15 +++++++--- tests/ui/unicode-quote.rs | 4 ++- tests/ui/unicode-quote.stderr | 30 ++++++++++++------- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index b7651cac1df12..fd1b29daea616 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -159,11 +159,18 @@ pub(crate) fn emit_unescape_error( version control settings", ); } else if looks_like_quote(c) { + diag.span_suggestion( + err_span, + "if you meant to use a unicode quote; \ + consider using its escaped form for clarity", + // lit.replace(c, &ec[1..]), + &ec, + Applicability::MaybeIncorrect, + ); + diag.help(format!( - "{ec} is not an ascii quote, \ - but may look like one in some fonts.\n\ - consider writing it in its \ - escaped form for clarity." + "{ec} is not an ascii quote, but may look like one in some fonts; \ + consider writing it in its escaped form for clarity." )); } else { if mode == Mode::Str || mode == Mode::Char { diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs index 0421fdb85041d..7fbc71d583691 100644 --- a/tests/ui/unicode-quote.rs +++ b/tests/ui/unicode-quote.rs @@ -1,3 +1,5 @@ fn main() { - dbg!("since when is \“THIS\” not allowed in a string literal"); + dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); + //~^ 2:20: 2:21: unknown character escape: `\u{201c}` + //~^^ 2:65: 2:66: unknown character escape: `\u{201c}` } diff --git a/tests/ui/unicode-quote.stderr b/tests/ui/unicode-quote.stderr index f418bbb65fa2e..475145057bd30 100644 --- a/tests/ui/unicode-quote.stderr +++ b/tests/ui/unicode-quote.stderr @@ -1,20 +1,28 @@ error: unknown character escape: `\u{201c}` - --> $DIR/unicode-quote.rs:2:26 + --> $DIR/unicode-quote.rs:2:20 | -LL | dbg!("since when is \“THIS\” not allowed in a string literal"); - | ^ unknown character escape +LL | dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); + | ^ unknown character escape + | + = help: \u{201c} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); +LL + dbg!("Is this \u{201c} a small sized quote or a big sized quote. \“ "); | - = help: \u{201c} is not an ascii quote, but may look like one in some fonts. - consider writing it in its escaped form for clarity. -error: unknown character escape: `\u{201d}` - --> $DIR/unicode-quote.rs:2:32 +error: unknown character escape: `\u{201c}` + --> $DIR/unicode-quote.rs:2:65 + | +LL | dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); + | ^ unknown character escape + | + = help: \u{201c} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity | -LL | dbg!("since when is \“THIS\” not allowed in a string literal"); - | ^ unknown character escape +LL - dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); +LL + dbg!("Is this \“ a small sized quote or a big sized quote. \u{201c} "); | - = help: \u{201d} is not an ascii quote, but may look like one in some fonts. - consider writing it in its escaped form for clarity. error: aborting due to 2 previous errors From 1f9345d2143830891cc6a482e13552d2e2a75a14 Mon Sep 17 00:00:00 2001 From: Xelph Date: Mon, 17 Feb 2025 14:24:33 -0700 Subject: [PATCH 4/6] Add comment to test --- tests/ui/unicode-quote.rs | 6 ++++-- tests/ui/unicode-quote.stderr | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs index 7fbc71d583691..5c7d1458aea9c 100644 --- a/tests/ui/unicode-quote.rs +++ b/tests/ui/unicode-quote.rs @@ -1,5 +1,7 @@ +// Provide a useful error message when attempting to escape a non-ascii quotation mark. +// fn main() { dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); - //~^ 2:20: 2:21: unknown character escape: `\u{201c}` - //~^^ 2:65: 2:66: unknown character escape: `\u{201c}` + //~^ ERROR 4:20: 4:21: unknown character escape: `\u{201c}` + //~^^ ERROR 4:65: 4:66: unknown character escape: `\u{201c}` } diff --git a/tests/ui/unicode-quote.stderr b/tests/ui/unicode-quote.stderr index 475145057bd30..cf1e59be8d1ee 100644 --- a/tests/ui/unicode-quote.stderr +++ b/tests/ui/unicode-quote.stderr @@ -1,5 +1,5 @@ error: unknown character escape: `\u{201c}` - --> $DIR/unicode-quote.rs:2:20 + --> $DIR/unicode-quote.rs:4:20 | LL | dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); | ^ unknown character escape @@ -12,7 +12,7 @@ LL + dbg!("Is this \u{201c} a small sized quote or a big sized quote. \“ " | error: unknown character escape: `\u{201c}` - --> $DIR/unicode-quote.rs:2:65 + --> $DIR/unicode-quote.rs:4:65 | LL | dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); | ^ unknown character escape From 4d80eef3ff73e52061fa1e374f520c9788fe915c Mon Sep 17 00:00:00 2001 From: Xelph Date: Mon, 17 Feb 2025 15:45:09 -0700 Subject: [PATCH 5/6] Rework test --- tests/ui/unicode-quote.rs | 19 ++++++- tests/ui/unicode-quote.stderr | 104 +++++++++++++++++++++++++++++----- 2 files changed, 107 insertions(+), 16 deletions(-) diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs index 5c7d1458aea9c..17b25733c9313 100644 --- a/tests/ui/unicode-quote.rs +++ b/tests/ui/unicode-quote.rs @@ -1,7 +1,20 @@ // Provide a useful error message when attempting to escape a non-ascii quotation mark. // fn main() { - dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); - //~^ ERROR 4:20: 4:21: unknown character escape: `\u{201c}` - //~^^ ERROR 4:65: 4:66: unknown character escape: `\u{201c}` + dbg!("\″"); //U+2033 + //~^ERROR 4:12: 4:13: unknown character escape: `\u{2033}` + dbg!("\ʺ"); //U+02BA + //~^ERROR 6:12: 6:13: unknown character escape: `\u{2ba}` + dbg!("\˝"); //U+02DD + //~^ERROR 8:12: 8:13: unknown character escape: `\u{2dd}` + dbg!("\̋"); //U+030B + //~^ERROR 10:12: 10:13: unknown character escape: `\u{30b}` + dbg!("\̎"); //U+030E + //~^ERROR 12:12: 12:13: unknown character escape: `\u{30e}` + dbg!("\״"); //U+05F4 + //~^ERROR 14:12: 14:13: unknown character escape: `\u{5f4}` + dbg!("\“"); //U+201C + //~^ERROR 16:12: 16:13: unknown character escape: `\u{201c}` + dbg!("\”"); //U+201D + //~^ERROR 18:12: 18:13: unknown character escape: `\u{201d}` } diff --git a/tests/ui/unicode-quote.stderr b/tests/ui/unicode-quote.stderr index cf1e59be8d1ee..926f56eb9360f 100644 --- a/tests/ui/unicode-quote.stderr +++ b/tests/ui/unicode-quote.stderr @@ -1,28 +1,106 @@ -error: unknown character escape: `\u{201c}` - --> $DIR/unicode-quote.rs:4:20 +error: unknown character escape: `\u{2033}` + --> $DIR/unicode-quote.rs:4:12 | -LL | dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); - | ^ unknown character escape +LL | dbg!("\″"); //U+2033 + | ^ unknown character escape | - = help: \u{201c} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. + = help: \u{2033} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\″"); //U+2033 +LL + dbg!("\u{2033}"); //U+2033 + | + +error: unknown character escape: `\u{2ba}` + --> $DIR/unicode-quote.rs:6:12 + | +LL | dbg!("\ʺ"); //U+02BA + | ^ unknown character escape + | + = help: \u{2ba} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\ʺ"); //U+02BA +LL + dbg!("\u{2ba}"); //U+02BA + | + +error: unknown character escape: `\u{2dd}` + --> $DIR/unicode-quote.rs:8:12 + | +LL | dbg!("\˝"); //U+02DD + | ^ unknown character escape + | + = help: \u{2dd} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\˝"); //U+02DD +LL + dbg!("\u{2dd}"); //U+02DD + | + +error: unknown character escape: `\u{30b}` + --> $DIR/unicode-quote.rs:10:12 + | +LL | dbg!("\̋"); //U+030B + | ^ unknown character escape + | + = help: \u{30b} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\̋"); //U+030B +LL + dbg!("\u{30b}"); //U+030B + | + +error: unknown character escape: `\u{30e}` + --> $DIR/unicode-quote.rs:12:12 + | +LL | dbg!("\̎"); //U+030E + | ^ unknown character escape + | + = help: \u{30e} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. help: if you meant to use a unicode quote; consider using its escaped form for clarity | -LL - dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); -LL + dbg!("Is this \u{201c} a small sized quote or a big sized quote. \“ "); +LL - dbg!("\̎"); //U+030E +LL + dbg!("\u{30e}"); //U+030E + | + +error: unknown character escape: `\u{5f4}` + --> $DIR/unicode-quote.rs:14:12 + | +LL | dbg!("\״"); //U+05F4 + | ^ unknown character escape + | + = help: \u{5f4} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\״"); //U+05F4 +LL + dbg!("\u{5f4}"); //U+05F4 | error: unknown character escape: `\u{201c}` - --> $DIR/unicode-quote.rs:4:65 + --> $DIR/unicode-quote.rs:16:12 | -LL | dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); - | ^ unknown character escape +LL | dbg!("\“"); //U+201C + | ^ unknown character escape | = help: \u{201c} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. help: if you meant to use a unicode quote; consider using its escaped form for clarity | -LL - dbg!("Is this \“ a small sized quote or a big sized quote. \“ "); -LL + dbg!("Is this \“ a small sized quote or a big sized quote. \u{201c} "); +LL - dbg!("\“"); //U+201C +LL + dbg!("\u{201c}"); //U+201C + | + +error: unknown character escape: `\u{201d}` + --> $DIR/unicode-quote.rs:18:12 + | +LL | dbg!("\”"); //U+201D + | ^ unknown character escape + | + = help: \u{201d} is not an ascii quote, but may look like one in some fonts; consider writing it in its escaped form for clarity. +help: if you meant to use a unicode quote; consider using its escaped form for clarity + | +LL - dbg!("\”"); //U+201D +LL + dbg!("\u{201d}"); //U+201D | -error: aborting due to 2 previous errors +error: aborting due to 8 previous errors From f278b590c1481aa6b5ed58f6164331b491baa425 Mon Sep 17 00:00:00 2001 From: Lisa FS Date: Tue, 25 Mar 2025 10:06:21 -0600 Subject: [PATCH 6/6] Remove error line numbers and fix capitalization. Co-authored-by: nora <48135649+Noratrieb@users.noreply.github.com> --- compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 2 +- tests/ui/unicode-quote.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index fd1b29daea616..e7873c772b7f6 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -169,7 +169,7 @@ pub(crate) fn emit_unescape_error( ); diag.help(format!( - "{ec} is not an ascii quote, but may look like one in some fonts; \ + "{ec} is not an ASCII quote, but may look like one in some fonts; \ consider writing it in its escaped form for clarity." )); } else { diff --git a/tests/ui/unicode-quote.rs b/tests/ui/unicode-quote.rs index 17b25733c9313..023903de4523d 100644 --- a/tests/ui/unicode-quote.rs +++ b/tests/ui/unicode-quote.rs @@ -2,7 +2,7 @@ // fn main() { dbg!("\″"); //U+2033 - //~^ERROR 4:12: 4:13: unknown character escape: `\u{2033}` + //~^ERROR unknown character escape: `\u{2033}` dbg!("\ʺ"); //U+02BA //~^ERROR 6:12: 6:13: unknown character escape: `\u{2ba}` dbg!("\˝"); //U+02DD