From 81a6f189408b71b452a9804818f2cd50d2ed9108 Mon Sep 17 00:00:00 2001 From: Kivooeo Date: Wed, 9 Jul 2025 15:24:28 +0500 Subject: [PATCH] added error for invalid char cast --- compiler/rustc_lint/messages.ftl | 7 ++ compiler/rustc_lint/src/lints.rs | 14 +++ compiler/rustc_lint/src/types/literal.rs | 42 ++++++-- tests/ui/cast/cast-char.rs | 56 +++++++++- tests/ui/cast/cast-char.stderr | 124 +++++++++++++++++++++-- 5 files changed, 222 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index 8d9f2385b710f..5d07afdaf17bd 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -440,6 +440,7 @@ lint_invalid_asm_label_named = avoid using named labels in inline assembly .help = only local labels of the form `:` should be used in inline asm .note = see the asm section of Rust By Example for more information lint_invalid_asm_label_no_span = the label may be declared in the expansion of a macro + lint_invalid_crate_type_value = invalid `crate_type` value .suggestion = did you mean @@ -790,6 +791,9 @@ lint_supertrait_as_deref_target = this `Deref` implementation is covered by an i .label2 = target type is a supertrait of `{$self_ty}` .help = consider removing this implementation or replacing it with a method instead +lint_surrogate_char_cast = surrogate values are not valid for `char` + .note = `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values + lint_suspicious_double_ref_clone = using `.clone()` on a double reference, which returns `{$ty}` instead of cloning the inner type @@ -799,6 +803,9 @@ lint_suspicious_double_ref_deref = lint_symbol_intern_string_literal = using `Symbol::intern` on a string literal .help = consider adding the symbol to `compiler/rustc_span/src/symbol.rs` +lint_too_large_char_cast = value exceeds maximum `char` value + .note = maximum valid `char` value is `0x10FFFF` + lint_trailing_semi_macro = trailing semicolon in macro used in expression position .note1 = macro invocations at the end of a block are treated as expressions .note2 = to ignore the value produced by the macro, add a semicolon after the invocation of `{$name}` diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index 21148833eaf72..19989cbcce680 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -1746,6 +1746,20 @@ pub(crate) struct OverflowingLiteral<'a> { pub lit: String, } +#[derive(LintDiagnostic)] +#[diag(lint_surrogate_char_cast)] +#[note] +pub(crate) struct SurrogateCharCast { + pub literal: u128, +} + +#[derive(LintDiagnostic)] +#[diag(lint_too_large_char_cast)] +#[note] +pub(crate) struct TooLargeCharCast { + pub literal: u128, +} + #[derive(LintDiagnostic)] #[diag(lint_uses_power_alignment)] pub(crate) struct UsesPowerAlignment; diff --git a/compiler/rustc_lint/src/types/literal.rs b/compiler/rustc_lint/src/types/literal.rs index d44f45177bde0..2bac58ba23d07 100644 --- a/compiler/rustc_lint/src/types/literal.rs +++ b/compiler/rustc_lint/src/types/literal.rs @@ -12,7 +12,7 @@ use crate::context::LintContext; use crate::lints::{ OnlyCastu8ToChar, OverflowingBinHex, OverflowingBinHexSign, OverflowingBinHexSignBitSub, OverflowingBinHexSub, OverflowingInt, OverflowingIntHelp, OverflowingLiteral, OverflowingUInt, - RangeEndpointOutOfRange, UseInclusiveRange, + RangeEndpointOutOfRange, SurrogateCharCast, TooLargeCharCast, UseInclusiveRange, }; use crate::types::{OVERFLOWING_LITERALS, TypeLimits}; @@ -38,12 +38,18 @@ fn lint_overflowing_range_endpoint<'tcx>( // We only want to handle exclusive (`..`) ranges, // which are represented as `ExprKind::Struct`. - let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else { return false }; - let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else { return false }; + let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else { + return false; + }; + let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else { + return false; + }; if !is_range_literal(struct_expr) { return false; }; - let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else { return false }; + let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else { + return false; + }; // We can suggest using an inclusive range // (`..=`) instead only if it is the `end` that is @@ -61,7 +67,9 @@ fn lint_overflowing_range_endpoint<'tcx>( }; let sub_sugg = if span.lo() == lit_span.lo() { - let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else { return false }; + let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else { + return false; + }; UseInclusiveRange::WithoutParen { sugg: struct_expr.span.shrink_to_lo().to(lit_span.shrink_to_hi()), start, @@ -316,11 +324,25 @@ fn lint_uint_literal<'tcx>( match par_e.kind { hir::ExprKind::Cast(..) => { if let ty::Char = cx.typeck_results().expr_ty(par_e).kind() { - cx.emit_span_lint( - OVERFLOWING_LITERALS, - par_e.span, - OnlyCastu8ToChar { span: par_e.span, literal: lit_val }, - ); + if lit_val > 0x10FFFF { + cx.emit_span_lint( + OVERFLOWING_LITERALS, + par_e.span, + TooLargeCharCast { literal: lit_val }, + ); + } else if (0xD800..=0xDFFF).contains(&lit_val) { + cx.emit_span_lint( + OVERFLOWING_LITERALS, + par_e.span, + SurrogateCharCast { literal: lit_val }, + ); + } else { + cx.emit_span_lint( + OVERFLOWING_LITERALS, + par_e.span, + OnlyCastu8ToChar { span: par_e.span, literal: lit_val }, + ); + } return; } } diff --git a/tests/ui/cast/cast-char.rs b/tests/ui/cast/cast-char.rs index 9634ed56f7b72..5bf05072253fd 100644 --- a/tests/ui/cast/cast-char.rs +++ b/tests/ui/cast/cast-char.rs @@ -1,10 +1,58 @@ #![deny(overflowing_literals)] fn main() { - const XYZ: char = 0x1F888 as char; + // Valid cases - should suggest char literal + + // u8 range (0-255) + const VALID_U8_1: char = 0x41 as char; // 'A' + const VALID_U8_2: char = 0xFF as char; // maximum u8 + const VALID_U8_3: char = 0x00 as char; // minimum u8 + + // Valid Unicode in lower range [0x0, 0xD7FF] + const VALID_LOW_1: char = 0x1000 as char; // 4096 + //~^ ERROR: only `u8` can be cast into `char` + const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range + //~^ ERROR: only `u8` can be cast into `char` + const VALID_LOW_3: char = 0x0500 as char; // cyrillic range + //~^ ERROR: only `u8` can be cast into `char` + + // Valid Unicode in upper range [0xE000, 0x10FFFF] + const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range + //~^ ERROR only `u8` can be cast into `char` + const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue + //~^ ERROR only `u8` can be cast into `char` + const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode + //~^ ERROR only `u8` can be cast into `char` + const VALID_HIGH_4: char = 0xFFFD as char; // replacement character + //~^ ERROR only `u8` can be cast into `char` + const VALID_HIGH_5: char = 0x1F600 as char; // emoji + //~^ ERROR only `u8` can be cast into `char` + + // Invalid cases - should show InvalidCharCast + + // Surrogate range [0xD800, 0xDFFF] - reserved for UTF-16 + const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate + //~^ ERROR: surrogate values are not valid + const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate + //~^ ERROR: surrogate values are not valid + const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range + //~^ ERROR: surrogate values are not valid + + // Too large values (> 0x10FFFF) + const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum + //~^ ERROR: value exceeds maximum `char` value + const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue + //~^ ERROR: value exceeds maximum `char` value + const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger + //~^ ERROR: value exceeds maximum `char` value + const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum + //~^ ERROR: value exceeds maximum `char` value + + // Boundary cases + const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate + //~^ ERROR only `u8` can be cast into `char` + const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate //~^ ERROR only `u8` can be cast into `char` - const XY: char = 129160 as char; + const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum //~^ ERROR only `u8` can be cast into `char` - const ZYX: char = '\u{01F888}'; - println!("{}", XYZ); } diff --git a/tests/ui/cast/cast-char.stderr b/tests/ui/cast/cast-char.stderr index 211937c9d6faf..a8d0b3b04b0c2 100644 --- a/tests/ui/cast/cast-char.stderr +++ b/tests/ui/cast/cast-char.stderr @@ -1,8 +1,8 @@ error: only `u8` can be cast into `char` - --> $DIR/cast-char.rs:4:23 + --> $DIR/cast-char.rs:12:31 | -LL | const XYZ: char = 0x1F888 as char; - | ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'` +LL | const VALID_LOW_1: char = 0x1000 as char; // 4096 + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1000}'` | note: the lint level is defined here --> $DIR/cast-char.rs:1:9 @@ -11,10 +11,120 @@ LL | #![deny(overflowing_literals)] | ^^^^^^^^^^^^^^^^^^^^ error: only `u8` can be cast into `char` - --> $DIR/cast-char.rs:6:22 + --> $DIR/cast-char.rs:14:31 | -LL | const XY: char = 129160 as char; - | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'` +LL | const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FF}'` -error: aborting due to 2 previous errors +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:16:31 + | +LL | const VALID_LOW_3: char = 0x0500 as char; // cyrillic range + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{500}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:20:32 + | +LL | const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E000}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:22:32 + | +LL | const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue + | ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:24:32 + | +LL | const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode + | ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFF}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:26:32 + | +LL | const VALID_HIGH_4: char = 0xFFFD as char; // replacement character + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{FFFD}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:28:32 + | +LL | const VALID_HIGH_5: char = 0x1F600 as char; // emoji + | ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F600}'` + +error: surrogate values are not valid for `char` + --> $DIR/cast-char.rs:34:39 + | +LL | const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate + | ^^^^^^^^^^^^^^ + | + = note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values + +error: surrogate values are not valid for `char` + --> $DIR/cast-char.rs:36:39 + | +LL | const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate + | ^^^^^^^^^^^^^^ + | + = note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values + +error: surrogate values are not valid for `char` + --> $DIR/cast-char.rs:38:39 + | +LL | const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range + | ^^^^^^^^^^^^^^ + | + = note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values + +error: value exceeds maximum `char` value + --> $DIR/cast-char.rs:42:37 + | +LL | const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum + | ^^^^^^^^^^^^^^^^ + | + = note: maximum valid `char` value is `0x10FFFF` + +error: value exceeds maximum `char` value + --> $DIR/cast-char.rs:44:37 + | +LL | const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue + | ^^^^^^^^^^^^^^^^ + | + = note: maximum valid `char` value is `0x10FFFF` + +error: value exceeds maximum `char` value + --> $DIR/cast-char.rs:46:37 + | +LL | const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger + | ^^^^^^^^^^^^^^^^ + | + = note: maximum valid `char` value is `0x10FFFF` + +error: value exceeds maximum `char` value + --> $DIR/cast-char.rs:48:37 + | +LL | const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum + | ^^^^^^^^^^^^^^^^ + | + = note: maximum valid `char` value is `0x10FFFF` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:52:30 + | +LL | const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FE}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:54:30 + | +LL | const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate + | ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E001}'` + +error: only `u8` can be cast into `char` + --> $DIR/cast-char.rs:56:30 + | +LL | const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum + | ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFE}'` + +error: aborting due to 18 previous errors