From 81a6f189408b71b452a9804818f2cd50d2ed9108 Mon Sep 17 00:00:00 2001
From: Kivooeo <Kivooeo123@gmail.com>
Date: Wed, 9 Jul 2025 15:24:28 +0500
Subject: [PATCH] added error for invalid char cast

---
 compiler/rustc_lint/messages.ftl         |   7 ++
 compiler/rustc_lint/src/lints.rs         |  14 +++
 compiler/rustc_lint/src/types/literal.rs |  42 ++++++--
 tests/ui/cast/cast-char.rs               |  56 +++++++++-
 tests/ui/cast/cast-char.stderr           | 124 +++++++++++++++++++++--
 5 files changed, 222 insertions(+), 21 deletions(-)
diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl
index 8d9f2385b710f..5d07afdaf17bd 100644
--- a/compiler/rustc_lint/messages.ftl
+++ b/compiler/rustc_lint/messages.ftl
@@ -440,6 +440,7 @@ lint_invalid_asm_label_named = avoid using named labels in inline assembly
     .help = only local labels of the form `<number>:` should be used in inline asm
     .note = see the asm section of Rust By Example <https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels> for more information
 lint_invalid_asm_label_no_span = the label may be declared in the expansion of a macro
+
 lint_invalid_crate_type_value = invalid `crate_type` value
     .suggestion = did you mean
 
@@ -790,6 +791,9 @@ lint_supertrait_as_deref_target = this `Deref` implementation is covered by an i
     .label2 = target type is a supertrait of `{$self_ty}`
     .help = consider removing this implementation or replacing it with a method instead
 
+lint_surrogate_char_cast = surrogate values are not valid for `char`
+    .note = `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values
+
 lint_suspicious_double_ref_clone =
     using `.clone()` on a double reference, which returns `{$ty}` instead of cloning the inner type
 
@@ -799,6 +803,9 @@ lint_suspicious_double_ref_deref =
 lint_symbol_intern_string_literal = using `Symbol::intern` on a string literal
     .help = consider adding the symbol to `compiler/rustc_span/src/symbol.rs`
 
+lint_too_large_char_cast = value exceeds maximum `char` value
+    .note = maximum valid `char` value is `0x10FFFF`
+
 lint_trailing_semi_macro = trailing semicolon in macro used in expression position
     .note1 = macro invocations at the end of a block are treated as expressions
     .note2 = to ignore the value produced by the macro, add a semicolon after the invocation of `{$name}`
diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs
index 21148833eaf72..19989cbcce680 100644
--- a/compiler/rustc_lint/src/lints.rs
+++ b/compiler/rustc_lint/src/lints.rs
@@ -1746,6 +1746,20 @@ pub(crate) struct OverflowingLiteral<'a> {
     pub lit: String,
 }
 
+#[derive(LintDiagnostic)]
+#[diag(lint_surrogate_char_cast)]
+#[note]
+pub(crate) struct SurrogateCharCast {
+    pub literal: u128,
+}
+
+#[derive(LintDiagnostic)]
+#[diag(lint_too_large_char_cast)]
+#[note]
+pub(crate) struct TooLargeCharCast {
+    pub literal: u128,
+}
+
 #[derive(LintDiagnostic)]
 #[diag(lint_uses_power_alignment)]
 pub(crate) struct UsesPowerAlignment;
diff --git a/compiler/rustc_lint/src/types/literal.rs b/compiler/rustc_lint/src/types/literal.rs
index d44f45177bde0..2bac58ba23d07 100644
--- a/compiler/rustc_lint/src/types/literal.rs
+++ b/compiler/rustc_lint/src/types/literal.rs
@@ -12,7 +12,7 @@ use crate::context::LintContext;
 use crate::lints::{
     OnlyCastu8ToChar, OverflowingBinHex, OverflowingBinHexSign, OverflowingBinHexSignBitSub,
     OverflowingBinHexSub, OverflowingInt, OverflowingIntHelp, OverflowingLiteral, OverflowingUInt,
-    RangeEndpointOutOfRange, UseInclusiveRange,
+    RangeEndpointOutOfRange, SurrogateCharCast, TooLargeCharCast, UseInclusiveRange,
 };
 use crate::types::{OVERFLOWING_LITERALS, TypeLimits};
 
@@ -38,12 +38,18 @@ fn lint_overflowing_range_endpoint<'tcx>(
 
     // We only want to handle exclusive (`..`) ranges,
     // which are represented as `ExprKind::Struct`.
-    let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else { return false };
-    let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else { return false };
+    let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else {
+        return false;
+    };
+    let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else {
+        return false;
+    };
     if !is_range_literal(struct_expr) {
         return false;
     };
-    let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else { return false };
+    let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else {
+        return false;
+    };
 
     // We can suggest using an inclusive range
     // (`..=`) instead only if it is the `end` that is
@@ -61,7 +67,9 @@ fn lint_overflowing_range_endpoint<'tcx>(
     };
 
     let sub_sugg = if span.lo() == lit_span.lo() {
-        let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else { return false };
+        let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else {
+            return false;
+        };
         UseInclusiveRange::WithoutParen {
             sugg: struct_expr.span.shrink_to_lo().to(lit_span.shrink_to_hi()),
             start,
@@ -316,11 +324,25 @@ fn lint_uint_literal<'tcx>(
             match par_e.kind {
                 hir::ExprKind::Cast(..) => {
                     if let ty::Char = cx.typeck_results().expr_ty(par_e).kind() {
-                        cx.emit_span_lint(
-                            OVERFLOWING_LITERALS,
-                            par_e.span,
-                            OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
-                        );
+                        if lit_val > 0x10FFFF {
+                            cx.emit_span_lint(
+                                OVERFLOWING_LITERALS,
+                                par_e.span,
+                                TooLargeCharCast { literal: lit_val },
+                            );
+                        } else if (0xD800..=0xDFFF).contains(&lit_val) {
+                            cx.emit_span_lint(
+                                OVERFLOWING_LITERALS,
+                                par_e.span,
+                                SurrogateCharCast { literal: lit_val },
+                            );
+                        } else {
+                            cx.emit_span_lint(
+                                OVERFLOWING_LITERALS,
+                                par_e.span,
+                                OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
+                            );
+                        }
                         return;
                     }
                 }
diff --git a/tests/ui/cast/cast-char.rs b/tests/ui/cast/cast-char.rs
index 9634ed56f7b72..5bf05072253fd 100644
--- a/tests/ui/cast/cast-char.rs
+++ b/tests/ui/cast/cast-char.rs
@@ -1,10 +1,58 @@
 #![deny(overflowing_literals)]
 
 fn main() {
-    const XYZ: char = 0x1F888 as char;
+    // Valid cases - should suggest char literal
+
+    // u8 range (0-255)
+    const VALID_U8_1: char = 0x41 as char; // 'A'
+    const VALID_U8_2: char = 0xFF as char; // maximum u8
+    const VALID_U8_3: char = 0x00 as char; // minimum u8
+
+    // Valid Unicode in lower range [0x0, 0xD7FF]
+    const VALID_LOW_1: char = 0x1000 as char; // 4096
+    //~^ ERROR: only `u8` can be cast into `char`
+    const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
+    //~^ ERROR: only `u8` can be cast into `char`
+    const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
+    //~^ ERROR: only `u8` can be cast into `char`
+
+    // Valid Unicode in upper range [0xE000, 0x10FFFF]
+    const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
+    //~^ ERROR only `u8` can be cast into `char`
+    const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
+    //~^ ERROR only `u8` can be cast into `char`
+    const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
+    //~^ ERROR only `u8` can be cast into `char`
+    const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
+    //~^ ERROR only `u8` can be cast into `char`
+    const VALID_HIGH_5: char = 0x1F600 as char; // emoji
+    //~^ ERROR only `u8` can be cast into `char`
+
+    // Invalid cases - should show InvalidCharCast
+
+    // Surrogate range [0xD800, 0xDFFF] - reserved for UTF-16
+    const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
+    //~^ ERROR: surrogate values are not valid
+    const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
+    //~^ ERROR: surrogate values are not valid
+    const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
+    //~^ ERROR: surrogate values are not valid
+
+    // Too large values (> 0x10FFFF)
+    const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
+    //~^ ERROR: value exceeds maximum `char` value
+    const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
+    //~^ ERROR: value exceeds maximum `char` value
+    const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
+    //~^ ERROR: value exceeds maximum `char` value
+    const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
+    //~^ ERROR: value exceeds maximum `char` value
+
+    // Boundary cases
+    const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
+    //~^ ERROR only `u8` can be cast into `char`
+    const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
     //~^ ERROR only `u8` can be cast into `char`
-    const XY: char = 129160 as char;
+    const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
     //~^ ERROR only `u8` can be cast into `char`
-    const ZYX: char = '\u{01F888}';
-    println!("{}", XYZ);
 }
diff --git a/tests/ui/cast/cast-char.stderr b/tests/ui/cast/cast-char.stderr
index 211937c9d6faf..a8d0b3b04b0c2 100644
--- a/tests/ui/cast/cast-char.stderr
+++ b/tests/ui/cast/cast-char.stderr
@@ -1,8 +1,8 @@
 error: only `u8` can be cast into `char`
-  --> $DIR/cast-char.rs:4:23
+  --> $DIR/cast-char.rs:12:31
    |
-LL |     const XYZ: char = 0x1F888 as char;
-   |                       ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
+LL |     const VALID_LOW_1: char = 0x1000 as char; // 4096
+   |                               ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1000}'`
    |
 note: the lint level is defined here
   --> $DIR/cast-char.rs:1:9
@@ -11,10 +11,120 @@ LL | #![deny(overflowing_literals)]
    |         ^^^^^^^^^^^^^^^^^^^^
 
 error: only `u8` can be cast into `char`
-  --> $DIR/cast-char.rs:6:22
+  --> $DIR/cast-char.rs:14:31
    |
-LL |     const XY: char = 129160 as char;
-   |                      ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
+LL |     const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
+   |                               ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FF}'`
 
-error: aborting due to 2 previous errors
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:16:31
+   |
+LL |     const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
+   |                               ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{500}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:20:32
+   |
+LL |     const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
+   |                                ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E000}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:22:32
+   |
+LL |     const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
+   |                                ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:24:32
+   |
+LL |     const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
+   |                                ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFF}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:26:32
+   |
+LL |     const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
+   |                                ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{FFFD}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:28:32
+   |
+LL |     const VALID_HIGH_5: char = 0x1F600 as char; // emoji
+   |                                ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F600}'`
+
+error: surrogate values are not valid for `char`
+  --> $DIR/cast-char.rs:34:39
+   |
+LL |     const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
+   |                                       ^^^^^^^^^^^^^^
+   |
+   = note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values
+
+error: surrogate values are not valid for `char`
+  --> $DIR/cast-char.rs:36:39
+   |
+LL |     const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
+   |                                       ^^^^^^^^^^^^^^
+   |
+   = note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values
+
+error: surrogate values are not valid for `char`
+  --> $DIR/cast-char.rs:38:39
+   |
+LL |     const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
+   |                                       ^^^^^^^^^^^^^^
+   |
+   = note: `0xD800..=0xDFFF` are reserved for Unicode surrogates and are not valid `char` values
+
+error: value exceeds maximum `char` value
+  --> $DIR/cast-char.rs:42:37
+   |
+LL |     const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
+   |                                     ^^^^^^^^^^^^^^^^
+   |
+   = note: maximum valid `char` value is `0x10FFFF`
+
+error: value exceeds maximum `char` value
+  --> $DIR/cast-char.rs:44:37
+   |
+LL |     const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
+   |                                     ^^^^^^^^^^^^^^^^
+   |
+   = note: maximum valid `char` value is `0x10FFFF`
+
+error: value exceeds maximum `char` value
+  --> $DIR/cast-char.rs:46:37
+   |
+LL |     const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
+   |                                     ^^^^^^^^^^^^^^^^
+   |
+   = note: maximum valid `char` value is `0x10FFFF`
+
+error: value exceeds maximum `char` value
+  --> $DIR/cast-char.rs:48:37
+   |
+LL |     const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
+   |                                     ^^^^^^^^^^^^^^^^
+   |
+   = note: maximum valid `char` value is `0x10FFFF`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:52:30
+   |
+LL |     const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
+   |                              ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FE}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:54:30
+   |
+LL |     const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
+   |                              ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E001}'`
+
+error: only `u8` can be cast into `char`
+  --> $DIR/cast-char.rs:56:30
+   |
+LL |     const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
+   |                              ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFE}'`
+
+error: aborting due to 18 previous errors