Skip to content

Commit 261e71f

Browse files
committed
added error for invalid char cast
1 parent f838cbc commit 261e71f

File tree

5 files changed

+219
-23
lines changed

5 files changed

+219
-23
lines changed

compiler/rustc_lint/messages.ftl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,11 @@ lint_invalid_asm_label_named = avoid using named labels in inline assembly
440440
.help = only local labels of the form `<number>:` should be used in inline asm
441441
.note = see the asm section of Rust By Example <https://doc.rust-lang.org/nightly/rust-by-example/unsafe/asm.html#labels> for more information
442442
lint_invalid_asm_label_no_span = the label may be declared in the expansion of a macro
443+
444+
lint_invalid_char_cast = cannot cast `{$literal}` to `char`
445+
.note = char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
446+
.note1 = The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
447+
443448
lint_invalid_crate_type_value = invalid `crate_type` value
444449
.suggestion = did you mean
445450

compiler/rustc_lint/src/lints.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,14 @@ pub(crate) struct OverflowingLiteral<'a> {
17461746
pub lit: String,
17471747
}
17481748

1749+
#[derive(LintDiagnostic)]
1750+
#[diag(lint_invalid_char_cast)]
1751+
#[note]
1752+
#[note(lint_note1)]
1753+
pub(crate) struct InvalidCharCast {
1754+
pub literal: u128,
1755+
}
1756+
17491757
#[derive(LintDiagnostic)]
17501758
#[diag(lint_uses_power_alignment)]
17511759
pub(crate) struct UsesPowerAlignment;

compiler/rustc_lint/src/types/literal.rs

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ use {rustc_ast as ast, rustc_attr_data_structures as attrs, rustc_hir as hir};
1010
use crate::LateContext;
1111
use crate::context::LintContext;
1212
use crate::lints::{
13-
OnlyCastu8ToChar, OverflowingBinHex, OverflowingBinHexSign, OverflowingBinHexSignBitSub,
14-
OverflowingBinHexSub, OverflowingInt, OverflowingIntHelp, OverflowingLiteral, OverflowingUInt,
15-
RangeEndpointOutOfRange, UseInclusiveRange,
13+
InvalidCharCast, OnlyCastu8ToChar, OverflowingBinHex, OverflowingBinHexSign,
14+
OverflowingBinHexSignBitSub, OverflowingBinHexSub, OverflowingInt, OverflowingIntHelp,
15+
OverflowingLiteral, OverflowingUInt, RangeEndpointOutOfRange, UseInclusiveRange,
1616
};
1717
use crate::types::{OVERFLOWING_LITERALS, TypeLimits};
1818

@@ -38,12 +38,18 @@ fn lint_overflowing_range_endpoint<'tcx>(
3838

3939
// We only want to handle exclusive (`..`) ranges,
4040
// which are represented as `ExprKind::Struct`.
41-
let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else { return false };
42-
let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else { return false };
41+
let Node::ExprField(field) = cx.tcx.parent_hir_node(hir_id) else {
42+
return false;
43+
};
44+
let Node::Expr(struct_expr) = cx.tcx.parent_hir_node(field.hir_id) else {
45+
return false;
46+
};
4347
if !is_range_literal(struct_expr) {
4448
return false;
4549
};
46-
let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else { return false };
50+
let ExprKind::Struct(_, [start, end], _) = &struct_expr.kind else {
51+
return false;
52+
};
4753

4854
// We can suggest using an inclusive range
4955
// (`..=`) instead only if it is the `end` that is
@@ -61,7 +67,9 @@ fn lint_overflowing_range_endpoint<'tcx>(
6167
};
6268

6369
let sub_sugg = if span.lo() == lit_span.lo() {
64-
let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else { return false };
70+
let Ok(start) = cx.sess().source_map().span_to_snippet(start.span) else {
71+
return false;
72+
};
6573
UseInclusiveRange::WithoutParen {
6674
sugg: struct_expr.span.shrink_to_lo().to(lit_span.shrink_to_hi()),
6775
start,
@@ -316,11 +324,21 @@ fn lint_uint_literal<'tcx>(
316324
match par_e.kind {
317325
hir::ExprKind::Cast(..) => {
318326
if let ty::Char = cx.typeck_results().expr_ty(par_e).kind() {
319-
cx.emit_span_lint(
320-
OVERFLOWING_LITERALS,
321-
par_e.span,
322-
OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
323-
);
327+
if lit_val <= 0xFF
328+
|| (lit_val <= 0x10FFFF && !matches!(lit_val, 0xD800..=0xDFFF))
329+
{
330+
cx.emit_span_lint(
331+
OVERFLOWING_LITERALS,
332+
par_e.span,
333+
OnlyCastu8ToChar { span: par_e.span, literal: lit_val },
334+
);
335+
} else {
336+
cx.emit_span_lint(
337+
OVERFLOWING_LITERALS,
338+
par_e.span,
339+
InvalidCharCast { literal: lit_val },
340+
);
341+
}
324342
return;
325343
}
326344
}

tests/ui/cast/cast-char.rs

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,58 @@
11
#![deny(overflowing_literals)]
22

33
fn main() {
4-
const XYZ: char = 0x1F888 as char;
4+
// Valid cases - should suggest char literal
5+
6+
// u8 range (0-255)
7+
const VALID_U8_1: char = 0x41 as char; // 'A'
8+
const VALID_U8_2: char = 0xFF as char; // maximum u8
9+
const VALID_U8_3: char = 0x00 as char; // minimum u8
10+
11+
// Valid Unicode in lower range [0x0, 0xD7FF]
12+
const VALID_LOW_1: char = 0x1000 as char; // 4096
13+
//~^ ERROR: only `u8` can be cast into `char`
14+
const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
15+
//~^ ERROR: only `u8` can be cast into `char`
16+
const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
17+
//~^ ERROR: only `u8` can be cast into `char`
18+
19+
// Valid Unicode in upper range [0xE000, 0x10FFFF]
20+
const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
21+
//~^ ERROR only `u8` can be cast into `char`
22+
const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
23+
//~^ ERROR only `u8` can be cast into `char`
24+
const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
25+
//~^ ERROR only `u8` can be cast into `char`
26+
const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
27+
//~^ ERROR only `u8` can be cast into `char`
28+
const VALID_HIGH_5: char = 0x1F600 as char; // emoji
29+
//~^ ERROR only `u8` can be cast into `char`
30+
31+
// Invalid cases - should show InvalidCharCast
32+
33+
// Surrogate range [0xD800, 0xDFFF] - reserved for UTF-16
34+
const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
35+
//~^ ERROR: cannot cast
36+
const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
37+
//~^ ERROR: cannot cast
38+
const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
39+
//~^ ERROR: cannot cast
40+
41+
// Too large values (> 0x10FFFF)
42+
const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
43+
//~^ ERROR: cannot cast
44+
const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
45+
//~^ ERROR: cannot cast
46+
const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
47+
//~^ ERROR: cannot cast
48+
const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
49+
//~^ ERROR: cannot cast
50+
51+
// Boundary cases
52+
const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
53+
//~^ ERROR only `u8` can be cast into `char`
54+
const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
555
//~^ ERROR only `u8` can be cast into `char`
6-
const XY: char = 129160 as char;
56+
const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
757
//~^ ERROR only `u8` can be cast into `char`
8-
const ZYX: char = '\u{01F888}';
9-
println!("{}", XYZ);
1058
}

tests/ui/cast/cast-char.stderr

Lines changed: 124 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
error: only `u8` can be cast into `char`
2-
--> $DIR/cast-char.rs:4:23
2+
--> $DIR/cast-char.rs:12:31
33
|
4-
LL | const XYZ: char = 0x1F888 as char;
5-
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
4+
LL | const VALID_LOW_1: char = 0x1000 as char; // 4096
5+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1000}'`
66
|
77
note: the lint level is defined here
88
--> $DIR/cast-char.rs:1:9
@@ -11,10 +11,127 @@ LL | #![deny(overflowing_literals)]
1111
| ^^^^^^^^^^^^^^^^^^^^
1212

1313
error: only `u8` can be cast into `char`
14-
--> $DIR/cast-char.rs:6:22
14+
--> $DIR/cast-char.rs:14:31
1515
|
16-
LL | const XY: char = 129160 as char;
17-
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
16+
LL | const VALID_LOW_2: char = 0xD7FF as char; // last valid in lower range
17+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FF}'`
1818

19-
error: aborting due to 2 previous errors
19+
error: only `u8` can be cast into `char`
20+
--> $DIR/cast-char.rs:16:31
21+
|
22+
LL | const VALID_LOW_3: char = 0x0500 as char; // cyrillic range
23+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{500}'`
24+
25+
error: only `u8` can be cast into `char`
26+
--> $DIR/cast-char.rs:20:32
27+
|
28+
LL | const VALID_HIGH_1: char = 0xE000 as char; // first valid in upper range
29+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E000}'`
30+
31+
error: only `u8` can be cast into `char`
32+
--> $DIR/cast-char.rs:22:32
33+
|
34+
LL | const VALID_HIGH_2: char = 0x1F888 as char; // 129160 - example from issue
35+
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F888}'`
36+
37+
error: only `u8` can be cast into `char`
38+
--> $DIR/cast-char.rs:24:32
39+
|
40+
LL | const VALID_HIGH_3: char = 0x10FFFF as char; // maximum valid Unicode
41+
| ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFF}'`
42+
43+
error: only `u8` can be cast into `char`
44+
--> $DIR/cast-char.rs:26:32
45+
|
46+
LL | const VALID_HIGH_4: char = 0xFFFD as char; // replacement character
47+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{FFFD}'`
48+
49+
error: only `u8` can be cast into `char`
50+
--> $DIR/cast-char.rs:28:32
51+
|
52+
LL | const VALID_HIGH_5: char = 0x1F600 as char; // emoji
53+
| ^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{1F600}'`
54+
55+
error: cannot cast `55296` to `char`
56+
--> $DIR/cast-char.rs:34:39
57+
|
58+
LL | const INVALID_SURROGATE_1: char = 0xD800 as char; // first surrogate
59+
| ^^^^^^^^^^^^^^
60+
|
61+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
62+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
63+
64+
error: cannot cast `57343` to `char`
65+
--> $DIR/cast-char.rs:36:39
66+
|
67+
LL | const INVALID_SURROGATE_2: char = 0xDFFF as char; // last surrogate
68+
| ^^^^^^^^^^^^^^
69+
|
70+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
71+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
72+
73+
error: cannot cast `56064` to `char`
74+
--> $DIR/cast-char.rs:38:39
75+
|
76+
LL | const INVALID_SURROGATE_3: char = 0xDB00 as char; // middle of surrogate range
77+
| ^^^^^^^^^^^^^^
78+
|
79+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
80+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
81+
82+
error: cannot cast `1114112` to `char`
83+
--> $DIR/cast-char.rs:42:37
84+
|
85+
LL | const INVALID_TOO_BIG_1: char = 0x110000 as char; // one more than maximum
86+
| ^^^^^^^^^^^^^^^^
87+
|
88+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
89+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
90+
91+
error: cannot cast `15698056` to `char`
92+
--> $DIR/cast-char.rs:44:37
93+
|
94+
LL | const INVALID_TOO_BIG_2: char = 0xEF8888 as char; // example from issue
95+
| ^^^^^^^^^^^^^^^^
96+
|
97+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
98+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
99+
100+
error: cannot cast `2097151` to `char`
101+
--> $DIR/cast-char.rs:46:37
102+
|
103+
LL | const INVALID_TOO_BIG_3: char = 0x1FFFFF as char; // much larger
104+
| ^^^^^^^^^^^^^^^^
105+
|
106+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
107+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
108+
109+
error: cannot cast `16777215` to `char`
110+
--> $DIR/cast-char.rs:48:37
111+
|
112+
LL | const INVALID_TOO_BIG_4: char = 0xFFFFFF as char; // 24-bit maximum
113+
| ^^^^^^^^^^^^^^^^
114+
|
115+
= note: char must be a valid Unicode scalar value (code points in ranges [0x0, 0xD7FF] or [0xE000, 0x10FFFF])
116+
= note: The gap [0xD800, 0xDFFF] is reserved for UTF-16 surrogates and is invalid
117+
118+
error: only `u8` can be cast into `char`
119+
--> $DIR/cast-char.rs:52:30
120+
|
121+
LL | const BOUNDARY_1: char = 0xD7FE as char; // valid, before surrogate
122+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{D7FE}'`
123+
124+
error: only `u8` can be cast into `char`
125+
--> $DIR/cast-char.rs:54:30
126+
|
127+
LL | const BOUNDARY_2: char = 0xE001 as char; // valid, after surrogate
128+
| ^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{E001}'`
129+
130+
error: only `u8` can be cast into `char`
131+
--> $DIR/cast-char.rs:56:30
132+
|
133+
LL | const BOUNDARY_3: char = 0x10FFFE as char; // valid, near maximum
134+
| ^^^^^^^^^^^^^^^^ help: use a `char` literal instead: `'\u{10FFFE}'`
135+
136+
error: aborting due to 18 previous errors
20137

0 commit comments

Comments
 (0)