Skip to content

Commit dbd2f30

Browse files
authored
Rollup merge of #143708 - epage:pretty, r=compiler-errors
fix: Include frontmatter in -Zunpretty output In the implementation (#140035), this was left as an open question for the tracking issue (#136889). My assumption is that this should be carried over. The test was carried over from #137193 which was superseded by #140035. Thankfully, either way, `-Zunpretty` is unstable and we can always change it even if we stabilize frontmatter.
2 parents 140f2fa + a11ee56 commit dbd2f30

File tree

14 files changed

+157
-39
lines changed

14 files changed

+157
-39
lines changed

compiler/rustc_ast_pretty/src/pprust/state.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment>
120120
pos += shebang_len;
121121
}
122122

123-
for token in rustc_lexer::tokenize(&text[pos..]) {
123+
for token in rustc_lexer::tokenize(&text[pos..], rustc_lexer::FrontmatterAllowed::Yes) {
124124
let token_text = &text[pos..pos + token.len as usize];
125125
match token.kind {
126126
rustc_lexer::TokenKind::Whitespace => {
@@ -171,6 +171,14 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment>
171171
})
172172
}
173173
}
174+
rustc_lexer::TokenKind::Frontmatter { .. } => {
175+
code_to_the_left = false;
176+
comments.push(Comment {
177+
style: CommentStyle::Isolated,
178+
lines: vec![token_text.to_string()],
179+
pos: start_bpos + BytePos(pos as u32),
180+
});
181+
}
174182
_ => {
175183
code_to_the_left = true;
176184
}

compiler/rustc_lexer/src/lib.rs

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
273273
if let Some(input_tail) = input.strip_prefix("#!") {
274274
// Ok, this is a shebang but if the next non-whitespace token is `[`,
275275
// then it may be valid Rust code, so consider it Rust code.
276-
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| {
277-
!matches!(
278-
tok,
279-
TokenKind::Whitespace
280-
| TokenKind::LineComment { doc_style: None }
281-
| TokenKind::BlockComment { doc_style: None, .. }
282-
)
283-
});
276+
let next_non_whitespace_token =
277+
tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| {
278+
!matches!(
279+
tok,
280+
TokenKind::Whitespace
281+
| TokenKind::LineComment { doc_style: None }
282+
| TokenKind::BlockComment { doc_style: None, .. }
283+
)
284+
});
284285
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
285286
// No other choice than to consider this a shebang.
286287
return Some(2 + input_tail.lines().next().unwrap_or_default().len());
@@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
303304
}
304305

305306
/// Creates an iterator that produces tokens from the input string.
306-
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
307-
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
307+
///
308+
/// When parsing a full Rust document,
309+
/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`].
310+
///
311+
/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`]
312+
pub fn tokenize(
313+
input: &str,
314+
frontmatter_allowed: FrontmatterAllowed,
315+
) -> impl Iterator<Item = Token> {
316+
let mut cursor = Cursor::new(input, frontmatter_allowed);
308317
std::iter::from_fn(move || {
309318
let token = cursor.advance_token();
310319
if token.kind != TokenKind::Eof { Some(token) } else { None }

compiler/rustc_lexer/src/tests.rs

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,15 +124,17 @@ fn test_valid_shebang() {
124124
assert_eq!(strip_shebang(input), None);
125125
}
126126

127-
fn check_lexing(src: &str, expect: Expect) {
128-
let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect();
127+
fn check_lexing(src: &str, frontmatter_allowed: FrontmatterAllowed, expect: Expect) {
128+
let actual: String =
129+
tokenize(src, frontmatter_allowed).map(|token| format!("{:?}\n", token)).collect();
129130
expect.assert_eq(&actual)
130131
}
131132

132133
#[test]
133134
fn smoke_test() {
134135
check_lexing(
135136
"/* my source file */ fn main() { println!(\"zebra\"); }\n",
137+
FrontmatterAllowed::No,
136138
expect![[r#"
137139
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 }
138140
Token { kind: Whitespace, len: 1 }
@@ -171,6 +173,7 @@ fn comment_flavors() {
171173
/** outer doc block */
172174
/*! inner doc block */
173175
",
176+
FrontmatterAllowed::No,
174177
expect![[r#"
175178
Token { kind: Whitespace, len: 1 }
176179
Token { kind: LineComment { doc_style: None }, len: 7 }
@@ -199,6 +202,7 @@ fn comment_flavors() {
199202
fn nested_block_comments() {
200203
check_lexing(
201204
"/* /* */ */'a'",
205+
FrontmatterAllowed::No,
202206
expect![[r#"
203207
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
204208
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
@@ -210,6 +214,7 @@ fn nested_block_comments() {
210214
fn characters() {
211215
check_lexing(
212216
"'a' ' ' '\\n'",
217+
FrontmatterAllowed::No,
213218
expect![[r#"
214219
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
215220
Token { kind: Whitespace, len: 1 }
@@ -224,6 +229,7 @@ fn characters() {
224229
fn lifetime() {
225230
check_lexing(
226231
"'abc",
232+
FrontmatterAllowed::No,
227233
expect![[r#"
228234
Token { kind: Lifetime { starts_with_number: false }, len: 4 }
229235
"#]],
@@ -234,6 +240,7 @@ fn lifetime() {
234240
fn raw_string() {
235241
check_lexing(
236242
"r###\"\"#a\\b\x00c\"\"###",
243+
FrontmatterAllowed::No,
237244
expect![[r#"
238245
Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 }
239246
"#]],
@@ -257,6 +264,7 @@ b"a"
257264
r###"raw"###suffix
258265
br###"raw"###suffix
259266
"####,
267+
FrontmatterAllowed::No,
260268
expect![[r#"
261269
Token { kind: Whitespace, len: 1 }
262270
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
@@ -286,3 +294,78 @@ br###"raw"###suffix
286294
"#]],
287295
)
288296
}
297+
298+
#[test]
299+
fn frontmatter_allowed() {
300+
check_lexing(
301+
r#"
302+
---cargo
303+
[dependencies]
304+
clap = "4"
305+
---
306+
307+
fn main() {}
308+
"#,
309+
FrontmatterAllowed::Yes,
310+
expect![[r#"
311+
Token { kind: Whitespace, len: 1 }
312+
Token { kind: Frontmatter { has_invalid_preceding_whitespace: false, invalid_infostring: false }, len: 38 }
313+
Token { kind: Whitespace, len: 2 }
314+
Token { kind: Ident, len: 2 }
315+
Token { kind: Whitespace, len: 1 }
316+
Token { kind: Ident, len: 4 }
317+
Token { kind: OpenParen, len: 1 }
318+
Token { kind: CloseParen, len: 1 }
319+
Token { kind: Whitespace, len: 1 }
320+
Token { kind: OpenBrace, len: 1 }
321+
Token { kind: CloseBrace, len: 1 }
322+
Token { kind: Whitespace, len: 1 }
323+
"#]],
324+
)
325+
}
326+
327+
#[test]
328+
fn frontmatter_disallowed() {
329+
check_lexing(
330+
r#"
331+
---cargo
332+
[dependencies]
333+
clap = "4"
334+
---
335+
336+
fn main() {}
337+
"#,
338+
FrontmatterAllowed::No,
339+
expect![[r#"
340+
Token { kind: Whitespace, len: 1 }
341+
Token { kind: Minus, len: 1 }
342+
Token { kind: Minus, len: 1 }
343+
Token { kind: Minus, len: 1 }
344+
Token { kind: Ident, len: 5 }
345+
Token { kind: Whitespace, len: 1 }
346+
Token { kind: OpenBracket, len: 1 }
347+
Token { kind: Ident, len: 12 }
348+
Token { kind: CloseBracket, len: 1 }
349+
Token { kind: Whitespace, len: 1 }
350+
Token { kind: Ident, len: 4 }
351+
Token { kind: Whitespace, len: 1 }
352+
Token { kind: Eq, len: 1 }
353+
Token { kind: Whitespace, len: 1 }
354+
Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 }
355+
Token { kind: Whitespace, len: 1 }
356+
Token { kind: Minus, len: 1 }
357+
Token { kind: Minus, len: 1 }
358+
Token { kind: Minus, len: 1 }
359+
Token { kind: Whitespace, len: 2 }
360+
Token { kind: Ident, len: 2 }
361+
Token { kind: Whitespace, len: 1 }
362+
Token { kind: Ident, len: 4 }
363+
Token { kind: OpenParen, len: 1 }
364+
Token { kind: CloseParen, len: 1 }
365+
Token { kind: Whitespace, len: 1 }
366+
Token { kind: OpenBrace, len: 1 }
367+
Token { kind: CloseBrace, len: 1 }
368+
Token { kind: Whitespace, len: 1 }
369+
"#]],
370+
)
371+
}

src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use clippy_utils::visitors::{Descend, for_each_expr};
99
use hir::HirId;
1010
use rustc_hir as hir;
1111
use rustc_hir::{Block, BlockCheckMode, ItemKind, Node, UnsafeSource};
12-
use rustc_lexer::{TokenKind, tokenize};
12+
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
1313
use rustc_lint::{LateContext, LateLintPass, LintContext};
1414
use rustc_session::impl_lint_pass;
1515
use rustc_span::{BytePos, Pos, RelativeBytePos, Span, SyntaxContext};
@@ -746,7 +746,7 @@ fn text_has_safety_comment(src: &str, line_starts: &[RelativeBytePos], start_pos
746746
loop {
747747
if line.starts_with("/*") {
748748
let src = &src[line_start..line_starts.last().unwrap().to_usize()];
749-
let mut tokens = tokenize(src);
749+
let mut tokens = tokenize(src, FrontmatterAllowed::No);
750750
return (src[..tokens.next().unwrap().len as usize]
751751
.to_ascii_uppercase()
752752
.contains("SAFETY:")

src/tools/clippy/clippy_lints/src/utils/format_args_collector.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use clippy_utils::source::SpanRangeExt;
33
use itertools::Itertools;
44
use rustc_ast::{Crate, Expr, ExprKind, FormatArgs};
55
use rustc_data_structures::fx::FxHashMap;
6-
use rustc_lexer::{TokenKind, tokenize};
6+
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
77
use rustc_lint::{EarlyContext, EarlyLintPass};
88
use rustc_session::impl_lint_pass;
99
use rustc_span::{Span, hygiene};
@@ -82,7 +82,7 @@ fn has_span_from_proc_macro(cx: &EarlyContext<'_>, args: &FormatArgs) -> bool {
8282
.all(|sp| {
8383
sp.check_source_text(cx, |src| {
8484
// text should be either `, name` or `, name =`
85-
let mut iter = tokenize(src).filter(|t| {
85+
let mut iter = tokenize(src, FrontmatterAllowed::No).filter(|t| {
8686
!matches!(
8787
t.kind,
8888
TokenKind::LineComment { .. } | TokenKind::BlockComment { .. } | TokenKind::Whitespace

src/tools/clippy/clippy_utils/src/consts.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use rustc_hir::def::{DefKind, Res};
1515
use rustc_hir::{
1616
BinOpKind, Block, ConstBlock, Expr, ExprKind, HirId, Item, ItemKind, Node, PatExpr, PatExprKind, QPath, UnOp,
1717
};
18-
use rustc_lexer::tokenize;
18+
use rustc_lexer::{FrontmatterAllowed, tokenize};
1919
use rustc_lint::LateContext;
2020
use rustc_middle::mir::ConstValue;
2121
use rustc_middle::mir::interpret::{Scalar, alloc_range};
@@ -304,9 +304,7 @@ pub fn lit_to_mir_constant<'tcx>(lit: &LitKind, ty: Option<Ty<'tcx>>) -> Constan
304304
match *lit {
305305
LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
306306
LitKind::Byte(b) => Constant::Int(u128::from(b)),
307-
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => {
308-
Constant::Binary(s.as_byte_str().to_vec())
309-
}
307+
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => Constant::Binary(s.as_byte_str().to_vec()),
310308
LitKind::Char(c) => Constant::Char(c),
311309
LitKind::Int(n, _) => Constant::Int(n.get()),
312310
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {
@@ -568,9 +566,7 @@ impl<'tcx> ConstEvalCtxt<'tcx> {
568566
} else {
569567
match &lit.node {
570568
LitKind::Str(is, _) => Some(is.is_empty()),
571-
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => {
572-
Some(s.as_byte_str().is_empty())
573-
}
569+
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => Some(s.as_byte_str().is_empty()),
574570
_ => None,
575571
}
576572
}
@@ -715,7 +711,7 @@ impl<'tcx> ConstEvalCtxt<'tcx> {
715711
&& let Some(src) = src.as_str()
716712
{
717713
use rustc_lexer::TokenKind::{BlockComment, LineComment, OpenBrace, Semi, Whitespace};
718-
if !tokenize(src)
714+
if !tokenize(src, FrontmatterAllowed::No)
719715
.map(|t| t.kind)
720716
.filter(|t| !matches!(t, Whitespace | LineComment { .. } | BlockComment { .. } | Semi))
721717
.eq([OpenBrace])

src/tools/clippy/clippy_utils/src/hir_utils.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use rustc_hir::{
1212
Pat, PatExpr, PatExprKind, PatField, PatKind, Path, PathSegment, PrimTy, QPath, Stmt, StmtKind, StructTailExpr,
1313
TraitBoundModifiers, Ty, TyKind, TyPat, TyPatKind,
1414
};
15-
use rustc_lexer::{TokenKind, tokenize};
15+
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
1616
use rustc_lint::LateContext;
1717
use rustc_middle::ty::TypeckResults;
1818
use rustc_span::{BytePos, ExpnKind, MacroKind, Symbol, SyntaxContext, sym};
@@ -686,7 +686,7 @@ fn reduce_exprkind<'hir>(cx: &LateContext<'_>, kind: &'hir ExprKind<'hir>) -> &'
686686
// `{}` => `()`
687687
([], None)
688688
if block.span.check_source_text(cx, |src| {
689-
tokenize(src)
689+
tokenize(src, FrontmatterAllowed::No)
690690
.map(|t| t.kind)
691691
.filter(|t| {
692692
!matches!(

src/tools/clippy/clippy_utils/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ use rustc_hir::{
106106
Param, Pat, PatExpr, PatExprKind, PatKind, Path, PathSegment, QPath, Stmt, StmtKind, TraitFn, TraitItem,
107107
TraitItemKind, TraitRef, TyKind, UnOp, def,
108108
};
109-
use rustc_lexer::{TokenKind, tokenize};
109+
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
110110
use rustc_lint::{LateContext, Level, Lint, LintContext};
111111
use rustc_middle::hir::nested_filter;
112112
use rustc_middle::hir::place::PlaceBase;
@@ -2764,7 +2764,7 @@ pub fn expr_use_ctxt<'tcx>(cx: &LateContext<'tcx>, e: &Expr<'tcx>) -> ExprUseCtx
27642764
/// Tokenizes the input while keeping the text associated with each token.
27652765
pub fn tokenize_with_text(s: &str) -> impl Iterator<Item = (TokenKind, &str, InnerSpan)> {
27662766
let mut pos = 0;
2767-
tokenize(s).map(move |t| {
2767+
tokenize(s, FrontmatterAllowed::No).map(move |t| {
27682768
let end = pos + t.len;
27692769
let range = pos as usize..end as usize;
27702770
let inner = InnerSpan::new(range.start, range.end);
@@ -2779,7 +2779,7 @@ pub fn span_contains_comment(sm: &SourceMap, span: Span) -> bool {
27792779
let Ok(snippet) = sm.span_to_snippet(span) else {
27802780
return false;
27812781
};
2782-
return tokenize(&snippet).any(|token| {
2782+
return tokenize(&snippet, FrontmatterAllowed::No).any(|token| {
27832783
matches!(
27842784
token.kind,
27852785
TokenKind::BlockComment { .. } | TokenKind::LineComment { .. }

src/tools/clippy/clippy_utils/src/source.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::sync::Arc;
77
use rustc_ast::{LitKind, StrStyle};
88
use rustc_errors::Applicability;
99
use rustc_hir::{BlockCheckMode, Expr, ExprKind, UnsafeSource};
10-
use rustc_lexer::{LiteralKind, TokenKind, tokenize};
10+
use rustc_lexer::{FrontmatterAllowed, LiteralKind, TokenKind, tokenize};
1111
use rustc_lint::{EarlyContext, LateContext};
1212
use rustc_middle::ty::TyCtxt;
1313
use rustc_session::Session;
@@ -277,7 +277,7 @@ fn map_range(
277277
}
278278

279279
fn ends_with_line_comment_or_broken(text: &str) -> bool {
280-
let Some(last) = tokenize(text).last() else {
280+
let Some(last) = tokenize(text, FrontmatterAllowed::No).last() else {
281281
return false;
282282
};
283283
match last.kind {
@@ -310,7 +310,8 @@ fn with_leading_whitespace_inner(lines: &[RelativeBytePos], src: &str, range: Ra
310310
&& ends_with_line_comment_or_broken(&start[prev_start..])
311311
&& let next_line = lines.partition_point(|&pos| pos.to_usize() < range.end)
312312
&& let next_start = lines.get(next_line).map_or(src.len(), |&x| x.to_usize())
313-
&& tokenize(src.get(range.end..next_start)?).any(|t| !matches!(t.kind, TokenKind::Whitespace))
313+
&& tokenize(src.get(range.end..next_start)?, FrontmatterAllowed::No)
314+
.any(|t| !matches!(t.kind, TokenKind::Whitespace))
314315
{
315316
Some(range.start)
316317
} else {

src/tools/rust-analyzer/crates/parser/src/lexed_str.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
use std::ops;
1212

1313
use rustc_literal_escaper::{
14-
EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
15-
unescape_str,
14+
unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str, EscapeError,
15+
Mode,
1616
};
1717

1818
use crate::{
@@ -44,7 +44,9 @@ impl<'a> LexedStr<'a> {
4444

4545
// Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer
4646
// but we want to split it to two in edition <2024.
47-
while let Some(token) = rustc_lexer::tokenize(&text[conv.offset..]).next() {
47+
while let Some(token) =
48+
rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next()
49+
{
4850
let token_text = &text[conv.offset..][..token.len as usize];
4951

5052
conv.extend_token(&token.kind, token_text);
@@ -58,7 +60,7 @@ impl<'a> LexedStr<'a> {
5860
return None;
5961
}
6062

61-
let token = rustc_lexer::tokenize(text).next()?;
63+
let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?;
6264
if token.len as usize != text.len() {
6365
return None;
6466
}

0 commit comments

Comments
 (0)