diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index aff98c63bcb41..def0cb74d295b 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -120,7 +120,7 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec pos += shebang_len; } - for token in rustc_lexer::tokenize(&text[pos..]) { + for token in rustc_lexer::tokenize(&text[pos..], rustc_lexer::FrontmatterAllowed::Yes) { let token_text = &text[pos..pos + token.len as usize]; match token.kind { rustc_lexer::TokenKind::Whitespace => { @@ -171,6 +171,14 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec }) } } + rustc_lexer::TokenKind::Frontmatter { .. } => { + code_to_the_left = false; + comments.push(Comment { + style: CommentStyle::Isolated, + lines: vec![token_text.to_string()], + pos: start_bpos + BytePos(pos as u32), + }); + } _ => { code_to_the_left = true; } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index e30dbe80248d4..e80196ed5679d 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option { if let Some(input_tail) = input.strip_prefix("#!") { // Ok, this is a shebang but if the next non-whitespace token is `[`, // then it may be valid Rust code, so consider it Rust code. - let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| { - !matches!( - tok, - TokenKind::Whitespace - | TokenKind::LineComment { doc_style: None } - | TokenKind::BlockComment { doc_style: None, .. } - ) - }); + let next_non_whitespace_token = + tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| { + !matches!( + tok, + TokenKind::Whitespace + | TokenKind::LineComment { doc_style: None } + | TokenKind::BlockComment { doc_style: None, .. } + ) + }); if next_non_whitespace_token != Some(TokenKind::OpenBracket) { // No other choice than to consider this a shebang. return Some(2 + input_tail.lines().next().unwrap_or_default().len()); @@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> } /// Creates an iterator that produces tokens from the input string. -pub fn tokenize(input: &str) -> impl Iterator { - let mut cursor = Cursor::new(input, FrontmatterAllowed::No); +/// +/// When parsing a full Rust document, +/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`]. +/// +/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`] +pub fn tokenize( + input: &str, + frontmatter_allowed: FrontmatterAllowed, +) -> impl Iterator { + let mut cursor = Cursor::new(input, frontmatter_allowed); std::iter::from_fn(move || { let token = cursor.advance_token(); if token.kind != TokenKind::Eof { Some(token) } else { None } diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index fc8d9b9d57bc4..a7357ba38c8e4 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -124,8 +124,9 @@ fn test_valid_shebang() { assert_eq!(strip_shebang(input), None); } -fn check_lexing(src: &str, expect: Expect) { - let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect(); +fn check_lexing(src: &str, frontmatter_allowed: FrontmatterAllowed, expect: Expect) { + let actual: String = + tokenize(src, frontmatter_allowed).map(|token| format!("{:?}\n", token)).collect(); expect.assert_eq(&actual) } @@ -133,6 +134,7 @@ fn check_lexing(src: &str, expect: Expect) { fn smoke_test() { check_lexing( "/* my source file */ fn main() { println!(\"zebra\"); }\n", + FrontmatterAllowed::No, expect![[r#" Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } Token { kind: Whitespace, len: 1 } @@ -171,6 +173,7 @@ fn comment_flavors() { /** outer doc block */ /*! inner doc block */ ", + FrontmatterAllowed::No, expect![[r#" Token { kind: Whitespace, len: 1 } Token { kind: LineComment { doc_style: None }, len: 7 } @@ -199,6 +202,7 @@ fn comment_flavors() { fn nested_block_comments() { check_lexing( "/* /* */ */'a'", + FrontmatterAllowed::No, expect![[r#" Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } @@ -210,6 +214,7 @@ fn nested_block_comments() { fn characters() { check_lexing( "'a' ' ' '\\n'", + FrontmatterAllowed::No, expect![[r#" Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } Token { kind: Whitespace, len: 1 } @@ -224,6 +229,7 @@ fn characters() { fn lifetime() { check_lexing( "'abc", + FrontmatterAllowed::No, expect![[r#" Token { kind: Lifetime { starts_with_number: false }, len: 4 } "#]], @@ -234,6 +240,7 @@ fn lifetime() { fn raw_string() { check_lexing( "r###\"\"#a\\b\x00c\"\"###", + FrontmatterAllowed::No, expect![[r#" Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 } "#]], @@ -257,6 +264,7 @@ b"a" r###"raw"###suffix br###"raw"###suffix "####, + FrontmatterAllowed::No, expect![[r#" Token { kind: Whitespace, len: 1 } Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } @@ -286,3 +294,78 @@ br###"raw"###suffix "#]], ) } + +#[test] +fn frontmatter_allowed() { + check_lexing( + r#" +---cargo +[dependencies] +clap = "4" +--- + +fn main() {} +"#, + FrontmatterAllowed::Yes, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Frontmatter { has_invalid_preceding_whitespace: false, invalid_infostring: false }, len: 38 } + Token { kind: Whitespace, len: 2 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + +#[test] +fn frontmatter_disallowed() { + check_lexing( + r#" +---cargo +[dependencies] +clap = "4" +--- + +fn main() {} +"#, + FrontmatterAllowed::No, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Ident, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBracket, len: 1 } + Token { kind: Ident, len: 12 } + Token { kind: CloseBracket, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Eq, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Minus, len: 1 } + Token { kind: Whitespace, len: 2 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} diff --git a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs index 6cc4b589a7207..8906e654c972a 100644 --- a/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/src/tools/clippy/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -9,7 +9,7 @@ use clippy_utils::visitors::{Descend, for_each_expr}; use hir::HirId; use rustc_hir as hir; use rustc_hir::{Block, BlockCheckMode, ItemKind, Node, UnsafeSource}; -use rustc_lexer::{TokenKind, tokenize}; +use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize}; use rustc_lint::{LateContext, LateLintPass, LintContext}; use rustc_session::impl_lint_pass; use rustc_span::{BytePos, Pos, RelativeBytePos, Span, SyntaxContext}; @@ -746,7 +746,7 @@ fn text_has_safety_comment(src: &str, line_starts: &[RelativeBytePos], start_pos loop { if line.starts_with("/*") { let src = &src[line_start..line_starts.last().unwrap().to_usize()]; - let mut tokens = tokenize(src); + let mut tokens = tokenize(src, FrontmatterAllowed::No); return (src[..tokens.next().unwrap().len as usize] .to_ascii_uppercase() .contains("SAFETY:") diff --git a/src/tools/clippy/clippy_lints/src/utils/format_args_collector.rs b/src/tools/clippy/clippy_lints/src/utils/format_args_collector.rs index 8f314ce7a60ce..6629a67f78bd4 100644 --- a/src/tools/clippy/clippy_lints/src/utils/format_args_collector.rs +++ b/src/tools/clippy/clippy_lints/src/utils/format_args_collector.rs @@ -3,7 +3,7 @@ use clippy_utils::source::SpanRangeExt; use itertools::Itertools; use rustc_ast::{Crate, Expr, ExprKind, FormatArgs}; use rustc_data_structures::fx::FxHashMap; -use rustc_lexer::{TokenKind, tokenize}; +use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize}; use rustc_lint::{EarlyContext, EarlyLintPass}; use rustc_session::impl_lint_pass; use rustc_span::{Span, hygiene}; @@ -82,7 +82,7 @@ fn has_span_from_proc_macro(cx: &EarlyContext<'_>, args: &FormatArgs) -> bool { .all(|sp| { sp.check_source_text(cx, |src| { // text should be either `, name` or `, name =` - let mut iter = tokenize(src).filter(|t| { + let mut iter = tokenize(src, FrontmatterAllowed::No).filter(|t| { !matches!( t.kind, TokenKind::LineComment { .. } | TokenKind::BlockComment { .. } | TokenKind::Whitespace diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index ba0376e4d4066..25afa12e95d63 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -15,7 +15,7 @@ use rustc_hir::def::{DefKind, Res}; use rustc_hir::{ BinOpKind, Block, ConstBlock, Expr, ExprKind, HirId, Item, ItemKind, Node, PatExpr, PatExprKind, QPath, UnOp, }; -use rustc_lexer::tokenize; +use rustc_lexer::{FrontmatterAllowed, tokenize}; use rustc_lint::LateContext; use rustc_middle::mir::ConstValue; use rustc_middle::mir::interpret::{Scalar, alloc_range}; @@ -304,9 +304,7 @@ pub fn lit_to_mir_constant<'tcx>(lit: &LitKind, ty: Option>) -> Constan match *lit { LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Byte(b) => Constant::Int(u128::from(b)), - LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => { - Constant::Binary(s.as_byte_str().to_vec()) - } + LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => Constant::Binary(s.as_byte_str().to_vec()), LitKind::Char(c) => Constant::Char(c), LitKind::Int(n, _) => Constant::Int(n.get()), LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty { @@ -568,9 +566,7 @@ impl<'tcx> ConstEvalCtxt<'tcx> { } else { match &lit.node { LitKind::Str(is, _) => Some(is.is_empty()), - LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => { - Some(s.as_byte_str().is_empty()) - } + LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => Some(s.as_byte_str().is_empty()), _ => None, } } @@ -715,7 +711,7 @@ impl<'tcx> ConstEvalCtxt<'tcx> { && let Some(src) = src.as_str() { use rustc_lexer::TokenKind::{BlockComment, LineComment, OpenBrace, Semi, Whitespace}; - if !tokenize(src) + if !tokenize(src, FrontmatterAllowed::No) .map(|t| t.kind) .filter(|t| !matches!(t, Whitespace | LineComment { .. } | BlockComment { .. } | Semi)) .eq([OpenBrace]) diff --git a/src/tools/clippy/clippy_utils/src/hir_utils.rs b/src/tools/clippy/clippy_utils/src/hir_utils.rs index 0ca494f16e31d..6e8dccbccd515 100644 --- a/src/tools/clippy/clippy_utils/src/hir_utils.rs +++ b/src/tools/clippy/clippy_utils/src/hir_utils.rs @@ -12,7 +12,7 @@ use rustc_hir::{ Pat, PatExpr, PatExprKind, PatField, PatKind, Path, PathSegment, PrimTy, QPath, Stmt, StmtKind, StructTailExpr, TraitBoundModifiers, Ty, TyKind, TyPat, TyPatKind, }; -use rustc_lexer::{TokenKind, tokenize}; +use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize}; use rustc_lint::LateContext; use rustc_middle::ty::TypeckResults; use rustc_span::{BytePos, ExpnKind, MacroKind, Symbol, SyntaxContext, sym}; @@ -686,7 +686,7 @@ fn reduce_exprkind<'hir>(cx: &LateContext<'_>, kind: &'hir ExprKind<'hir>) -> &' // `{}` => `()` ([], None) if block.span.check_source_text(cx, |src| { - tokenize(src) + tokenize(src, FrontmatterAllowed::No) .map(|t| t.kind) .filter(|t| { !matches!( diff --git a/src/tools/clippy/clippy_utils/src/lib.rs b/src/tools/clippy/clippy_utils/src/lib.rs index c01f0ffaac9a0..98b4c183b1206 100644 --- a/src/tools/clippy/clippy_utils/src/lib.rs +++ b/src/tools/clippy/clippy_utils/src/lib.rs @@ -106,7 +106,7 @@ use rustc_hir::{ Param, Pat, PatExpr, PatExprKind, PatKind, Path, PathSegment, QPath, Stmt, StmtKind, TraitFn, TraitItem, TraitItemKind, TraitRef, TyKind, UnOp, def, }; -use rustc_lexer::{TokenKind, tokenize}; +use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize}; use rustc_lint::{LateContext, Level, Lint, LintContext}; use rustc_middle::hir::nested_filter; use rustc_middle::hir::place::PlaceBase; @@ -2764,7 +2764,7 @@ pub fn expr_use_ctxt<'tcx>(cx: &LateContext<'tcx>, e: &Expr<'tcx>) -> ExprUseCtx /// Tokenizes the input while keeping the text associated with each token. pub fn tokenize_with_text(s: &str) -> impl Iterator { let mut pos = 0; - tokenize(s).map(move |t| { + tokenize(s, FrontmatterAllowed::No).map(move |t| { let end = pos + t.len; let range = pos as usize..end as usize; let inner = InnerSpan::new(range.start, range.end); @@ -2779,7 +2779,7 @@ pub fn span_contains_comment(sm: &SourceMap, span: Span) -> bool { let Ok(snippet) = sm.span_to_snippet(span) else { return false; }; - return tokenize(&snippet).any(|token| { + return tokenize(&snippet, FrontmatterAllowed::No).any(|token| { matches!( token.kind, TokenKind::BlockComment { .. } | TokenKind::LineComment { .. } diff --git a/src/tools/clippy/clippy_utils/src/source.rs b/src/tools/clippy/clippy_utils/src/source.rs index 7f2bf99daff20..7d21336be1cd8 100644 --- a/src/tools/clippy/clippy_utils/src/source.rs +++ b/src/tools/clippy/clippy_utils/src/source.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use rustc_ast::{LitKind, StrStyle}; use rustc_errors::Applicability; use rustc_hir::{BlockCheckMode, Expr, ExprKind, UnsafeSource}; -use rustc_lexer::{LiteralKind, TokenKind, tokenize}; +use rustc_lexer::{FrontmatterAllowed, LiteralKind, TokenKind, tokenize}; use rustc_lint::{EarlyContext, LateContext}; use rustc_middle::ty::TyCtxt; use rustc_session::Session; @@ -277,7 +277,7 @@ fn map_range( } fn ends_with_line_comment_or_broken(text: &str) -> bool { - let Some(last) = tokenize(text).last() else { + let Some(last) = tokenize(text, FrontmatterAllowed::No).last() else { return false; }; match last.kind { @@ -310,7 +310,8 @@ fn with_leading_whitespace_inner(lines: &[RelativeBytePos], src: &str, range: Ra && ends_with_line_comment_or_broken(&start[prev_start..]) && let next_line = lines.partition_point(|&pos| pos.to_usize() < range.end) && let next_start = lines.get(next_line).map_or(src.len(), |&x| x.to_usize()) - && tokenize(src.get(range.end..next_start)?).any(|t| !matches!(t.kind, TokenKind::Whitespace)) + && tokenize(src.get(range.end..next_start)?, FrontmatterAllowed::No) + .any(|t| !matches!(t.kind, TokenKind::Whitespace)) { Some(range.start) } else { diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index e6c92dec68107..bff9acd78fa0f 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -11,8 +11,8 @@ use std::ops; use rustc_literal_escaper::{ - EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, - unescape_str, + unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str, EscapeError, + Mode, }; use crate::{ @@ -44,7 +44,9 @@ impl<'a> LexedStr<'a> { // Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer // but we want to split it to two in edition <2024. - while let Some(token) = rustc_lexer::tokenize(&text[conv.offset..]).next() { + while let Some(token) = + rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next() + { let token_text = &text[conv.offset..][..token.len as usize]; conv.extend_token(&token.kind, token_text); @@ -58,7 +60,7 @@ impl<'a> LexedStr<'a> { return None; } - let token = rustc_lexer::tokenize(text).next()?; + let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?; if token.len as usize != text.len() { return None; } diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server_impl.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server_impl.rs index dd576f23ae9fd..662f6257642f0 100644 --- a/src/tools/rust-analyzer/crates/proc-macro-srv/src/server_impl.rs +++ b/src/tools/rust-analyzer/crates/proc-macro-srv/src/server_impl.rs @@ -121,7 +121,7 @@ pub(super) fn literal_from_str( use proc_macro::bridge::LitKind; use rustc_lexer::{LiteralKind, Token, TokenKind}; - let mut tokens = rustc_lexer::tokenize(s); + let mut tokens = rustc_lexer::tokenize(s, rustc_lexer::FrontmatterAllowed::No); let minus_or_lit = tokens.next().unwrap_or(Token { kind: TokenKind::Eof, len: 0 }); let lit = if minus_or_lit.kind == TokenKind::Minus { diff --git a/src/tools/rust-analyzer/crates/tt/src/lib.rs b/src/tools/rust-analyzer/crates/tt/src/lib.rs index 14574a6456bda..44123385c8cc3 100644 --- a/src/tools/rust-analyzer/crates/tt/src/lib.rs +++ b/src/tools/rust-analyzer/crates/tt/src/lib.rs @@ -579,7 +579,7 @@ where { use rustc_lexer::LiteralKind; - let token = rustc_lexer::tokenize(text).next_tuple(); + let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next_tuple(); let Some((rustc_lexer::Token { kind: rustc_lexer::TokenKind::Literal { kind, suffix_start }, .. diff --git a/tests/ui/unpretty/frontmatter.rs b/tests/ui/unpretty/frontmatter.rs new file mode 100644 index 0000000000000..1971808e2a866 --- /dev/null +++ b/tests/ui/unpretty/frontmatter.rs @@ -0,0 +1,10 @@ +--- +--- + +//@ compile-flags: -Zunpretty=normal +//@ check-pass + +#![feature(frontmatter)] + +fn main() { +} diff --git a/tests/ui/unpretty/frontmatter.stdout b/tests/ui/unpretty/frontmatter.stdout new file mode 100644 index 0000000000000..2ccbb1b258255 --- /dev/null +++ b/tests/ui/unpretty/frontmatter.stdout @@ -0,0 +1,9 @@ +--- +--- + +//@ compile-flags: -Zunpretty=normal +//@ check-pass + +#![feature(frontmatter)] + +fn main() {}