From 02bcb61da35b1f7e4dec0b0a5dcee88698c89982 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 13:32:13 -0500 Subject: [PATCH 1/8] test(embedded): Copy frontmatter tests from rustc I left out those dealing with proc-macros or `include` --- src/cargo/util/toml/embedded.rs | 447 ++++++++++++++++++++++++++++++++ 1 file changed, 447 insertions(+) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 6ed4858b27d..5772ed206f8 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -216,6 +216,453 @@ mod test_expand { } } + #[test] + fn rustc_dot_in_infostring_leading() { + assert_source( + r#"---.toml +//~^ ERROR: invalid infostring for frontmatter +--- + +// infostrings cannot have leading dots + +fn main() {} +"#, + str![[r#" +shebang: None +info: ".toml" +frontmatter: "//~^ ERROR: invalid infostring for frontmatter\n" +content: "\n// infostrings cannot have leading dots\n\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_dot_in_infostring_non_leading() { + assert_err( + ScriptSource::parse( + r#"---Cargo.toml +--- + +// infostrings can contain dots as long as a dot isn't the first character. +//@ check-pass + +fn main() {} +"#, + ), + str!["no closing `---` found for frontmatter"], + ); + } + + #[test] + fn rustc_escape() { + assert_source( + r#"---- + +--- + +---- + +//@ check-pass + +// This test checks that longer dashes for opening and closing can be used to +// escape sequences such as three dashes inside the frontmatter block. + +fn main() {} +"#, + str![[r#" +shebang: None +info: None +frontmatter: "\n---\n\n" +content: "\n//@ check-pass\n\n// This test checks that longer dashes for opening and closing can be used to\n// escape sequences such as three dashes inside the frontmatter block.\n\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_extra_after_end() { + assert_err( + ScriptSource::parse( + r#"--- +---cargo +//~^ ERROR: extra characters after frontmatter close are not allowed + +fn main() {} +"#, + ), + str!["no closing `---` found for frontmatter"], + ); + } + + #[test] + fn rustc_frontmatter_after_tokens() { + assert_source( + r#"#![feature(frontmatter)] + +--- +//~^ ERROR: expected item, found `-` +// FIXME(frontmatter): make this diagnostic better +--- + +// frontmatters must be at the start of a file. This test ensures that. + +fn main() {} +"#, + str![[r##" +shebang: None +info: None +frontmatter: None +content: "#![feature(frontmatter)]\n\n---\n//~^ ERROR: expected item, found `-`\n// FIXME(frontmatter): make this diagnostic better\n---\n\n// frontmatters must be at the start of a file. This test ensures that.\n\nfn main() {}\n" + +"##]], + ); + } + + #[test] + fn rustc_frontmatter_non_lexible_tokens() { + assert_source( + r#"---uwu +🏳️‍⚧️ +--- + +//@ check-pass + +// check that frontmatter blocks can have tokens that are otherwise not accepted by +// the lexer as Rust code. + +fn main() {} +"#, + str![[r#" +shebang: None +info: "uwu" +frontmatter: "🏳\u{fe0f}\u{200d}⚧\u{fe0f}\n" +content: "\n//@ check-pass\n\n// check that frontmatter blocks can have tokens that are otherwise not accepted by\n// the lexer as Rust code.\n\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_frontmatter_whitespace_1() { + assert_source( + r#" --- +//~^ ERROR: invalid preceding whitespace for frontmatter opening + --- +//~^ ERROR: invalid preceding whitespace for frontmatter close + +// check that whitespaces should not precede the frontmatter opening or close. + +fn main() {} +"#, + str![[r#" +shebang: None +info: None +frontmatter: None +content: " ---\n//~^ ERROR: invalid preceding whitespace for frontmatter opening\n ---\n//~^ ERROR: invalid preceding whitespace for frontmatter close\n\n// check that whitespaces should not precede the frontmatter opening or close.\n\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_frontmatter_whitespace_2() { + assert_err( + ScriptSource::parse( + r#"---cargo + +//@ compile-flags: --crate-type lib + +fn foo(x: i32) -> i32 { + ---x + //~^ ERROR: invalid preceding whitespace for frontmatter close + //~| ERROR: extra characters after frontmatter close are not allowed +} +//~^ ERROR: unexpected closing delimiter: `}` + +// this test is for the weird case that valid Rust code can have three dashes +// within them and get treated as a frontmatter close. +"#, + ), + str!["no closing `---` found for frontmatter"], + ); + } + + #[test] + fn rustc_frontmatter_whitespace_3() { + assert_err( + ScriptSource::parse( + r#" + + +---cargo +--- + +// please note the whitespace characters after the first four lines. +// This ensures that we accept whitespaces before the frontmatter, after +// the frontmatter opening and the frontmatter close. + +//@ check-pass +// ignore-tidy-end-whitespace +// ignore-tidy-leading-newlines + +fn main() {} +"#, + ), + str!["no closing `---` found for frontmatter"], + ); + } + + #[test] + fn rustc_frontmatter_whitespace_4() { + assert_err( + ScriptSource::parse( + r#"--- cargo +--- + +//@ check-pass +// A frontmatter infostring can have leading whitespace. + +fn main() {} +"#, + ), + str!["no closing `---` found for frontmatter"], + ); + } + + #[test] + fn rustc_infostring_fail() { + assert_source( + r#" +---cargo,clippy +//~^ ERROR: invalid infostring for frontmatter +--- + +// infostrings can only be a single identifier. + +fn main() {} +"#, + str![[r#" +shebang: None +info: "cargo,clippy" +frontmatter: "//~^ ERROR: invalid infostring for frontmatter\n" +content: "\n// infostrings can only be a single identifier.\n\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_mismatch_1() { + assert_err( + ScriptSource::parse( + r#"---cargo +//~^ ERROR: frontmatter close does not match the opening +---- + +// there must be the same number of dashes for both the opening and the close +// of the frontmatter. + +fn main() {} +"#, + ), + str!["unexpected trailing content on closing fence: `-`"], + ); + } + + #[test] + fn rustc_mismatch_2() { + assert_err( + ScriptSource::parse( + r#"----cargo +//~^ ERROR: frontmatter close does not match the opening +---cargo +//~^ ERROR: extra characters after frontmatter close are not allowed + +fn main() {} +"#, + ), + str!["no closing `----` found for frontmatter"], + ); + } + + #[test] + fn rustc_multifrontmatter_2() { + assert_source( + r#"--- + --- +//~^ ERROR: invalid preceding whitespace for frontmatter close + + --- +//~^ ERROR: expected item, found `-` +// FIXME(frontmatter): make this diagnostic better +--- + +fn main() {} +"#, + str![[r#" +shebang: None +info: None +frontmatter: " ---\n//~^ ERROR: invalid preceding whitespace for frontmatter close\n\n ---\n//~^ ERROR: expected item, found `-`\n// FIXME(frontmatter): make this diagnostic better\n" +content: "\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_multifrontmatter() { + assert_source( + r#"--- +--- + +--- +//~^ ERROR: expected item, found `-` +// FIXME(frontmatter): make this diagnostic better +--- + +// test that we do not parse another frontmatter block after the first one. + +fn main() {} +"#, + str![[r#" +shebang: None +info: None +frontmatter: "---\n\n" +content: "//~^ ERROR: expected item, found `-`\n// FIXME(frontmatter): make this diagnostic better\n---\n\n// test that we do not parse another frontmatter block after the first one.\n\nfn main() {}\n" + +"#]], + ); + } + + #[test] + fn rustc_shebang() { + assert_source( + r#"#!/usr/bin/env -S cargo -Zscript +--- +[dependencies] +clap = "4" +--- + +//@ check-pass + +// Shebangs on a file can precede a frontmatter. + +fn main () {} +"#, + str![[r##" +shebang: "#!/usr/bin/env -S cargo -Zscript\n" +info: None +frontmatter: "[dependencies]\nclap = \"4\"\n" +content: "\n//@ check-pass\n\n// Shebangs on a file can precede a frontmatter.\n\nfn main () {}\n" + +"##]], + ); + } + + #[test] + fn rustc_unclosed_1() { + assert_err( + ScriptSource::parse( + r#"----cargo +//~^ ERROR: unclosed frontmatter + +// This test checks that the #! characters can help us recover a frontmatter +// close. There should not be a "missing `main` function" error as the rest +// are properly parsed. + +fn main() {} +"#, + ), + str!["no closing `----` found for frontmatter"], + ); + } + + #[test] + fn rustc_unclosed_2() { + assert_err( + ScriptSource::parse( + r#"----cargo +//~^ ERROR: unclosed frontmatter +//~| ERROR: frontmatters are experimental + +//@ compile-flags: --crate-type lib + +// Leading whitespace on the feature line prevents recovery. However +// the dashes quoted will not be used for recovery and the entire file +// should be treated as within the frontmatter block. + +fn foo() -> &str { + "----" +} +"#, + ), + str!["no closing `----` found for frontmatter"], + ); + } + + #[test] + fn rustc_unclosed_3() { + assert_err( + ScriptSource::parse( + r#"----cargo +//~^ ERROR: frontmatter close does not match the opening + +//@ compile-flags: --crate-type lib + +// Unfortunate recovery situation. Not really preventable with improving the +// recovery strategy, but this type of code is rare enough already. + +fn foo(x: i32) -> i32 { + ---x + //~^ ERROR: invalid preceding whitespace for frontmatter close + //~| ERROR: extra characters after frontmatter close are not allowed +} +//~^ ERROR: unexpected closing delimiter: `}` +"#, + ), + str!["no closing `----` found for frontmatter"], + ); + } + + #[test] + fn rustc_unclosed_4() { + assert_err( + ScriptSource::parse( + r#" +----cargo +//~^ ERROR: unclosed frontmatter + +//! Similarly, a module-level content should allow for recovery as well (as +//! per unclosed-1.rs) + +fn main() {} +"#, + ), + str!["no closing `----` found for frontmatter"], + ); + } + + #[test] + fn rustc_unclosed_5() { + assert_err( + ScriptSource::parse( + r#"----cargo +//~^ ERROR: unclosed frontmatter +//~| ERROR: frontmatters are experimental + +// Similarly, a use statement should allow for recovery as well (as +// per unclosed-1.rs) + +use std::env; + +fn main() {} +"#, + ), + str!["no closing `----` found for frontmatter"], + ); + } + #[test] fn split_default() { assert_source( From b4c96847f28a276f196f6f0e519e89aa753b563e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 13:56:12 -0500 Subject: [PATCH 2/8] refactor(embedded): Pull in parser from rust-lang/rust#137193 --- src/cargo/util/toml/embedded.rs | 81 +++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 5772ed206f8..4438cebbeb4 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -78,59 +78,70 @@ impl<'s> ScriptSource<'s> { source.content = content; } - const FENCE_CHAR: char = '-'; - let mut rest = source.content; - while !rest.is_empty() { - let without_spaces = rest.trim_start_matches([' ', '\t']); - let without_nl = without_spaces.trim_start_matches(['\r', '\n']); - if without_nl == rest { - // nothing trimmed - break; - } else if without_nl == without_spaces { - // frontmatter must come after a newline + + // Whitespace may precede a frontmatter but must end with a newline + const WHITESPACE: [char; 4] = [' ', '\t', '\r', '\n']; + let trimmed = rest.trim_start_matches(WHITESPACE); + if trimmed.len() != rest.len() { + let trimmed_len = rest.len() - trimmed.len(); + let last_trimmed_index = trimmed_len - 1; + if rest.as_bytes()[last_trimmed_index] != b'\n' { + // either not a frontmatter or invalid opening return Ok(source); } - rest = without_nl; } - let fence_end = rest + rest = trimmed; + + // Opens with a line that starts with 3 or more `-` followed by an optional identifier + const FENCE_CHAR: char = '-'; + let fence_length = rest .char_indices() .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) - .unwrap_or(source.content.len()); - let (fence_pattern, rest) = match fence_end { + .unwrap_or(rest.len()); + match fence_length { 0 => { return Ok(source); } 1 | 2 => { + // either not a frontmatter or invalid frontmatter opening anyhow::bail!( - "found {fence_end} `{FENCE_CHAR}` in rust frontmatter, expected at least 3" + "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3" ) } - _ => rest.split_at(fence_end), + _ => {} + } + let (fence_pattern, rest) = rest.split_at(fence_length); + let Some(info_end_index) = rest.find('\n') else { + anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - let nl_fence_pattern = format!("\n{fence_pattern}"); - let (info, content) = rest.split_once("\n").unwrap_or((rest, "")); - let info = info.trim(); + let (info, rest) = rest.split_at(info_end_index); + let info = info.trim_matches(WHITESPACE); if !info.is_empty() { source.info = Some(info); } - source.content = content; + let rest = rest + .strip_prefix('\n') + .expect("earlier `found` + `split_at` left us here"); - let Some(frontmatter_nl) = source.content.find(&nl_fence_pattern) else { + // Ends with a line that starts with a matching number of `-` only followed by whitespace + let nl_fence_pattern = format!("\n{fence_pattern}"); + let Some(frontmatter_nl) = rest.find(&nl_fence_pattern) else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - source.frontmatter = Some(&source.content[..frontmatter_nl + 1]); - source.content = &source.content[frontmatter_nl + nl_fence_pattern.len()..]; - - let (line, content) = source - .content - .split_once("\n") - .unwrap_or((source.content, "")); - let line = line.trim(); - if !line.is_empty() { - anyhow::bail!("unexpected trailing content on closing fence: `{line}`"); + let frontmatter = &rest[..frontmatter_nl + 1]; + let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; + source.frontmatter = Some(frontmatter); + + let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, "")); + let after_closing_fence = after_closing_fence.trim_matches(WHITESPACE); + if !after_closing_fence.is_empty() { + // extra characters beyond the original fence pattern, even if they are extra `-` + anyhow::bail!("trailing characters found after frontmatter close"); } - source.content = content; + + let frontmatter_len = input.len() - rest.len(); + source.content = &input[frontmatter_len..]; Ok(source) } @@ -466,7 +477,7 @@ content: "\n// infostrings can only be a single identifier.\n\nfn main() {}\n" fn main() {} "#, ), - str!["unexpected trailing content on closing fence: `-`"], + str!["trailing characters found after frontmatter close"], ); } @@ -905,7 +916,7 @@ content: "\nfn main() {}\n" fn main() {} "#, ), - str!["unexpected trailing content on closing fence: `--`"], + str!["trailing characters found after frontmatter close"], ); } @@ -942,7 +953,7 @@ time="0.1.25" fn main() {} "#, ), - str!["unexpected trailing content on closing fence: `-`"], + str!["trailing characters found after frontmatter close"], ); } From 5b6d5df49a03e3df21c322655a670118270c6053 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 13:57:22 -0500 Subject: [PATCH 3/8] test(embedded): Note our lack of infostring validator --- src/cargo/util/toml/embedded.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 4438cebbeb4..dc51e97eba3 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -229,6 +229,7 @@ mod test_expand { #[test] fn rustc_dot_in_infostring_leading() { + // We don't validate infostrings besides `info == "cargo"` assert_source( r#"---.toml //~^ ERROR: invalid infostring for frontmatter @@ -443,6 +444,7 @@ fn main() {} #[test] fn rustc_infostring_fail() { + // We don't validate infostrings besides `info == "cargo"` assert_source( r#" ---cargo,clippy From a8d714b403a4f6557ff18516ffe6b1a7ec7f62c8 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 14:01:54 -0500 Subject: [PATCH 4/8] fix(embedded): Allow empty frontmatter --- src/cargo/util/toml/embedded.rs | 56 ++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index dc51e97eba3..9bfa80ff1f0 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -120,9 +120,6 @@ impl<'s> ScriptSource<'s> { if !info.is_empty() { source.info = Some(info); } - let rest = rest - .strip_prefix('\n') - .expect("earlier `found` + `split_at` left us here"); // Ends with a line that starts with a matching number of `-` only followed by whitespace let nl_fence_pattern = format!("\n{fence_pattern}"); @@ -130,8 +127,11 @@ impl<'s> ScriptSource<'s> { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; let frontmatter = &rest[..frontmatter_nl + 1]; - let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; + let frontmatter = frontmatter + .strip_prefix('\n') + .expect("earlier `found` + `split_at` left us here"); source.frontmatter = Some(frontmatter); + let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, "")); let after_closing_fence = after_closing_fence.trim_matches(WHITESPACE); @@ -251,9 +251,8 @@ content: "\n// infostrings cannot have leading dots\n\nfn main() {}\n" #[test] fn rustc_dot_in_infostring_non_leading() { - assert_err( - ScriptSource::parse( - r#"---Cargo.toml + assert_source( + r#"---Cargo.toml --- // infostrings can contain dots as long as a dot isn't the first character. @@ -261,8 +260,13 @@ content: "\n// infostrings cannot have leading dots\n\nfn main() {}\n" fn main() {} "#, - ), - str!["no closing `---` found for frontmatter"], + str![[r#" +shebang: None +info: "Cargo.toml" +frontmatter: "" +content: "\n// infostrings can contain dots as long as a dot isn't the first character.\n//@ check-pass\n\nfn main() {}\n" + +"#]], ); } @@ -303,7 +307,7 @@ content: "\n//@ check-pass\n\n// This test checks that longer dashes for opening fn main() {} "#, ), - str!["no closing `---` found for frontmatter"], + str!["trailing characters found after frontmatter close"], ); } @@ -402,9 +406,8 @@ fn foo(x: i32) -> i32 { #[test] fn rustc_frontmatter_whitespace_3() { - assert_err( - ScriptSource::parse( - r#" + assert_source( + r#" ---cargo @@ -420,16 +423,20 @@ fn foo(x: i32) -> i32 { fn main() {} "#, - ), - str!["no closing `---` found for frontmatter"], + str![[r#" +shebang: None +info: "cargo" +frontmatter: "" +content: "\n// please note the whitespace characters after the first four lines.\n// This ensures that we accept whitespaces before the frontmatter, after\n// the frontmatter opening and the frontmatter close.\n\n//@ check-pass\n// ignore-tidy-end-whitespace\n// ignore-tidy-leading-newlines\n\nfn main() {}\n" + +"#]], ); } #[test] fn rustc_frontmatter_whitespace_4() { - assert_err( - ScriptSource::parse( - r#"--- cargo + assert_source( + r#"--- cargo --- //@ check-pass @@ -437,8 +444,13 @@ fn main() {} fn main() {} "#, - ), - str!["no closing `---` found for frontmatter"], + str![[r#" +shebang: None +info: "cargo" +frontmatter: "" +content: "\n//@ check-pass\n// A frontmatter infostring can have leading whitespace.\n\nfn main() {}\n" + +"#]], ); } @@ -541,8 +553,8 @@ fn main() {} str![[r#" shebang: None info: None -frontmatter: "---\n\n" -content: "//~^ ERROR: expected item, found `-`\n// FIXME(frontmatter): make this diagnostic better\n---\n\n// test that we do not parse another frontmatter block after the first one.\n\nfn main() {}\n" +frontmatter: "" +content: "\n---\n//~^ ERROR: expected item, found `-`\n// FIXME(frontmatter): make this diagnostic better\n---\n\n// test that we do not parse another frontmatter block after the first one.\n\nfn main() {}\n" "#]], ); From ff0ed7276db2ac094947ed0295589f8c8bdc03a1 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 14:03:12 -0500 Subject: [PATCH 5/8] test(embedded): Note our lack of rust grammar parsing --- src/cargo/util/toml/embedded.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 9bfa80ff1f0..01b2d0c1669 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -313,6 +313,7 @@ fn main() {} #[test] fn rustc_frontmatter_after_tokens() { + // Deferred to rustc since this requires knowledge of Rust grammar assert_source( r#"#![feature(frontmatter)] From c3a97125248da7bcdc1061232c0c6f2d344afe67 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 14:05:25 -0500 Subject: [PATCH 6/8] test(embedded): Note rustc bug --- src/cargo/util/toml/embedded.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 01b2d0c1669..a88ff4f1d07 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -514,6 +514,7 @@ fn main() {} #[test] fn rustc_multifrontmatter_2() { + // This should be valid, bug on rustc's side, see rust-lang/rust#141367 assert_source( r#"--- --- From 70f09a012d73c565593844a38aaf77f1a6261318 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 15:16:01 -0500 Subject: [PATCH 7/8] fix(embedded): Disallow ambiguous multiple frontmatters We could say that we delegate this to rustc but if they add support for multiple frontmatters, we need to update to be able to know which we should read, so its better to error on our side. --- src/cargo/util/toml/embedded.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index a88ff4f1d07..5f0c244371b 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -143,6 +143,11 @@ impl<'s> ScriptSource<'s> { let frontmatter_len = input.len() - rest.len(); source.content = &input[frontmatter_len..]; + let repeat = Self::parse(source.content)?; + if repeat.frontmatter.is_some() { + anyhow::bail!("only one frontmatter is supported"); + } + Ok(source) } @@ -362,6 +367,7 @@ content: "\n//@ check-pass\n\n// check that frontmatter blocks can have tokens t #[test] fn rustc_frontmatter_whitespace_1() { + // Deferred to rustc since this requires knowledge of Rust grammar assert_source( r#" --- //~^ ERROR: invalid preceding whitespace for frontmatter opening @@ -539,8 +545,9 @@ content: "\nfn main() {}\n" #[test] fn rustc_multifrontmatter() { - assert_source( - r#"--- + assert_err( + ScriptSource::parse( + r#"--- --- --- @@ -552,13 +559,8 @@ content: "\nfn main() {}\n" fn main() {} "#, - str![[r#" -shebang: None -info: None -frontmatter: "" -content: "\n---\n//~^ ERROR: expected item, found `-`\n// FIXME(frontmatter): make this diagnostic better\n---\n\n// test that we do not parse another frontmatter block after the first one.\n\nfn main() {}\n" - -"#]], + ), + str!["only one frontmatter is supported"], ); } From 58c22565886e25621bffd56f0adacb87d3c94477 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 21 May 2025 15:21:32 -0500 Subject: [PATCH 8/8] fix(embedded): Match rustc's whitespace definition --- src/cargo/util/toml/embedded.rs | 41 +++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index 5f0c244371b..70638254116 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -81,8 +81,7 @@ impl<'s> ScriptSource<'s> { let mut rest = source.content; // Whitespace may precede a frontmatter but must end with a newline - const WHITESPACE: [char; 4] = [' ', '\t', '\r', '\n']; - let trimmed = rest.trim_start_matches(WHITESPACE); + let trimmed = rest.trim_start_matches(is_whitespace); if trimmed.len() != rest.len() { let trimmed_len = rest.len() - trimmed.len(); let last_trimmed_index = trimmed_len - 1; @@ -116,7 +115,7 @@ impl<'s> ScriptSource<'s> { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; let (info, rest) = rest.split_at(info_end_index); - let info = info.trim_matches(WHITESPACE); + let info = info.trim_matches(is_whitespace); if !info.is_empty() { source.info = Some(info); } @@ -134,7 +133,7 @@ impl<'s> ScriptSource<'s> { let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, "")); - let after_closing_fence = after_closing_fence.trim_matches(WHITESPACE); + let after_closing_fence = after_closing_fence.trim_matches(is_whitespace); if !after_closing_fence.is_empty() { // extra characters beyond the original fence pattern, even if they are extra `-` anyhow::bail!("trailing characters found after frontmatter close"); @@ -188,6 +187,40 @@ fn strip_shebang(input: &str) -> Option { None } +/// True if `c` is considered a whitespace according to Rust language definition. +/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html) +/// for definitions of these classes. +/// +/// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace` +fn is_whitespace(c: char) -> bool { + // This is Pattern_White_Space. + // + // Note that this set is stable (ie, it doesn't change with different + // Unicode versions), so it's ok to just hard-code the values. + + matches!( + c, + // Usual ASCII suspects + '\u{0009}' // \t + | '\u{000A}' // \n + | '\u{000B}' // vertical tab + | '\u{000C}' // form feed + | '\u{000D}' // \r + | '\u{0020}' // space + + // NEXT LINE from latin1 + | '\u{0085}' + + // Bidi markers + | '\u{200E}' // LEFT-TO-RIGHT MARK + | '\u{200F}' // RIGHT-TO-LEFT MARK + + // Dedicated whitespace characters from Unicode + | '\u{2028}' // LINE SEPARATOR + | '\u{2029}' // PARAGRAPH SEPARATOR + ) +} + #[cfg(test)] mod test_expand { use snapbox::assert_data_eq;