Skip to content

Commit a9f0a7b

Browse files
authored
Try to hit line lengths limits on windows in testing (#53)
* Try to hit line lengths limits on windows in testing * Initial basic support for splitting arguments * Fix windows build * Fallback to `@-file` when invoking lld
1 parent e00ddf3 commit a9f0a7b

File tree

5 files changed

+364
-71
lines changed

5 files changed

+364
-71
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,10 @@ wat = "1.219.0"
3333
wit-component = "0.219.0"
3434
wit-parser = "0.219.0"
3535
wasi-preview1-component-adapter-provider = "24.0.0"
36+
37+
[target.'cfg(unix)'.dependencies]
38+
libc = "0.2"
39+
40+
[target.'cfg(windows)'.dependencies]
41+
winsplit = "0.1"
42+
windows-sys = { version = "0.59", features = ['Win32_Foundation'] }

src/argfile.rs

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
use anyhow::{Context, Result};
2+
use std::ffi::{OsStr, OsString};
3+
4+
pub fn expand() -> Result<Vec<OsString>> {
5+
let mut expander = Expander::default();
6+
for arg in std::env::args_os() {
7+
expander.push(arg)?;
8+
}
9+
Ok(expander.args)
10+
}
11+
12+
#[derive(Default)]
13+
struct Expander {
14+
args: Vec<OsString>,
15+
}
16+
17+
impl Expander {
18+
fn push(&mut self, arg: OsString) -> Result<()> {
19+
let bytes = arg.as_encoded_bytes();
20+
match bytes.split_first() {
21+
Some((b'@', rest)) => {
22+
self.push_file(unsafe { OsStr::from_encoded_bytes_unchecked(rest) })
23+
}
24+
_ => {
25+
self.args.push(arg);
26+
Ok(())
27+
}
28+
}
29+
}
30+
31+
fn push_file(&mut self, file: &OsStr) -> Result<()> {
32+
let contents =
33+
std::fs::read_to_string(file).with_context(|| format!("failed to read {file:?}"))?;
34+
35+
for part in imp::split(&contents) {
36+
self.push(part.into())?;
37+
}
38+
Ok(())
39+
}
40+
}
41+
42+
#[cfg(not(windows))]
43+
use gnu as imp;
44+
#[cfg(not(windows))]
45+
mod gnu {
46+
pub fn split(s: &str) -> impl Iterator<Item = String> + '_ {
47+
Split { iter: s.chars() }
48+
}
49+
50+
struct Split<'a> {
51+
iter: std::str::Chars<'a>,
52+
}
53+
54+
impl<'a> Iterator for Split<'a> {
55+
type Item = String;
56+
57+
fn next(&mut self) -> Option<String> {
58+
loop {
59+
match self.iter.next()? {
60+
c if c.is_whitespace() => {}
61+
'"' => break Some(self.quoted('"')),
62+
'\'' => break Some(self.quoted('\'')),
63+
c => {
64+
let mut ret = String::new();
65+
self.push(&mut ret, c);
66+
while let Some(next) = self.iter.next() {
67+
if next.is_whitespace() {
68+
break;
69+
}
70+
self.push(&mut ret, next);
71+
}
72+
break Some(ret);
73+
}
74+
}
75+
}
76+
}
77+
}
78+
79+
impl Split<'_> {
80+
fn quoted(&mut self, end: char) -> String {
81+
let mut part = String::new();
82+
while let Some(next) = self.iter.next() {
83+
if next == end {
84+
break;
85+
}
86+
self.push(&mut part, next);
87+
}
88+
part
89+
}
90+
91+
fn push(&mut self, dst: &mut String, ch: char) {
92+
if ch == '\\' {
93+
if let Some(ch) = self.iter.next() {
94+
dst.push(ch);
95+
return;
96+
}
97+
}
98+
dst.push(ch);
99+
}
100+
}
101+
102+
#[test]
103+
fn tests() {
104+
assert_eq!(split("x").collect::<Vec<_>>(), ["x"]);
105+
assert_eq!(split("\\x").collect::<Vec<_>>(), ["x"]);
106+
assert_eq!(split("'x'").collect::<Vec<_>>(), ["x"]);
107+
assert_eq!(split("\"x\"").collect::<Vec<_>>(), ["x"]);
108+
109+
assert_eq!(split("x y").collect::<Vec<_>>(), ["x", "y"]);
110+
assert_eq!(split("x\ny").collect::<Vec<_>>(), ["x", "y"]);
111+
assert_eq!(split("\\x y").collect::<Vec<_>>(), ["x", "y"]);
112+
assert_eq!(split("'x y'").collect::<Vec<_>>(), ["x y"]);
113+
assert_eq!(split("\"x y\"").collect::<Vec<_>>(), ["x y"]);
114+
assert_eq!(split("\"x 'y'\"\n'y'").collect::<Vec<_>>(), ["x 'y'", "y"]);
115+
assert_eq!(
116+
split(
117+
r#"
118+
a\ \\b
119+
z
120+
"x y \\z"
121+
"#
122+
)
123+
.collect::<Vec<_>>(),
124+
["a \\b", "z", "x y \\z"]
125+
);
126+
}
127+
}
128+
129+
#[cfg(windows)]
130+
use windows as imp;
131+
#[cfg(windows)]
132+
mod windows {
133+
pub fn split(s: &str) -> impl Iterator<Item = String> {
134+
winsplit::split(s).into_iter()
135+
}
136+
}

src/lib.rs

Lines changed: 129 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@ use lexopt::Arg;
44
use std::env;
55
use std::ffi::OsString;
66
use std::path::{Path, PathBuf};
7-
use std::process::Command;
7+
use std::process::{Command, ExitStatus};
88
use std::str::FromStr;
99
use wasmparser::Payload;
1010
use wit_component::StringEncoding;
1111
use wit_parser::{Resolve, WorldId};
1212

13+
mod argfile;
14+
1315
/// Representation of a flag passed to `wasm-ld`
1416
///
1517
/// Note that the parsing of flags in `wasm-ld` is not as uniform as parsing
@@ -391,7 +393,7 @@ impl App {
391393
/// in fact `lexopt` is used to filter out `wasm-ld` arguments and `clap`
392394
/// only parses arguments specific to `wasm-component-ld`.
393395
fn parse() -> Result<App> {
394-
let mut args = env::args_os().collect::<Vec<_>>();
396+
let mut args = argfile::expand().context("failed to expand @-response files")?;
395397

396398
// First remove `-flavor wasm` in case this is invoked as a generic LLD
397399
// driver. We can safely ignore that going forward.
@@ -526,8 +528,7 @@ impl App {
526528
}
527529

528530
fn run(&mut self) -> Result<()> {
529-
let mut cmd = self.lld();
530-
let linker = cmd.get_program().to_owned();
531+
let mut lld = self.lld();
531532

532533
// If a temporary output is needed make sure it has the same file name
533534
// as the output of our command itself since LLD will embed this file
@@ -549,16 +550,14 @@ impl App {
549550
// temporary location for wit-component to read and then the real output
550551
// is created after wit-component runs.
551552
if self.skip_wit_component() {
552-
cmd.arg("-o").arg(&self.component.output);
553+
lld.output(&self.component.output);
553554
} else {
554-
cmd.arg("-o").arg(&temp_output);
555+
lld.output(&temp_output);
555556
}
556557

557-
if self.component.verbose {
558-
eprintln!("running LLD: {cmd:?}");
559-
}
560-
let status = cmd
561-
.status()
558+
let linker = &lld.exe;
559+
let status = lld
560+
.status(&temp_dir, &self.lld_args)
562561
.with_context(|| format!("failed to spawn {linker:?}"))?;
563562
if !status.success() {
564563
bail!("failed to invoke LLD: {status}");
@@ -676,38 +675,147 @@ impl App {
676675
|| self.shared
677676
}
678677

679-
fn lld(&self) -> Command {
678+
fn lld(&self) -> Lld {
680679
let mut lld = self.find_lld();
681-
lld.args(&self.lld_args);
682680
if self.component.verbose {
683-
lld.arg("--verbose");
681+
lld.verbose = true
684682
}
685683
lld
686684
}
687685

688-
fn find_lld(&self) -> Command {
686+
fn find_lld(&self) -> Lld {
689687
if let Some(path) = &self.component.wasm_ld_path {
690-
return Command::new(path);
688+
return Lld::new(path);
691689
}
692690

693691
// Search for the first of `wasm-ld` or `rust-lld` in `$PATH`
694692
let wasm_ld = format!("wasm-ld{}", env::consts::EXE_SUFFIX);
695693
let rust_lld = format!("rust-lld{}", env::consts::EXE_SUFFIX);
696694
for entry in env::split_paths(&env::var_os("PATH").unwrap_or_default()) {
697695
if entry.join(&wasm_ld).is_file() {
698-
return Command::new(wasm_ld);
696+
return Lld::new(wasm_ld);
699697
}
700698
if entry.join(&rust_lld).is_file() {
701-
let mut ret = Command::new(rust_lld);
702-
ret.arg("-flavor").arg("wasm");
703-
return ret;
699+
let mut lld = Lld::new(rust_lld);
700+
lld.needs_flavor = true;
701+
return lld;
704702
}
705703
}
706704

707705
// Fall back to `wasm-ld` if the search failed to get an error message
708706
// that indicates that `wasm-ld` was attempted to be found but couldn't
709707
// be found.
710-
Command::new("wasm-ld")
708+
Lld::new("wasm-ld")
709+
}
710+
}
711+
712+
/// Helper structure representing an `lld` invocation.
713+
struct Lld {
714+
exe: PathBuf,
715+
needs_flavor: bool,
716+
verbose: bool,
717+
output: Option<PathBuf>,
718+
}
719+
720+
impl Lld {
721+
fn new(exe: impl Into<PathBuf>) -> Lld {
722+
Lld {
723+
exe: exe.into(),
724+
needs_flavor: false,
725+
verbose: false,
726+
output: None,
727+
}
728+
}
729+
730+
fn output(&mut self, dst: impl Into<PathBuf>) {
731+
self.output = Some(dst.into());
732+
}
733+
734+
fn status(&self, tmpdir: &tempfile::TempDir, args: &[OsString]) -> Result<ExitStatus> {
735+
// If we can probably pass `args` natively, try to do so. In some cases
736+
// though just skip this entirely and go straight to below.
737+
if !self.probably_too_big(args) {
738+
match self.run(args) {
739+
// If this subprocess failed to spawn because the arguments
740+
// were too large, fall through to below.
741+
Err(ref e) if self.command_line_too_big(e) => {
742+
if self.verbose {
743+
eprintln!("command line was too large, trying again...");
744+
}
745+
}
746+
other => return Ok(other?),
747+
}
748+
} else if self.verbose {
749+
eprintln!("arguments probably too large {args:?}");
750+
}
751+
752+
// The `args` are too big to be passed via the command line itself so
753+
// encode the mall using "posix quoting" into an "argfile". This gets
754+
// passed as `@foo` to lld and we also pass `--rsp-quoting=posix` to
755+
// ensure that LLD always uses posix quoting. That means that we don't
756+
// have to implement the dual nature of both posix and windows encoding
757+
// here.
758+
let mut argfile = Vec::new();
759+
for arg in args {
760+
for byte in arg.as_encoded_bytes() {
761+
if *byte == b'\\' || *byte == b' ' {
762+
argfile.push(b'\\');
763+
}
764+
argfile.push(*byte);
765+
}
766+
argfile.push(b'\n');
767+
}
768+
let path = tmpdir.path().join("argfile_tmp");
769+
std::fs::write(&path, &argfile).with_context(|| format!("failed to write {path:?}"))?;
770+
let mut argfile_arg = OsString::from("@");
771+
argfile_arg.push(&path);
772+
let status = self.run(&["--rsp-quoting=posix".into(), argfile_arg.into()])?;
773+
Ok(status)
774+
}
775+
776+
/// Tests whether the `args` array is too large to execute natively.
777+
///
778+
/// Windows `cmd.exe` has a very small limit of around 8k so perform a
779+
/// guess up to 6k. This isn't 100% accurate.
780+
fn probably_too_big(&self, args: &[OsString]) -> bool {
781+
let args_size = args
782+
.iter()
783+
.map(|s| s.as_encoded_bytes().len())
784+
.sum::<usize>();
785+
cfg!(windows) && args_size > 6 * 1024
786+
}
787+
788+
/// Test if the OS failed to spawn a process because the arguments were too
789+
/// long.
790+
fn command_line_too_big(&self, err: &std::io::Error) -> bool {
791+
#[cfg(unix)]
792+
return err.raw_os_error() == Some(libc::E2BIG);
793+
#[cfg(windows)]
794+
return err.raw_os_error()
795+
== Some(windows_sys::Win32::Foundation::ERROR_FILENAME_EXCED_RANGE as i32);
796+
#[cfg(not(any(unix, windows)))]
797+
{
798+
let _ = err;
799+
return false;
800+
}
801+
}
802+
803+
fn run(&self, args: &[OsString]) -> std::io::Result<ExitStatus> {
804+
let mut cmd = Command::new(&self.exe);
805+
if self.needs_flavor {
806+
cmd.arg("-flavor").arg("wasm");
807+
}
808+
cmd.args(args);
809+
if self.verbose {
810+
cmd.arg("--verbose");
811+
}
812+
if let Some(output) = &self.output {
813+
cmd.arg("-o").arg(output);
814+
}
815+
if self.verbose {
816+
eprintln!("running {cmd:?}");
817+
}
818+
cmd.status()
711819
}
712820
}
713821

0 commit comments

Comments
 (0)