Skip to content

Commit dfe3737

Browse files
committed
Optimize performance of cargo package.
This is achieved by allowing `gix status` to only run in the package root, while running it another time just on the few files that are interesting outside of the package root. This saves a lot of time compared to the previous implementation, which ran the status on the entire repository.
1 parent d3b85cd commit dfe3737

File tree

1 file changed

+80
-63
lines changed
  • src/cargo/ops/cargo_package

1 file changed

+80
-63
lines changed

src/cargo/ops/cargo_package/vcs.rs

Lines changed: 80 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//! Helpers to gather the VCS information for `cargo package`.
2+
23
use crate::core::{Package, Workspace};
34
use crate::ops::PackageOpts;
45
use crate::sources::PathEntry;
@@ -7,11 +8,11 @@ use anyhow::Context;
78
use cargo_util::paths;
89
use gix::bstr::ByteSlice;
910
use gix::dir::walk::EmissionMode;
11+
use gix::dirwalk::Options;
1012
use gix::index::entry::Mode;
1113
use gix::status::tree_index::TrackRenames;
1214
use gix::worktree::stack::state::ignore::Source;
1315
use serde::Serialize;
14-
use std::collections::HashSet;
1516
use std::path::{Path, PathBuf};
1617
use tracing::debug;
1718

@@ -47,7 +48,7 @@ pub fn check_repo_state(
4748
opts: &PackageOpts<'_>,
4849
) -> CargoResult<Option<VcsInfo>> {
4950
let gctx = ws.gctx();
50-
let Ok(repo) = gix::discover(p.root()) else {
51+
let Ok(mut repo) = gix::discover(p.root()) else {
5152
gctx.shell().verbose(|shell| {
5253
shell.warn(format_args!(
5354
"no (git) VCS found for `{}`",
@@ -115,7 +116,7 @@ pub fn check_repo_state(
115116
path.display(),
116117
workdir.display(),
117118
);
118-
let Some(git) = git(ws, p, src_files, &repo, &opts)? else {
119+
let Some(git) = git(ws, p, src_files, &mut repo, &opts)? else {
119120
// If the git repo lacks essential field like `sha1`, and since this field exists from the beginning,
120121
// then don't generate the corresponding file in order to maintain consistency with past behavior.
121122
return Ok(None);
@@ -181,31 +182,32 @@ fn git(
181182
ws: &Workspace<'_>,
182183
pkg: &Package,
183184
src_files: &[PathEntry],
184-
repo: &gix::Repository,
185+
repo: &mut gix::Repository,
185186
opts: &PackageOpts<'_>,
186187
) -> CargoResult<Option<GitVcsInfo>> {
188+
{
189+
let mut config = repo.config_snapshot_mut();
190+
// This currently is only a very minor speedup for the biggest repositories,
191+
// but might trigger creating many threads.
192+
config.set_value(&gix::config::tree::Index::THREADS, "false")?;
193+
}
187194
// This is a collection of any dirty or untracked files. This covers:
188195
// - new/modified/deleted/renamed/type change (index or worktree)
189196
// - untracked files (which are "new" worktree files)
190197
// - ignored (in case the user has an `include` directive that
191198
// conflicts with .gitignore).
192-
let (mut dirty_files, mut dirty_files_outside_package_root) = (Vec::new(), Vec::new());
199+
let mut dirty_files = Vec::new();
193200
let workdir = repo.workdir().unwrap();
194201
collect_statuses(
195202
repo,
196203
workdir,
197204
relative_package_root(repo, pkg.root()).as_deref(),
198205
&mut dirty_files,
199-
&mut dirty_files_outside_package_root,
200206
)?;
201207

202208
// Include each submodule so that the error message can provide
203209
// specifically *which* files in a submodule are modified.
204-
status_submodules(
205-
repo,
206-
&mut dirty_files,
207-
&mut dirty_files_outside_package_root,
208-
)?;
210+
status_submodules(repo, &mut dirty_files)?;
209211

210212
// Find the intersection of dirty in git, and the src_files that would
211213
// be packaged. This is a lazy n^2 check, but seems fine with
@@ -230,10 +232,7 @@ fn git(
230232
}
231233
})
232234
.map(|p| p.as_ref())
233-
.chain(
234-
dirty_files_outside_pkg_root(ws, pkg, &dirty_files_outside_package_root, src_files)?
235-
.iter(),
236-
)
235+
.chain(dirty_files_outside_pkg_root(ws, pkg, repo, src_files)?.iter())
237236
.map(|path| {
238237
pathdiff::diff_paths(path, cwd)
239238
.as_ref()
@@ -271,25 +270,17 @@ fn collect_statuses(
271270
workdir: &Path,
272271
relative_package_root: Option<&Path>,
273272
dirty_files: &mut Vec<PathBuf>,
274-
dirty_files_outside_package_root: &mut Vec<PathBuf>,
275273
) -> CargoResult<()> {
276274
let statuses = repo
277275
.status(gix::progress::Discard)?
278-
.dirwalk_options(|opts| {
279-
opts.emit_untracked(gix::dir::walk::EmissionMode::Matching)
280-
// Also pick up ignored files or whole directories
281-
// to specifically catch overzealously ignored source files.
282-
// Later we will match these dirs by prefix, which is why collapsing
283-
// them is desirable here.
284-
.emit_ignored(Some(EmissionMode::CollapseDirectory))
285-
.emit_tracked(false)
286-
.recurse_repositories(false)
287-
.symlinks_to_directories_are_ignored_like_directories(true)
288-
.emit_empty_directories(false)
289-
})
276+
.dirwalk_options(configure_dirwalk)
290277
.tree_index_track_renames(TrackRenames::Disabled)
291278
.index_worktree_submodules(None)
292-
.into_iter(None /* pathspec patterns */)
279+
.into_iter(
280+
relative_package_root.map(|rela_pkg_root| {
281+
gix::path::into_bstr(rela_pkg_root).into_owned()
282+
}), /* pathspec patterns */
283+
)
293284
.with_context(|| {
294285
format!(
295286
"failed to begin git status for repo {}",
@@ -307,11 +298,6 @@ fn collect_statuses(
307298

308299
let rel_path = gix::path::from_bstr(status.location());
309300
let path = workdir.join(&rel_path);
310-
if relative_package_root.is_some_and(|pkg_root| !rel_path.starts_with(pkg_root)) {
311-
dirty_files_outside_package_root.push(path);
312-
continue;
313-
}
314-
315301
// It is OK to include Cargo.lock even if it is ignored.
316302
if path.ends_with("Cargo.lock")
317303
&& matches!(
@@ -330,11 +316,7 @@ fn collect_statuses(
330316
}
331317

332318
/// Helper to collect dirty statuses while recursing into submodules.
333-
fn status_submodules(
334-
repo: &gix::Repository,
335-
dirty_files: &mut Vec<PathBuf>,
336-
dirty_files_outside_package_root: &mut Vec<PathBuf>,
337-
) -> CargoResult<()> {
319+
fn status_submodules(repo: &gix::Repository, dirty_files: &mut Vec<PathBuf>) -> CargoResult<()> {
338320
let Some(submodules) = repo.submodules()? else {
339321
return Ok(());
340322
};
@@ -345,14 +327,8 @@ fn status_submodules(
345327
let Some(workdir) = sub_repo.workdir() else {
346328
continue;
347329
};
348-
status_submodules(&sub_repo, dirty_files, dirty_files_outside_package_root)?;
349-
collect_statuses(
350-
&sub_repo,
351-
workdir,
352-
None,
353-
dirty_files,
354-
dirty_files_outside_package_root,
355-
)?;
330+
status_submodules(&sub_repo, dirty_files)?;
331+
collect_statuses(&sub_repo, workdir, None, dirty_files)?;
356332
}
357333
}
358334
Ok(())
@@ -374,7 +350,7 @@ fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<Path
374350
/// This currently looks at
375351
///
376352
/// * `package.readme` and `package.license-file` pointing to paths outside package root
377-
/// * symlinks targets reside outside package root
353+
/// * symlinks targets residing outside package root
378354
/// * Any change in the root workspace manifest, regardless of what has changed.
379355
///
380356
/// This is required because those paths may link to a file outside the
@@ -383,18 +359,20 @@ fn relative_package_root(repo: &gix::Repository, pkg_root: &Path) -> Option<Path
383359
fn dirty_files_outside_pkg_root(
384360
ws: &Workspace<'_>,
385361
pkg: &Package,
386-
dirty_files_outside_of_package_root: &[PathBuf],
362+
repo: &gix::Repository,
387363
src_files: &[PathEntry],
388-
) -> CargoResult<HashSet<PathBuf>> {
364+
) -> CargoResult<Vec<PathBuf>> {
389365
let pkg_root = pkg.root();
366+
let workdir = repo.workdir().unwrap();
367+
390368
let meta = pkg.manifest().metadata();
391369
let metadata_paths: Vec<_> = [&meta.license_file, &meta.readme]
392370
.into_iter()
393371
.filter_map(|p| p.as_deref())
394372
.map(|path| paths::normalize_path(&pkg_root.join(path)))
395373
.collect();
396374

397-
let dirty_files = src_files
375+
let linked_files_outside_package_root: Vec<_> = src_files
398376
.iter()
399377
.filter(|p| p.is_symlink_or_under_symlink())
400378
.map(|p| p.as_ref().as_path())
@@ -403,19 +381,58 @@ fn dirty_files_outside_pkg_root(
403381
// If inside package root. Don't bother checking git status.
404382
.filter(|p| paths::strip_prefix_canonical(p, pkg_root).is_err())
405383
// Handle files outside package root but under git workdir,
406-
.filter_map(|src_file| {
407-
let canon_src_path = gix::path::realpath_opts(
408-
src_file,
409-
ws.gctx().cwd(),
410-
gix::path::realpath::MAX_SYMLINKS,
384+
.filter_map(|p| paths::strip_prefix_canonical(p, workdir).ok())
385+
.collect();
386+
387+
if linked_files_outside_package_root.is_empty() {
388+
return Ok(Vec::new());
389+
}
390+
391+
let statuses = repo
392+
.status(gix::progress::Discard)?
393+
.dirwalk_options(configure_dirwalk)
394+
// Limit the amount of threads for used for the worktree status, as the pathspec will
395+
// prevent most paths from being visited anyway there is not much work.
396+
.index_worktree_options_mut(|opts| opts.thread_limit = Some(1))
397+
.tree_index_track_renames(TrackRenames::Disabled)
398+
.index_worktree_submodules(None)
399+
.into_iter(
400+
linked_files_outside_package_root
401+
.into_iter()
402+
.map(|p| gix::path::into_bstr(p).into_owned()),
403+
)
404+
.with_context(|| {
405+
format!(
406+
"failed to begin git status for outfor repo {}",
407+
repo.path().display()
411408
)
412-
.unwrap_or_else(|_| src_file.to_owned());
409+
})?;
413410

414-
dirty_files_outside_of_package_root
415-
.iter()
416-
.any(|p| canon_src_path.starts_with(p))
417-
.then_some(canon_src_path)
418-
})
419-
.collect();
411+
let mut dirty_files = Vec::new();
412+
for status in statuses {
413+
let status = status.with_context(|| {
414+
format!(
415+
"failed to retrieve git status from repo {}",
416+
repo.path().display()
417+
)
418+
})?;
419+
420+
let rel_path = gix::path::from_bstr(status.location());
421+
let path = workdir.join(&rel_path);
422+
dirty_files.push(path);
423+
}
420424
Ok(dirty_files)
421425
}
426+
427+
fn configure_dirwalk(opts: Options) -> Options {
428+
opts.emit_untracked(gix::dir::walk::EmissionMode::Matching)
429+
// Also pick up ignored files or whole directories
430+
// to specifically catch overzealously ignored source files.
431+
// Later we will match these dirs by prefix, which is why collapsing
432+
// them is desirable here.
433+
.emit_ignored(Some(EmissionMode::CollapseDirectory))
434+
.emit_tracked(false)
435+
.recurse_repositories(false)
436+
.symlinks_to_directories_are_ignored_like_directories(true)
437+
.emit_empty_directories(false)
438+
}

0 commit comments

Comments
 (0)