Skip to content

Commit a58b0c5

Browse files
committed
Remove some doc collisions.
There are some cases where `cargo doc` will try to document two things with the same crate_name. This attempts to automatically remove some of those duplicates based on some rules: - Prefers dependencies for the target over dependencies for the host (such as proc-macros). - Prefers the "newest" version if it comes from the same source. There are still plenty of situations where there can be collisions, but I'm uncertain on the best way to handle those.
1 parent d4b3a1d commit a58b0c5

File tree

2 files changed

+384
-1
lines changed

2 files changed

+384
-1
lines changed

src/cargo/ops/cargo_compile.rs

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,11 @@ use crate::core::profiles::{Profiles, UnitFor};
3636
use crate::core::resolver::features::{self, FeaturesFor, RequestedFeatures};
3737
use crate::core::resolver::{HasDevUnits, Resolve, ResolveOpts};
3838
use crate::core::{FeatureValue, Package, PackageSet, Shell, Summary, Target};
39-
use crate::core::{PackageId, PackageIdSpec, TargetKind, Workspace};
39+
use crate::core::{PackageId, PackageIdSpec, SourceId, TargetKind, Workspace};
4040
use crate::ops;
4141
use crate::ops::resolve::WorkspaceResolve;
4242
use crate::util::config::Config;
43+
use crate::util::interning::InternedString;
4344
use crate::util::restricted_names::is_glob_pattern;
4445
use crate::util::{closest_msg, profile, CargoResult, StableHasher};
4546

@@ -503,6 +504,12 @@ pub fn create_bcx<'a, 'cfg>(
503504
interner,
504505
)?;
505506

507+
// TODO: In theory, Cargo should also dedupe the roots, but I'm uncertain
508+
// what heuristics to use in that case.
509+
if build_config.mode == (CompileMode::Doc { deps: true }) {
510+
remove_duplicate_doc(build_config, &mut unit_graph);
511+
}
512+
506513
if build_config
507514
.requested_kinds
508515
.iter()
@@ -1455,3 +1462,108 @@ fn opt_patterns_and_names(
14551462
}
14561463
Ok((opt_patterns, opt_names))
14571464
}
1465+
1466+
/// Removes duplicate CompileMode::Doc units that would cause problems with
1467+
/// filename collisions.
1468+
///
1469+
/// Rustdoc only separates units by crate name in the file directory
1470+
/// structure. If any two units with the same crate name exist, this would
1471+
/// cause a filename collision, causing different rustdoc invocations to stomp
1472+
/// on one another's files.
1473+
///
1474+
/// Unfortunately this does not remove all duplicates, as some of them are
1475+
/// either user error, or difficult to remove. Cases that I can think of:
1476+
///
1477+
/// - Same target name in different packages. See the `collision_doc` test.
1478+
/// - Different sources. See `collision_doc_sources` test.
1479+
///
1480+
/// Ideally this would not be necessary.
1481+
fn remove_duplicate_doc(build_config: &BuildConfig, unit_graph: &mut UnitGraph) {
1482+
// NOTE: There is some risk that this can introduce problems because it
1483+
// may create orphans in the unit graph (parts of the tree get detached
1484+
// from the roots). I currently can't think of any ways this will cause a
1485+
// problem because all other parts of Cargo traverse the graph starting
1486+
// from the roots. Perhaps this should scan for detached units and remove
1487+
// them too?
1488+
//
1489+
// First, create a mapping of crate_name -> Unit so we can see where the
1490+
// duplicates are.
1491+
let mut all_docs: HashMap<String, Vec<Unit>> = HashMap::new();
1492+
for unit in unit_graph.keys() {
1493+
if unit.mode.is_doc() {
1494+
all_docs
1495+
.entry(unit.target.crate_name())
1496+
.or_default()
1497+
.push(unit.clone());
1498+
}
1499+
}
1500+
let mut remove = |units: Vec<Unit>, reason: &str| {
1501+
for unit in &units {
1502+
log::debug!(
1503+
"removing duplicate doc due to {} for package {} target `{}`",
1504+
reason,
1505+
unit.pkg,
1506+
unit.target.name()
1507+
);
1508+
unit_graph.remove(unit);
1509+
}
1510+
for unit_deps in unit_graph.values_mut() {
1511+
unit_deps.retain(|unit_dep| !units.iter().any(|unit| *unit == unit_dep.unit));
1512+
}
1513+
};
1514+
// Iterate over the duplicates and try to remove them from unit_graph.
1515+
for (_crate_name, mut units) in all_docs {
1516+
if units.len() == 1 {
1517+
continue;
1518+
}
1519+
// Prefer target over host if --target was not specified.
1520+
if build_config
1521+
.requested_kinds
1522+
.iter()
1523+
.all(CompileKind::is_host)
1524+
{
1525+
let (to_remove, remaining_units): (Vec<Unit>, Vec<Unit>) =
1526+
units.into_iter().partition(|unit| unit.kind.is_host());
1527+
// Note these duplicates may not be real duplicates, since they
1528+
// might get merged in rebuild_unit_graph_shared. Either way, it
1529+
// shouldn't hurt to remove them early (although the report in the
1530+
// log might be confusing).
1531+
remove(to_remove, "host/target merger");
1532+
units = remaining_units;
1533+
if units.len() == 1 {
1534+
continue;
1535+
}
1536+
}
1537+
// Prefer newer versions over older.
1538+
let mut source_map: HashMap<(InternedString, SourceId, CompileKind), Vec<Unit>> =
1539+
HashMap::new();
1540+
for unit in units {
1541+
let pkg_id = unit.pkg.package_id();
1542+
// Note, this does not detect duplicates from different sources.
1543+
source_map
1544+
.entry((pkg_id.name(), pkg_id.source_id(), unit.kind))
1545+
.or_default()
1546+
.push(unit);
1547+
}
1548+
let mut remaining_units = Vec::new();
1549+
for (_key, mut units) in source_map {
1550+
if units.len() > 1 {
1551+
units.sort_by(|a, b| a.pkg.version().partial_cmp(b.pkg.version()).unwrap());
1552+
// Remove any entries with version < newest.
1553+
let newest_version = units.last().unwrap().pkg.version().clone();
1554+
let (to_remove, keep_units): (Vec<Unit>, Vec<Unit>) = units
1555+
.into_iter()
1556+
.partition(|unit| unit.pkg.version() < &newest_version);
1557+
remove(to_remove, "older version");
1558+
remaining_units.extend(keep_units);
1559+
} else {
1560+
remaining_units.extend(units);
1561+
}
1562+
}
1563+
if remaining_units.len() == 1 {
1564+
continue;
1565+
}
1566+
// Are there other heuristics to remove duplicates that would make
1567+
// sense? Maybe prefer path sources over all others?
1568+
}
1569+
}

0 commit comments

Comments
 (0)