Skip to content

Commit e454446

Browse files
committed
Intern units for fast hashing
This commit starts to intern `Unit` structures for a few reasons: * This primarily makes equality and hashing much faster. We have tons of hash lookups with units, and they were showing up quite high in profiles. It turns out `Unit` hashes a *ton* of data, and most of it is always redundant. To handle this they're all only hashed once now and hashing/equality are just pointer checks. * The size of `Unit` is now drastically reduced to just one pointer, so movement of units throughout the backend should be much more efficient.
1 parent bd8253f commit e454446

File tree

10 files changed

+218
-130
lines changed

10 files changed

+218
-130
lines changed

src/cargo/core/compiler/build_context/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use crate::core::{Dependency, Workspace};
99
use crate::core::{PackageId, PackageSet, Resolve};
1010
use crate::util::errors::CargoResult;
1111
use crate::util::{profile, Cfg, Config, Rustc};
12-
13-
use super::{BuildConfig, BuildOutput, Kind, Unit};
12+
use crate::core::compiler::{Unit, Kind, BuildConfig, BuildOutput};
13+
use crate::core::compiler::unit::UnitInterner;
1414

1515
mod target_info;
1616
pub use self::target_info::{FileFlavor, TargetInfo};
@@ -37,6 +37,7 @@ pub struct BuildContext<'a, 'cfg: 'a> {
3737
pub target_config: TargetConfig,
3838
pub target_info: TargetInfo,
3939
pub host_info: TargetInfo,
40+
pub units: &'a UnitInterner<'a>,
4041
}
4142

4243
impl<'a, 'cfg> BuildContext<'a, 'cfg> {
@@ -47,6 +48,7 @@ impl<'a, 'cfg> BuildContext<'a, 'cfg> {
4748
config: &'cfg Config,
4849
build_config: &'a BuildConfig,
4950
profiles: &'a Profiles,
51+
units: &'a UnitInterner<'a>,
5052
extra_compiler_args: HashMap<Unit<'a>, Vec<String>>,
5153
) -> CargoResult<BuildContext<'a, 'cfg>> {
5254
let mut rustc = config.load_global_rustc(Some(ws))?;
@@ -82,6 +84,7 @@ impl<'a, 'cfg> BuildContext<'a, 'cfg> {
8284
build_config,
8385
profiles,
8486
extra_compiler_args,
87+
units,
8588
})
8689
}
8790

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::env;
44
use std::path::PathBuf;
55
use std::str::{self, FromStr};
66

7-
use super::Kind;
7+
use crate::core::compiler::Kind;
88
use crate::core::TargetKind;
99
use crate::util::CfgExpr;
1010
use crate::util::{CargoResult, CargoResultExt, Cfg, Config, ProcessBuilder, Rustc};

src/cargo/core/compiler/build_plan.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ impl BuildPlan {
109109
}
110110
}
111111

112-
pub fn add(&mut self, cx: &Context<'_, '_>, unit: &Unit<'_>) -> CargoResult<()> {
112+
pub fn add<'a>(&mut self, cx: &Context<'a, '_>, unit: &Unit<'a>) -> CargoResult<()> {
113113
let id = self.plan.invocations.len();
114114
self.invocation_map.insert(unit.buildkey(), id);
115115
let deps = cx

src/cargo/core/compiler/context/compilation_files.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ use std::sync::Arc;
88
use lazycell::LazyCell;
99
use log::info;
1010

11-
use super::{BuildContext, Context, FileFlavor, Kind, Layout, Unit};
11+
use super::{BuildContext, Context, FileFlavor, Kind, Layout};
12+
use crate::core::compiler::Unit;
1213
use crate::core::{TargetKind, Workspace};
1314
use crate::util::{self, CargoResult};
1415

src/cargo/core/compiler/context/mod.rs

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ use std::sync::Arc;
88
use jobserver::Client;
99

1010
use crate::core::compiler::compilation;
11-
use crate::core::profiles::Profile;
12-
use crate::core::{Package, PackageId, Resolve, Target};
11+
use crate::core::compiler::Unit;
12+
use crate::core::{Package, PackageId, Resolve};
1313
use crate::util::errors::{CargoResult, CargoResultExt};
14-
use crate::util::{internal, profile, short_hash, Config};
14+
use crate::util::{internal, profile, Config};
1515

1616
use super::build_plan::BuildPlan;
1717
use super::custom_build::{self, BuildDeps, BuildScripts, BuildState};
@@ -27,49 +27,6 @@ mod compilation_files;
2727
use self::compilation_files::CompilationFiles;
2828
pub use self::compilation_files::{Metadata, OutputFile};
2929

30-
/// All information needed to define a unit.
31-
///
32-
/// A unit is an object that has enough information so that cargo knows how to build it.
33-
/// For example, if your package has dependencies, then every dependency will be built as a library
34-
/// unit. If your package is a library, then it will be built as a library unit as well, or if it
35-
/// is a binary with `main.rs`, then a binary will be output. There are also separate unit types
36-
/// for `test`ing and `check`ing, amongst others.
37-
///
38-
/// The unit also holds information about all possible metadata about the package in `pkg`.
39-
///
40-
/// A unit needs to know extra information in addition to the type and root source file. For
41-
/// example, it needs to know the target architecture (OS, chip arch etc.) and it needs to know
42-
/// whether you want a debug or release build. There is enough information in this struct to figure
43-
/// all that out.
44-
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)]
45-
pub struct Unit<'a> {
46-
/// Information about available targets, which files to include/exclude, etc. Basically stuff in
47-
/// `Cargo.toml`.
48-
pub pkg: &'a Package,
49-
/// Information about the specific target to build, out of the possible targets in `pkg`. Not
50-
/// to be confused with *target-triple* (or *target architecture* ...), the target arch for a
51-
/// build.
52-
pub target: &'a Target,
53-
/// The profile contains information about *how* the build should be run, including debug
54-
/// level, etc.
55-
pub profile: Profile,
56-
/// Whether this compilation unit is for the host or target architecture.
57-
///
58-
/// For example, when
59-
/// cross compiling and using a custom build script, the build script needs to be compiled for
60-
/// the host architecture so the host rustc can use it (when compiling to the target
61-
/// architecture).
62-
pub kind: Kind,
63-
/// The "mode" this unit is being compiled for. See [`CompileMode`] for more details.
64-
pub mode: CompileMode,
65-
}
66-
67-
impl<'a> Unit<'a> {
68-
pub fn buildkey(&self) -> String {
69-
format!("{}-{}", self.pkg.name(), short_hash(self))
70-
}
71-
}
72-
7330
pub struct Context<'a, 'cfg: 'a> {
7431
pub bcx: &'a BuildContext<'a, 'cfg>,
7532
pub compilation: Compilation<'cfg>,

src/cargo/core/compiler/context/unit_dependencies.rs

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ use std::collections::{HashMap, HashSet};
2020

2121
use log::trace;
2222

23-
use super::{BuildContext, CompileMode, Kind, Unit};
23+
use super::{BuildContext, CompileMode, Kind};
24+
use crate::core::compiler::Unit;
2425
use crate::core::dependency::Kind as DepKind;
2526
use crate::core::package::Downloads;
2627
use crate::core::profiles::UnitFor;
@@ -350,7 +351,7 @@ fn compute_deps_doc<'a, 'cfg, 'tmp>(
350351

351352
fn maybe_lib<'a>(
352353
unit: &Unit<'a>,
353-
bcx: &BuildContext<'_, '_>,
354+
bcx: &BuildContext<'a, '_>,
354355
unit_for: UnitFor,
355356
) -> Option<(Unit<'a>, UnitFor)> {
356357
unit.pkg.targets().iter().find(|t| t.linkable()).map(|t| {
@@ -369,7 +370,7 @@ fn maybe_lib<'a>(
369370
/// build script.
370371
fn dep_build_script<'a>(
371372
unit: &Unit<'a>,
372-
bcx: &BuildContext<'_, '_>,
373+
bcx: &BuildContext<'a, '_>,
373374
) -> Option<(Unit<'a>, UnitFor)> {
374375
unit.pkg
375376
.targets()
@@ -378,16 +379,15 @@ fn dep_build_script<'a>(
378379
.map(|t| {
379380
// The profile stored in the Unit is the profile for the thing
380381
// the custom build script is running for.
381-
(
382-
Unit {
383-
pkg: unit.pkg,
384-
target: t,
385-
profile: bcx.profiles.get_profile_run_custom_build(&unit.profile),
386-
kind: unit.kind,
387-
mode: CompileMode::RunCustomBuild,
388-
},
389-
UnitFor::new_build(),
390-
)
382+
let unit = bcx.units.intern(
383+
unit.pkg,
384+
t,
385+
bcx.profiles.get_profile_run_custom_build(&unit.profile),
386+
unit.kind,
387+
CompileMode::RunCustomBuild,
388+
);
389+
390+
(unit, UnitFor::new_build())
391391
})
392392
}
393393

@@ -410,7 +410,7 @@ fn check_or_build_mode(mode: CompileMode, target: &Target) -> CompileMode {
410410
}
411411

412412
fn new_unit<'a>(
413-
bcx: &BuildContext<'_, '_>,
413+
bcx: &BuildContext<'a, '_>,
414414
pkg: &'a Package,
415415
target: &'a Target,
416416
unit_for: UnitFor,
@@ -424,13 +424,8 @@ fn new_unit<'a>(
424424
mode,
425425
bcx.build_config.release,
426426
);
427-
Unit {
428-
pkg,
429-
target,
430-
profile,
431-
kind,
432-
mode,
433-
}
427+
428+
bcx.units.intern(pkg, target, profile, kind, mode)
434429
}
435430

436431
/// Fill in missing dependencies for units of the `RunCustomBuild`

src/cargo/core/compiler/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ mod job;
99
mod job_queue;
1010
mod layout;
1111
mod output_depinfo;
12+
mod unit;
1213

1314
use std::env;
1415
use std::ffi::{OsStr, OsString};
@@ -26,13 +27,14 @@ pub use self::build_config::{BuildConfig, CompileMode, MessageFormat};
2627
pub use self::build_context::{BuildContext, FileFlavor, TargetConfig, TargetInfo};
2728
use self::build_plan::BuildPlan;
2829
pub use self::compilation::{Compilation, Doctest};
29-
pub use self::context::{Context, Unit};
30+
pub use self::context::Context;
3031
pub use self::custom_build::{BuildMap, BuildOutput, BuildScripts};
31-
use self::job::{Job, Work};
3232
pub use self::job::Freshness;
33+
use self::job::{Job, Work};
3334
use self::job_queue::JobQueue;
3435
pub use self::layout::is_bad_artifact_name;
3536
use self::output_depinfo::output_depinfo;
37+
pub use crate::core::compiler::unit::{Unit, UnitInterner};
3638
use crate::core::manifest::TargetSourcePath;
3739
use crate::core::profiles::{Lto, PanicStrategy, Profile};
3840
use crate::core::{PackageId, Target};

src/cargo/core/compiler/unit.rs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
use crate::core::compiler::{CompileMode, Kind};
2+
use crate::core::{profiles::Profile, Package, Target};
3+
use crate::util::hex::short_hash;
4+
use std::cell::RefCell;
5+
use std::collections::HashSet;
6+
use std::fmt;
7+
use std::hash::{Hash, Hasher};
8+
use std::ops::Deref;
9+
10+
/// All information needed to define a unit.
11+
///
12+
/// A unit is an object that has enough information so that cargo knows how to build it.
13+
/// For example, if your package has dependencies, then every dependency will be built as a library
14+
/// unit. If your package is a library, then it will be built as a library unit as well, or if it
15+
/// is a binary with `main.rs`, then a binary will be output. There are also separate unit types
16+
/// for `test`ing and `check`ing, amongst others.
17+
///
18+
/// The unit also holds information about all possible metadata about the package in `pkg`.
19+
///
20+
/// A unit needs to know extra information in addition to the type and root source file. For
21+
/// example, it needs to know the target architecture (OS, chip arch etc.) and it needs to know
22+
/// whether you want a debug or release build. There is enough information in this struct to figure
23+
/// all that out.
24+
#[derive(Clone, Copy, PartialOrd, Ord)]
25+
pub struct Unit<'a> {
26+
inner: &'a UnitInner<'a>,
27+
}
28+
29+
/// Internal fields of `Unit` which `Unit` will dereference to.
30+
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
31+
pub struct UnitInner<'a> {
32+
/// Information about available targets, which files to include/exclude, etc. Basically stuff in
33+
/// `Cargo.toml`.
34+
pub pkg: &'a Package,
35+
/// Information about the specific target to build, out of the possible targets in `pkg`. Not
36+
/// to be confused with *target-triple* (or *target architecture* ...), the target arch for a
37+
/// build.
38+
pub target: &'a Target,
39+
/// The profile contains information about *how* the build should be run, including debug
40+
/// level, etc.
41+
pub profile: Profile,
42+
/// Whether this compilation unit is for the host or target architecture.
43+
///
44+
/// For example, when
45+
/// cross compiling and using a custom build script, the build script needs to be compiled for
46+
/// the host architecture so the host rustc can use it (when compiling to the target
47+
/// architecture).
48+
pub kind: Kind,
49+
/// The "mode" this unit is being compiled for. See [`CompileMode`] for more details.
50+
pub mode: CompileMode,
51+
}
52+
53+
impl<'a> Unit<'a> {
54+
pub fn buildkey(&self) -> String {
55+
format!("{}-{}", self.pkg.name(), short_hash(self))
56+
}
57+
}
58+
59+
// Just hash the pointer for fast hashing
60+
impl<'a> Hash for Unit<'a> {
61+
fn hash<H: Hasher>(&self, hasher: &mut H) {
62+
(self.inner as *const UnitInner<'a>).hash(hasher)
63+
}
64+
}
65+
66+
// Just equate the pointer since these are interned
67+
impl<'a> PartialEq for Unit<'a> {
68+
fn eq(&self, other: &Unit<'a>) -> bool {
69+
self.inner as *const UnitInner<'a> == other.inner as *const UnitInner<'a>
70+
}
71+
}
72+
73+
impl<'a> Eq for Unit<'a> {}
74+
75+
impl<'a> Deref for Unit<'a> {
76+
type Target = UnitInner<'a>;
77+
78+
fn deref(&self) -> &UnitInner<'a> {
79+
self.inner
80+
}
81+
}
82+
83+
impl<'a> fmt::Debug for Unit<'a> {
84+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
85+
f.debug_struct("Unit")
86+
.field("pkg", &self.pkg)
87+
.field("target", &self.target)
88+
.field("profile", &self.profile)
89+
.field("kind", &self.kind)
90+
.field("mode", &self.mode)
91+
.finish()
92+
}
93+
}
94+
95+
pub struct UnitInterner<'a> {
96+
state: RefCell<InternerState<'a>>,
97+
}
98+
99+
struct InternerState<'a> {
100+
cache: HashSet<Box<UnitInner<'a>>>,
101+
}
102+
103+
impl<'a> UnitInterner<'a> {
104+
pub fn new() -> UnitInterner<'a> {
105+
UnitInterner {
106+
state: RefCell::new(InternerState {
107+
cache: HashSet::new(),
108+
}),
109+
}
110+
}
111+
112+
pub fn intern(
113+
&'a self,
114+
pkg: &'a Package,
115+
target: &'a Target,
116+
profile: Profile,
117+
kind: Kind,
118+
mode: CompileMode,
119+
) -> Unit<'a> {
120+
let inner = self.intern_inner(&UnitInner {
121+
pkg,
122+
target,
123+
profile,
124+
kind,
125+
mode,
126+
});
127+
Unit { inner }
128+
}
129+
130+
fn intern_inner(&'a self, item: &UnitInner<'a>) -> &'a UnitInner<'a> {
131+
let mut me = self.state.borrow_mut();
132+
if let Some(item) = me.cache.get(item) {
133+
return unsafe { &*(&**item as *const UnitInner<'a>) };
134+
}
135+
me.cache.insert(Box::new(item.clone()));
136+
let item = me.cache.get(item).unwrap();
137+
return unsafe { &*(&**item as *const UnitInner<'a>) };
138+
}
139+
}

0 commit comments

Comments
 (0)