Skip to content

Commit df16720

Browse files
committed
v2: Take advantage of rustc's precalulated src hashes.
1 parent 68c94e8 commit df16720

File tree

3 files changed

+129
-34
lines changed

3 files changed

+129
-34
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ lazycell = "1.2.0"
4646
libc = "0.2"
4747
log = "0.4.6"
4848
libgit2-sys = "0.12.14"
49+
md-5 = "0.8"
4950
memchr = "2.1.3"
5051
num_cpus = "1.0"
5152
opener = "0.4"
@@ -56,6 +57,7 @@ semver = { version = "0.10", features = ["serde"] }
5657
serde = { version = "1.0.82", features = ["derive"] }
5758
serde_ignored = "0.1.0"
5859
serde_json = { version = "1.0.30", features = ["raw_value"] }
60+
sha-1 = "0.8"
5961
shell-escape = "0.1.4"
6062
strip-ansi-escapes = "0.1.0"
6163
tar = { version = "0.4.26", default-features = false }

src/cargo/core/compiler/context/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ use std::sync::{Arc, Mutex};
55
use filetime::FileTime;
66
use jobserver::Client;
77

8-
use crate::core::compiler::{self, compilation, Unit};
8+
use crate::core::compiler::{
9+
self, compilation, fingerprint::FileHash, fingerprint::FileSize, Unit,
10+
};
911
use crate::core::PackageId;
1012
use crate::util::errors::{CargoResult, CargoResultExt};
1113
use crate::util::profile;
@@ -38,7 +40,7 @@ pub struct Context<'a, 'cfg> {
3840
/// Fingerprints used to detect if a unit is out-of-date.
3941
pub fingerprints: HashMap<Unit, Arc<Fingerprint>>,
4042
/// Cache of file mtimes to reduce filesystem hits.
41-
pub mtime_cache: HashMap<PathBuf, (FileTime, u32, u64)>,
43+
pub mtime_cache: HashMap<PathBuf, (FileTime, FileSize, FileHash)>,
4244
/// A set used to track which units have been compiled.
4345
/// A unit may appear in the job graph multiple times as a dependency of
4446
/// multiple packages, but it only needs to run once.

src/cargo/core/compiler/fingerprint.rs

Lines changed: 123 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -318,17 +318,18 @@ use std::fs;
318318
use std::hash::{self, Hasher};
319319
use std::io::{self, Read};
320320
use std::path::{Path, PathBuf};
321-
use std::str;
321+
use std::str::{self, FromStr};
322322
use std::sync::{Arc, Mutex};
323323
use std::time::SystemTime;
324324

325325
use anyhow::{bail, format_err};
326326
use filetime::FileTime;
327-
use fxhash::FxHasher;
328327
use log::{debug, info};
328+
use md5::{Digest, Md5};
329329
use serde::de;
330330
use serde::ser;
331331
use serde::{Deserialize, Serialize};
332+
use sha1::Sha1;
332333

333334
use crate::core::compiler::unit_graph::UnitDep;
334335
use crate::core::Package;
@@ -344,8 +345,14 @@ use super::job::{
344345
};
345346
use super::{BuildContext, Context, FileFlavor, Unit};
346347

347-
type FileSize = u32;
348-
type FileHash = u64;
348+
// While source files can't currently be > 4Gb, bin files could be.
349+
pub type FileSize = u64;
350+
351+
#[derive(Clone)]
352+
pub struct FileHash {
353+
kind: SourceFileHashAlgorithm,
354+
hash: String,
355+
}
349356

350357
/// Determines if a `unit` is up-to-date, and if not prepares necessary work to
351358
/// update the persisted fingerprint.
@@ -751,13 +758,22 @@ impl LocalFingerprint {
751758
LocalFingerprint::RerunIfChanged { output, paths } => {
752759
let c: Vec<_> = paths
753760
.iter()
754-
.map(|p| (pkg_root.join(p), 0u32, 0u64))
761+
.map(|p| {
762+
(
763+
pkg_root.join(p),
764+
0u64,
765+
FileHash {
766+
kind: SourceFileHashAlgorithm::Md5,
767+
hash: String::new(),
768+
},
769+
)
770+
})
755771
.collect();
756772
Ok(find_stale_file(
757773
config,
758774
mtime_cache,
759775
&target_root.join(output),
760-
&c,
776+
c.as_slice(),
761777
))
762778
}
763779

@@ -1706,7 +1722,7 @@ fn find_stale_file(
17061722
config: &Config,
17071723
mtime_cache: &mut HashMap<PathBuf, (FileTime, FileSize, FileHash)>,
17081724
reference: &Path,
1709-
paths: &[(PathBuf, u32, u64)],
1725+
paths: &[(PathBuf, FileSize, FileHash)],
17101726
) -> Option<StaleItem> {
17111727
let reference_mtime = match paths::mtime(reference) {
17121728
Ok(mtime) => mtime,
@@ -1716,7 +1732,7 @@ fn find_stale_file(
17161732
for (path, reference_size, reference_hash) in paths {
17171733
let path = &path;
17181734
let (path_mtime, path_size, path_hash) = match mtime_cache.entry(path.to_path_buf()) {
1719-
Entry::Occupied(o) => *o.get(),
1735+
Entry::Occupied(o) => o.get().clone(), //FIXME? do we need to clone here?
17201736
Entry::Vacant(v) => {
17211737
let mtime = match paths::mtime(path) {
17221738
Ok(mtime) => mtime,
@@ -1725,13 +1741,21 @@ fn find_stale_file(
17251741
let current_size = if config.cli_unstable().hash_tracking {
17261742
match std::fs::metadata(path) {
17271743
// For file difference checking just check the lower bits of file size
1728-
Ok(metadata) => metadata.len() as u32,
1744+
Ok(metadata) => metadata.len(),
17291745
Err(..) => return Some(StaleItem::MissingFile(path.to_path_buf())), //todo
17301746
}
17311747
} else {
17321748
0
17331749
};
1734-
*v.insert((mtime, current_size, 0u64)) // Hash calculated only if needed later.
1750+
v.insert((
1751+
mtime,
1752+
current_size,
1753+
FileHash {
1754+
kind: SourceFileHashAlgorithm::Md5,
1755+
hash: String::new(),
1756+
},
1757+
))
1758+
.clone() // Hash calculated only if needed later.
17351759
}
17361760
};
17371761

@@ -1773,21 +1797,40 @@ fn find_stale_file(
17731797

17741798
// Same size but mtime is different. Probably there's no change...
17751799
// compute hash and compare to prevent change cascade...
1776-
if config.cli_unstable().hash_tracking && *reference_hash > 0 {
1800+
if config.cli_unstable().hash_tracking && reference_hash.hash.len() > 0 {
1801+
// FIXME? We could fail a little faster by seeing if any size discrepencies on _any_ file before checking hashes.
1802+
// but not sure it's worth the additional complexity.
17771803
//FIXME put the result in the mtime_cache rather than hashing each time!
17781804
let mut reader = io::BufReader::new(fs::File::open(&path).unwrap()); //FIXME
1779-
let mut hasher = FxHasher::default();
1780-
let mut buffer = [0; 1024];
1781-
loop {
1782-
let count = reader.read(&mut buffer).unwrap(); //FIXME
1783-
if count == 0 {
1784-
break;
1805+
1806+
let hash = match reference_hash.kind {
1807+
SourceFileHashAlgorithm::Md5 => {
1808+
let mut hasher = Md5::new();
1809+
let mut buffer = [0; 1024];
1810+
loop {
1811+
let count = reader.read(&mut buffer).unwrap(); //FIXME
1812+
if count == 0 {
1813+
break;
1814+
}
1815+
hasher.input(&buffer[..count]);
1816+
}
1817+
format!("{:?}", hasher.result())
17851818
}
1786-
hasher.write(&buffer[..count]);
1787-
}
1788-
let hash = hasher.finish();
1819+
SourceFileHashAlgorithm::Sha1 => {
1820+
let mut hasher = Sha1::new();
1821+
let mut buffer = [0; 1024];
1822+
loop {
1823+
let count = reader.read(&mut buffer).unwrap(); //FIXME
1824+
if count == 0 {
1825+
break;
1826+
}
1827+
hasher.input(&buffer[..count]);
1828+
}
1829+
format!("{:?}", hasher.result())
1830+
}
1831+
};
17891832

1790-
if hash == *reference_hash {
1833+
if hash == reference_hash.hash {
17911834
continue;
17921835
}
17931836
}
@@ -1807,6 +1850,24 @@ fn find_stale_file(
18071850
None
18081851
}
18091852

1853+
#[derive(Clone, Copy, Eq, PartialEq)]
1854+
pub enum SourceFileHashAlgorithm {
1855+
Md5,
1856+
Sha1,
1857+
}
1858+
1859+
impl FromStr for SourceFileHashAlgorithm {
1860+
type Err = ();
1861+
1862+
fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> {
1863+
match s {
1864+
"md5" => Ok(SourceFileHashAlgorithm::Md5),
1865+
"sha1" => Ok(SourceFileHashAlgorithm::Sha1),
1866+
_ => Err(()),
1867+
}
1868+
}
1869+
}
1870+
18101871
enum DepInfoPathType {
18111872
// src/, e.g. src/lib.rs
18121873
PackageRootRelative,
@@ -1914,7 +1975,7 @@ pub fn translate_dep_info(
19141975
pub struct RustcDepInfo {
19151976
/// The list of files that the main target in the dep-info file depends on.
19161977
/// and lower 32bits of size and hash (or 0 if not there).
1917-
pub files: Vec<(PathBuf, u32, u64)>, //FIXME use Option<NonZeroU32> instead?
1978+
pub files: Vec<(PathBuf, FileSize, FileHash)>, //FIXME use Option<NonZero> instead?
19181979
/// The list of environment variables we found that the rustc compilation
19191980
/// depends on.
19201981
///
@@ -1939,21 +2000,34 @@ struct EncodedDepInfo {
19392000
impl EncodedDepInfo {
19402001
fn parse(mut bytes: &[u8]) -> Option<EncodedDepInfo> {
19412002
let bytes = &mut bytes;
1942-
let nfiles = read_usize(bytes)?;
2003+
let nfiles = read_usize(bytes).unwrap();
19432004
let mut files = Vec::with_capacity(nfiles as usize);
19442005
for _ in 0..nfiles {
19452006
//FIXME: backward compatibility!!!
1946-
let size = read_usize(bytes)? as FileSize;
2007+
let size = read_u64(bytes)? as FileSize;
19472008
//debug!("read size as {}", size);
1948-
let hash = read_u64(bytes)?;
2009+
let hash_buf = read_bytes(bytes)?;
2010+
2011+
let hash = String::from_utf8(hash_buf.to_vec()).unwrap();
2012+
19492013
//debug!("read hash as {}", hash);
2014+
let kind = match read_u8(bytes)? {
2015+
0 => SourceFileHashAlgorithm::Md5,
2016+
1 => SourceFileHashAlgorithm::Sha1,
2017+
_ => return None,
2018+
};
19502019
let ty = match read_u8(bytes)? {
19512020
0 => DepInfoPathType::PackageRootRelative,
19522021
1 => DepInfoPathType::TargetRootRelative,
19532022
_ => return None,
19542023
};
19552024
let bytes = read_bytes(bytes)?;
1956-
files.push((size, hash, ty, util::bytes2path(bytes).ok()?));
2025+
files.push((
2026+
size,
2027+
FileHash { kind, hash },
2028+
ty,
2029+
util::bytes2path(bytes).ok()?,
2030+
));
19572031
}
19582032

19592033
let nenv = read_usize(bytes)?;
@@ -2015,9 +2089,14 @@ impl EncodedDepInfo {
20152089
write_usize(dst, self.files.len());
20162090
for (size, hash, ty, file) in self.files.iter() {
20172091
//debug!("writing depinfo size as {} ", *size as usize);
2018-
write_usize(dst, *size as usize);
2019-
//debug!("writing depinfo hash as {} ", *hash);
2020-
write_u64(dst, *hash);
2092+
write_u64(dst, *size);
2093+
//debug!("writing depinfo hash as {} ", hash.hash.len());
2094+
write_bytes(dst, hash.hash.as_bytes());
2095+
//write(dst, hash.hash);
2096+
match hash.kind {
2097+
SourceFileHashAlgorithm::Md5 => dst.push(0),
2098+
SourceFileHashAlgorithm::Sha1 => dst.push(1),
2099+
}
20212100
match ty {
20222101
DepInfoPathType::PackageRootRelative => dst.push(0),
20232102
DepInfoPathType::TargetRootRelative => dst.push(1),
@@ -2094,8 +2173,13 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult<RustcDepInfo>
20942173
if ret.files[i].0.to_string_lossy() == file {
20952174
let parts: Vec<_> = line["# size:".len()..].split(" ").collect();
20962175
ret.files[i].1 = parts[0].trim().parse()?; //FIXME do we need trims?
2097-
let hash = &parts[1]["hash:".len()..].trim();
2098-
ret.files[i].2 = hash.parse()?;
2176+
let kind_hash: Vec<_> = parts[1].split(":").collect();
2177+
let hash = kind_hash[1];
2178+
ret.files[i].2 = FileHash {
2179+
kind: SourceFileHashAlgorithm::from_str(kind_hash[0])
2180+
.expect("unknown hashing algo"),
2181+
hash: hash.to_string(),
2182+
};
20992183
break;
21002184
}
21012185
}
@@ -2117,7 +2201,14 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult<RustcDepInfo>
21172201
internal("malformed dep-info format, trailing \\".to_string())
21182202
})?);
21192203
}
2120-
ret.files.push((file.into(), 0, 0));
2204+
ret.files.push((
2205+
file.into(),
2206+
0,
2207+
FileHash {
2208+
kind: SourceFileHashAlgorithm::Md5,
2209+
hash: String::new(),
2210+
},
2211+
));
21212212
}
21222213
} else {
21232214
prev_line = Some(line);

0 commit comments

Comments
 (0)