Skip to content

Commit 1daff03

Browse files
committed
Avoid using mtime information for reusing cache files
Using mtime information is pretty finnicky across platforms, so instead take a different strategy where we embed the sha that a cache file was generated from into the cache file itself. If the registry's sha has changed then we regenerate the cache file, otherwise we can reuse the cache file. This should make cache file generation more robust (any command can generate a cache file to get used at any time) as well as works better across platforms (doesn't run into issues with coarse mtime systems and the like).
1 parent 783f22b commit 1daff03

File tree

4 files changed

+59
-59
lines changed

4 files changed

+59
-59
lines changed

src/cargo/sources/registry/index.rs

Lines changed: 45 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,10 @@
6767
//! hopefully those are more obvious inline in the code itself.
6868
6969
use std::collections::{HashMap, HashSet};
70-
use std::fs::{self, File};
71-
use std::io::Read;
70+
use std::fs;
7271
use std::path::Path;
7372
use std::str;
7473

75-
use filetime::FileTime;
7674
use log::info;
7775
use semver::{Version, VersionReq};
7876

@@ -316,7 +314,7 @@ impl<'cfg> RegistryIndex<'cfg> {
316314
// let root = self.config.assert_package_cache_locked(&self.path);
317315
let root = load.assert_index_locked(&self.path);
318316
let cache_root = root.join(".cache");
319-
let last_index_update = load.last_modified();;
317+
let index_version = load.current_version();
320318

321319
// See module comment in `registry/mod.rs` for why this is structured
322320
// the way it is.
@@ -338,7 +336,7 @@ impl<'cfg> RegistryIndex<'cfg> {
338336
// along the way produce helpful "did you mean?" suggestions.
339337
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
340338
let summaries = Summaries::parse(
341-
last_index_update,
339+
index_version.as_ref().map(|s| &**s),
342340
&root,
343341
&cache_root,
344342
path.as_ref(),
@@ -471,7 +469,7 @@ impl Summaries {
471469
/// * `load` - the actual index implementation which may be very slow to
472470
/// call. We avoid this if we can.
473471
pub fn parse(
474-
last_index_update: Option<FileTime>,
472+
index_version: Option<&str>,
475473
root: &Path,
476474
cache_root: &Path,
477475
relative: &Path,
@@ -483,24 +481,18 @@ impl Summaries {
483481
// of reasons, but consider all of them non-fatal and just log their
484482
// occurrence in case anyone is debugging anything.
485483
let cache_path = cache_root.join(relative);
486-
if let Some(last_index_update) = last_index_update {
487-
match File::open(&cache_path) {
488-
Ok(file) => {
489-
let metadata = file.metadata()?;
490-
let cache_mtime = FileTime::from_last_modification_time(&metadata);
491-
if cache_mtime > last_index_update {
492-
log::debug!("cache for {:?} is fresh", relative);
493-
match Summaries::parse_cache(&file, &metadata) {
494-
Ok(s) => return Ok(Some(s)),
495-
Err(e) => {
496-
log::debug!("failed to parse {:?} cache: {}", relative, e);
497-
}
498-
}
499-
} else {
500-
log::debug!("cache for {:?} is out of date", relative);
484+
if let Some(index_version) = index_version {
485+
match fs::read(&cache_path) {
486+
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
487+
Ok(s) => {
488+
log::debug!("fast path for registry cache of {:?}", relative);
489+
return Ok(Some(s))
501490
}
502-
}
503-
Err(e) => log::debug!("cache for {:?} error: {}", relative, e),
491+
Err(e) => {
492+
log::debug!("failed to parse {:?} cache: {}", relative, e);
493+
}
494+
},
495+
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
504496
}
505497
}
506498

@@ -510,7 +502,7 @@ impl Summaries {
510502
log::debug!("slow path for {:?}", relative);
511503
let mut ret = Summaries::default();
512504
let mut hit_closure = false;
513-
let mut cache_bytes = Vec::new();
505+
let mut cache_bytes = None;
514506
let err = load.load(root, relative, &mut |contents| {
515507
ret.raw_data = contents.to_vec();
516508
let mut cache = SummariesCache::default();
@@ -535,7 +527,9 @@ impl Summaries {
535527
ret.versions.insert(version, summary.into());
536528
start = end + 1;
537529
}
538-
cache_bytes = cache.serialize();
530+
if let Some(index_version) = index_version {
531+
cache_bytes = Some(cache.serialize(index_version));
532+
}
539533
Ok(())
540534
});
541535

@@ -553,26 +547,22 @@ impl Summaries {
553547
//
554548
// This is opportunistic so we ignore failure here but are sure to log
555549
// something in case of error.
556-
//
557-
// Note that we also skip this when `last_index_update` is `None` because it
558-
// means we can't handle the cache anyway.
559-
if last_index_update.is_some() && fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
560-
let path = Filesystem::new(cache_path.clone());
561-
config.assert_package_cache_locked(&path);
562-
if let Err(e) = fs::write(cache_path, cache_bytes) {
563-
log::info!("failed to write cache: {}", e);
550+
if let Some(cache_bytes) = cache_bytes {
551+
if fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
552+
let path = Filesystem::new(cache_path.clone());
553+
config.assert_package_cache_locked(&path);
554+
if let Err(e) = fs::write(cache_path, cache_bytes) {
555+
log::info!("failed to write cache: {}", e);
556+
}
564557
}
565558
}
566559
Ok(Some(ret))
567560
}
568561

569562
/// Parses an open `File` which represents information previously cached by
570563
/// Cargo.
571-
pub fn parse_cache(mut file: &File, meta: &fs::Metadata) -> CargoResult<Summaries> {
572-
let mut contents = Vec::new();
573-
contents.reserve(meta.len() as usize + 1);
574-
file.read_to_end(&mut contents)?;
575-
let cache = SummariesCache::parse(&contents)?;
564+
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
565+
let cache = SummariesCache::parse(&contents, last_index_update)?;
576566
let mut ret = Summaries::default();
577567
for (version, summary) in cache.versions {
578568
let (start, end) = subslice_bounds(&contents, summary);
@@ -614,7 +604,7 @@ impl Summaries {
614604
const CURRENT_CACHE_VERSION: u8 = 1;
615605

616606
impl<'a> SummariesCache<'a> {
617-
fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
607+
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
618608
// NB: keep this method in sync with `serialize` below
619609
let (first_byte, rest) = data
620610
.split_first()
@@ -624,6 +614,19 @@ impl<'a> SummariesCache<'a> {
624614
}
625615
let mut iter = memchr::Memchr::new(0, rest);
626616
let mut start = 0;
617+
if let Some(end) = iter.next() {
618+
let update = &rest[start..end];
619+
if update != last_index_update.as_bytes() {
620+
failure::bail!(
621+
"cache out of date: current index ({}) != cache ({})",
622+
last_index_update,
623+
str::from_utf8(update)?,
624+
)
625+
}
626+
start = end + 1;
627+
} else {
628+
failure::bail!("malformed file");
629+
}
627630
let mut ret = SummariesCache::default();
628631
while let Some(version_end) = iter.next() {
629632
let version = &rest[start..version_end];
@@ -637,7 +640,7 @@ impl<'a> SummariesCache<'a> {
637640
Ok(ret)
638641
}
639642

640-
fn serialize(&self) -> Vec<u8> {
643+
fn serialize(&self, index_version: &str) -> Vec<u8> {
641644
// NB: keep this method in sync with `parse` above
642645
let size = self
643646
.versions
@@ -646,6 +649,8 @@ impl<'a> SummariesCache<'a> {
646649
.sum();
647650
let mut contents = Vec::with_capacity(size);
648651
contents.push(CURRENT_CACHE_VERSION);
652+
contents.extend_from_slice(index_version.as_bytes());
653+
contents.push(0);
649654
for (version, data) in self.versions.iter() {
650655
contents.extend_from_slice(version.to_string().as_bytes());
651656
contents.push(0);

src/cargo/sources/registry/local.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
use crate::core::PackageId;
1+
use crate::core::{PackageId, InternedString};
22
use crate::sources::registry::{MaybeLock, RegistryConfig, RegistryData};
33
use crate::util::errors::{CargoResult, CargoResultExt};
44
use crate::util::paths;
55
use crate::util::{Config, Filesystem, Sha256};
6-
use filetime::FileTime;
76
use hex;
87
use std::fs::File;
98
use std::io::prelude::*;
@@ -43,7 +42,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
4342
path.as_path_unlocked()
4443
}
4544

46-
fn last_modified(&self) -> Option<FileTime> {
45+
fn current_version(&self) -> Option<InternedString> {
4746
None
4847
}
4948

src/cargo/sources/registry/mod.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ use std::fs::{File, OpenOptions};
165165
use std::io::Write;
166166
use std::path::{Path, PathBuf};
167167

168-
use filetime::FileTime;
169168
use flate2::read::GzDecoder;
170169
use log::debug;
171170
use semver::{Version, VersionReq};
@@ -174,7 +173,7 @@ use tar::Archive;
174173

175174
use crate::core::dependency::{Dependency, Kind};
176175
use crate::core::source::MaybePackage;
177-
use crate::core::{Package, PackageId, Source, SourceId, Summary};
176+
use crate::core::{Package, PackageId, Source, SourceId, Summary, InternedString};
178177
use crate::sources::PathSource;
179178
use crate::util::errors::CargoResultExt;
180179
use crate::util::hex;
@@ -372,7 +371,7 @@ pub trait RegistryData {
372371
true
373372
}
374373
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
375-
fn last_modified(&self) -> Option<FileTime>;
374+
fn current_version(&self) -> Option<InternedString>;
376375
}
377376

378377
pub enum MaybeLock {

src/cargo/sources/registry/remote.rs

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
use crate::core::{PackageId, SourceId};
1+
use crate::core::{PackageId, SourceId, InternedString};
22
use crate::sources::git;
33
use crate::sources::registry::MaybeLock;
44
use crate::sources::registry::{RegistryConfig, RegistryData, CRATE_TEMPLATE, VERSION_TEMPLATE};
55
use crate::util::errors::{CargoResult, CargoResultExt};
66
use crate::util::{Config, Filesystem, Sha256};
7-
use crate::util::paths;
8-
use filetime::FileTime;
97
use lazycell::LazyCell;
108
use log::{debug, trace};
119
use std::cell::{Cell, Ref, RefCell};
@@ -25,7 +23,7 @@ pub struct RemoteRegistry<'cfg> {
2523
tree: RefCell<Option<git2::Tree<'static>>>,
2624
repo: LazyCell<git2::Repository>,
2725
head: Cell<Option<git2::Oid>>,
28-
last_updated: Cell<Option<FileTime>>,
26+
current_sha: Cell<Option<InternedString>>,
2927
}
3028

3129
impl<'cfg> RemoteRegistry<'cfg> {
@@ -38,7 +36,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
3836
tree: RefCell::new(None),
3937
repo: LazyCell::new(),
4038
head: Cell::new(None),
41-
last_updated: Cell::new(None),
39+
current_sha: Cell::new(None),
4240
}
4341
}
4442

@@ -141,14 +139,13 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
141139
self.config.assert_package_cache_locked(path)
142140
}
143141

144-
fn last_modified(&self) -> Option<FileTime> {
145-
if let Some(time) = self.last_updated.get() {
146-
return Some(time);
142+
fn current_version(&self) -> Option<InternedString> {
143+
if let Some(sha) = self.current_sha.get() {
144+
return Some(sha);
147145
}
148-
let path = self.config.assert_package_cache_locked(&self.index_path);
149-
let mtime = paths::mtime(&path.join(LAST_UPDATED_FILE)).ok();
150-
self.last_updated.set(mtime);
151-
self.last_updated.get()
146+
let sha = InternedString::new(&self.head().ok()?.to_string());
147+
self.current_sha.set(Some(sha));
148+
Some(sha)
152149
}
153150

154151
fn load(
@@ -223,7 +220,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
223220
self.prepare()?;
224221
self.head.set(None);
225222
*self.tree.borrow_mut() = None;
226-
self.last_updated.set(None);
223+
self.current_sha.set(None);
227224
let path = self.config.assert_package_cache_locked(&self.index_path);
228225
self.config
229226
.shell()

0 commit comments

Comments
 (0)