Skip to content

Commit c559add

Browse files
committed
fetch publisher data from crates.io, start build artifact caching
1 parent 49e5d7f commit c559add

File tree

10 files changed

+377
-87
lines changed

10 files changed

+377
-87
lines changed

docker-compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ services:
1616
- "/var/run/docker.sock:/var/run/docker.sock"
1717
- ".rustwide-docker:/opt/docsrs/rustwide"
1818
- "cratesio-index:/opt/docsrs/prefix/crates.io-index"
19+
- "artifact-cache:/opt/docsrs/prefix/artifact_cache"
1920
environment:
2021
DOCSRS_RUSTWIDE_WORKSPACE: /opt/docsrs/rustwide
2122
DOCSRS_DATABASE_URL: postgresql://cratesfyi:password@db
2223
DOCSRS_STORAGE_BACKEND: s3
2324
S3_ENDPOINT: http://s3:9000
2425
AWS_ACCESS_KEY_ID: cratesfyi
2526
AWS_SECRET_ACCESS_KEY: secret_key
27+
DOCSRS_PREFIX: /opt/docsrs/prefix
2628
env_file:
2729
- .env
2830
healthcheck:
@@ -94,3 +96,4 @@ volumes:
9496
postgres-data: {}
9597
minio-data: {}
9698
cratesio-index: {}
99+
artifact-cache: {}

src/db/add_package.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::{
22
db::types::Feature,
33
docbuilder::{BuildResult, DocCoverage},
44
error::Result,
5-
index::api::{CrateData, CrateOwner, ReleaseData},
5+
index::api::{CrateData, GithubUser, ReleaseData},
66
storage::CompressionAlgorithm,
77
utils::MetadataPackage,
88
web::crate_details::CrateDetails,
@@ -371,7 +371,7 @@ pub fn update_crate_data_in_database(
371371
/// Adds owners into database
372372
fn update_owners_in_database(
373373
conn: &mut Client,
374-
owners: &[CrateOwner],
374+
owners: &[GithubUser],
375375
crate_id: i32,
376376
) -> Result<()> {
377377
// Update any existing owner data since it is mutable and could have changed since last
@@ -562,9 +562,10 @@ mod test {
562562
},
563563
)?;
564564

565-
let owner1 = CrateOwner {
565+
let owner1 = GithubUser {
566566
avatar: "avatar".into(),
567567
login: "login".into(),
568+
..Default::default()
568569
};
569570

570571
update_owners_in_database(&mut conn, &[owner1.clone()], crate_id)?;
@@ -600,16 +601,18 @@ mod test {
600601
// set initial owner details
601602
update_owners_in_database(
602603
&mut conn,
603-
&[CrateOwner {
604+
&[GithubUser {
604605
login: "login".into(),
605606
avatar: "avatar".into(),
607+
..Default::default()
606608
}],
607609
crate_id,
608610
)?;
609611

610-
let updated_owner = CrateOwner {
612+
let updated_owner = GithubUser {
611613
login: "login".into(),
612614
avatar: "avatar2".into(),
615+
..Default::default()
613616
};
614617
update_owners_in_database(&mut conn, &[updated_owner.clone()], crate_id)?;
615618

@@ -645,17 +648,19 @@ mod test {
645648
// set initial owner details
646649
update_owners_in_database(
647650
&mut conn,
648-
&[CrateOwner {
651+
&[GithubUser {
649652
login: "login".into(),
650653
avatar: "avatar".into(),
654+
..Default::default()
651655
}],
652656
crate_id,
653657
)?;
654658

655-
let new_owners: Vec<CrateOwner> = (1..5)
656-
.map(|i| CrateOwner {
659+
let new_owners: Vec<GithubUser> = (1..5)
660+
.map(|i| GithubUser {
657661
login: format!("login{i}"),
658662
avatar: format!("avatar{i}"),
663+
..Default::default()
659664
})
660665
.collect();
661666

src/db/delete.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ fn delete_crate_from_database(conn: &mut Client, name: &str, crate_id: i32) -> R
187187
#[cfg(test)]
188188
mod tests {
189189
use super::*;
190-
use crate::index::api::CrateOwner;
190+
use crate::index::api::GithubUser;
191191
use crate::test::{assert_success, wrapper};
192192
use postgres::Client;
193193
use test_case::test_case;
@@ -313,9 +313,10 @@ mod tests {
313313
.name("a")
314314
.version("1.0.0")
315315
.archive_storage(archive_storage)
316-
.add_owner(CrateOwner {
316+
.add_owner(GithubUser {
317317
login: "malicious actor".into(),
318318
avatar: "https://example.org/malicious".into(),
319+
..Default::default()
319320
})
320321
.create()?;
321322
assert!(release_exists(&mut db.conn(), v1)?);
@@ -342,9 +343,10 @@ mod tests {
342343
.name("a")
343344
.version("2.0.0")
344345
.archive_storage(archive_storage)
345-
.add_owner(CrateOwner {
346+
.add_owner(GithubUser {
346347
login: "Peter Rabbit".into(),
347348
avatar: "https://example.org/peter".into(),
349+
..Default::default()
348350
})
349351
.create()?;
350352
assert!(release_exists(&mut db.conn(), v2)?);

src/docbuilder/caching.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use crate::utils::copy_dir_all;
2+
use anyhow::{Context as _, Result};
3+
use std::{
4+
fs, io,
5+
path::{Path, PathBuf},
6+
};
7+
use tracing::{debug, instrument, warn};
8+
9+
/// move cache folder to target, falling back to copy + delete on error.
10+
fn move_or_copy<P: AsRef<Path> + std::fmt::Debug, Q: AsRef<Path> + std::fmt::Debug>(
11+
source: P,
12+
dest: Q,
13+
) -> io::Result<()> {
14+
if let Some(parent) = dest.as_ref().parent() {
15+
fs::create_dir_all(parent)?;
16+
}
17+
if let Err(err) = fs::rename(&source, &dest) {
18+
warn!(
19+
?err,
20+
?source,
21+
?dest,
22+
"could not move target directory, fall back to copy"
23+
);
24+
copy_dir_all(&source, &dest)?;
25+
fs::remove_dir_all(&source)?;
26+
}
27+
Ok(())
28+
}
29+
30+
/// artifact caching with cleanup
31+
#[derive(Debug)]
32+
pub(crate) struct ArtifactCache {
33+
cache_dir: PathBuf,
34+
}
35+
36+
impl ArtifactCache {
37+
pub(crate) fn new(cache_dir: PathBuf) -> Result<Self> {
38+
Ok(Self { cache_dir })
39+
}
40+
41+
pub(crate) fn purge(&self) -> Result<()> {
42+
fs::remove_dir_all(&self.cache_dir)?;
43+
Ok(())
44+
}
45+
46+
/// clean up a target directory.
47+
///
48+
/// Should delete all things that shouldn't leak between
49+
/// builds, so:
50+
/// - doc-output
51+
/// - ...?
52+
#[instrument(skip(self))]
53+
fn cleanup(&self, target_dir: &Path) -> Result<()> {
54+
for item in fs::read_dir(target_dir)? {
55+
// the first level of directories are the targets in most cases,
56+
// delete their doc-directories
57+
let item = item?;
58+
let doc_dir = item.path().join("doc");
59+
if doc_dir.is_dir() {
60+
debug!(?doc_dir, "cache dir cleanup");
61+
fs::remove_dir_all(doc_dir)?;
62+
}
63+
}
64+
Ok(())
65+
}
66+
67+
/// restore a cached target directory.
68+
///
69+
/// Will just move the cache folder into the rustwide
70+
/// target path. If that fails, will use `copy_dir_all`.
71+
#[instrument(skip(self))]
72+
pub(crate) fn restore_to<P: AsRef<Path> + std::fmt::Debug>(
73+
&self,
74+
cache_key: &str,
75+
target_dir: P,
76+
) -> Result<()> {
77+
let target_dir = target_dir.as_ref();
78+
if target_dir.exists() {
79+
// to be safe, while most of the time the dir doesn't exist,
80+
// or is empty.
81+
fs::remove_dir_all(target_dir).context("could not clean target directory")?;
82+
}
83+
84+
let cache_dir = self.cache_dir.join(cache_key);
85+
if !cache_dir.exists() {
86+
// when there is no existing cache dir,
87+
// we can just create an empty target.
88+
fs::create_dir_all(target_dir).context("could not create target directory")?;
89+
return Ok(());
90+
}
91+
92+
move_or_copy(cache_dir, target_dir).context("could not move cache directory to target")?;
93+
Ok(())
94+
}
95+
96+
#[instrument(skip(self))]
97+
pub(crate) fn save<P: AsRef<Path> + std::fmt::Debug>(
98+
&self,
99+
cache_key: &str,
100+
target_dir: P,
101+
) -> Result<()> {
102+
let cache_dir = self.cache_dir.join(cache_key);
103+
if !cache_dir.exists() {
104+
fs::create_dir_all(&cache_dir)?;
105+
}
106+
107+
move_or_copy(&target_dir, &cache_dir)
108+
.context("could not move target directory to cache")?;
109+
self.cleanup(&cache_dir)?;
110+
Ok(())
111+
}
112+
}

src/docbuilder/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod caching;
12
mod crates;
23
mod limits;
34
mod rustwide_builder;

src/docbuilder/rustwide_builder.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use super::caching::ArtifactCache;
12
use crate::db::file::add_path_into_database;
23
use crate::db::{
34
add_build_into_database, add_doc_coverage, add_package_into_database,
@@ -27,13 +28,14 @@ use rustwide::{AlternativeRegistry, Build, Crate, Toolchain, Workspace, Workspac
2728
use std::collections::{HashMap, HashSet};
2829
use std::path::Path;
2930
use std::sync::Arc;
30-
use tracing::{debug, info, warn};
31+
use tracing::{debug, info, instrument, warn};
3132

3233
const USER_AGENT: &str = "docs.rs builder (https://github.com/rust-lang/docs.rs)";
3334
const COMPONENTS: &[&str] = &["llvm-tools-preview", "rustc-dev", "rustfmt"];
3435
const DUMMY_CRATE_NAME: &str = "empty-library";
3536
const DUMMY_CRATE_VERSION: &str = "1.0.0";
3637

38+
#[derive(Debug)]
3739
pub enum PackageKind<'a> {
3840
Local(&'a Path),
3941
CratesIo,
@@ -48,6 +50,7 @@ pub struct RustwideBuilder {
4850
storage: Arc<Storage>,
4951
metrics: Arc<Metrics>,
5052
index: Arc<Index>,
53+
artifact_cache: ArtifactCache,
5154
rustc_version: String,
5255
repository_stats_updater: Arc<RepositoryStatsUpdater>,
5356
skip_build_if_exists: bool,
@@ -90,6 +93,7 @@ impl RustwideBuilder {
9093
Ok(RustwideBuilder {
9194
workspace,
9295
toolchain,
96+
artifact_cache: ArtifactCache::new(config.prefix.join("artifact_cache"))?,
9397
config,
9498
db: context.pool()?,
9599
storage: context.storage()?,
@@ -200,6 +204,7 @@ impl RustwideBuilder {
200204

201205
let has_changed = old_version.as_deref() != Some(&self.rustc_version);
202206
if has_changed {
207+
self.artifact_cache.purge()?;
203208
self.add_essential_files()?;
204209
}
205210
Ok(has_changed)
@@ -322,6 +327,7 @@ impl RustwideBuilder {
322327
self.build_package(&package.name, &package.version, PackageKind::Local(path))
323328
}
324329

330+
#[instrument(skip(self))]
325331
pub fn build_package(
326332
&mut self,
327333
name: &str,
@@ -386,6 +392,34 @@ impl RustwideBuilder {
386392
(|| -> Result<bool> {
387393
use docsrs_metadata::BuildTargets;
388394

395+
let release_data = match self
396+
.index
397+
.api()
398+
.get_release_data(name, version)
399+
.context("error fetching release data from crates.io")
400+
{
401+
Ok(data) => data,
402+
Err(err) => {
403+
warn!("{:#?}", err);
404+
ReleaseData::default()
405+
}
406+
};
407+
408+
if let Some(ref published_by) = release_data.published_by {
409+
info!(
410+
host_target_dir=?build.host_target_dir(),
411+
published_by_id=published_by.id,
412+
published_by_login=published_by.login,
413+
"restoring artifact cache",
414+
);
415+
if let Err(err) = self
416+
.artifact_cache
417+
.restore_to(&published_by.id.to_string(), build.host_target_dir())
418+
{
419+
warn!(?err, "could not restore artifact cache");
420+
}
421+
}
422+
389423
let mut has_docs = false;
390424
let mut successful_targets = Vec::new();
391425
let metadata = Metadata::from_crate_root(build.host_source_dir())?;
@@ -537,6 +571,22 @@ impl RustwideBuilder {
537571
Err(err) => warn!("{:#?}", err),
538572
}
539573

574+
if let Some(ref published_by) = release_data.published_by {
575+
info!(
576+
host_target_dir=?build.host_target_dir(),
577+
published_by_id=published_by.id,
578+
published_by_login=published_by.login,
579+
"saving artifact cache",
580+
);
581+
if let Err(err) = self
582+
.artifact_cache
583+
.save(&published_by.id.to_string(), build.host_target_dir())
584+
.context("error giving back artifact cache")
585+
{
586+
warn!(?err, "could not give back artifact cache");
587+
};
588+
}
589+
540590
if res.result.successful {
541591
// delete eventually existing files from pre-archive storage.
542592
// we're doing this in the end so eventual problems in the build

0 commit comments

Comments
 (0)