Skip to content

Commit 4d65dd8

Browse files
committed
fetch publisher data from crates.io, start build artifact caching
1 parent a87743c commit 4d65dd8

File tree

10 files changed

+377
-87
lines changed

10 files changed

+377
-87
lines changed

docker-compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@ services:
1616
- "/var/run/docker.sock:/var/run/docker.sock"
1717
- ".rustwide-docker:/opt/docsrs/rustwide"
1818
- "cratesio-index:/opt/docsrs/prefix/crates.io-index"
19+
- "artifact-cache:/opt/docsrs/prefix/artifact_cache"
1920
environment:
2021
DOCSRS_RUSTWIDE_WORKSPACE: /opt/docsrs/rustwide
2122
DOCSRS_DATABASE_URL: postgresql://cratesfyi:password@db
2223
DOCSRS_STORAGE_BACKEND: s3
2324
S3_ENDPOINT: http://s3:9000
2425
AWS_ACCESS_KEY_ID: cratesfyi
2526
AWS_SECRET_ACCESS_KEY: secret_key
27+
DOCSRS_PREFIX: /opt/docsrs/prefix
2628
env_file:
2729
- .env
2830
healthcheck:
@@ -94,3 +96,4 @@ volumes:
9496
postgres-data: {}
9597
minio-data: {}
9698
cratesio-index: {}
99+
artifact-cache: {}

src/db/add_package.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::{
22
db::types::Feature,
33
docbuilder::{BuildResult, DocCoverage},
44
error::Result,
5-
index::api::{CrateData, CrateOwner, ReleaseData},
5+
index::api::{CrateData, GithubUser, ReleaseData},
66
storage::CompressionAlgorithm,
77
utils::MetadataPackage,
88
web::crate_details::CrateDetails,
@@ -371,7 +371,7 @@ pub fn update_crate_data_in_database(
371371
/// Adds owners into database
372372
fn update_owners_in_database(
373373
conn: &mut Client,
374-
owners: &[CrateOwner],
374+
owners: &[GithubUser],
375375
crate_id: i32,
376376
) -> Result<()> {
377377
// Update any existing owner data since it is mutable and could have changed since last
@@ -562,9 +562,10 @@ mod test {
562562
},
563563
)?;
564564

565-
let owner1 = CrateOwner {
565+
let owner1 = GithubUser {
566566
avatar: "avatar".into(),
567567
login: "login".into(),
568+
..Default::default()
568569
};
569570

570571
update_owners_in_database(&mut conn, &[owner1.clone()], crate_id)?;
@@ -600,16 +601,18 @@ mod test {
600601
// set initial owner details
601602
update_owners_in_database(
602603
&mut conn,
603-
&[CrateOwner {
604+
&[GithubUser {
604605
login: "login".into(),
605606
avatar: "avatar".into(),
607+
..Default::default()
606608
}],
607609
crate_id,
608610
)?;
609611

610-
let updated_owner = CrateOwner {
612+
let updated_owner = GithubUser {
611613
login: "login".into(),
612614
avatar: "avatar2".into(),
615+
..Default::default()
613616
};
614617
update_owners_in_database(&mut conn, &[updated_owner.clone()], crate_id)?;
615618

@@ -645,17 +648,19 @@ mod test {
645648
// set initial owner details
646649
update_owners_in_database(
647650
&mut conn,
648-
&[CrateOwner {
651+
&[GithubUser {
649652
login: "login".into(),
650653
avatar: "avatar".into(),
654+
..Default::default()
651655
}],
652656
crate_id,
653657
)?;
654658

655-
let new_owners: Vec<CrateOwner> = (1..5)
656-
.map(|i| CrateOwner {
659+
let new_owners: Vec<GithubUser> = (1..5)
660+
.map(|i| GithubUser {
657661
login: format!("login{i}"),
658662
avatar: format!("avatar{i}"),
663+
..Default::default()
659664
})
660665
.collect();
661666

src/db/delete.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ fn delete_crate_from_database(conn: &mut Client, name: &str, crate_id: i32) -> R
187187
#[cfg(test)]
188188
mod tests {
189189
use super::*;
190-
use crate::index::api::CrateOwner;
190+
use crate::index::api::GithubUser;
191191
use crate::test::{assert_success, wrapper};
192192
use postgres::Client;
193193
use test_case::test_case;
@@ -313,9 +313,10 @@ mod tests {
313313
.name("a")
314314
.version("1.0.0")
315315
.archive_storage(archive_storage)
316-
.add_owner(CrateOwner {
316+
.add_owner(GithubUser {
317317
login: "malicious actor".into(),
318318
avatar: "https://example.org/malicious".into(),
319+
..Default::default()
319320
})
320321
.create()?;
321322
assert!(release_exists(&mut db.conn(), v1)?);
@@ -342,9 +343,10 @@ mod tests {
342343
.name("a")
343344
.version("2.0.0")
344345
.archive_storage(archive_storage)
345-
.add_owner(CrateOwner {
346+
.add_owner(GithubUser {
346347
login: "Peter Rabbit".into(),
347348
avatar: "https://example.org/peter".into(),
349+
..Default::default()
348350
})
349351
.create()?;
350352
assert!(release_exists(&mut db.conn(), v2)?);

src/docbuilder/caching.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use crate::utils::copy_dir_all;
2+
use anyhow::{Context as _, Result};
3+
use std::{
4+
fs, io,
5+
path::{Path, PathBuf},
6+
};
7+
use tracing::{debug, instrument, warn};
8+
9+
/// move cache folder to target, falling back to copy + delete on error.
10+
fn move_or_copy<P: AsRef<Path> + std::fmt::Debug, Q: AsRef<Path> + std::fmt::Debug>(
11+
source: P,
12+
dest: Q,
13+
) -> io::Result<()> {
14+
if let Some(parent) = dest.as_ref().parent() {
15+
fs::create_dir_all(parent)?;
16+
}
17+
if let Err(err) = fs::rename(&source, &dest) {
18+
warn!(
19+
?err,
20+
?source,
21+
?dest,
22+
"could not move target directory, fall back to copy"
23+
);
24+
copy_dir_all(&source, &dest)?;
25+
fs::remove_dir_all(&source)?;
26+
}
27+
Ok(())
28+
}
29+
30+
/// artifact caching with cleanup
31+
#[derive(Debug)]
32+
pub(crate) struct ArtifactCache {
33+
cache_dir: PathBuf,
34+
}
35+
36+
impl ArtifactCache {
37+
pub(crate) fn new(cache_dir: PathBuf) -> Result<Self> {
38+
Ok(Self { cache_dir })
39+
}
40+
41+
pub(crate) fn purge(&self) -> Result<()> {
42+
fs::remove_dir_all(&self.cache_dir)?;
43+
Ok(())
44+
}
45+
46+
/// clean up a target directory.
47+
///
48+
/// Should delete all things that shouldn't leak between
49+
/// builds, so:
50+
/// - doc-output
51+
/// - ...?
52+
#[instrument(skip(self))]
53+
fn cleanup(&self, target_dir: &Path) -> Result<()> {
54+
for item in fs::read_dir(target_dir)? {
55+
// the first level of directories are the targets in most cases,
56+
// delete their doc-directories
57+
let item = item?;
58+
let doc_dir = item.path().join("doc");
59+
if doc_dir.is_dir() {
60+
debug!(?doc_dir, "cache dir cleanup");
61+
fs::remove_dir_all(doc_dir)?;
62+
}
63+
}
64+
Ok(())
65+
}
66+
67+
/// restore a cached target directory.
68+
///
69+
/// Will just move the cache folder into the rustwide
70+
/// target path. If that fails, will use `copy_dir_all`.
71+
#[instrument(skip(self))]
72+
pub(crate) fn restore_to<P: AsRef<Path> + std::fmt::Debug>(
73+
&self,
74+
cache_key: &str,
75+
target_dir: P,
76+
) -> Result<()> {
77+
let target_dir = target_dir.as_ref();
78+
if target_dir.exists() {
79+
// to be safe, while most of the time the dir doesn't exist,
80+
// or is empty.
81+
fs::remove_dir_all(target_dir).context("could not clean target directory")?;
82+
}
83+
84+
let cache_dir = self.cache_dir.join(cache_key);
85+
if !cache_dir.exists() {
86+
// when there is no existing cache dir,
87+
// we can just create an empty target.
88+
fs::create_dir_all(target_dir).context("could not create target directory")?;
89+
return Ok(());
90+
}
91+
92+
move_or_copy(cache_dir, target_dir).context("could not move cache directory to target")?;
93+
Ok(())
94+
}
95+
96+
#[instrument(skip(self))]
97+
pub(crate) fn save<P: AsRef<Path> + std::fmt::Debug>(
98+
&self,
99+
cache_key: &str,
100+
target_dir: P,
101+
) -> Result<()> {
102+
let cache_dir = self.cache_dir.join(cache_key);
103+
if !cache_dir.exists() {
104+
fs::create_dir_all(&cache_dir)?;
105+
}
106+
107+
move_or_copy(&target_dir, &cache_dir)
108+
.context("could not move target directory to cache")?;
109+
self.cleanup(&cache_dir)?;
110+
Ok(())
111+
}
112+
}

src/docbuilder/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod caching;
12
mod crates;
23
mod limits;
34
mod rustwide_builder;

src/docbuilder/rustwide_builder.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use super::caching::ArtifactCache;
12
use crate::db::file::add_path_into_database;
23
use crate::db::{
34
add_build_into_database, add_doc_coverage, add_package_into_database,
@@ -27,12 +28,13 @@ use rustwide::{AlternativeRegistry, Build, Crate, Toolchain, Workspace, Workspac
2728
use std::collections::{HashMap, HashSet};
2829
use std::path::Path;
2930
use std::sync::Arc;
30-
use tracing::{debug, info, warn};
31+
use tracing::{debug, info, instrument, warn};
3132

3233
const USER_AGENT: &str = "docs.rs builder (https://github.com/rust-lang/docs.rs)";
3334
const DUMMY_CRATE_NAME: &str = "empty-library";
3435
const DUMMY_CRATE_VERSION: &str = "1.0.0";
3536

37+
#[derive(Debug)]
3638
pub enum PackageKind<'a> {
3739
Local(&'a Path),
3840
CratesIo,
@@ -47,6 +49,7 @@ pub struct RustwideBuilder {
4749
storage: Arc<Storage>,
4850
metrics: Arc<Metrics>,
4951
index: Arc<Index>,
52+
artifact_cache: ArtifactCache,
5053
rustc_version: String,
5154
repository_stats_updater: Arc<RepositoryStatsUpdater>,
5255
skip_build_if_exists: bool,
@@ -89,6 +92,7 @@ impl RustwideBuilder {
8992
Ok(RustwideBuilder {
9093
workspace,
9194
toolchain,
95+
artifact_cache: ArtifactCache::new(config.prefix.join("artifact_cache"))?,
9296
config,
9397
db: context.pool()?,
9498
storage: context.storage()?,
@@ -197,6 +201,7 @@ impl RustwideBuilder {
197201

198202
let has_changed = old_version.as_deref() != Some(&self.rustc_version);
199203
if has_changed {
204+
self.artifact_cache.purge()?;
200205
self.add_essential_files()?;
201206
}
202207
Ok(has_changed)
@@ -319,6 +324,7 @@ impl RustwideBuilder {
319324
self.build_package(&package.name, &package.version, PackageKind::Local(path))
320325
}
321326

327+
#[instrument(skip(self))]
322328
pub fn build_package(
323329
&mut self,
324330
name: &str,
@@ -383,6 +389,34 @@ impl RustwideBuilder {
383389
(|| -> Result<bool> {
384390
use docsrs_metadata::BuildTargets;
385391

392+
let release_data = match self
393+
.index
394+
.api()
395+
.get_release_data(name, version)
396+
.context("error fetching release data from crates.io")
397+
{
398+
Ok(data) => data,
399+
Err(err) => {
400+
warn!("{:#?}", err);
401+
ReleaseData::default()
402+
}
403+
};
404+
405+
if let Some(ref published_by) = release_data.published_by {
406+
info!(
407+
host_target_dir=?build.host_target_dir(),
408+
published_by_id=published_by.id,
409+
published_by_login=published_by.login,
410+
"restoring artifact cache",
411+
);
412+
if let Err(err) = self
413+
.artifact_cache
414+
.restore_to(&published_by.id.to_string(), build.host_target_dir())
415+
{
416+
warn!(?err, "could not restore artifact cache");
417+
}
418+
}
419+
386420
let mut has_docs = false;
387421
let mut successful_targets = Vec::new();
388422
let metadata = Metadata::from_crate_root(build.host_source_dir())?;
@@ -534,6 +568,22 @@ impl RustwideBuilder {
534568
Err(err) => warn!("{:#?}", err),
535569
}
536570

571+
if let Some(ref published_by) = release_data.published_by {
572+
info!(
573+
host_target_dir=?build.host_target_dir(),
574+
published_by_id=published_by.id,
575+
published_by_login=published_by.login,
576+
"saving artifact cache",
577+
);
578+
if let Err(err) = self
579+
.artifact_cache
580+
.save(&published_by.id.to_string(), build.host_target_dir())
581+
.context("error giving back artifact cache")
582+
{
583+
warn!(?err, "could not give back artifact cache");
584+
};
585+
}
586+
537587
if res.result.successful {
538588
// delete eventually existing files from pre-archive storage.
539589
// we're doing this in the end so eventual problems in the build

0 commit comments

Comments
 (0)