Skip to content

Commit a0b0756

Browse files
committed
add temporary command to fix broken archive indexes
1 parent 7ad8c62 commit a0b0756

5 files changed

+119
-31
lines changed

.sqlx/query-648ce6ae0bfbdc28ad7f4099f8141380bd83a93829e8a89d083c263bf621ed5f.json

Lines changed: 0 additions & 23 deletions
This file was deleted.

.sqlx/query-79ea7db0f92c01d358679d0ea4e936b8d4645f6265126aff46a3b5c9cabba64b.json

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/bin/cratesfyi.rs

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
1-
use std::env;
21
use std::fmt::Write;
32
use std::net::SocketAddr;
43
use std::path::PathBuf;
54
use std::str::FromStr;
65
use std::sync::Arc;
6+
use std::{env, fs};
77

88
use anyhow::{anyhow, Context as _, Error, Result};
99
use axum::async_trait;
1010
use clap::{Parser, Subcommand, ValueEnum};
1111
use docs_rs::cdn::CdnBackend;
1212
use docs_rs::db::{self, add_path_into_database, Overrides, Pool, PoolClient};
1313
use docs_rs::repositories::RepositoryStatsUpdater;
14+
use docs_rs::storage::{rustdoc_archive_path, source_archive_path, PathNotFoundError};
1415
use docs_rs::utils::{
1516
get_config, get_crate_pattern_and_priority, list_crate_priorities, queue_builder,
16-
remove_crate_priority, set_config, set_crate_priority, ConfigName,
17+
remove_crate_priority, set_config, set_crate_priority, spawn_blocking, ConfigName,
1718
};
1819
use docs_rs::{
1920
start_background_metrics_webserver, start_web_server, AsyncStorage, BuildQueue, Config,
@@ -23,6 +24,7 @@ use docs_rs::{
2324
use futures_util::StreamExt;
2425
use humantime::Duration;
2526
use once_cell::sync::OnceCell;
27+
use rusqlite::{Connection, OpenFlags};
2628
use sentry::TransactionContext;
2729
use tokio::runtime::{Builder, Runtime};
2830
use tracing_log::LogTracer;
@@ -509,6 +511,9 @@ enum DatabaseSubcommand {
509511
/// temporary commant to update the `crates.latest_version_id` field
510512
UpdateLatestVersionId,
511513

514+
/// temporary command to rebuild a subset of the archive indexes
515+
FixBrokenArchiveIndexes,
516+
512517
/// Updates Github/Gitlab stats for crates.
513518
UpdateRepositoryFields,
514519

@@ -567,6 +572,80 @@ impl DatabaseSubcommand {
567572
.context("Failed to run database migrations")?
568573
}
569574

575+
Self::FixBrokenArchiveIndexes => {
576+
let pool = ctx.pool()?;
577+
let build_queue = ctx.build_queue()?;
578+
ctx.runtime()?
579+
.block_on(async {
580+
let storage = ctx.async_storage().await?;
581+
let mut conn = pool.get_async().await?;
582+
let mut result_stream = sqlx::query!(
583+
"
584+
SELECT c.name, r.version, r.release_time
585+
FROM crates c, releases r
586+
WHERE c.id = r.crate_id
587+
ORDER BY r.id
588+
"
589+
)
590+
.fetch(&mut *conn);
591+
592+
while let Some(row) = result_stream.next().await {
593+
let row = row?;
594+
595+
println!(
596+
"checking index for {} {} ({:?})",
597+
row.name, row.version, row.release_time
598+
);
599+
600+
for path in &[
601+
rustdoc_archive_path(&row.name, &row.version),
602+
source_archive_path(&row.name, &row.version),
603+
] {
604+
let local_archive_index_filename = match storage
605+
.download_archive_index(path, 42)
606+
.await
607+
{
608+
Ok(path) => path,
609+
Err(err)
610+
if err.downcast_ref::<PathNotFoundError>().is_some() =>
611+
{
612+
continue
613+
}
614+
Err(err) => return Err(err),
615+
};
616+
617+
let count = {
618+
let connection = Connection::open_with_flags(
619+
&local_archive_index_filename,
620+
OpenFlags::SQLITE_OPEN_READ_ONLY
621+
| OpenFlags::SQLITE_OPEN_NO_MUTEX,
622+
)?;
623+
let mut stmt =
624+
connection.prepare("SELECT count(*) FROM files")?;
625+
626+
stmt.query_row([], |row| Ok(row.get::<_, usize>(0)))??
627+
};
628+
629+
fs::remove_file(&local_archive_index_filename)?;
630+
631+
if count >= 65000 {
632+
println!("...big index, queueing rebuild");
633+
spawn_blocking({
634+
let build_queue = build_queue.clone();
635+
let name = row.name.clone();
636+
let version = row.version.clone();
637+
move || build_queue.add_crate(&name, &version, 5, None)
638+
})
639+
.await?;
640+
}
641+
}
642+
}
643+
644+
Ok::<(), anyhow::Error>(())
645+
})
646+
.context("Failed to queue rebuilds for big documentation sizes")?
647+
}
648+
570649
Self::UpdateLatestVersionId => {
571650
let pool = ctx.pool()?;
572651
ctx.runtime()?
@@ -581,7 +660,7 @@ impl DatabaseSubcommand {
581660
while let Some(row) = result_stream.next().await {
582661
let row = row?;
583662

584-
println!("handling crate {}", row.name);
663+
println!("handling crate {} ", row.name);
585664

586665
db::update_latest_version_id(&mut update_conn, row.id).await?;
587666
}

src/storage/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type FileRange = RangeInclusive<u64>;
2828

2929
#[derive(Debug, thiserror::Error)]
3030
#[error("path not found")]
31-
pub(crate) struct PathNotFoundError;
31+
pub struct PathNotFoundError;
3232

3333
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
3434
pub(crate) struct Blob {
@@ -304,7 +304,7 @@ impl AsyncStorage {
304304
}
305305

306306
#[instrument]
307-
pub(super) async fn download_archive_index(
307+
pub async fn download_archive_index(
308308
&self,
309309
archive_path: &str,
310310
latest_build_id: i32,
@@ -823,11 +823,11 @@ fn detect_mime(file_path: impl AsRef<Path>) -> &'static str {
823823
}
824824
}
825825

826-
pub(crate) fn rustdoc_archive_path(name: &str, version: &str) -> String {
826+
pub fn rustdoc_archive_path(name: &str, version: &str) -> String {
827827
format!("rustdoc/{name}/{version}.zip")
828828
}
829829

830-
pub(crate) fn source_archive_path(name: &str, version: &str) -> String {
830+
pub fn source_archive_path(name: &str, version: &str) -> String {
831831
format!("sources/{name}/{version}.zip")
832832
}
833833

src/utils/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ where
110110
/// })
111111
/// .await?
112112
/// ```
113-
pub(crate) async fn spawn_blocking<F, R>(f: F) -> Result<R>
113+
pub async fn spawn_blocking<F, R>(f: F) -> Result<R>
114114
where
115115
F: FnOnce() -> Result<R> + Send + 'static,
116116
R: Send + 'static,

0 commit comments

Comments
 (0)