Skip to content

Commit 455c9af

Browse files
authored
Merge pull request #11481 from Turbo87/og-image-integration
Integrate `crates_io_og_image` into the application
2 parents 3858d89 + d5771ca commit 455c9af

17 files changed

+508
-5
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,7 @@ jobs:
206206
- run: cargo test --workspace
207207
env:
208208
# Set the path to the Fira Sans font for Typst.
209-
# The path is relative to the `crates_io_og_image` crate root.
210-
TYPST_FONT_PATH: ../../Fira-4.202/otf
209+
TYPST_FONT_PATH: ${{ github.workspace }}/Fira-4.202/otf
211210

212211
frontend-lint:
213212
name: Frontend / Lint

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ crates_io_env_vars = { path = "crates/crates_io_env_vars" }
7171
crates_io_github = { path = "crates/crates_io_github" }
7272
crates_io_index = { path = "crates/crates_io_index" }
7373
crates_io_markdown = { path = "crates/crates_io_markdown" }
74+
crates_io_og_image = { path = "crates/crates_io_og_image" }
7475
crates_io_pagerduty = { path = "crates/crates_io_pagerduty" }
7576
crates_io_session = { path = "crates/crates_io_session" }
7677
crates_io_tarball = { path = "crates/crates_io_tarball" }

src/bin/background-worker.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use crates_io::{Emails, config};
2424
use crates_io_docs_rs::RealDocsRsClient;
2525
use crates_io_env_vars::var;
2626
use crates_io_index::RepositoryConfig;
27+
use crates_io_og_image::OgImageGenerator;
2728
use crates_io_team_repo::TeamRepoImpl;
2829
use crates_io_worker::Runner;
2930
use object_store::prefix::PrefixStore;
@@ -102,6 +103,7 @@ fn main() -> anyhow::Result<()> {
102103
.emails(emails)
103104
.maybe_docs_rs(docs_rs)
104105
.team_repo(Box::new(team_repo))
106+
.og_image_generator(OgImageGenerator::from_environment()?)
105107
.build();
106108

107109
let environment = Arc::new(environment);
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
use anyhow::Result;
2+
use crates_io::db;
3+
use crates_io::schema::{background_jobs, crates};
4+
use crates_io::worker::jobs::GenerateOgImage;
5+
use crates_io_worker::BackgroundJob;
6+
use diesel::prelude::*;
7+
use diesel_async::RunQueryDsl;
8+
use tracing::{info, warn};
9+
10+
#[derive(clap::Parser, Debug)]
11+
#[command(
12+
name = "backfill-og-images",
13+
about = "Enqueue OG image generation jobs for existing crates"
14+
)]
15+
pub struct Opts {
16+
#[arg(long, default_value = "1000")]
17+
/// Batch size for enqueueing crates (default: 1000)
18+
batch_size: usize,
19+
20+
#[arg(long)]
21+
/// Only generate OG images for crates with names starting with this prefix
22+
prefix: Option<String>,
23+
24+
#[arg(long)]
25+
/// Offset to start enqueueing from (useful for resuming)
26+
offset: Option<i64>,
27+
}
28+
29+
pub async fn run(opts: Opts) -> Result<()> {
30+
let mut conn = db::oneoff_connection().await?;
31+
32+
info!("Starting OG image backfill with options: {opts:?}");
33+
34+
// Helper function to build query
35+
let build_query = |offset: i64| {
36+
let mut query = crates::table
37+
.select(crates::name)
38+
.order(crates::name)
39+
.into_boxed();
40+
41+
if let Some(prefix) = &opts.prefix {
42+
query = query.filter(crates::name.like(format!("{prefix}%")));
43+
}
44+
45+
query.offset(offset)
46+
};
47+
48+
// Count total crates to process
49+
let mut count_query = crates::table.into_boxed();
50+
if let Some(prefix) = &opts.prefix {
51+
count_query = count_query.filter(crates::name.like(format!("{prefix}%")));
52+
}
53+
let total_crates: i64 = count_query.count().get_result(&mut conn).await?;
54+
55+
info!("Total crates to enqueue: {total_crates}");
56+
57+
let mut offset = opts.offset.unwrap_or(0);
58+
let mut enqueued = 0;
59+
let mut errors = 0;
60+
61+
loop {
62+
// Fetch batch of crate names
63+
let crate_names: Vec<String> = build_query(offset)
64+
.limit(opts.batch_size as i64)
65+
.load(&mut conn)
66+
.await?;
67+
68+
if crate_names.is_empty() {
69+
break;
70+
}
71+
72+
let batch_size = crate_names.len();
73+
info!(
74+
"Enqueueing batch {}-{} of {total_crates}",
75+
offset + 1,
76+
offset + batch_size as i64
77+
);
78+
79+
// Create batch of jobs
80+
let jobs = crate_names
81+
.into_iter()
82+
.map(GenerateOgImage::new)
83+
.map(|job| {
84+
Ok((
85+
background_jobs::job_type.eq(GenerateOgImage::JOB_NAME),
86+
background_jobs::data.eq(serde_json::to_value(job)?),
87+
background_jobs::priority.eq(-10),
88+
))
89+
})
90+
.collect::<serde_json::Result<Vec<_>>>()?;
91+
92+
// Batch insert all jobs
93+
let result = diesel::insert_into(background_jobs::table)
94+
.values(jobs)
95+
.execute(&mut conn)
96+
.await;
97+
98+
match result {
99+
Ok(inserted_count) => {
100+
enqueued += inserted_count;
101+
info!("Enqueued {enqueued} jobs so far...");
102+
}
103+
Err(e) => {
104+
errors += batch_size;
105+
warn!("Failed to enqueue batch of OG image jobs: {e}");
106+
}
107+
}
108+
109+
// Break if we've processed fewer than batch_size (last batch)
110+
if batch_size < opts.batch_size {
111+
break;
112+
}
113+
114+
offset += opts.batch_size as i64;
115+
}
116+
117+
info!("Jobs enqueued: {enqueued}");
118+
if errors > 0 {
119+
warn!("{errors} jobs failed to enqueue. Check logs above for details.");
120+
}
121+
122+
Ok(())
123+
}

src/bin/crates-admin/enqueue_job.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ pub enum Command {
3434
#[arg()]
3535
name: String,
3636
},
37+
/// Generate OpenGraph images for the specified crates
38+
GenerateOgImage {
39+
/// Crate names to generate OpenGraph images for
40+
#[arg(required = true)]
41+
names: Vec<String>,
42+
},
3743
ProcessCdnLogQueue(jobs::ProcessCdnLogQueue),
3844
SyncAdmins {
3945
/// Force a sync even if one is already in progress
@@ -143,6 +149,11 @@ pub async fn run(command: Command) -> Result<()> {
143149

144150
jobs::CheckTyposquat::new(&name).enqueue(&mut conn).await?;
145151
}
152+
Command::GenerateOgImage { names } => {
153+
for name in names {
154+
jobs::GenerateOgImage::new(name).enqueue(&mut conn).await?;
155+
}
156+
}
146157
Command::SendTokenExpiryNotifications => {
147158
jobs::SendTokenExpiryNotifications
148159
.enqueue(&mut conn)

src/bin/crates-admin/main.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#[macro_use]
22
extern crate tracing;
33

4+
mod backfill_og_images;
45
mod default_versions;
56
mod delete_crate;
67
mod delete_version;
@@ -17,6 +18,7 @@ mod yank_version;
1718
#[derive(clap::Parser, Debug)]
1819
#[command(name = "crates-admin")]
1920
enum Command {
21+
BackfillOgImages(backfill_og_images::Opts),
2022
DeleteCrate(delete_crate::Opts),
2123
DeleteVersion(delete_version::Opts),
2224
Populate(populate::Opts),
@@ -46,6 +48,7 @@ async fn main() -> anyhow::Result<()> {
4648
span.record("command", tracing::field::debug(&command));
4749

4850
match command {
51+
Command::BackfillOgImages(opts) => backfill_og_images::run(opts).await,
4952
Command::DeleteCrate(opts) => delete_crate::run(opts).await,
5053
Command::DeleteVersion(opts) => delete_version::run(opts).await,
5154
Command::Populate(opts) => populate::run(opts).await,

src/controllers/krate/publish.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use crate::app::AppState;
44
use crate::auth::{AuthCheck, AuthHeader, Authentication};
55
use crate::worker::jobs::{
6-
self, CheckTyposquat, SendPublishNotificationsJob, UpdateDefaultVersion,
6+
self, CheckTyposquat, GenerateOgImage, SendPublishNotificationsJob, UpdateDefaultVersion,
77
};
88
use axum::Json;
99
use axum::body::{Body, Bytes};
@@ -549,14 +549,14 @@ pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<Go
549549
// Compared to only using a background job, this prevents us from getting into a
550550
// situation where a crate exists in the `crates` table but doesn't have a default
551551
// version in the `default_versions` table.
552-
if let Some((existing_default_version, _)) = existing_default_version {
552+
if let Some((existing_default_version, _)) = &existing_default_version {
553553
let published_default_version = DefaultVersion {
554554
id: version.id,
555555
num: semver,
556556
yanked: false,
557557
};
558558

559-
if existing_default_version < published_default_version {
559+
if existing_default_version < &published_default_version {
560560
diesel::update(default_versions::table)
561561
.filter(default_versions::crate_id.eq(krate.id))
562562
.set(default_versions::version_id.eq(version.id))
@@ -631,6 +631,14 @@ pub async fn publish(app: AppState, req: Parts, body: Body) -> AppResult<Json<Go
631631
}),
632632
)?;
633633

634+
// Enqueue OG image generation job if not handled by UpdateDefaultVersion
635+
if existing_default_version.is_none() {
636+
let og_image_job = GenerateOgImage::new(krate.name.clone());
637+
if let Err(error) = og_image_job.enqueue(conn).await {
638+
error!("Failed to enqueue `GenerateOgImage` job: {error}");
639+
}
640+
};
641+
634642
// Experiment: check new crates for potential typosquatting.
635643
if existing_crate.is_none() {
636644
let crates_feed_job = jobs::rss::SyncCratesFeed;

src/storage.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,18 @@ use tracing::{instrument, warn};
1919

2020
const PREFIX_CRATES: &str = "crates";
2121
const PREFIX_READMES: &str = "readmes";
22+
const PREFIX_OG_IMAGES: &str = "og-images";
2223
const DEFAULT_REGION: &str = "us-west-1";
2324
const CONTENT_TYPE_CRATE: &str = "application/gzip";
2425
const CONTENT_TYPE_GZIP: &str = "application/gzip";
2526
const CONTENT_TYPE_ZIP: &str = "application/zip";
2627
const CONTENT_TYPE_INDEX: &str = "text/plain";
2728
const CONTENT_TYPE_README: &str = "text/html";
29+
const CONTENT_TYPE_OG_IMAGE: &str = "image/png";
2830
const CACHE_CONTROL_IMMUTABLE: &str = "public,max-age=31536000,immutable";
2931
const CACHE_CONTROL_INDEX: &str = "public,max-age=600";
3032
const CACHE_CONTROL_README: &str = "public,max-age=604800";
33+
const CACHE_CONTROL_OG_IMAGE: &str = "public,max-age=86400";
3134

3235
type StdPath = std::path::Path;
3336

@@ -209,6 +212,13 @@ impl Storage {
209212
apply_cdn_prefix(&self.cdn_prefix, &readme_path(name, version)).replace('+', "%2B")
210213
}
211214

215+
/// Returns the URL of an uploaded crate's Open Graph image.
216+
///
217+
/// The function doesn't check for the existence of the file.
218+
pub fn og_image_location(&self, name: &str) -> String {
219+
apply_cdn_prefix(&self.cdn_prefix, &og_image_path(name))
220+
}
221+
212222
/// Returns the URL of an uploaded RSS feed.
213223
pub fn feed_url(&self, feed_id: &FeedId<'_>) -> String {
214224
apply_cdn_prefix(&self.cdn_prefix, &feed_id.into()).replace('+', "%2B")
@@ -240,6 +250,13 @@ impl Storage {
240250
self.store.delete(&path).await
241251
}
242252

253+
/// Deletes the Open Graph image for the given crate.
254+
#[instrument(skip(self))]
255+
pub async fn delete_og_image(&self, name: &str) -> Result<()> {
256+
let path = og_image_path(name);
257+
self.store.delete(&path).await
258+
}
259+
243260
#[instrument(skip(self))]
244261
pub async fn delete_feed(&self, feed_id: &FeedId<'_>) -> Result<()> {
245262
let path = feed_id.into();
@@ -270,6 +287,19 @@ impl Storage {
270287
Ok(())
271288
}
272289

290+
/// Uploads an Open Graph image for the given crate.
291+
#[instrument(skip(self, bytes))]
292+
pub async fn upload_og_image(&self, name: &str, bytes: Bytes) -> Result<()> {
293+
let path = og_image_path(name);
294+
let attributes = self.attrs([
295+
(Attribute::ContentType, CONTENT_TYPE_OG_IMAGE),
296+
(Attribute::CacheControl, CACHE_CONTROL_OG_IMAGE),
297+
]);
298+
let opts = attributes.into();
299+
self.store.put_opts(&path, bytes.into(), opts).await?;
300+
Ok(())
301+
}
302+
273303
#[instrument(skip(self, channel))]
274304
pub async fn upload_feed(
275305
&self,
@@ -385,6 +415,10 @@ fn readme_path(name: &str, version: &str) -> Path {
385415
format!("{PREFIX_READMES}/{name}/{name}-{version}.html").into()
386416
}
387417

418+
fn og_image_path(name: &str) -> Path {
419+
format!("{PREFIX_OG_IMAGES}/{name}.png").into()
420+
}
421+
388422
fn apply_cdn_prefix(cdn_prefix: &Option<String>, path: &Path) -> String {
389423
match cdn_prefix {
390424
Some(cdn_prefix) if !cdn_prefix.starts_with("https://") => {
@@ -484,6 +518,17 @@ mod tests {
484518
for (name, version, expected) in readme_tests {
485519
assert_eq!(storage.readme_location(name, version), expected);
486520
}
521+
522+
let og_image_tests = vec![
523+
("foo", "https://static.crates.io/og-images/foo.png"),
524+
(
525+
"some-long-crate-name",
526+
"https://static.crates.io/og-images/some-long-crate-name.png",
527+
),
528+
];
529+
for (name, expected) in og_image_tests {
530+
assert_eq!(storage.og_image_location(name), expected);
531+
}
487532
}
488533

489534
#[test]
@@ -661,4 +706,39 @@ mod tests {
661706
let expected_files = vec![target];
662707
assert_eq!(stored_files(&s.store).await, expected_files);
663708
}
709+
710+
#[tokio::test]
711+
async fn upload_og_image() {
712+
let s = Storage::from_config(&StorageConfig::in_memory());
713+
714+
let bytes = Bytes::from_static(b"fake png data");
715+
s.upload_og_image("foo", bytes.clone()).await.unwrap();
716+
717+
let expected_files = vec!["og-images/foo.png"];
718+
assert_eq!(stored_files(&s.store).await, expected_files);
719+
720+
s.upload_og_image("some-long-crate-name", bytes)
721+
.await
722+
.unwrap();
723+
724+
let expected_files = vec!["og-images/foo.png", "og-images/some-long-crate-name.png"];
725+
assert_eq!(stored_files(&s.store).await, expected_files);
726+
}
727+
728+
#[tokio::test]
729+
async fn delete_og_image() {
730+
let s = Storage::from_config(&StorageConfig::in_memory());
731+
732+
let bytes = Bytes::from_static(b"fake png data");
733+
s.upload_og_image("foo", bytes.clone()).await.unwrap();
734+
s.upload_og_image("bar", bytes).await.unwrap();
735+
736+
let expected_files = vec!["og-images/bar.png", "og-images/foo.png"];
737+
assert_eq!(stored_files(&s.store).await, expected_files);
738+
739+
s.delete_og_image("foo").await.unwrap();
740+
741+
let expected_files = vec!["og-images/bar.png"];
742+
assert_eq!(stored_files(&s.store).await, expected_files);
743+
}
664744
}

0 commit comments

Comments
 (0)