diff --git a/src/env.ts b/src/env.ts index 0b9ada3..95af60e 100644 --- a/src/env.ts +++ b/src/env.ts @@ -50,4 +50,6 @@ export interface Env { * Host for the www/Digital Ocean/origin server */ FALLBACK_HOST: string; + + ORIGIN_HOST: string; } diff --git a/src/providers/originProvider.ts b/src/providers/originProvider.ts new file mode 100644 index 0000000..4ec2514 --- /dev/null +++ b/src/providers/originProvider.ts @@ -0,0 +1,105 @@ +import { CACHE_HEADERS } from '../constants/cache'; +import { Context } from '../context'; +import { + GetFileOptions, + GetFileResult, + HeadFileResult, + HttpResponseHeaders, + Provider, + ReadDirectoryResult, +} from './provider'; + +type OriginProviderCtorOptions = { + ctx: Context; +}; + +/** + * Serves assets from origin.nodejs.org, used as a fallback for if R2 fails. + */ +export class OriginProvider implements Provider { + private ctx: Context; + + constructor({ ctx }: OriginProviderCtorOptions) { + this.ctx = ctx; + } + + async headFile(path: string): Promise { + const res = await fetch(this.ctx.env.ORIGIN_HOST + path, { + method: 'HEAD', + headers: { + 'user-agent': 'release-cloudflare-worker', + }, + }); + + if (res.status === 404) { + return undefined; + } + + return { + httpStatusCode: res.status, + httpHeaders: originHeadersToOurHeadersObject(res.headers), + }; + } + + async getFile( + path: string, + options?: GetFileOptions | undefined + ): Promise { + const res = await fetch(this.ctx.env.ORIGIN_HOST + path, { + headers: { + 'user-agent': 'release-cloudflare-worker', + 'if-match': options?.conditionalHeaders?.ifMatch ?? '', + 'if-none-match': options?.conditionalHeaders?.ifMatch ?? '', + 'if-modified-since': + options?.conditionalHeaders?.ifModifiedSince?.toUTCString() ?? '', + 'if-unmodified-since': + options?.conditionalHeaders?.ifUnmodifiedSince?.toUTCString() ?? '', + range: options?.rangeHeader ?? '', + }, + }); + + if (res.status === 404) { + return undefined; + } + + return { + contents: res.body, + httpStatusCode: res.status, + httpHeaders: originHeadersToOurHeadersObject(res.headers), + }; + } + + async readDirectory(path: string): Promise { + const res = await fetch(this.ctx.env.ORIGIN_HOST + path, { + headers: { + 'user-agent': 'release-cloudflare-worker', + }, + }); + + if (res.status === 404) { + return undefined; + } + + return { + body: res.body, + httpStatusCode: res.status, + httpHeaders: originHeadersToOurHeadersObject(res.headers), + }; + } +} + +function originHeadersToOurHeadersObject( + headers: Headers +): HttpResponseHeaders { + return { + etag: headers.get('etag') ?? '', + 'accept-range': headers.get('accept-range') ?? 'bytes', + 'access-control-allow-origin': + headers.get('access-control-allow-origin') ?? '', + 'cache-control': CACHE_HEADERS.failure, // We don't want to cache these responses + 'last-modified': headers.get('last-modified') ?? '', + 'content-language': headers.get('content-language') ?? '', + 'content-disposition': headers.get('content-disposition') ?? '', + 'content-length': headers.get('content-length') ?? '0', + }; +} diff --git a/src/providers/provider.ts b/src/providers/provider.ts index 92b88a4..4bc2e66 100644 --- a/src/providers/provider.ts +++ b/src/providers/provider.ts @@ -32,6 +32,10 @@ export type HttpResponseHeaders = { }; export type HeadFileResult = { + /** + * Status code to send the client + */ + httpStatusCode: number; /** * Headers to send the client */ @@ -72,6 +76,7 @@ export type File = { export type R2ReadDirectoryResult = { subdirectories: string[]; + hasIndexHtmlFile: boolean; files: File[]; }; diff --git a/src/providers/r2Provider.ts b/src/providers/r2Provider.ts index a14d7f3..df95dd8 100644 --- a/src/providers/r2Provider.ts +++ b/src/providers/r2Provider.ts @@ -12,9 +12,11 @@ import { Provider, ReadDirectoryResult, } from './provider'; +import { S3Provider } from './s3Provider'; type R2ProviderCtorOptions = { ctx: Context; + fallbackProvider?: Provider; }; export class R2Provider implements Provider { @@ -41,6 +43,7 @@ export class R2Provider implements Provider { } return { + httpStatusCode: 200, httpHeaders: r2MetadataToHeaders(object, 200), }; } @@ -83,9 +86,12 @@ export class R2Provider implements Provider { }; } - readDirectory(_: string): Promise { - // We will use the S3Provider here - throw new Error('Method not implemented.'); + readDirectory(path: string): Promise { + const s3Provider = new S3Provider({ + ctx: this.ctx, + fallbackProvider: this.fallbackProvider, + }); + return s3Provider.readDirectory(path); } } diff --git a/src/providers/s3Provider.ts b/src/providers/s3Provider.ts new file mode 100644 index 0000000..a685355 --- /dev/null +++ b/src/providers/s3Provider.ts @@ -0,0 +1,107 @@ +import { ListObjectsV2Command, S3Client } from '@aws-sdk/client-s3'; +import { Context } from '../context'; +import { + File, + GetFileOptions, + GetFileResult, + HeadFileResult, + Provider, + ReadDirectoryResult, +} from './provider'; +import { retryWrapper } from '../utils/provider'; +import { R2_RETRY_LIMIT, S3_MAX_KEYS } from '../constants/limits'; + +type S3ProviderCtorOptions = { + ctx: Context; +}; + +/** + * This provides assets from an S3-compatible data source. In our case, it's + * still R2. We use this only for directory listing. In R2's bindings api, + * there's some internal response size limit that makes us need to send + * an absurd amount of requests in order to list the full contents of some + * directories. Using the S3 api was the recommended fix from the R2 team. + */ +export class S3Provider implements Provider { + private ctx: Context; + private client: S3Client; + + constructor({ ctx }: S3ProviderCtorOptions) { + this.ctx = ctx; + + this.client = new S3Client({ + region: 'auto', + endpoint: ctx.env.S3_ENDPOINT, + credentials: { + accessKeyId: ctx.env.S3_ACCESS_KEY_ID, + secretAccessKey: ctx.env.S3_ACCESS_KEY_SECRET, + }, + }); + } + + headFile(_: string): Promise { + throw new Error('Method not implemented.'); + } + + getFile( + _: string, + _2?: GetFileOptions | undefined + ): Promise { + throw new Error('Method not implemented.'); + } + + async readDirectory(path: string): Promise { + const directories = new Set(); + let hasIndexHtmlFile = false; + const files: File[] = []; + + let isTruncated = true; + let cursor: string | undefined; + while (isTruncated) { + const result = await retryWrapper( + async () => { + return this.client.send( + new ListObjectsV2Command({ + Bucket: this.ctx.env.BUCKET_NAME, + Prefix: path, + Delimiter: '/', + MaxKeys: S3_MAX_KEYS, + ContinuationToken: cursor, + }) + ); + }, + R2_RETRY_LIMIT, + this.ctx.sentry + ); + + result.CommonPrefixes?.forEach(directory => { + directories.add(directory.Prefix!.substring(path.length)); + }); + + result.Contents?.forEach(object => { + if (object.Key!.endsWith('index.html')) { + hasIndexHtmlFile = true; + } + + files.push({ + name: object.Key!.substring(path.length), + size: object.Size!, + lastModified: object.LastModified!, + }); + }); + + isTruncated = result.IsTruncated ?? false; + cursor = result.NextContinuationToken; + } + + if (directories.size === 0 && files.length === 0) { + return undefined; + } + + return { + subdirectories: Array.from(directories), + hasIndexHtmlFile, + files, + }; + } +} diff --git a/wrangler.toml b/wrangler.toml index 31b103f..3b61b80 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -15,6 +15,7 @@ DIRECTORY_CACHE_CONTROL = 'public, max-age=3600, s-maxage=14400' BUCKET_NAME = 'dist-prod' USE_FALLBACK_WHEN_R2_FAILS = false FALLBACK_HOST = 'https://origin.nodejs.org' +ORIGIN_HOST = 'https://origin.nodejs.org' [[r2_buckets]] binding = 'R2_BUCKET' @@ -33,6 +34,7 @@ DIRECTORY_CACHE_CONTROL = 'public, max-age=3600, s-maxage=14400' BUCKET_NAME = 'dist-prod' USE_FALLBACK_WHEN_R2_FAILS = true FALLBACK_HOST = 'https://origin.nodejs.org' +ORIGIN_HOST = 'https://origin.nodejs.org' [[env.staging.r2_buckets]] binding = 'R2_BUCKET' @@ -51,6 +53,7 @@ DIRECTORY_CACHE_CONTROL = 'public, max-age=3600, s-maxage=14400' BUCKET_NAME='dist-prod' USE_FALLBACK_WHEN_R2_FAILS = true FALLBACK_HOST = 'https://origin.nodejs.org' +ORIGIN_HOST = 'https://origin.nodejs.org' [[env.prod.r2_buckets]] binding = 'R2_BUCKET'