diff --git a/Cargo.lock b/Cargo.lock index ec9723308..54ed2c41d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,6 +85,17 @@ dependencies = [ "aes", ] +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check 0.9.4", +] + [[package]] name = "ahash" version = "0.8.8" @@ -639,6 +650,28 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2 1.0.78", + "quote 1.0.35", + "syn 1.0.109", +] + [[package]] name = "bytemuck" version = "1.14.3" @@ -1997,8 +2030,7 @@ checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" [[package]] name = "eszip" version = "0.68.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e478354842a7ee02c685221da61b7ecee919283e65818167d0f8ce60cca70798" +source = "git+https://github.com/supabase/eszip?branch=fix-pub-vis-0-68-2#7c3dd25d990339a25515bab84bd1edbbd4f40cad" dependencies = [ "anyhow", "base64 0.21.7", @@ -2058,6 +2090,12 @@ dependencies = [ "serde", ] +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + [[package]] name = "fastwebsockets" version = "0.4.4" @@ -2560,6 +2598,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] name = "hashbrown" @@ -2567,7 +2608,7 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash", + "ahash 0.8.8", ] [[package]] @@ -2576,7 +2617,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash", + "ahash 0.8.8", "allocator-api2", ] @@ -4267,6 +4308,26 @@ dependencies = [ "cc", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2 1.0.78", + "quote 1.0.35", + "syn 1.0.109", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -4461,6 +4522,15 @@ version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" version = "0.11.20" @@ -4548,6 +4618,35 @@ dependencies = [ "digest", ] +[[package]] +name = "rkyv" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +dependencies = [ + "proc-macro2 1.0.78", + "quote 1.0.35", + "syn 1.0.109", +] + [[package]] name = "ron" version = "0.8.1" @@ -4856,6 +4955,13 @@ dependencies = [ "sb_node", ] +[[package]] +name = "sb_eszip_shared" +version = "0.1.0" +dependencies = [ + "eszip", +] + [[package]] name = "sb_fs" version = "0.1.0" @@ -4869,15 +4975,19 @@ dependencies = [ "deno_npm", "deno_semver", "eszip", + "futures", "import_map 0.18.0", "log", "once_cell", + "rkyv", "sb_core", + "sb_eszip_shared", "sb_node", "sb_npm", "serde", "thiserror", "tokio", + "url", ] [[package]] @@ -4894,15 +5004,24 @@ dependencies = [ "deno_semver", "deno_web", "eszip", + "futures", "glob", + "hashlink", "import_map 0.18.0", "log", "once_cell", + "pathdiff", + "rkyv", "sb_core", + "sb_eszip_shared", "sb_fs", "sb_node", "sb_npm", + "scopeguard", "serde", + "sha2", + "tempfile", + "thiserror", "tokio", "urlencoding", ] @@ -4920,11 +5039,13 @@ dependencies = [ "deno_semver", "deno_tls", "eszip", + "futures-util", "import_map 0.18.0", "log", "monch", "once_cell", "sb_core", + "sb_eszip_shared", "sb_fs", "sb_graph", "sb_node", @@ -5110,6 +5231,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "sec1" version = "0.7.3" @@ -5633,7 +5760,7 @@ version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83406221c501860fce9c27444f44125eafe9e598b8b81be7563d7036784cd05c" dependencies = [ - "ahash", + "ahash 0.8.8", "anyhow", "dashmap", "once_cell", @@ -6065,6 +6192,18 @@ dependencies = [ "xattr", ] +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if 1.0.0", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "term_size" version = "0.3.2" @@ -6458,7 +6597,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "rand", "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 963824d06..af987db05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,10 +81,14 @@ http = "0.2" faster-hex = "0.9.0" tracing = "0.1" tracing-subscriber = "0.3" +sha2 = "0.10" +rkyv = "0.7" +tempfile = "3" [patch.crates-io] # TODO(Nyannyacha): Patch below is temporary. Clean the line in the Deno 1.44 update. deno_core = { git = "https://github.com/supabase/deno_core", branch = "278-supabase" } +eszip = { git = "https://github.com/supabase/eszip", branch = "fix-pub-vis-0-68-2" } [profile.dind] inherits = "dev" diff --git a/crates/base/src/deno_runtime.rs b/crates/base/src/deno_runtime.rs index 5006394bf..7281cd83f 100644 --- a/crates/base/src/deno_runtime.rs +++ b/crates/base/src/deno_runtime.rs @@ -28,6 +28,7 @@ use sb_core::conn_sync::DenoRuntimeDropToken; use sb_core::http::sb_core_http; use sb_core::http_start::sb_core_http_start; use sb_core::util::sync::AtomicFlag; +use sb_fs::static_fs::StaticFs; use serde::Serialize; use std::borrow::Cow; use std::collections::HashMap; @@ -57,9 +58,7 @@ use sb_env::sb_env as sb_env_op; use sb_fs::file_system::DenoCompileFileSystem; use sb_graph::emitter::EmitterFactory; use sb_graph::import_map::load_import_map; -use sb_graph::{ - generate_binary_eszip, include_glob_patterns_in_eszip, EszipPayloadKind, STATIC_FS_PREFIX, -}; +use sb_graph::{generate_binary_eszip, include_glob_patterns_in_eszip, EszipPayloadKind}; use sb_module_loader::standalone::create_module_loader_for_standalone_from_eszip_kind; use sb_module_loader::RuntimeProviders; use sb_node::deno_node; @@ -243,6 +242,8 @@ where .. } = opts; + // TODO(Nyannyacha): Make sure `service_path` is an absolute path first. + let drop_token = CancellationToken::default(); let base_dir_path = std::env::current_dir().map(|p| p.join(&service_path))?; @@ -324,9 +325,9 @@ where include_glob_patterns_in_eszip( static_patterns.iter().map(|s| s.as_str()).collect(), &mut eszip, - Some(STATIC_FS_PREFIX.to_string()), + &base_dir_path, ) - .await; + .await?; EszipPayloadKind::Eszip(eszip) }; @@ -345,6 +346,7 @@ where ) })() .unwrap_or_else(|| vec!["mozilla".to_string()]); + for store in ca_stores.iter() { match store.as_str() { "mozilla" => { @@ -371,6 +373,7 @@ where Arc::new(ValueRootCertStoreProvider::new(root_cert_store.clone())); let mut stdio = Some(Default::default()); + if is_user_worker { stdio = Some(deno_io::Stdio { stdin: deno_io::StdioPipe::file(std::fs::File::create("/dev/null")?), @@ -381,6 +384,7 @@ where let rt_provider = create_module_loader_for_standalone_from_eszip_kind( eszip, + base_dir_path.clone(), maybe_arc_import_map, import_map_path, maybe_inspector.is_some(), @@ -399,8 +403,9 @@ where let op_fs = { if is_user_worker { - Arc::new(sb_fs::static_fs::StaticFs::new( + Arc::new(StaticFs::new( static_files, + base_dir_path, vfs_path, vfs, npm_snapshot, @@ -943,6 +948,7 @@ extern "C" fn mem_check_gc_prologue_callback_fn( mod test { use crate::deno_runtime::DenoRuntime; use crate::rt_worker::worker::DuplexStreamEntry; + use anyhow::Context; use deno_config::JsxImportSourceConfig; use deno_core::error::AnyError; use deno_core::{serde_json, serde_v8, v8, FastString, ModuleCodeString, PollEventLoopOptions}; @@ -959,7 +965,8 @@ mod test { use std::fs; use std::fs::File; use std::io::Write; - use std::path::PathBuf; + use std::marker::PhantomData; + use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; use tokio::sync::mpsc; @@ -979,6 +986,150 @@ mod test { } } + #[derive(Debug, Default)] + struct RuntimeBuilder { + path: Option, + eszip: Option, + env_vars: Option>, + worker_runtime_conf: Option, + static_patterns: Vec, + jsx_import_source_config: Option, + _phantom_context: PhantomData, + } + + impl RuntimeBuilder { + fn new() -> Self { + Self::default() + } + } + + impl RuntimeBuilder { + fn set_context(self) -> RuntimeBuilder + where + C2: GetRuntimeContext, + { + RuntimeBuilder { + path: self.path, + eszip: self.eszip, + env_vars: self.env_vars, + worker_runtime_conf: self.worker_runtime_conf, + static_patterns: self.static_patterns, + jsx_import_source_config: self.jsx_import_source_config, + _phantom_context: PhantomData, + } + } + } + + impl RuntimeBuilder + where + C: GetRuntimeContext, + { + async fn build(self) -> DenoRuntime { + let RuntimeBuilder { + path, + eszip, + env_vars, + worker_runtime_conf, + static_patterns, + jsx_import_source_config, + _phantom_context, + } = self; + + let (worker_pool_tx, _) = mpsc::unbounded_channel::(); + + DenoRuntime::new( + WorkerContextInitOpts { + maybe_eszip: eszip, + service_path: path + .map(PathBuf::from) + .unwrap_or(PathBuf::from("./test_cases/main")), + + conf: { + if let Some(conf) = worker_runtime_conf { + conf + } else { + WorkerRuntimeOpts::MainWorker(MainWorkerRuntimeOpts { + worker_pool_tx, + shared_metric_src: None, + event_worker_metric_src: None, + }) + } + }, + + maybe_entrypoint: None, + maybe_decorator: None, + maybe_module_code: None, + + no_module_cache: false, + env_vars: env_vars.unwrap_or_default(), + + static_patterns, + maybe_jsx_import_source_config: jsx_import_source_config, + + events_rx: None, + timing: None, + + import_map_path: None, + }, + None, + ) + .await + .unwrap() + } + } + + impl RuntimeBuilder { + fn set_path(mut self, path: &str) -> Self { + let _ = self.path.insert(path.to_string()); + self + } + + async fn set_eszip

(mut self, path: P) -> Result + where + P: AsRef, + { + let _ = self.eszip.insert(EszipPayloadKind::VecKind( + tokio::fs::read(path) + .await + .context("cannot read eszip binary")?, + )); + + Ok(self) + } + + fn set_env_vars(mut self, vars: HashMap) -> Self { + let _ = self.env_vars.insert(vars); + self + } + + fn set_std_env(self) -> Self { + self.set_env_vars(std::env::vars().collect()) + } + + fn set_worker_runtime_conf(mut self, conf: WorkerRuntimeOpts) -> Self { + let _ = self.worker_runtime_conf.insert(conf); + self + } + + fn set_jsx_import_source_config(mut self, config: JsxImportSourceConfig) -> Self { + let _ = self.jsx_import_source_config.insert(config); + self + } + + fn add_static_pattern(mut self, pat: &str) -> Self { + self.static_patterns.push(pat.to_string()); + self + } + + fn extend_static_patterns(mut self, iter: I) -> Self + where + I: IntoIterator, + { + self.static_patterns.extend(iter); + self + } + } + #[tokio::test] #[serial] async fn test_module_code_no_eszip() { @@ -1145,58 +1296,11 @@ mod test { std::mem::drop(main_mod_ev); } - async fn create_runtime( - path: Option<&str>, - env_vars: Option>, - user_conf: Option, - static_patterns: Vec, - maybe_jsx_import_source_config: Option, - ) -> DenoRuntime - where - C: GetRuntimeContext, - { - let (worker_pool_tx, _) = mpsc::unbounded_channel::(); - - DenoRuntime::new( - WorkerContextInitOpts { - service_path: path - .map(PathBuf::from) - .unwrap_or(PathBuf::from("./test_cases/main")), - - no_module_cache: false, - import_map_path: None, - env_vars: env_vars.unwrap_or_default(), - events_rx: None, - timing: None, - maybe_eszip: None, - maybe_entrypoint: None, - maybe_decorator: None, - maybe_module_code: None, - conf: { - if let Some(uc) = user_conf { - uc - } else { - WorkerRuntimeOpts::MainWorker(MainWorkerRuntimeOpts { - worker_pool_tx, - shared_metric_src: None, - event_worker_metric_src: None, - }) - } - }, - static_patterns, - maybe_jsx_import_source_config, - }, - None, - ) - .await - .unwrap() - } - // Main Runtime should have access to `EdgeRuntime` #[tokio::test] #[serial] async fn test_main_runtime_creation() { - let mut runtime = create_runtime::<()>(None, None, None, vec![], None).await; + let mut runtime = RuntimeBuilder::new().build().await; { let scope = &mut runtime.js_runtime.handle_scope(); @@ -1216,14 +1320,10 @@ mod test { #[tokio::test] #[serial] async fn test_user_runtime_creation() { - let mut runtime = create_runtime::<()>( - None, - None, - Some(WorkerRuntimeOpts::UserWorker(Default::default())), - vec![], - None, - ) - .await; + let mut runtime = RuntimeBuilder::new() + .set_worker_runtime_conf(WorkerRuntimeOpts::UserWorker(Default::default())) + .build() + .await; { let scope = &mut runtime.js_runtime.handle_scope(); @@ -1242,8 +1342,7 @@ mod test { #[tokio::test] #[serial] async fn test_main_rt_fs() { - let mut main_rt = - create_runtime::<()>(None, Some(std::env::vars().collect()), None, vec![], None).await; + let mut main_rt = RuntimeBuilder::new().set_std_env().build().await; let global_value_deno_read_file_script = main_rt .js_runtime @@ -1257,6 +1356,7 @@ mod test { ), ) .unwrap(); + let fs_read_result = main_rt.to_value_mut::(&global_value_deno_read_file_script); assert_eq!( @@ -1268,19 +1368,17 @@ mod test { #[tokio::test] #[serial] async fn test_jsx_import_source() { - let mut main_rt = create_runtime::<()>( - Some("./test_cases/jsx-preact"), - Some(std::env::vars().collect()), - None, - vec![], - Some(JsxImportSourceConfig { + let mut main_rt = RuntimeBuilder::new() + .set_std_env() + .set_path("./test_cases/jsx-preact") + .set_jsx_import_source_config(JsxImportSourceConfig { default_specifier: Some("https://esm.sh/preact".to_string()), default_types_specifier: None, module: "jsx-runtime".to_string(), base_url: Url::from_file_path(std::env::current_dir().unwrap()).unwrap(), - }), - ) - .await; + }) + .build() + .await; let _main_mod_ev = main_rt.js_runtime.mod_evaluate(main_rt.main_module_id); let _ = main_rt @@ -1337,21 +1435,19 @@ mod test { #[tokio::test] #[serial] async fn test_static_fs() { - let mut user_rt = create_runtime::<()>( - None, - None, - Some(WorkerRuntimeOpts::UserWorker(Default::default())), - vec![String::from("./test_cases/**/*.md")], - None, - ) - .await; + let mut user_rt = RuntimeBuilder::new() + .set_worker_runtime_conf(WorkerRuntimeOpts::UserWorker(Default::default())) + .add_static_pattern("./test_cases/**/*.md") + .build() + .await; let user_rt_execute_scripts = user_rt .js_runtime .execute_script( "", ModuleCodeString::from( - r#"Deno.readTextFileSync("./mnt/data/test_cases/content.md")"#.to_string(), + // NOTE: Base path is `./test_cases/main`. + r#"Deno.readTextFileSync("content.md")"#.to_string(), ), ) .unwrap(); @@ -1368,14 +1464,10 @@ mod test { #[tokio::test] #[serial] async fn test_os_ops() { - let mut user_rt = create_runtime::<()>( - None, - None, - Some(WorkerRuntimeOpts::UserWorker(Default::default())), - vec![], - None, - ) - .await; + let mut user_rt = RuntimeBuilder::new() + .set_worker_runtime_conf(WorkerRuntimeOpts::UserWorker(Default::default())) + .build() + .await; let user_rt_execute_scripts = user_rt .js_runtime @@ -1494,17 +1586,13 @@ mod test { #[serial] async fn test_os_env_vars() { std::env::set_var("Supa_Test", "Supa_Value"); - let mut main_rt = - create_runtime::<()>(None, Some(std::env::vars().collect()), None, vec![], None).await; - let mut user_rt = create_runtime::<()>( - None, - None, - Some(WorkerRuntimeOpts::UserWorker(Default::default())), - vec![], - None, - ) - .await; + let mut main_rt = RuntimeBuilder::new().set_std_env().build().await; + let mut user_rt = RuntimeBuilder::new() + .set_worker_runtime_conf(WorkerRuntimeOpts::UserWorker(Default::default())) + .build() + .await; + assert!(!main_rt.env_vars.is_empty()); assert!(user_rt.env_vars.is_empty()); @@ -1563,14 +1651,13 @@ mod test { assert!(user_serde_deno_env.unwrap().is_null()); } - async fn create_basic_user_runtime( + fn create_basic_user_runtime_builder( path: &str, memory_limit_mb: T, worker_timeout_ms: U, static_patterns: &[&str], - ) -> DenoRuntime + ) -> RuntimeBuilder where - C: GetRuntimeContext, T: Into>, U: Into>, { @@ -1582,28 +1669,26 @@ mod test { .into() .unwrap_or(default_opt.worker_timeout_ms); - create_runtime::( - Some(path), - None, - Some(WorkerRuntimeOpts::UserWorker(UserWorkerRuntimeOpts { + RuntimeBuilder::new() + .set_path(path) + .set_worker_runtime_conf(WorkerRuntimeOpts::UserWorker(UserWorkerRuntimeOpts { memory_limit_mb, worker_timeout_ms, cpu_time_soft_limit_ms: 100, cpu_time_hard_limit_ms: 200, force_create: true, ..default_opt - })), - static_patterns.iter().map(|it| String::from(*it)).collect(), - None, - ) - .await + })) + .extend_static_patterns(static_patterns.iter().map(|it| String::from(*it))) } #[tokio::test] #[serial] async fn test_array_buffer_allocation_below_limit() { - let mut user_rt: DenoRuntime = - create_basic_user_runtime("./test_cases/array_buffers", 20, 1000, &[]).await; + let mut user_rt = + create_basic_user_runtime_builder("./test_cases/array_buffers", 20, 1000, &[]) + .build() + .await; let (_tx, duplex_stream_rx) = mpsc::unbounded_channel::(); let (result, _) = user_rt.run(duplex_stream_rx, None, None).await; @@ -1617,8 +1702,10 @@ mod test { #[tokio::test] #[serial] async fn test_array_buffer_allocation_above_limit() { - let mut user_rt: DenoRuntime = - create_basic_user_runtime("./test_cases/array_buffers", 15, 1000, &[]).await; + let mut user_rt = + create_basic_user_runtime_builder("./test_cases/array_buffers", 15, 1000, &[]) + .build() + .await; let (_tx, duplex_stream_rx) = mpsc::unbounded_channel::(); let (result, _) = user_rt.run(duplex_stream_rx, None, None).await; @@ -1641,9 +1728,14 @@ mod test { ) { let (_duplex_stream_tx, duplex_stream_rx) = mpsc::unbounded_channel::(); let (callback_tx, mut callback_rx) = mpsc::unbounded_channel::<()>(); - let mut user_rt: DenoRuntime = - create_basic_user_runtime(path, memory_limit_mb, worker_timeout_ms, static_patterns) - .await; + let mut user_rt = create_basic_user_runtime_builder( + path, + memory_limit_mb, + worker_timeout_ms, + static_patterns, + ) + .build() + .await; let waker = user_rt.js_runtime.op_state().borrow().waker.clone(); let handle = user_rt.js_runtime.v8_isolate().thread_safe_handle(); @@ -1745,16 +1837,36 @@ mod test { } } - let mut user_rt: DenoRuntime = create_basic_user_runtime( + let mut user_rt = create_basic_user_runtime_builder( "./test_cases/user-worker-san-check", None, None, &["./test_cases/user-worker-san-check/.blocklisted"], ) + .set_context::() + .build() .await; let (_tx, duplex_stream_rx) = mpsc::unbounded_channel(); user_rt.run(duplex_stream_rx, None, None).await.0.unwrap(); } + + #[tokio::test] + #[serial] + #[should_panic] + async fn test_load_corrupted_eszip_v1() { + let mut user_rt = RuntimeBuilder::new() + .set_path("./test_cases/eszip-migration/npm-supabase-js") + .set_eszip("./test_cases/eszip-migration/npm-supabase-js/v1_corrupted.eszip") + .await + .unwrap() + .set_worker_runtime_conf(WorkerRuntimeOpts::UserWorker(Default::default())) + .build() + .await; + + let (_tx, duplex_stream_rx) = mpsc::unbounded_channel(); + + user_rt.run(duplex_stream_rx, None, None).await.0.unwrap(); + } } diff --git a/crates/base/test_cases/eszip-migration/npm-supabase-js/index.ts b/crates/base/test_cases/eszip-migration/npm-supabase-js/index.ts new file mode 100644 index 000000000..f18cc1833 --- /dev/null +++ b/crates/base/test_cases/eszip-migration/npm-supabase-js/index.ts @@ -0,0 +1,3 @@ +import { createClient } from "npm:@supabase/supabase-js@2.42.0"; +console.log(typeof createClient); +Deno.serve((_req) => new Response("Hello, world")); diff --git a/crates/base/test_cases/eszip-migration/npm-supabase-js/v0.eszip b/crates/base/test_cases/eszip-migration/npm-supabase-js/v0.eszip new file mode 100644 index 000000000..299c39a4b Binary files /dev/null and b/crates/base/test_cases/eszip-migration/npm-supabase-js/v0.eszip differ diff --git a/crates/base/test_cases/eszip-migration/npm-supabase-js/v0_corrupted.eszip b/crates/base/test_cases/eszip-migration/npm-supabase-js/v0_corrupted.eszip new file mode 100644 index 000000000..610712b1a Binary files /dev/null and b/crates/base/test_cases/eszip-migration/npm-supabase-js/v0_corrupted.eszip differ diff --git a/crates/base/test_cases/eszip-migration/npm-supabase-js/v1.eszip b/crates/base/test_cases/eszip-migration/npm-supabase-js/v1.eszip new file mode 100644 index 000000000..e74f9e1f1 Binary files /dev/null and b/crates/base/test_cases/eszip-migration/npm-supabase-js/v1.eszip differ diff --git a/crates/base/test_cases/eszip-migration/npm-supabase-js/v1_corrupted.eszip b/crates/base/test_cases/eszip-migration/npm-supabase-js/v1_corrupted.eszip new file mode 100644 index 000000000..f14a41012 Binary files /dev/null and b/crates/base/test_cases/eszip-migration/npm-supabase-js/v1_corrupted.eszip differ diff --git a/crates/base/test_cases/content.md b/crates/base/test_cases/main/content.md similarity index 100% rename from crates/base/test_cases/content.md rename to crates/base/test_cases/main/content.md diff --git a/crates/base/test_cases/meow_20mib.bin b/crates/base/test_cases/read_file_sync_20mib/20mib.bin similarity index 100% rename from crates/base/test_cases/meow_20mib.bin rename to crates/base/test_cases/read_file_sync_20mib/20mib.bin diff --git a/crates/base/test_cases/read_file_sync_20mib/index.ts b/crates/base/test_cases/read_file_sync_20mib/index.ts index fc8c66ed5..bdeaa5074 100644 --- a/crates/base/test_cases/read_file_sync_20mib/index.ts +++ b/crates/base/test_cases/read_file_sync_20mib/index.ts @@ -1,5 +1,5 @@ // NOTE: Just defined to prevent the JsRuntime leave from the event loop Deno.serve(() => { /* do nothing */ }); -let buf_in_ext_mem = Deno.readFileSync("./mnt/data/test_cases/meow_20mib.bin") as Uint8Array; +let buf_in_ext_mem = Deno.readFileSync("20mib.bin") as Uint8Array; console.log(buf_in_ext_mem.length); // to prevent optimization \ No newline at end of file diff --git a/crates/base/test_cases/user-worker-san-check/index.ts b/crates/base/test_cases/user-worker-san-check/index.ts index 0c58ba990..1da7a4dfc 100644 --- a/crates/base/test_cases/user-worker-san-check/index.ts +++ b/crates/base/test_cases/user-worker-san-check/index.ts @@ -1,4 +1,4 @@ -let blocklist: string[] = Deno.readTextFileSync("./mnt/data/test_cases/user-worker-san-check/.blocklisted") +let blocklist: string[] = Deno.readTextFileSync(".blocklisted") .trim() .split("\n"); diff --git a/crates/base/test_cases/grow.wasm b/crates/base/test_cases/wasm/grow_standalone/grow.wasm similarity index 100% rename from crates/base/test_cases/grow.wasm rename to crates/base/test_cases/wasm/grow_standalone/grow.wasm diff --git a/crates/base/test_cases/wasm/grow_standalone/index.ts b/crates/base/test_cases/wasm/grow_standalone/index.ts index 783a5387e..d52ec5517 100644 --- a/crates/base/test_cases/wasm/grow_standalone/index.ts +++ b/crates/base/test_cases/wasm/grow_standalone/index.ts @@ -15,7 +15,7 @@ WAT sample */ let wasm; -let buf = Deno.readFileSync("./mnt/data/test_cases/grow.wasm") as Uint8Array; +let buf = Deno.readFileSync("grow.wasm") as Uint8Array; let mod = new WebAssembly.Module(buf); let imports = { wbg: {} }; let instance = new WebAssembly.Instance(mod, imports); diff --git a/crates/base/test_cases/meow_20mib.wasm b/crates/base/test_cases/wasm/shared/20mib.wasm similarity index 100% rename from crates/base/test_cases/meow_20mib.wasm rename to crates/base/test_cases/wasm/shared/20mib.wasm diff --git a/crates/base/test_cases/wasm/shared/index.ts b/crates/base/test_cases/wasm/shared/index.ts index eff0f0a6a..4fe04a090 100644 --- a/crates/base/test_cases/wasm/shared/index.ts +++ b/crates/base/test_cases/wasm/shared/index.ts @@ -101,11 +101,10 @@ export function use_grow() { export default function init() { // 20985134 bytes ~= 21M (in file system) - let buf = Deno.readFileSync("./mnt/data/test_cases/meow_20mib.wasm") as Uint8Array; + let buf = Deno.readFileSync(import.meta.dirname + "/20mib.wasm") as Uint8Array; let mod = new WebAssembly.Module(buf); let imports = { wbg: {} }; let instance = new WebAssembly.Instance(mod, imports); wasm = instance.exports; } - diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index c0a199d04..2c5895c5a 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -17,9 +17,7 @@ use flags::get_cli; use log::warn; use sb_graph::emitter::EmitterFactory; use sb_graph::import_map::load_import_map; -use sb_graph::{ - extract_from_file, generate_binary_eszip, include_glob_patterns_in_eszip, STATIC_FS_PREFIX, -}; +use sb_graph::{extract_from_file, generate_binary_eszip, include_glob_patterns_in_eszip}; use std::fs::File; use std::io::Write; use std::net::SocketAddr; @@ -244,16 +242,22 @@ fn main() -> Result<(), anyhow::Error> { vec![] }; - let entry_point_path = sub_matches + let entrypoint_script_path = sub_matches .get_one::("entrypoint") .cloned() .unwrap(); - let path = PathBuf::from(entry_point_path.as_str()); - if !path.exists() { - bail!("entrypoint path does not exist ({})", path.display()); + let entrypoint_script_path = PathBuf::from(entrypoint_script_path.as_str()); + if !entrypoint_script_path.is_file() { + bail!( + "entrypoint path does not exist ({})", + entrypoint_script_path.display() + ); } + let entrypoint_script_path = entrypoint_script_path.canonicalize().unwrap(); + let entrypoint_dir_path = entrypoint_script_path.parent().unwrap(); + let mut emitter_factory = EmitterFactory::new(); let maybe_import_map = load_import_map(import_map_path.clone()) .map_err(|e| anyhow!("import map path is invalid ({})", e))?; @@ -272,19 +276,15 @@ fn main() -> Result<(), anyhow::Error> { emitter_factory.set_import_map(maybe_import_map.clone()); let mut eszip = generate_binary_eszip( - path.canonicalize().unwrap(), + &entrypoint_script_path, Arc::new(emitter_factory), None, maybe_import_map_url, ) .await?; - include_glob_patterns_in_eszip( - static_patterns, - &mut eszip, - Some(STATIC_FS_PREFIX.to_string()), - ) - .await; + include_glob_patterns_in_eszip(static_patterns, &mut eszip, entrypoint_dir_path) + .await?; let bin = eszip.into_bytes(); @@ -305,12 +305,12 @@ fn main() -> Result<(), anyhow::Error> { let output_path = PathBuf::from(output_path.as_str()); let eszip_path = PathBuf::from(eszip_path.as_str()); - extract_from_file(eszip_path, output_path.clone()).await; - - println!( - "Eszip extracted successfully inside path {}", - output_path.to_str().unwrap() - ); + if extract_from_file(eszip_path, output_path.clone()).await { + println!( + "Eszip extracted successfully inside path {}", + output_path.to_str().unwrap() + ); + } } _ => { // unrecognized command diff --git a/crates/node/Cargo.toml b/crates/node/Cargo.toml index bb2a51765..ea8018e19 100644 --- a/crates/node/Cargo.toml +++ b/crates/node/Cargo.toml @@ -65,7 +65,7 @@ scrypt = "0.11.0" sec1 = "0.7" serde = "1.0.149" sha-1 = "0.10.0" -sha2 = { version = "0.10.8", features = ["oid"] } +sha2 = { workspace = true, features = ["oid"] } signature = "2.1" simd-json = "0.13.4" spki = "0.7.2" diff --git a/crates/sb_eszip_shared/Cargo.toml b/crates/sb_eszip_shared/Cargo.toml new file mode 100644 index 000000000..b6ddcfee8 --- /dev/null +++ b/crates/sb_eszip_shared/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "sb_eszip_shared" +version = "0.1.0" +authors = ["Supabase "] +edition = "2021" +resolver = "2" +license = "MIT" + +[lib] +path = "lib.rs" + +[dependencies] +eszip.workspace = true \ No newline at end of file diff --git a/crates/sb_eszip_shared/lib.rs b/crates/sb_eszip_shared/lib.rs new file mode 100644 index 000000000..4c3ea483d --- /dev/null +++ b/crates/sb_eszip_shared/lib.rs @@ -0,0 +1,13 @@ +use eszip::Module; + +pub static SUPABASE_ESZIP_VERSION: &[u8] = b"1"; + +pub static SUPABASE_ESZIP_VERSION_KEY: &str = "---SUPABASE-ESZIP-VERSION-ESZIP---"; +pub static VFS_ESZIP_KEY: &str = "---SUPABASE-VFS-DATA-ESZIP---"; +pub static SOURCE_CODE_ESZIP_KEY: &str = "---SUPABASE-SOURCE-CODE-ESZIP---"; +pub static STATIC_FILES_ESZIP_KEY: &str = "---SUPABASE-STATIC-FILES-ESZIP---"; + +pub trait AsyncEszipDataRead: std::fmt::Debug + Send + Sync { + fn ensure_module(&self, specifier: &str) -> Option; + fn ensure_import_map(&self, specifier: &str) -> Option; +} diff --git a/crates/sb_fs/Cargo.toml b/crates/sb_fs/Cargo.toml index 21fe5a1df..e3edfdc59 100644 --- a/crates/sb_fs/Cargo.toml +++ b/crates/sb_fs/Cargo.toml @@ -13,18 +13,22 @@ path = "lib.rs" sb_core = { version = "0.1.0", path = "../sb_core" } sb_node = { version = "0.1.0", path = "../node" } sb_npm = { version = "0.1.0", path = "../npm" } +sb_eszip_shared = { version = "0.1.0", path = "../sb_eszip_shared" } deno_semver.workspace = true anyhow.workspace = true deno_core.workspace = true eszip.workspace = true import_map.workspace = true -log = { workspace = true } +log.workspace = true serde.workspace = true tokio.workspace = true +futures.workspace = true deno_ast.workspace = true deno_fs.workspace = true deno_npm.workspace = true once_cell.workspace = true deno_io.workspace = true thiserror.workspace = true -async-trait.workspace = true \ No newline at end of file +async-trait.workspace = true +url.workspace = true +rkyv = { workspace = true, features = ["validation"] } diff --git a/crates/sb_fs/file_system.rs b/crates/sb_fs/file_system.rs index 7b4a425a3..aa98da9be 100644 --- a/crates/sb_fs/file_system.rs +++ b/crates/sb_fs/file_system.rs @@ -7,6 +7,8 @@ use std::path::PathBuf; use std::rc::Rc; use std::sync::Arc; +use crate::rt::SYNC_IO_RT; + use super::virtual_fs::FileBackedVfs; #[derive(Debug, Clone)] @@ -33,23 +35,26 @@ impl DenoCompileFileSystem { } } - fn copy_to_real_path(&self, oldpath: &Path, newpath: &Path) -> FsResult<()> { + async fn copy_to_real_path_async(&self, oldpath: &Path, newpath: &Path) -> FsResult<()> { let old_file = self.0.file_entry(oldpath)?; - let old_file_bytes = self.0.read_file_all(old_file)?; - RealFs.write_file_sync( - newpath, - OpenOptions { - read: false, - write: true, - create: true, - truncate: true, - append: false, - create_new: false, - mode: None, - }, - None, - &old_file_bytes, - ) + let old_file_bytes = self.0.read_file_all(old_file).await?; + + RealFs + .write_file_async( + newpath.to_path_buf(), + OpenOptions { + read: false, + write: true, + create: true, + truncate: true, + append: false, + create_new: false, + mode: None, + }, + None, + old_file_bytes, + ) + .await } } @@ -136,7 +141,17 @@ impl FileSystem for DenoCompileFileSystem { fn copy_file_sync(&self, oldpath: &Path, newpath: &Path) -> FsResult<()> { self.error_if_in_vfs(newpath)?; if self.0.is_path_within(oldpath) { - self.copy_to_real_path(oldpath, newpath) + std::thread::scope(|s| { + let this = self.clone(); + + s.spawn(move || { + SYNC_IO_RT.block_on(async move { + this.copy_to_real_path_async(oldpath, newpath).await + }) + }) + .join() + .unwrap() + }) } else { RealFs.copy_file_sync(oldpath, newpath) } @@ -145,7 +160,7 @@ impl FileSystem for DenoCompileFileSystem { self.error_if_in_vfs(&newpath)?; if self.0.is_path_within(&oldpath) { let fs = self.clone(); - tokio::task::spawn_blocking(move || fs.copy_to_real_path(&oldpath, &newpath)).await? + fs.copy_to_real_path_async(&oldpath, &newpath).await } else { RealFs.copy_file_async(oldpath, newpath).await } diff --git a/crates/sb_fs/lib.rs b/crates/sb_fs/lib.rs index f16393e71..643acf332 100644 --- a/crates/sb_fs/lib.rs +++ b/crates/sb_fs/lib.rs @@ -1,17 +1,21 @@ use crate::virtual_fs::{FileBackedVfs, VfsBuilder, VfsRoot, VirtualDirectory}; -use deno_core::error::AnyError; -use deno_core::{normalize_path, serde_json}; +use anyhow::{bail, Context}; +use deno_core::normalize_path; use deno_npm::NpmSystemInfo; use eszip::EszipV2; +use log::warn; +use sb_eszip_shared::{AsyncEszipDataRead, STATIC_FILES_ESZIP_KEY}; use sb_npm::cache::NpmCache; use sb_npm::registry::CliNpmRegistryApi; use sb_npm::resolution::NpmResolution; use sb_npm::{CliNpmResolver, InnerCliNpmResolverRef}; use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; +use url::Url; pub mod file_system; +mod rt; pub mod static_fs; pub mod virtual_fs; @@ -22,41 +26,74 @@ pub struct VfsOpts { pub npm_resolution: Arc, } -pub type EszipStaticFiles = HashMap>; - -pub async fn extract_static_files_from_eszip(eszip: &EszipV2) -> EszipStaticFiles { - let key = String::from("---SUPABASE-STATIC-FILES-ESZIP---"); - let mut files: EszipStaticFiles = HashMap::new(); - - if eszip.specifiers().contains(&key) { - let eszip_static_files = eszip.get_module(key.as_str()).unwrap(); - let data = eszip_static_files.take_source().await.unwrap(); - let data = data.to_vec(); - let data: Vec = serde_json::from_slice(data.as_slice()).unwrap(); - for static_specifier in data { - let file_mod = eszip.get_module(static_specifier.as_str()).unwrap(); - files.insert( - normalize_path(PathBuf::from(static_specifier)) - .to_str() - .unwrap() - .to_string(), - file_mod.take_source().await.unwrap().to_vec(), - ); +pub type EszipStaticFiles = HashMap; + +pub trait LazyEszipV2: std::ops::Deref + AsyncEszipDataRead {} + +impl LazyEszipV2 for T where T: std::ops::Deref + AsyncEszipDataRead {} + +pub async fn extract_static_files_from_eszip

( + eszip: &dyn LazyEszipV2, + mapped_base_dir_path: P, +) -> EszipStaticFiles +where + P: AsRef, +{ + let mut files = EszipStaticFiles::default(); + + let Some(eszip_static_files) = eszip.ensure_module(STATIC_FILES_ESZIP_KEY) else { + return files; + }; + + let data = eszip_static_files.source().await.unwrap(); + let archived = match rkyv::check_archived_root::>(&data) { + Ok(vec) => vec, + Err(err) => { + warn!("failed to deserialize specifiers for static files: {}", err); + return files; } + }; + + for specifier in archived.as_ref() { + let specifier = specifier.as_str(); + let path = match Url::parse(specifier) { + Ok(v) => PathBuf::from(v.path()), + Err(err) => { + warn!("could not parse the specifier for static file: {}", err); + continue; + } + }; + + files.insert( + normalize_path(mapped_base_dir_path.as_ref().join(path)), + specifier.to_string(), + ); } files } pub fn load_npm_vfs( + eszip: Arc, root_dir_path: PathBuf, - vfs_data: Option<&[u8]>, -) -> Result { - let dir: Option = if let Some(vfs_data) = vfs_data { - serde_json::from_slice(vfs_data)? - } else { - None - }; + vfs_data_slice: Option<&[u8]>, +) -> Result { + let dir = match vfs_data_slice + .map(rkyv::check_archived_root::>) + .transpose() + { + Ok(Some(archived)) => Some( + < as rkyv::Archive>::Archived as rkyv::Deserialize< + Option, + rkyv::Infallible, + >>::deserialize(archived, &mut rkyv::Infallible) + .with_context(|| "cannot deserialize vfs data")?, + ), + + Ok(None) => None, + Err(err) => bail!("cannot load npm vfs: {}", err), + } + .flatten(); let fs_root: VfsRoot = if let Some(mut dir) = dir { // align the name of the directory with the root dir @@ -80,14 +117,22 @@ pub fn load_npm_vfs( } }; - Ok(FileBackedVfs::new(fs_root)) + Ok(FileBackedVfs::new(eszip, fs_root)) } -pub fn build_vfs(opts: VfsOpts) -> Result { +pub fn build_vfs<'scope, F>( + opts: VfsOpts, + add_content_callback_fn: F, +) -> Result, anyhow::Error> +where + F: (for<'r> FnMut(&'r Path, &'r str, Vec) -> String) + 'scope, +{ match opts.npm_resolver.as_inner() { InnerCliNpmResolverRef::Managed(npm_resolver) => { if let Some(node_modules_path) = npm_resolver.root_node_modules_path() { - let mut builder = VfsBuilder::new(node_modules_path.clone())?; + let mut builder = + VfsBuilder::new(node_modules_path.clone(), add_content_callback_fn)?; + builder.add_dir_recursive(node_modules_path)?; Ok(builder) } else { @@ -95,7 +140,7 @@ pub fn build_vfs(opts: VfsOpts) -> Result { // but also don't make this dependent on the registry url let registry_url = npm_resolver.registry_base_url(); let root_path = npm_resolver.registry_folder_in_global_cache(registry_url); - let mut builder = VfsBuilder::new(root_path)?; + let mut builder = VfsBuilder::new(root_path, add_content_callback_fn)?; for package in npm_resolver.all_system_packages(&NpmSystemInfo::default()) { let folder = npm_resolver.resolve_pkg_folder_from_pkg_id(&package.id)?; builder.add_dir_recursive(&folder)?; @@ -106,7 +151,7 @@ pub fn build_vfs(opts: VfsOpts) -> Result { } } _ => { - panic!("Unreachable"); + unreachable!(); } } } diff --git a/crates/sb_fs/rt.rs b/crates/sb_fs/rt.rs new file mode 100644 index 000000000..15efb2e02 --- /dev/null +++ b/crates/sb_fs/rt.rs @@ -0,0 +1,9 @@ +use once_cell::sync::Lazy; + +pub(crate) static SYNC_IO_RT: Lazy = Lazy::new(|| { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .thread_name("sb-virtualfs-io") + .build() + .unwrap() +}); diff --git a/crates/sb_fs/static_fs.rs b/crates/sb_fs/static_fs.rs index 25e5bf7e0..137f66dc6 100644 --- a/crates/sb_fs/static_fs.rs +++ b/crates/sb_fs/static_fs.rs @@ -1,3 +1,4 @@ +use crate::rt::SYNC_IO_RT; use crate::{EszipStaticFiles, FileBackedVfs}; use deno_core::normalize_path; use deno_fs::{AccessCheckCb, FsDirEntry, FsFileType, OpenOptions}; @@ -10,7 +11,8 @@ use std::sync::Arc; #[derive(Debug, Clone)] pub struct StaticFs { - files: EszipStaticFiles, + static_files: EszipStaticFiles, + base_dir_path: PathBuf, vfs_path: PathBuf, snapshot: Option, vfs: Arc, @@ -19,13 +21,15 @@ pub struct StaticFs { impl StaticFs { pub fn new( static_files: EszipStaticFiles, + base_dir_path: PathBuf, vfs_path: PathBuf, vfs: Arc, snapshot: Option, ) -> Self { Self { vfs, - files: static_files, + static_files, + base_dir_path, vfs_path, snapshot, } @@ -294,16 +298,37 @@ impl deno_fs::FileSystem for StaticFs { let buf = file.read_all_sync()?; Ok(buf) } else { - let normalize_path = normalize_path(path); - let path = normalize_path.to_str().unwrap(); - let is_file_in_vfs = self.files.contains_key(path); - if is_file_in_vfs { - let res = self.files.get(path).unwrap().to_vec(); - Ok(res) + let eszip = self.vfs.eszip.as_ref(); + let path = if path.is_relative() { + self.base_dir_path.join(path) + } else { + path.to_path_buf() + }; + + let normalized = normalize_path(path); + + if let Some(file) = self + .static_files + .get(&normalized) + .and_then(|it| eszip.ensure_module(it)) + { + let Some(res) = std::thread::scope(|s| { + s.spawn(move || SYNC_IO_RT.block_on(async move { file.source().await })) + .join() + .unwrap() + }) else { + return Err(std::io::Error::new( + std::io::ErrorKind::NotFound, + "No content available", + ) + .into()); + }; + + Ok(res.to_vec()) } else { Err(std::io::Error::new( std::io::ErrorKind::NotFound, - format!("path not found {}", path), + format!("path not found: {}", normalized.to_string_lossy()), ) .into()) } diff --git a/crates/sb_fs/virtual_fs.rs b/crates/sb_fs/virtual_fs.rs index a533b3087..3dac406a2 100644 --- a/crates/sb_fs/virtual_fs.rs +++ b/crates/sb_fs/virtual_fs.rs @@ -21,31 +21,43 @@ use deno_io; use deno_io::fs::FsError; use deno_io::fs::FsResult; use deno_io::fs::FsStat; +use futures::future::OptionFuture; +use rkyv::Archive; +use rkyv::Deserialize; +use rkyv::Serialize; use sb_core::util::checksum; use sb_core::util::fs::canonicalize_path; -use serde::Deserialize; -use serde::Serialize; +use sb_eszip_shared::AsyncEszipDataRead; use thiserror::Error; +use crate::rt::SYNC_IO_RT; + #[derive(Error, Debug)] #[error( -"Failed to strip prefix '{}' from '{}'", root_path.display(), target.display() + "Failed to strip prefix '{}' from '{}'", root_path.display(), target.display() )] pub struct StripRootError { root_path: PathBuf, target: PathBuf, } -pub struct VfsBuilder { +type AddContentCallback<'scope> = + Arc FnMut(&'r Path, &'r str, Vec) -> String) + 'scope>>; + +pub struct VfsBuilder<'scope> { root_path: PathBuf, root_dir: VirtualDirectory, files: Vec>, current_offset: u64, file_offsets: HashMap, + add_content_callback_fn: AddContentCallback<'scope>, } -impl VfsBuilder { - pub fn new(root_path: PathBuf) -> Result { +impl<'scope> VfsBuilder<'scope> { + pub fn new(root_path: PathBuf, add_content_callback_fn: F) -> Result + where + F: (for<'r> FnMut(&'r Path, &'r str, Vec) -> String) + 'scope, + { let root_path = canonicalize_path(&root_path)?; log::debug!("Building vfs with root '{}'", root_path.display()); Ok(Self { @@ -61,6 +73,8 @@ impl VfsBuilder { files: Vec::new(), current_offset: 0, file_offsets: Default::default(), + add_content_callback_fn: Arc::from(std::sync::Mutex::new(add_content_callback_fn)) + as AddContentCallback<'scope>, }) } @@ -165,23 +179,27 @@ impl VfsBuilder { self.current_offset }; + let add_content_callback_fn = self.add_content_callback_fn.clone(); let dir = self.add_dir(path.parent().unwrap())?; let name = path.file_name().unwrap().to_string_lossy(); let data_len = data.len(); - match dir.entries.binary_search_by(|e| e.name().cmp(&name)) { + let insert_index = match dir.entries.binary_search_by(|e| e.name().cmp(&name)) { + Err(insert_index) => insert_index, Ok(_) => unreachable!(), - Err(insert_index) => { - dir.entries.insert( - insert_index, - VfsEntry::File(VirtualFile { - name: name.to_string(), - offset, - len: data.len() as u64, - content: Some(data), - }), - ); - } - } + }; + + let len = data.len(); + let key = (add_content_callback_fn.lock().unwrap())(path, &name, data); + + dir.entries.insert( + insert_index, + VfsEntry::File(VirtualFile { + key, + name: name.to_string(), + offset, + len: len as u64, + }), + ); // new file, update the list of files if self.current_offset == offset { @@ -314,8 +332,8 @@ impl<'a> VfsEntryRef<'a> { } } -// todo(dsherret): we should store this more efficiently in the binary -#[derive(Debug, Serialize, Deserialize)] +#[derive(Archive, Serialize, Deserialize, Debug)] +#[archive(check_bytes)] pub enum VfsEntry { Dir(VirtualDirectory), File(VirtualFile), @@ -340,38 +358,60 @@ impl VfsEntry { } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Archive, Serialize, Deserialize, Debug)] +#[archive( + check_bytes, + bound(serialize = "__S: rkyv::ser::ScratchSpace + rkyv::ser::Serializer") +)] +#[archive_attr(check_bytes( + bound = "__C: rkyv::validation::ArchiveContext, <__C as rkyv::Fallible>::Error: std::error::Error" +))] pub struct VirtualDirectory { pub name: String, // should be sorted by name + #[omit_bounds] + #[archive_attr(omit_bounds)] pub entries: Vec, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Archive, Serialize, Deserialize, Debug, Clone)] +#[archive(check_bytes)] pub struct VirtualFile { + pub key: String, pub name: String, pub offset: u64, pub len: u64, - pub content: Option>, // Not Deno Original, but it's the best way to store it in the ESZIP. } impl VirtualFile { - pub fn read_file(&self, _pos: u64, buf: &mut [u8]) -> std::io::Result { - match &self.content { - Some(content) => { - let read_length = buf.len().min(content.len()); - buf[..read_length].copy_from_slice(&content[..read_length]); - Ok(read_length) - } - None => Err(io::Error::new( + pub async fn read_file( + &self, + eszip: &dyn AsyncEszipDataRead, + _pos: u64, + buf: &mut [u8], + ) -> std::io::Result { + let content: OptionFuture<_> = eszip + .ensure_module(self.key.as_str()) + .map(|it| async move { it.source().await }) + .into(); + + let Some(Some(content)) = content.await else { + return Err(io::Error::new( io::ErrorKind::NotFound, "No content available", - )), - } + )); + }; + + let read_length = buf.len().min(content.len()); + + buf[..read_length].copy_from_slice(&content[..read_length]); + + Ok(read_length) } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Archive, Serialize, Deserialize, Debug)] +#[archive(check_bytes)] pub struct VirtualSymlink { pub name: String, pub dest_parts: Vec, @@ -540,7 +580,7 @@ impl FileBackedVfsFile { } } - fn read_to_buf(&self, buf: &mut [u8]) -> FsResult { + async fn read_to_buf(&self, buf: &mut [u8]) -> FsResult { let pos = { let mut pos = self.pos.lock(); let read_pos = *pos; @@ -548,12 +588,14 @@ impl FileBackedVfsFile { *pos = std::cmp::min(self.file.len, *pos + buf.len() as u64); read_pos }; + self.vfs .read_file(&self.file, pos, buf) + .await .map_err(|err| err.into()) } - fn read_to_end(&self) -> FsResult> { + async fn read_to_end(&self) -> FsResult> { let pos = { let mut pos = self.pos.lock(); let read_pos = *pos; @@ -569,7 +611,7 @@ impl FileBackedVfsFile { } let size = (self.file.len - pos) as usize; let mut buf = vec![0; size]; - self.vfs.read_file(&self.file, pos, &mut buf)?; + self.vfs.read_file(&self.file, pos, &mut buf).await?; Ok(buf) } } @@ -577,15 +619,19 @@ impl FileBackedVfsFile { #[async_trait::async_trait(?Send)] impl deno_io::fs::File for FileBackedVfsFile { fn read_sync(self: Rc, buf: &mut [u8]) -> FsResult { - self.read_to_buf(buf) + std::thread::scope(|s| { + let inner = (*self).clone(); + + s.spawn(move || SYNC_IO_RT.block_on(inner.read_to_buf(buf))) + .join() + .unwrap() + }) } async fn read_byob(self: Rc, mut buf: BufMutView) -> FsResult<(usize, BufMutView)> { let inner = (*self).clone(); - tokio::task::spawn(async move { - let nread = inner.read_to_buf(&mut buf)?; - Ok((nread, buf)) - }) - .await? + let nread = inner.read_to_buf(&mut buf).await?; + + Ok((nread, buf)) } fn write_sync(self: Rc, _buf: &[u8]) -> FsResult { @@ -603,11 +649,17 @@ impl deno_io::fs::File for FileBackedVfsFile { } fn read_all_sync(self: Rc) -> FsResult> { - self.read_to_end() + std::thread::scope(|s| { + let inner = (*self).clone(); + + s.spawn(move || SYNC_IO_RT.block_on(inner.read_to_end())) + .join() + .unwrap() + }) } async fn read_all_async(self: Rc) -> FsResult> { let inner = (*self).clone(); - tokio::task::spawn_blocking(move || inner.read_to_end()).await? + inner.read_to_end().await } fn chmod_sync(self: Rc, _pathmode: u32) -> FsResult<()> { @@ -699,12 +751,13 @@ impl deno_io::fs::File for FileBackedVfsFile { #[derive(Debug)] pub struct FileBackedVfs { - fs_root: VfsRoot, + pub(crate) eszip: Arc, + pub(crate) fs_root: VfsRoot, } impl FileBackedVfs { - pub fn new(fs_root: VfsRoot) -> Self { - Self { fs_root } + pub fn new(eszip: Arc, fs_root: VfsRoot) -> Self { + Self { eszip, fs_root } } pub fn root(&self) -> &Path { @@ -766,19 +819,19 @@ impl FileBackedVfs { Ok(path) } - pub fn read_file_all(&self, file: &VirtualFile) -> std::io::Result> { + pub async fn read_file_all(&self, file: &VirtualFile) -> std::io::Result> { let mut buf = vec![0; file.len as usize]; - self.read_file(file, 0, &mut buf)?; + self.read_file(file, 0, &mut buf).await?; Ok(buf) } - pub fn read_file( + pub async fn read_file( &self, file: &VirtualFile, pos: u64, buf: &mut [u8], ) -> std::io::Result { - file.read_file(pos, buf) + file.read_file(self.eszip.as_ref(), pos, buf).await } pub fn dir_entry(&self, path: &Path) -> std::io::Result<&VirtualDirectory> { diff --git a/crates/sb_graph/Cargo.toml b/crates/sb_graph/Cargo.toml index 52f1a3119..e52bdd310 100644 --- a/crates/sb_graph/Cargo.toml +++ b/crates/sb_graph/Cargo.toml @@ -14,12 +14,13 @@ sb_core = { version = "0.1.0", path = "../sb_core" } sb_node = { version = "0.1.0", path = "../node" } sb_npm = { version = "0.1.0", path = "../npm" } sb_fs = { version = "0.1.0", path = "../sb_fs" } +sb_eszip_shared = { version = "0.1.0", path = "../sb_eszip_shared" } deno_semver.workspace = true anyhow.workspace = true deno_core.workspace = true eszip.workspace = true import_map.workspace = true -log = { workspace = true } +log.workspace = true serde.workspace = true tokio.workspace = true deno_ast.workspace = true @@ -31,3 +32,13 @@ urlencoding.workspace = true deno_lockfile.workspace = true deno_config.workspace = true glob.workspace = true +futures.workspace = true +sha2.workspace = true +scopeguard.workspace = true +thiserror.workspace = true +rkyv = { workspace = true, features = ["validation"] } +hashlink = { version = "0.8" } +pathdiff = { version = "0.2" } + +[dev-dependencies] +tempfile.workspace = true \ No newline at end of file diff --git a/crates/sb_graph/errors.rs b/crates/sb_graph/errors.rs new file mode 100644 index 000000000..c3bf8df14 --- /dev/null +++ b/crates/sb_graph/errors.rs @@ -0,0 +1,10 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum EszipError { + #[error("unsupported supabase eszip version (expected {expected:?}, found {found:?})")] + UnsupportedVersion { + expected: &'static [u8], + found: Option>, + }, +} diff --git a/crates/sb_graph/eszip_migrate.rs b/crates/sb_graph/eszip_migrate.rs new file mode 100644 index 000000000..d91faa40f --- /dev/null +++ b/crates/sb_graph/eszip_migrate.rs @@ -0,0 +1,318 @@ +use log::{error, warn}; + +use crate::{errors::EszipError, LazyLoadableEszip}; + +pub async fn try_migrate_if_needed( + mut eszip: LazyLoadableEszip, +) -> Result { + if let Err(err) = eszip.ensure_version().await { + match err.downcast_ref::() { + Some(err) => { + warn!("{}: will attempt migration", err); + let result = match err { + EszipError::UnsupportedVersion { expected, found } => match (expected, found) { + (&b"1", &None) => v0::try_migrate_v0_v1(&mut eszip).await, + _ => unreachable!(), + }, + }; + + match result { + Ok(migrated) => Ok(migrated), + Err(err) => { + error!("{:?}", err); + Err(eszip) + } + } + } + + None => { + error!("failed to migrate (found unexpected error)"); + Err(eszip) + } + } + } else { + Ok(eszip) + } +} + +mod v0 { + use std::{collections::HashSet, sync::Arc}; + + use anyhow::Context; + use deno_core::serde_json; + use eszip::{v2::EszipV2Modules, EszipV2}; + use futures::future::OptionFuture; + use once_cell::sync::Lazy; + use sb_eszip_shared::{AsyncEszipDataRead, SUPABASE_ESZIP_VERSION_KEY, VFS_ESZIP_KEY}; + use serde::{Deserialize, Serialize}; + + use crate::{eszip_migrate::v1, LazyLoadableEszip}; + + #[derive(Serialize, Deserialize, Debug)] + pub struct Directory { + pub name: String, + pub entries: Vec, + } + + #[derive(Serialize, Deserialize, Debug, Clone)] + pub struct File { + pub name: String, + pub offset: u64, + pub len: u64, + pub content: Option>, + } + + #[derive(Serialize, Deserialize, Debug)] + pub struct Symlink { + pub name: String, + pub dest_parts: Vec, + } + + #[derive(Serialize, Deserialize, Debug)] + pub enum Entry { + Dir(Directory), + File(File), + Symlink(Symlink), + } + + pub async fn try_migrate_v0_v1( + v0_eszip: &mut LazyLoadableEszip, + ) -> Result { + let mut v1_eszip = LazyLoadableEszip::new( + EszipV2 { + modules: EszipV2Modules::default(), + npm_snapshot: v0_eszip.npm_snapshot.take(), + }, + None, + ); + + v0_eszip + .ensure_read_all() + .await + .with_context(|| "failed to load v0 eszip data")?; + + let vfs_mod_data = OptionFuture::<_>::from( + v0_eszip + .ensure_module(VFS_ESZIP_KEY) + .map(|it| async move { it.source().await }), + ) + .await + .flatten(); + + //STATIC_FILES_ESZIP_KEY + + let v1_dir = if let Some(data) = vfs_mod_data { + let mut count = 0; + let v0_dir = serde_json::from_slice::>(data.as_ref()) + .with_context(|| "failed to parse v0 structure")?; + + fn migrate_dir_v0_v1( + v0_dir: self::Directory, + v1_eszip: &mut LazyLoadableEszip, + count: &mut i32, + ) -> v1::Directory { + let mut v1_dir = v1::Directory { + name: v0_dir.name.clone(), + entries: vec![], + }; + + let v1_dir_entries = &mut v1_dir.entries; + + for entry in v0_dir.entries.into_iter() { + match entry { + Entry::Dir(v0_sub_dir) => { + v1_dir_entries.push(v1::Entry::Dir(migrate_dir_v0_v1( + v0_sub_dir, v1_eszip, count, + ))); + } + + Entry::File(v0_sub_file) => { + let key = format!("vfs://{}", *count); + let data = v0_sub_file.content; + + *count += 1; + v1_dir_entries.push(v1::Entry::File(v1::File { + key: key.clone(), + name: v0_sub_file.name, + offset: v0_sub_file.offset, + len: v0_sub_file.len, + })); + + if let Some(data) = data { + v1_eszip.add_opaque_data(key, data.into()); + } + } + + Entry::Symlink(v0_sub_symlink) => { + v1_dir_entries.push(v1::Entry::Symlink(v1::Symlink { + name: v0_sub_symlink.name, + dest_parts: v0_sub_symlink.dest_parts, + })); + } + } + } + + v1_dir + } + + v0_dir.map(|it| migrate_dir_v0_v1(it, &mut v1_eszip, &mut count)) + } else { + None + }; + + let v1_vfs_data = rkyv::to_bytes::<_, 1024>(&v1_dir) + .with_context(|| "failed to serialize v1 vfs data")?; + + v1_eszip.add_opaque_data( + String::from(SUPABASE_ESZIP_VERSION_KEY), + Arc::from(b"1" as &[u8]), + ); + + v1_eszip.add_opaque_data( + String::from(VFS_ESZIP_KEY), + Arc::from(v1_vfs_data.into_boxed_slice()), + ); + + static BLOCKLIST: Lazy> = + Lazy::new(|| HashSet::from([SUPABASE_ESZIP_VERSION_KEY, VFS_ESZIP_KEY])); + + let specifiers = v0_eszip.specifiers(); + let mut v0_modules = v0_eszip.modules.0.lock().unwrap(); + let mut v1_modules = v1_eszip.modules.0.lock().unwrap(); + + for specifier in specifiers { + if BLOCKLIST.contains(specifier.as_str()) { + continue; + } + + let module = v0_modules.remove(&specifier).unwrap(); + + v1_modules.insert(specifier, module); + } + + drop(v1_modules); + + Ok(v1_eszip) + } +} + +mod v1 { + use rkyv::{Archive, Deserialize, Serialize}; + + #[derive(Archive, Serialize, Deserialize, Debug)] + #[archive( + check_bytes, + bound(serialize = "__S: rkyv::ser::ScratchSpace + rkyv::ser::Serializer") + )] + #[archive_attr(check_bytes( + bound = "__C: rkyv::validation::ArchiveContext, <__C as rkyv::Fallible>::Error: std::error::Error" + ))] + pub struct Directory { + pub name: String, + // should be sorted by name + #[omit_bounds] + #[archive_attr(omit_bounds)] + pub entries: Vec, + } + + #[derive(Archive, Serialize, Deserialize, Debug, Clone)] + #[archive(check_bytes)] + pub struct File { + pub key: String, + pub name: String, + pub offset: u64, + pub len: u64, + } + + #[derive(Archive, Serialize, Deserialize, Debug)] + #[archive(check_bytes)] + pub struct Symlink { + pub name: String, + pub dest_parts: Vec, + } + + #[derive(Archive, Serialize, Deserialize, Debug)] + #[archive(check_bytes)] + pub enum Entry { + Dir(Directory), + File(File), + Symlink(Symlink), + } +} + +#[cfg(test)] +mod test { + use std::path::PathBuf; + use tokio::fs; + + use crate::{extract_eszip, EszipPayloadKind, ExtractEszipPayload}; + + const MIGRATE_TEST_DIR: &str = "../base/test_cases/eszip-migration"; + + async fn test_extract_eszip(orig: PathBuf, target: PathBuf) { + let tmp_dir = tempfile::tempdir().unwrap(); + let (_orig_buf, target_buf) = { + ( + fs::read(orig).await.unwrap(), + fs::read(target).await.unwrap(), + ) + }; + + let payload = ExtractEszipPayload { + data: EszipPayloadKind::VecKind(target_buf), + folder: tmp_dir.path().to_path_buf(), + }; + + assert!(extract_eszip(payload).await); + + // TODO(Nyannyacha): It seems to be returning a buffer for the transpiled source rather than + // the original source. Fix that issue and uncomment below. + + // let tmp_file_buf = fs::read(tmp_dir.path().join("index.ts")).await.unwrap(); + // assert_eq!(orig_buf, tmp_file_buf); + } + + #[tokio::test] + async fn test_extract_v0() { + test_extract_eszip( + PathBuf::from(format!("{}/npm-supabase-js/index.ts", MIGRATE_TEST_DIR)), + PathBuf::from(format!("{}/npm-supabase-js/v0.eszip", MIGRATE_TEST_DIR)), + ) + .await; + } + + #[tokio::test] + async fn test_extract_v1() { + test_extract_eszip( + PathBuf::from(format!("{}/npm-supabase-js/index.ts", MIGRATE_TEST_DIR)), + PathBuf::from(format!("{}/npm-supabase-js/v1.eszip", MIGRATE_TEST_DIR)), + ) + .await; + } + + #[tokio::test] + #[should_panic] + async fn test_extract_v0_corrupted() { + test_extract_eszip( + PathBuf::from(format!("{}/npm-supabase-js/index.ts", MIGRATE_TEST_DIR)), + PathBuf::from(format!( + "{}/npm-supabase-js/v0_corrupted.eszip", + MIGRATE_TEST_DIR + )), + ) + .await; + } + + #[tokio::test] + #[should_panic] + async fn test_extract_v1_corrupted() { + test_extract_eszip( + PathBuf::from(format!("{}/npm-supabase-js/index.ts", MIGRATE_TEST_DIR)), + PathBuf::from(format!( + "{}/npm-supabase-js/v1_corrupted.eszip", + MIGRATE_TEST_DIR + )), + ) + .await; + } +} diff --git a/crates/sb_graph/eszip_parse.rs b/crates/sb_graph/eszip_parse.rs new file mode 100644 index 000000000..26867ae55 --- /dev/null +++ b/crates/sb_graph/eszip_parse.rs @@ -0,0 +1,129 @@ +// Below is roughly originated from eszip@0.60.0/src/v2.rs + +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, +}; + +use eszip::{ + v2::{ + read_npm_section, EszipNpmPackageIndex, EszipV2Module, EszipV2Modules, EszipV2SourceSlot, + HashedSection, + }, + EszipV2, ModuleKind, ParseError, +}; +use futures::{io::BufReader, AsyncRead, AsyncReadExt}; +use hashlink::LinkedHashMap; + +const ESZIP_V2_1_MAGIC: &[u8; 8] = b"ESZIP2.1"; + +pub async fn parse_v2_header( + reader: &mut BufReader, +) -> Result { + let mut magic = [0u8; 8]; + reader.read_exact(&mut magic).await?; + + if !EszipV2::has_magic(&magic) { + return Err(ParseError::InvalidV2); + } + + let is_v3 = magic == *ESZIP_V2_1_MAGIC; + let header = HashedSection::read(reader).await?; + if !header.hash_valid() { + return Err(ParseError::InvalidV2HeaderHash); + } + + let mut modules = LinkedHashMap::::new(); + let mut npm_specifiers = HashMap::new(); + + let mut read = 0; + + // This macro reads n number of bytes from the header section. If the header + // section is not long enough, this function will be early exited with an + // error. + macro_rules! read { + ($n:expr, $err:expr) => {{ + if read + $n > header.len() { + return Err(ParseError::InvalidV2Header($err)); + } + let start = read; + read += $n; + &header.bytes()[start..read] + }}; + } + + while read < header.len() { + let specifier_len = + u32::from_be_bytes(read!(4, "specifier len").try_into().unwrap()) as usize; + let specifier = String::from_utf8(read!(specifier_len, "specifier").to_vec()) + .map_err(|_| ParseError::InvalidV2Specifier(read))?; + + let entry_kind = read!(1, "entry kind")[0]; + match entry_kind { + 0 => { + let source_offset = + u32::from_be_bytes(read!(4, "source offset").try_into().unwrap()); + let source_len = u32::from_be_bytes(read!(4, "source len").try_into().unwrap()); + let source_map_offset = + u32::from_be_bytes(read!(4, "source map offset").try_into().unwrap()); + let source_map_len = + u32::from_be_bytes(read!(4, "source map len").try_into().unwrap()); + let kind = match read!(1, "module kind")[0] { + 0 => ModuleKind::JavaScript, + 1 => ModuleKind::Json, + 2 => ModuleKind::Jsonc, + 3 => ModuleKind::OpaqueData, + n => return Err(ParseError::InvalidV2ModuleKind(n, read)), + }; + let source = if source_offset == 0 && source_len == 0 { + EszipV2SourceSlot::Ready(Arc::new([])) + } else { + EszipV2SourceSlot::Pending { + offset: source_offset as usize, + length: source_len as usize, + wakers: vec![], + } + }; + let source_map = if source_map_offset == 0 && source_map_len == 0 { + EszipV2SourceSlot::Ready(Arc::new([])) + } else { + EszipV2SourceSlot::Pending { + offset: source_map_offset as usize, + length: source_map_len as usize, + wakers: vec![], + } + }; + let module = EszipV2Module::Module { + kind, + source, + source_map, + }; + modules.insert(specifier, module); + } + 1 => { + let target_len = + u32::from_be_bytes(read!(4, "target len").try_into().unwrap()) as usize; + let target = String::from_utf8(read!(target_len, "target").to_vec()) + .map_err(|_| ParseError::InvalidV2Specifier(read))?; + modules.insert(specifier, EszipV2Module::Redirect { target }); + } + 2 if is_v3 => { + // npm specifier + let pkg_id = u32::from_be_bytes(read!(4, "npm package id").try_into().unwrap()); + npm_specifiers.insert(specifier, EszipNpmPackageIndex(pkg_id)); + } + n => return Err(ParseError::InvalidV2EntryKind(n, read)), + }; + } + + let npm_snapshot = if is_v3 { + read_npm_section(reader, npm_specifiers).await? + } else { + None + }; + + Ok(EszipV2 { + modules: EszipV2Modules(Arc::new(Mutex::new(modules))), + npm_snapshot, + }) +} diff --git a/crates/sb_graph/lib.rs b/crates/sb_graph/lib.rs index eb0867f15..c182272c1 100644 --- a/crates/sb_graph/lib.rs +++ b/crates/sb_graph/lib.rs @@ -1,26 +1,42 @@ use crate::emitter::EmitterFactory; +use crate::errors::EszipError; use crate::graph_util::{create_eszip_from_graph_raw, create_graph}; +use anyhow::{bail, Context}; use deno_ast::MediaType; -use deno_core::error::AnyError; use deno_core::futures::io::{AllowStdIo, BufReader}; use deno_core::url::Url; -use deno_core::{serde_json, FastString, JsBuffer, ModuleSpecifier}; +use deno_core::{FastString, JsBuffer, ModuleSpecifier}; use deno_fs::{FileSystem, RealFs}; use deno_npm::NpmSystemInfo; -use eszip::{EszipV2, ModuleKind}; +use eszip::v2::{EszipV2Module, EszipV2Modules, EszipV2SourceSlot}; +use eszip::{EszipV2, Module, ModuleKind, ParseError}; +use futures::future::OptionFuture; +use futures::{AsyncReadExt, AsyncSeekExt}; use glob::glob; use log::error; +use sb_eszip_shared::{ + AsyncEszipDataRead, SOURCE_CODE_ESZIP_KEY, STATIC_FILES_ESZIP_KEY, SUPABASE_ESZIP_VERSION, + SUPABASE_ESZIP_VERSION_KEY, VFS_ESZIP_KEY, +}; use sb_fs::{build_vfs, VfsOpts}; use sb_npm::InnerCliNpmResolverRef; +use scopeguard::ScopeGuard; use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; use std::borrow::Cow; +use std::collections::HashMap; use std::fs; use std::fs::{create_dir_all, File}; -use std::io::Write; +use std::io::{Cursor, SeekFrom, Write}; use std::path::{Path, PathBuf}; use std::sync::Arc; +use tokio::sync::Mutex; + +mod eszip_parse; pub mod emitter; +pub mod errors; +pub mod eszip_migrate; pub mod graph_fs; pub mod graph_resolver; pub mod graph_util; @@ -28,11 +44,6 @@ pub mod import_map; pub mod jsr; pub mod jsx_util; -pub const VFS_ESZIP_KEY: &str = "---SUPABASE-VFS-DATA-ESZIP---"; -pub const SOURCE_CODE_ESZIP_KEY: &str = "---SUPABASE-SOURCE-CODE-ESZIP---"; -pub const STATIC_FILES_ESZIP_KEY: &str = "---SUPABASE-STATIC-FILES-ESZIP---"; -pub const STATIC_FS_PREFIX: &str = "mnt/data"; - #[derive(Debug, Clone, Copy, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum DecoratorType { @@ -71,143 +82,709 @@ pub enum EszipPayloadKind { Eszip(EszipV2), } -pub async fn payload_to_eszip(eszip_payload_kind: EszipPayloadKind) -> EszipV2 { +async fn read_u32(reader: &mut R) -> Result { + let mut buf = [0u8; 4]; + reader.read_exact(&mut buf).await?; + Ok(u32::from_be_bytes(buf)) +} + +#[derive(Debug)] +pub struct LazyLoadableEszip { + eszip: EszipV2, + maybe_data_section: Option>, +} + +impl std::ops::Deref for LazyLoadableEszip { + type Target = EszipV2; + + fn deref(&self) -> &Self::Target { + &self.eszip + } +} + +impl std::ops::DerefMut for LazyLoadableEszip { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.eszip + } +} + +impl Clone for LazyLoadableEszip { + fn clone(&self) -> Self { + Self { + eszip: EszipV2 { + modules: self.eszip.modules.clone(), + npm_snapshot: None, + }, + maybe_data_section: self.maybe_data_section.clone(), + } + } +} + +impl AsyncEszipDataRead for LazyLoadableEszip { + fn ensure_module(&self, specifier: &str) -> Option { + let module = self.ensure_data(specifier)?; + + if module.kind == ModuleKind::Jsonc { + return None; + } + + Some(module) + } + + fn ensure_import_map(&self, specifier: &str) -> Option { + let module = self.ensure_data(specifier)?; + + if module.kind == ModuleKind::JavaScript { + return None; + } + + Some(module) + } +} + +impl LazyLoadableEszip { + fn new(eszip: EszipV2, maybe_data_section: Option>) -> Self { + Self { + eszip, + maybe_data_section, + } + } + + pub fn ensure_data(&self, specifier: &str) -> Option { + let module = self + .get_module(specifier) + .or_else(|| self.get_import_map(specifier))?; + + if let Some(section) = self.maybe_data_section.clone() { + let specifier = module.specifier.clone(); + + drop(tokio::spawn(async move { + match section.read_data_section_by_specifier(&specifier).await { + Ok(_) => {} + Err(err) => { + error!("failed to read module data from the data section: {}", err); + } + } + })); + } + + Some(module) + } + + pub async fn ensure_read_all(&mut self) -> Result<(), ParseError> { + if let Some(section) = self.maybe_data_section.take() { + section.read_data_section_all().await + } else { + Ok(()) + } + } + + pub async fn ensure_version(&self) -> Result<(), anyhow::Error> { + let version = OptionFuture::<_>::from( + self.ensure_module(SUPABASE_ESZIP_VERSION_KEY) + .map(|it| async move { it.source().await }), + ) + .await + .flatten(); + + if !matches!(version, Some(ref v) if v.as_ref() == SUPABASE_ESZIP_VERSION) { + bail!(EszipError::UnsupportedVersion { + expected: SUPABASE_ESZIP_VERSION, + found: version.as_deref().map(<[u8]>::to_vec) + }); + } + + Ok(()) + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct EszipDataLoc { + source_offset: usize, + source_length: usize, + source_map_offset: usize, + source_map_length: usize, +} + +#[derive(Debug, Clone)] +pub enum EszipDataSectionMetadata { + HasLocation(EszipDataLoc), + PendingOrAlreadyLoaded, +} + +#[derive(Debug, Clone)] +pub struct EszipDataSection { + inner: Arc>>>, + modules: EszipV2Modules, + initial_offset: u64, + sources_len: Arc>>, + locs_by_specifier: Arc>>>, + loaded_locs_by_specifier: Arc>>, +} + +impl EszipDataSection { + pub fn new(inner: Cursor>, initial_offset: u64, modules: EszipV2Modules) -> Self { + Self { + inner: Arc::new(Mutex::new(inner)), + modules, + initial_offset, + sources_len: Arc::default(), + locs_by_specifier: Arc::default(), + loaded_locs_by_specifier: Arc::default(), + } + } + + pub async fn read_data_section_by_specifier( + &self, + specifier: &str, + ) -> Result<(), anyhow::Error> { + let mut locs_guard = self.locs_by_specifier.lock().await; + let locs = locs_guard.get_or_insert_with(|| { + self.modules + .0 + .lock() + .unwrap() + .iter() + .filter_map(|(specifier, m)| { + let mut loc = EszipDataLoc::default(); + let (source_slot, source_map_slot) = match m { + EszipV2Module::Module { + source, source_map, .. + } => (source, source_map), + EszipV2Module::Redirect { .. } => return None, + }; + + match source_slot { + EszipV2SourceSlot::Pending { offset, length, .. } => { + loc.source_offset = *offset; + loc.source_length = *length; + } + + EszipV2SourceSlot::Ready(_) | EszipV2SourceSlot::Taken => { + loc.source_length = 0; + loc.source_offset = 0; + } + } + + if let EszipV2SourceSlot::Pending { offset, length, .. } = source_map_slot { + loc.source_map_offset = *offset; + loc.source_map_length = *length; + } else if loc.source_length == 0 && loc.source_offset == 0 { + return Some(( + specifier.clone(), + EszipDataSectionMetadata::PendingOrAlreadyLoaded, + )); + } + + Some(( + specifier.clone(), + EszipDataSectionMetadata::HasLocation(loc), + )) + }) + .collect::>() + }); + + let Some(metadata) = locs.get_mut(specifier) else { + bail!("given specifier does not exist in the eszip header") + }; + + let loc = match metadata { + &mut EszipDataSectionMetadata::HasLocation(loc) => { + self.loaded_locs_by_specifier + .lock() + .await + .insert(String::from(specifier), loc); + + *metadata = EszipDataSectionMetadata::PendingOrAlreadyLoaded; + loc + } + + _ => return Ok(()), + }; + + drop(locs_guard); + + let mut inner = self.inner.lock().await; + let mut io = AllowStdIo::new({ + // NOTE: 4 byte offset in the middle represents the full source length. + inner.set_position(self.initial_offset + 4 + loc.source_offset as u64); + inner.by_ref() + }); + + let source_bytes = 'scope: { + if loc.source_length == 0 { + break 'scope None::>; + } + + let wake_guard = scopeguard::guard(&self.modules, |modules| { + Self::wake_source_slot(modules, specifier, || EszipV2SourceSlot::Taken); + }); + + let mut source_bytes = vec![0u8; loc.source_length]; + io.read_exact(&mut source_bytes).await?; + + let expected_hash = &mut [0u8; 32]; + io.read_exact(expected_hash).await?; + + let mut hasher = Sha256::new(); + hasher.update(&source_bytes); + + let actual_hash = hasher.finalize(); + + if &*actual_hash != expected_hash { + return Err(ParseError::InvalidV2SourceHash(specifier.to_string())) + .context("invalid source hash"); + } + + let _ = ScopeGuard::into_inner(wake_guard); + + Some(source_bytes) + }; + + if let Some(bytes) = source_bytes { + Self::wake_source_slot(&self.modules, specifier, move || { + EszipV2SourceSlot::Ready(Arc::from(bytes)) + }); + } + + let source_map_bytes = 'scope: { + if loc.source_map_length == 0 { + break 'scope None::>; + } + + let sources_len = { + let mut guard = self.sources_len.lock().await; + + match &mut *guard { + Some(len) => *len, + opt @ None => { + let mut io = AllowStdIo::new({ + inner.set_position(self.initial_offset); + inner.by_ref() + }); + + let sources_len = read_u32(&mut io).await? as usize; + + *opt = Some(sources_len as u64); + sources_len as u64 + } + } + }; + + let mut io = AllowStdIo::new({ + // NOTE: 4 byte offset in the middle represents the full source / source map length. + inner.set_position( + self.initial_offset + 4 + sources_len + 4 + loc.source_map_offset as u64, + ); + inner.by_ref() + }); + + let wake_guard = scopeguard::guard(&self.modules, |modules| { + Self::wake_source_map_slot(modules, specifier, || EszipV2SourceSlot::Taken); + }); + + let mut source_map_bytes = vec![0u8; loc.source_map_length]; + io.read_exact(&mut source_map_bytes).await?; + + let expected_hash = &mut [0u8; 32]; + io.read_exact(expected_hash).await?; + + let mut hasher = Sha256::new(); + hasher.update(&source_map_bytes); + + let actual_hash = hasher.finalize(); + + if &*actual_hash != expected_hash { + return Err(ParseError::InvalidV2SourceHash(specifier.to_string())) + .context("invalid source hash"); + } + + let _ = ScopeGuard::into_inner(wake_guard); + + Some(source_map_bytes) + }; + + if let Some(bytes) = source_map_bytes { + Self::wake_source_map_slot(&self.modules, specifier, move || { + EszipV2SourceSlot::Ready(Arc::from(bytes)) + }); + } + + Ok(()) + } + + pub async fn read_data_section_all(self: Arc) -> Result<(), ParseError> { + // NOTE: Below codes is roughly originated from eszip@0.60.0/src/v2.rs + + let this = Arc::into_inner(self).unwrap(); + let modules = this.modules; + let mut loaded_locs = Arc::into_inner(this.loaded_locs_by_specifier) + .unwrap() + .into_inner(); + + let mut inner = this.inner.try_lock_owned().unwrap(); + let mut io = AllowStdIo::new({ + inner.set_position(this.initial_offset); + inner.by_ref() + }); + + let sources_len = read_u32(&mut io).await? as usize; + let mut read = 0; + + let mut source_offsets = modules + .0 + .lock() + .unwrap() + .iter() + .filter_map(|(specifier, m)| { + if let EszipV2Module::Module { + source: EszipV2SourceSlot::Pending { offset, length, .. }, + .. + } = m + { + Some((*offset, (*length, specifier.clone(), true))) + } else { + loaded_locs.remove(specifier.as_str()).map(|loc| { + ( + loc.source_offset, + (loc.source_length, specifier.clone(), false), + ) + }) + } + }) + .collect::>(); + + let mut source_map_offsets = modules + .0 + .lock() + .unwrap() + .iter() + .filter_map(|(specifier, m)| { + if let EszipV2Module::Module { + source_map: EszipV2SourceSlot::Pending { offset, length, .. }, + .. + } = m + { + Some((*offset, (*length, specifier.clone(), true))) + } else { + loaded_locs.remove(specifier.as_str()).map(|loc| { + ( + loc.source_map_offset, + (loc.source_map_length, specifier.clone(), false), + ) + }) + } + }) + .collect::>(); + + while read < sources_len { + let (length, specifier, need_load) = source_offsets + .remove(&read) + .ok_or(ParseError::InvalidV2SourceOffset(read))?; + + if !need_load { + read += length + 32; + + io.seek(SeekFrom::Current((length + 32) as i64)) + .await + .unwrap(); + + continue; + } + + let mut source_bytes = vec![0u8; length]; + io.read_exact(&mut source_bytes).await?; + + let expected_hash = &mut [0u8; 32]; + io.read_exact(expected_hash).await?; + + let mut hasher = Sha256::new(); + hasher.update(&source_bytes); + + let actual_hash = hasher.finalize(); + + if &*actual_hash != expected_hash { + return Err(ParseError::InvalidV2SourceHash(specifier)); + } + + read += length + 32; + + Self::wake_source_slot(&modules, &specifier, move || { + EszipV2SourceSlot::Ready(Arc::from(source_bytes)) + }); + } + + let sources_maps_len = read_u32(&mut io).await? as usize; + let mut read = 0; + + while read < sources_maps_len { + let (length, specifier, need_load) = source_map_offsets + .remove(&read) + .ok_or(ParseError::InvalidV2SourceOffset(read))?; + + if !need_load { + read += length + 32; + + io.seek(SeekFrom::Current((length + 32) as i64)) + .await + .unwrap(); + + continue; + } + + let mut source_map_bytes = vec![0u8; length]; + io.read_exact(&mut source_map_bytes).await?; + + let expected_hash = &mut [0u8; 32]; + io.read_exact(expected_hash).await?; + + let mut hasher = Sha256::new(); + hasher.update(&source_map_bytes); + + let actual_hash = hasher.finalize(); + + if &*actual_hash != expected_hash { + return Err(ParseError::InvalidV2SourceHash(specifier)); + } + + read += length + 32; + + Self::wake_source_map_slot(&modules, &specifier, move || { + EszipV2SourceSlot::Ready(Arc::from(source_map_bytes)) + }); + } + + Ok(()) + } + + fn wake_module_with_slot( + modules: &EszipV2Modules, + specifier: &str, + select_slot_fn: F, + new_slot_fn: G, + ) where + F: for<'r> FnOnce(&'r mut EszipV2Module) -> &'r mut EszipV2SourceSlot, + G: FnOnce() -> EszipV2SourceSlot, + { + let wakers = { + let mut modules = modules.0.lock().unwrap(); + let module = modules.get_mut(specifier).expect("module not found"); + let slot = select_slot_fn(module); + + let old_slot = std::mem::replace(slot, new_slot_fn()); + + match old_slot { + EszipV2SourceSlot::Pending { wakers, .. } => wakers, + _ => panic!("already populated source slot"), + } + }; + + for w in wakers { + w.wake(); + } + } + + fn wake_source_slot(modules: &EszipV2Modules, specifier: &str, new_slot_fn: F) + where + F: FnOnce() -> EszipV2SourceSlot, + { + Self::wake_module_with_slot( + modules, + specifier, + |module| match module { + EszipV2Module::Module { ref mut source, .. } => source, + _ => panic!("invalid module type"), + }, + new_slot_fn, + ) + } + + fn wake_source_map_slot(modules: &EszipV2Modules, specifier: &str, new_slot_fn: F) + where + F: FnOnce() -> EszipV2SourceSlot, + { + Self::wake_module_with_slot( + modules, + specifier, + |module| match module { + EszipV2Module::Module { + ref mut source_map, .. + } => source_map, + _ => panic!("invalid module type"), + }, + new_slot_fn, + ) + } +} + +pub async fn payload_to_eszip(eszip_payload_kind: EszipPayloadKind) -> LazyLoadableEszip { match eszip_payload_kind { - EszipPayloadKind::Eszip(data) => data, + EszipPayloadKind::Eszip(eszip) => LazyLoadableEszip::new(eszip, None), _ => { let bytes = match eszip_payload_kind { EszipPayloadKind::JsBufferKind(js_buffer) => Vec::from(&*js_buffer), EszipPayloadKind::VecKind(vec) => vec, - _ => panic!("It should not get here"), + _ => unreachable!(), }; - let bufreader = BufReader::new(AllowStdIo::new(bytes.as_slice())); - let (eszip, loader) = eszip::EszipV2::parse(bufreader).await.unwrap(); + let mut io = AllowStdIo::new(Cursor::new(bytes)); + let mut bufreader = BufReader::new(&mut io); - loader.await.unwrap(); + let eszip = eszip_parse::parse_v2_header(&mut bufreader).await.unwrap(); - eszip + let initial_offset = bufreader.stream_position().await.unwrap(); + let data_section = + EszipDataSection::new(io.into_inner(), initial_offset, eszip.modules.clone()); + + LazyLoadableEszip::new(eszip, Some(Arc::new(data_section))) } } } -pub async fn generate_binary_eszip( - file: PathBuf, +pub async fn generate_binary_eszip

( + file: P, emitter_factory: Arc, maybe_module_code: Option, maybe_import_map_url: Option, -) -> Result { - let graph = create_graph(file.clone(), emitter_factory.clone(), &maybe_module_code).await; - let eszip = create_eszip_from_graph_raw(graph, Some(emitter_factory.clone())).await; - - if let Ok(mut eszip) = eszip { - let fs_path = file.clone(); - let source_code: Arc = if let Some(code) = maybe_module_code { - code.as_str().into() - } else { - let entry_content = RealFs - .read_file_sync(fs_path.clone().as_path(), None) - .unwrap(); - String::from_utf8(entry_content.clone())?.into() - }; - let emit_source = emitter_factory.emitter().unwrap().emit_parsed_source( - &ModuleSpecifier::parse( - &Url::from_file_path(&fs_path) - .map(|it| Cow::Owned(it.to_string())) - .ok() - .unwrap_or("http://localhost".into()), - ) - .unwrap(), - MediaType::from_path(fs_path.clone().as_path()), - &source_code, - )?; - - let bin_code: Arc<[u8]> = emit_source.as_bytes().into(); - - let npm_res = emitter_factory.npm_resolution().await; - let resolver = emitter_factory.npm_resolver().await; - - let (npm_vfs, _npm_files) = match resolver.clone().as_inner() { - InnerCliNpmResolverRef::Managed(managed) => { - let snapshot = - managed.serialized_valid_snapshot_for_system(&NpmSystemInfo::default()); - if !snapshot.as_serialized().packages.is_empty() { - let (root_dir, files) = build_vfs(VfsOpts { +) -> Result +where + P: AsRef, +{ + let file = file.as_ref(); + let graph = create_graph( + file.to_path_buf(), + emitter_factory.clone(), + &maybe_module_code, + ) + .await; + + let mut eszip = create_eszip_from_graph_raw(graph, Some(emitter_factory.clone())).await?; + + let source_code: Arc = if let Some(code) = maybe_module_code { + code.as_str().into() + } else { + String::from_utf8(RealFs.read_file_sync(file, None)?)?.into() + }; + + let emit_source = emitter_factory.emitter().unwrap().emit_parsed_source( + &ModuleSpecifier::parse( + &Url::from_file_path(file) + .map(|it| Cow::Owned(it.to_string())) + .ok() + .unwrap_or("http://localhost".into()), + ) + .unwrap(), + MediaType::from_path(file), + &source_code, + )?; + + let bin_code: Arc<[u8]> = emit_source.as_bytes().into(); + let npm_res = emitter_factory.npm_resolution().await; + let resolver = emitter_factory.npm_resolver().await; + + let (npm_vfs, _npm_files) = match resolver.clone().as_inner() { + InnerCliNpmResolverRef::Managed(managed) => { + let snapshot = managed.serialized_valid_snapshot_for_system(&NpmSystemInfo::default()); + if !snapshot.as_serialized().packages.is_empty() { + let mut count = 0; + let (root_dir, files) = build_vfs( + VfsOpts { npm_resolver: resolver.clone(), npm_registry_api: emitter_factory.npm_api().await.clone(), npm_cache: emitter_factory.npm_cache().await.clone(), npm_resolution: emitter_factory.npm_resolution().await.clone(), - })? - .into_dir_and_files(); + }, + |_path, _key, content| { + let key = format!("vfs://{}", count); - let snapshot = - npm_res.serialized_valid_snapshot_for_system(&NpmSystemInfo::default()); - eszip.add_npm_snapshot(snapshot); - (Some(root_dir), files) - } else { - (None, Vec::new()) - } + count += 1; + eszip.add_opaque_data(key.clone(), content.into()); + key + }, + )? + .into_dir_and_files(); + + let snapshot = + npm_res.serialized_valid_snapshot_for_system(&NpmSystemInfo::default()); + + eszip.add_npm_snapshot(snapshot); + + (Some(root_dir), files) + } else { + (None, Vec::new()) } - InnerCliNpmResolverRef::Byonm(_) => unreachable!(), - }; + } + InnerCliNpmResolverRef::Byonm(_) => unreachable!(), + }; - let npm_vfs = serde_json::to_vec(&npm_vfs).unwrap().to_vec(); - let boxed_slice = npm_vfs.into_boxed_slice(); - - eszip.add_opaque_data(String::from(VFS_ESZIP_KEY), Arc::from(boxed_slice)); - eszip.add_opaque_data(String::from(SOURCE_CODE_ESZIP_KEY), bin_code); - - // add import map - if emitter_factory.maybe_import_map.is_some() { - eszip.add_import_map( - ModuleKind::Json, - maybe_import_map_url.unwrap(), - Arc::from( - emitter_factory - .maybe_import_map - .as_ref() - .unwrap() - .to_json() - .as_bytes(), - ), - ); - }; + let npm_vfs = + rkyv::to_bytes::<_, 1024>(&npm_vfs).with_context(|| "cannot serialize vfs data")?; + + eszip.add_opaque_data( + String::from(SUPABASE_ESZIP_VERSION_KEY), + Arc::from(SUPABASE_ESZIP_VERSION), + ); + + eszip.add_opaque_data( + String::from(VFS_ESZIP_KEY), + Arc::from(npm_vfs.into_boxed_slice()), + ); + + eszip.add_opaque_data(String::from(SOURCE_CODE_ESZIP_KEY), bin_code); + + // add import map + if emitter_factory.maybe_import_map.is_some() { + eszip.add_import_map( + ModuleKind::Json, + maybe_import_map_url.unwrap(), + Arc::from( + emitter_factory + .maybe_import_map + .as_ref() + .unwrap() + .to_json() + .as_bytes(), + ), + ); + }; - Ok(eszip) - } else { - eszip - } + Ok(eszip) } -pub async fn include_glob_patterns_in_eszip( +pub async fn include_glob_patterns_in_eszip

( patterns: Vec<&str>, eszip: &mut EszipV2, - prefix: Option, -) { - let mut static_files: Vec = vec![]; + base_dir: P, +) -> Result<(), anyhow::Error> +where + P: AsRef, +{ + let cwd = std::env::current_dir(); + let base_dir = base_dir.as_ref(); + let mut specifiers: Vec = vec![]; + for pattern in patterns { for entry in glob(pattern).expect("Failed to read pattern") { match entry { Ok(path) => { - let mod_path = path.to_str().unwrap().to_string(); - let mod_path = if let Some(file_prefix) = prefix.clone() { - PathBuf::from(file_prefix) - .join(PathBuf::from(mod_path)) - .to_str() - .unwrap() - .to_string() - } else { - mod_path + let path = cwd.as_ref().unwrap().join(path); + let (path, rel) = match pathdiff::diff_paths(&path, base_dir) { + Some(rel) => (path, rel.to_string_lossy().to_string()), + None => (path.clone(), path.to_string_lossy().to_string()), }; if path.exists() { - let content = std::fs::read(path).unwrap(); - let arc_slice: Arc<[u8]> = Arc::from(content.into_boxed_slice()); - eszip.add_opaque_data(mod_path.clone(), arc_slice); - } + let specifier = format!("static:{}", rel.as_str()); - static_files.push(mod_path); + eszip.add_opaque_data( + specifier.clone(), + Arc::from(std::fs::read(path).unwrap().into_boxed_slice()), + ); + + specifiers.push(specifier); + } } + Err(_) => { error!("Error reading pattern {} for static files", pattern) } @@ -215,11 +792,18 @@ pub async fn include_glob_patterns_in_eszip( } } - if !static_files.is_empty() { - let file_specifiers_as_bytes = serde_json::to_vec(&static_files).unwrap(); - let arc_slice: Arc<[u8]> = Arc::from(file_specifiers_as_bytes.into_boxed_slice()); - eszip.add_opaque_data(String::from(STATIC_FILES_ESZIP_KEY), arc_slice); + if !specifiers.is_empty() { + eszip.add_opaque_data( + String::from(STATIC_FILES_ESZIP_KEY), + Arc::from( + rkyv::to_bytes::<_, 1024>(&specifiers) + .with_context(|| "cannot serialize accessible paths for static files")? + .into_boxed_slice(), + ), + ); } + + Ok(()) } fn extract_file_specifiers(eszip: &EszipV2) -> Vec { @@ -288,9 +872,18 @@ async fn extract_modules( } } -pub async fn extract_eszip(payload: ExtractEszipPayload) { - let eszip = payload_to_eszip(payload.data).await; +pub async fn extract_eszip(payload: ExtractEszipPayload) -> bool { let output_folder = payload.folder; + let mut eszip = + match eszip_migrate::try_migrate_if_needed(payload_to_eszip(payload.data).await).await { + Ok(v) => v, + Err(_old) => { + error!("eszip migration failed (give up extract job)"); + return false; + } + }; + + eszip.ensure_read_all().await.unwrap(); if !output_folder.exists() { create_dir_all(&output_folder).unwrap(); @@ -299,18 +892,20 @@ pub async fn extract_eszip(payload: ExtractEszipPayload) { let file_specifiers = extract_file_specifiers(&eszip); if let Some(lowest_path) = sb_core::util::path::find_lowest_path(&file_specifiers) { extract_modules(&eszip, &file_specifiers, &lowest_path, &output_folder).await; + true } else { panic!("Path seems to be invalid"); } } -pub async fn extract_from_file(eszip_file: PathBuf, output_path: PathBuf) { +pub async fn extract_from_file(eszip_file: PathBuf, output_path: PathBuf) -> bool { let eszip_content = fs::read(eszip_file).expect("File does not exist"); + extract_eszip(ExtractEszipPayload { data: EszipPayloadKind::VecKind(eszip_content), folder: output_path, }) - .await; + .await } #[cfg(test)] @@ -332,12 +927,16 @@ mod test { None, ) .await; + let eszip = eszip.unwrap(); - extract_eszip(ExtractEszipPayload { - data: EszipPayloadKind::Eszip(eszip), - folder: PathBuf::from("../base/test_cases/extracted-npm/"), - }) - .await; + + assert!( + extract_eszip(ExtractEszipPayload { + data: EszipPayloadKind::Eszip(eszip), + folder: PathBuf::from("../base/test_cases/extracted-npm/"), + }) + .await + ); assert!(PathBuf::from("../base/test_cases/extracted-npm/hello.js").exists()); remove_dir_all(PathBuf::from("../base/test_cases/extracted-npm/")).unwrap(); diff --git a/crates/sb_module_loader/Cargo.toml b/crates/sb_module_loader/Cargo.toml index 22217538a..339857373 100644 --- a/crates/sb_module_loader/Cargo.toml +++ b/crates/sb_module_loader/Cargo.toml @@ -10,12 +10,18 @@ license = "MIT" path = "lib.rs" [dependencies] +sb_core = { version = "0.1.0", path = "../sb_core" } +sb_node = { version = "0.1.0", path = "../node" } +sb_npm = { version = "0.1.0", path = "../npm" } +sb_graph = { version = "0.1.0", path = "../sb_graph" } +sb_fs = { version = "0.1.0", path = "../sb_fs" } +sb_eszip_shared = { version = "0.1.0", path = "../sb_eszip_shared" } deno_semver.workspace = true anyhow.workspace = true deno_core.workspace = true eszip.workspace = true import_map.workspace = true -log = { workspace = true } +log.workspace = true serde.workspace = true tokio.workspace = true deno_ast.workspace = true @@ -26,8 +32,4 @@ deno_tls.workspace = true monch.workspace = true base64.workspace = true tracing.workspace = true -sb_core = { version = "0.1.0", path = "../sb_core" } -sb_node = { version = "0.1.0", path = "../node" } -sb_npm = { version = "0.1.0", path = "../npm" } -sb_graph = { version = "0.1.0", path = "../sb_graph" } -sb_fs = { version = "0.1.0", path = "../sb_fs" } \ No newline at end of file +futures-util.workspace = true diff --git a/crates/sb_module_loader/standalone/mod.rs b/crates/sb_module_loader/standalone/mod.rs index 0ef4dfc15..6a90619b4 100644 --- a/crates/sb_module_loader/standalone/mod.rs +++ b/crates/sb_module_loader/standalone/mod.rs @@ -4,12 +4,13 @@ use crate::node::cli_node_resolver::CliNodeResolver; use crate::node::node_module_loader::{CjsResolutionStore, NpmModuleLoader}; use crate::standalone::standalone_module_loader::{EmbeddedModuleLoader, SharedModuleLoaderState}; use crate::RuntimeProviders; -use anyhow::Context; +use anyhow::{bail, Context}; use deno_core::error::AnyError; use deno_core::url::Url; use deno_core::{FastString, ModuleSpecifier}; use deno_tls::rustls::RootCertStore; use deno_tls::RootCertStoreProvider; +use futures_util::future::OptionFuture; use import_map::{parse_from_json, ImportMap}; use sb_core::cache::caches::Caches; use sb_core::cache::deno_dir::DenoDirProvider; @@ -17,10 +18,11 @@ use sb_core::cache::node::NodeAnalysisCache; use sb_core::cache::CacheSetting; use sb_core::cert::{get_root_cert_store, CaData}; use sb_core::util::http_util::HttpClient; +use sb_eszip_shared::{AsyncEszipDataRead, SOURCE_CODE_ESZIP_KEY, VFS_ESZIP_KEY}; use sb_fs::file_system::DenoCompileFileSystem; use sb_fs::{extract_static_files_from_eszip, load_npm_vfs}; use sb_graph::graph_resolver::MappedSpecifierResolver; -use sb_graph::{payload_to_eszip, EszipPayloadKind, SOURCE_CODE_ESZIP_KEY, VFS_ESZIP_KEY}; +use sb_graph::{eszip_migrate, payload_to_eszip, EszipPayloadKind, LazyLoadableEszip}; use sb_node::analyze::NodeCodeTranslator; use sb_node::NodeResolver; use sb_npm::cache_dir::NpmCacheDir; @@ -29,6 +31,7 @@ use sb_npm::{ create_managed_npm_resolver, CliNpmResolverManagedCreateOptions, CliNpmResolverManagedPackageJsonInstallerOption, CliNpmResolverManagedSnapshotOption, }; +use std::path::Path; use std::rc::Rc; use std::sync::Arc; @@ -53,13 +56,16 @@ impl RootCertStoreProvider for StandaloneRootCertStoreProvider { } } -pub async fn create_module_loader_for_eszip( - mut eszip: eszip::EszipV2, +pub async fn create_module_loader_for_eszip

( + mut eszip: LazyLoadableEszip, + base_dir_path: P, metadata: Metadata, maybe_import_map: Option, include_source_map: bool, -) -> Result { - // let main_module = &metadata.entrypoint; +) -> Result +where + P: AsRef, +{ let current_exe_path = std::env::current_exe().unwrap(); let current_exe_name = current_exe_path.file_name().unwrap().to_string_lossy(); let deno_dir_provider = Arc::new(DenoDirProvider::new(None)); @@ -68,6 +74,7 @@ pub async fn create_module_loader_for_eszip( ca_data: metadata.ca_data.map(CaData::Bytes), cell: Default::default(), }); + let http_client = Arc::new(HttpClient::new( Some(root_cert_store_provider.clone()), metadata.unsafely_ignore_certificate_errors.clone(), @@ -78,47 +85,39 @@ pub async fn create_module_loader_for_eszip( let root_path = std::env::temp_dir() .join(format!("sb-compile-{}", current_exe_name)) .join("node_modules"); + let npm_cache_dir = NpmCacheDir::new(root_path.clone()); let npm_global_cache_dir = npm_cache_dir.get_cache_location(); - let code_fs = if let Some(module) = eszip.get_module(SOURCE_CODE_ESZIP_KEY) { - if let Some(code) = module.take_source().await { - Some(FastString::from(String::from_utf8(code.to_vec())?)) - } else { - None - } - } else { - None - }; + let entry_module_source = OptionFuture::<_>::from( + eszip + .ensure_module(SOURCE_CODE_ESZIP_KEY) + .map(|it| async move { it.take_source().await }), + ) + .await + .flatten() + .map(|it| String::from_utf8_lossy(it.as_ref()).into_owned()) + .map(FastString::from); let snapshot = eszip.take_npm_snapshot(); - let static_files = extract_static_files_from_eszip(&eszip).await; + let static_files = extract_static_files_from_eszip(&eszip, base_dir_path).await; let vfs_root_dir_path = npm_cache_dir.registry_folder(&npm_registry_url); let (fs, vfs) = { - let key = String::from(VFS_ESZIP_KEY); - let vfs_data: Option> = if eszip.specifiers().contains(&key) { - Some( - eszip - .get_module(VFS_ESZIP_KEY) - .unwrap() - .take_source() - .await - .unwrap() - .to_vec(), - ) - } else { - None - }; - - let vfs_data: Option<&[u8]> = if let Some(data) = &vfs_data { - Some(data) - } else { - None - }; - - let vfs = - load_npm_vfs(vfs_root_dir_path.clone(), vfs_data).context("Failed to load npm vfs.")?; + let vfs_data = OptionFuture::<_>::from( + eszip + .ensure_module(VFS_ESZIP_KEY) + .map(|it| async move { it.source().await }), + ) + .await + .flatten(); + + let vfs = load_npm_vfs( + Arc::new(eszip.clone()), + vfs_root_dir_path.clone(), + vfs_data.as_deref(), + ) + .context("Failed to load npm vfs.")?; let fs = DenoCompileFileSystem::new(vfs); let fs_backed_vfs = fs.file_backed_vfs().clone(); @@ -152,6 +151,7 @@ pub async fn create_module_loader_for_eszip( fs.clone(), npm_resolver.clone().into_npm_resolver(), )); + let cjs_resolutions = Arc::new(CjsResolutionStore::default()); let cache_db = Caches::new(deno_dir_provider.clone()); let node_analysis_cache = NodeAnalysisCache::new(cache_db.node_analysis_db()); @@ -162,6 +162,7 @@ pub async fn create_module_loader_for_eszip( node_resolver.clone(), npm_resolver.clone().into_npm_resolver(), )); + let maybe_import_map = maybe_import_map .map(|import_map| Some(Arc::new(import_map))) .unwrap_or_else(|| None); @@ -196,29 +197,41 @@ pub async fn create_module_loader_for_eszip( }), npm_resolver: npm_resolver.into_npm_resolver(), vfs, - module_code: code_fs, + module_code: entry_module_source, static_files, npm_snapshot: snapshot, vfs_path: vfs_root_dir_path, }) } -pub async fn create_module_loader_for_standalone_from_eszip_kind( +pub async fn create_module_loader_for_standalone_from_eszip_kind

( eszip_payload_kind: EszipPayloadKind, + base_dir_path: P, maybe_import_map_arc: Option>, maybe_import_map_path: Option, include_source_map: bool, -) -> Result { - let eszip = payload_to_eszip(eszip_payload_kind).await; - - let mut maybe_import_map: Option = None; +) -> Result +where + P: AsRef, +{ + let mut maybe_import_map = None; + let eszip = match eszip_migrate::try_migrate_if_needed( + payload_to_eszip(eszip_payload_kind).await, + ) + .await + { + Ok(v) => v, + Err(_old) => { + bail!("eszip migration failed"); + } + }; if let Some(import_map) = maybe_import_map_arc { let clone_import_map = (*import_map).clone(); maybe_import_map = Some(clone_import_map); } else if let Some(import_map_path) = maybe_import_map_path { let import_map_url = Url::parse(import_map_path.as_str())?; - if let Some(import_map_module) = eszip.get_import_map(import_map_url.as_str()) { + if let Some(import_map_module) = eszip.ensure_import_map(import_map_url.as_str()) { if let Some(source) = import_map_module.source().await { let source = std::str::from_utf8(&source)?.to_string(); let result = parse_from_json(&import_map_url, &source)?; @@ -229,6 +242,7 @@ pub async fn create_module_loader_for_standalone_from_eszip_kind( create_module_loader_for_eszip( eszip, + base_dir_path, Metadata { ca_stores: None, ca_data: None, diff --git a/crates/sb_module_loader/standalone/standalone_module_loader.rs b/crates/sb_module_loader/standalone/standalone_module_loader.rs index abafe87fd..006aa32cc 100644 --- a/crates/sb_module_loader/standalone/standalone_module_loader.rs +++ b/crates/sb_module_loader/standalone/standalone_module_loader.rs @@ -13,6 +13,8 @@ use deno_core::{ModuleLoader, ModuleSourceCode}; use deno_core::{ModuleSpecifier, RequestedModuleType}; use deno_semver::npm::NpmPackageReqReference; use eszip::deno_graph; +use sb_eszip_shared::AsyncEszipDataRead; +use sb_graph::LazyLoadableEszip; use std::sync::Arc; use tracing::instrument; @@ -21,7 +23,7 @@ use crate::util::arc_u8_to_arc_str; use sb_graph::graph_resolver::MappedSpecifierResolver; pub struct SharedModuleLoaderState { - pub(crate) eszip: eszip::EszipV2, + pub(crate) eszip: LazyLoadableEszip, pub(crate) mapped_specifier_resolver: MappedSpecifierResolver, pub(crate) npm_module_loader: Arc, pub(crate) node_resolver: Arc, @@ -90,7 +92,7 @@ impl ModuleLoader for EmbeddedModuleLoader { }; if specifier.scheme() == "jsr" { - if let Some(module) = self.shared.eszip.get_module(specifier.as_str()) { + if let Some(module) = self.shared.eszip.ensure_module(specifier.as_str()) { return Ok(ModuleSpecifier::parse(&module.specifier).unwrap()); } } @@ -151,12 +153,13 @@ impl ModuleLoader for EmbeddedModuleLoader { }; } - let Some(module) = self.shared.eszip.get_module(original_specifier.as_str()) else { + let Some(module) = self.shared.eszip.ensure_module(original_specifier.as_str()) else { return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( "Module not found: {}", original_specifier )))); }; + let original_specifier = original_specifier.clone(); let found_specifier = ModuleSpecifier::parse(&module.specifier).expect("invalid url in eszip"); diff --git a/examples/import-meta-dirname/index.ts b/examples/import-meta-dirname/index.ts new file mode 100644 index 000000000..834570edd --- /dev/null +++ b/examples/import-meta-dirname/index.ts @@ -0,0 +1,7 @@ +// --static "examples/**/*.txt" + +import { join } from "https://deno.land/std/path/mod.ts"; + +Deno.serve((_req) => { + return new Response(Deno.readTextFileSync(join(import.meta.dirname, "meow.txt"))); +}); diff --git a/examples/import-meta-dirname/meow.txt b/examples/import-meta-dirname/meow.txt new file mode 100644 index 000000000..b126ba4b5 --- /dev/null +++ b/examples/import-meta-dirname/meow.txt @@ -0,0 +1 @@ +meowmeow \ No newline at end of file diff --git a/examples/main-empty/index.ts b/examples/main-empty/index.ts index 29310da70..5e367108f 100644 --- a/examples/main-empty/index.ts +++ b/examples/main-empty/index.ts @@ -1,3 +1,5 @@ +// --static "examples/**/*.md" + import isEven from "npm:is-even"; import { sum } from "./some-import.ts"; @@ -7,7 +9,7 @@ globalThis.isTenEven = isEven(10); console.log(Deno.version); let val = sum(1, 2); console.log(Deno.cwd()) -console.log(Deno.readFileSync('mnt/data/examples/postgres-on-the-edge/README.md')); +console.log(Deno.readFileSync('../postgres-on-the-edge/README.md')); Deno.serve(async () => { return new Response(