From b343cdccb2dbcd7aa6ed8f5c65851af8c26d51c0 Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 18 Jun 2025 17:16:01 -0400 Subject: [PATCH 1/4] Adjust allocation strategy for TinyGo-derived modules so they don't immediately crash. You can run the TinyGo empty project in Viceroy now (once Viceroy is updated to depend on the new wit-component herein); it immediately crashed before. It serves a request and returns a 0 exit code, somewhat obscured by a backtrace because an empty project doesn't implement a Reactor. The crux of this update is that the GC phase of adapter application now takes a `ReallocScheme` arg to its `encode()` method. This represents slightly richer advice on how to find or construct a `realloc()` function for the adapter to use to allocate its state. Before, it took only an `Option`: `None` meant "use `memory.grow`", and `Some(such_and_such)` meant "use a realloc function called `such_and_such`, provided in the module being adapted". Now we can also say "construct a realloc routine using the given malloc routine found in the module being adapted". This lets us communicate to TinyGo's GC that we have reserved some RAM, so it doesn't stomp on us later. --- crates/wasmparser/src/readers/core/custom.rs | 2 +- crates/wit-component/src/encoding.rs | 26 ++++ crates/wit-component/src/encoding/world.rs | 44 ++++-- crates/wit-component/src/gc.rs | 142 ++++++++++++++----- 4 files changed, 166 insertions(+), 48 deletions(-) diff --git a/crates/wasmparser/src/readers/core/custom.rs b/crates/wasmparser/src/readers/core/custom.rs index b682f016f0..33410cfe16 100644 --- a/crates/wasmparser/src/readers/core/custom.rs +++ b/crates/wasmparser/src/readers/core/custom.rs @@ -103,7 +103,7 @@ impl<'a> CustomSectionReader<'a> { /// Return value of [`CustomSectionReader::as_known`]. /// /// Note that this is `#[non_exhaustive]` because depending on crate features -/// this enumeration will different entries. +/// this enumeration will contain different entries. #[allow(missing_docs)] #[non_exhaustive] pub enum KnownCustom<'a> { diff --git a/crates/wit-component/src/encoding.rs b/crates/wit-component/src/encoding.rs index 92f04bdad9..fcf5d06733 100644 --- a/crates/wit-component/src/encoding.rs +++ b/crates/wit-component/src/encoding.rs @@ -81,6 +81,8 @@ use std::collections::HashMap; use std::hash::Hash; use std::mem; use wasm_encoder::*; +use wasmparser::KnownCustom::Producers; +use wasmparser::Payload::CustomSection; use wasmparser::{Validator, WasmFeatures}; use wit_parser::{ Function, FunctionKind, InterfaceId, LiveTypes, Resolve, Stability, Type, TypeDefKind, TypeId, @@ -2670,11 +2672,34 @@ pub(super) struct Adapter { library_info: Option, } +/// Returns whether the module appears to have been generated by TinyGo. Absent +/// any evidence, returns false. +fn is_produced_by_tiny_go(module: &[u8]) -> Result { + for section in wasmparser::Parser::new(0).parse_all(module) { + if let CustomSection(custom_reader) = section? { + if let Producers(producers_reader) = custom_reader.as_known() { + for field in producers_reader { + let field = field?; + if field.name == "processed-by" { + for value in field.values.into_iter() { + if value?.name == "TinyGo" { + return Ok(true); + } + } + } + } + return Ok(false); + } + } + } + return Ok(false); +} /// An encoder of components based on `wit` interface definitions. #[derive(Default)] pub struct ComponentEncoder { module: Vec, module_import_map: Option, + module_is_produced_by_tiny_go: bool, pub(super) metadata: Bindgen, validate: bool, pub(super) main_module_exports: IndexSet, @@ -2693,6 +2718,7 @@ impl ComponentEncoder { /// core module. pub fn module(mut self, module: &[u8]) -> Result { let (wasm, metadata) = self.decode(module.as_ref())?; + self.module_is_produced_by_tiny_go = is_produced_by_tiny_go(&module)?; let (wasm, module_import_map) = ModuleImportMap::new(wasm)?; let exports = self .merge_metadata(metadata) diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs index fbfe7df842..f1c8486005 100644 --- a/crates/wit-component/src/encoding/world.rs +++ b/crates/wit-component/src/encoding/world.rs @@ -1,4 +1,5 @@ use super::{Adapter, ComponentEncoder, LibraryInfo, RequiredOptions}; +use crate::gc::ReallocScheme; use crate::validation::{ Import, ImportMap, ValidatedModule, validate_adapter_module, validate_module, }; @@ -24,7 +25,7 @@ pub struct WorldAdapter<'a> { /// `EncodingState` as this information doesn't change throughout the encoding /// process. pub struct ComponentWorld<'a> { - /// Encoder configuration with modules, the document ,etc. + /// Encoder configuration with modules, the document, etc. pub encoder: &'a ComponentEncoder, /// Validation information of the input module, or `None` in `--types-only` /// mode. @@ -83,6 +84,23 @@ impl<'a> ComponentWorld<'a> { Ok(ret) } + /// Given that there is no realloc function exported from the main module, + /// returns the realloc scheme we should use. + fn fallback_realloc_scheme(&self) -> ReallocScheme { + if self.encoder.module_is_produced_by_tiny_go { + // TODO: Also check for cabi_realloc and bail if it's there. + // If it appears the module was emitted by TinyGo, we delegate to + // its `malloc()` function. (TinyGo assumes its GC has rein over the + // whole memory and quickly overwrites the adapter's + // `memory.grow`-allocated State struct unless we inform TinyGo's GC + // of the memory we use.) + ReallocScheme::Malloc("malloc") + } else { + // If it's not TinyGo, use `memory.grow` instead. + ReallocScheme::MemoryGrow + } + } + /// Process adapters which are required here. Iterate over all /// adapters and figure out what functions are required from the /// adapter itself, either because the functions are imported by the @@ -121,8 +139,8 @@ impl<'a> ComponentWorld<'a> { } else { // Without `library_info` this means that this is an adapter. // The goal of the adapter is to provide a suite of symbols that - // can be imported, but not all symbols may be imported. Here - // the module is trimmed down to only what's needed by the + // can be imported, but perhaps not all symbols are imported. + // Here the module is trimmed down to only what's needed by the // original main module. // // The main module requires `required_by_import` above, but @@ -163,17 +181,17 @@ impl<'a> ComponentWorld<'a> { required.insert(name.to_string()); } + let realloc = if self.encoder.realloc_via_memory_grow { + self.fallback_realloc_scheme() + } else { + match self.info.exports.realloc_to_import_into_adapter() { + Some(name) => ReallocScheme::Realloc(name), + None => self.fallback_realloc_scheme(), + } + }; Cow::Owned( - crate::gc::run( - wasm, - &required, - if self.encoder.realloc_via_memory_grow { - None - } else { - self.info.exports.realloc_to_import_into_adapter() - }, - ) - .context("failed to reduce input adapter module to its minimal size")?, + crate::gc::run(wasm, &required, realloc) + .context("failed to reduce input adapter module to its minimal size")?, ) }; let info = validate_adapter_module( diff --git a/crates/wit-component/src/gc.rs b/crates/wit-component/src/gc.rs index 71a58e4d26..ba43375180 100644 --- a/crates/wit-component/src/gc.rs +++ b/crates/wit-component/src/gc.rs @@ -13,7 +13,22 @@ use wasmparser::*; const PAGE_SIZE: i32 = 64 * 1024; -/// This function will reduce the input core `wasm` module to only the set of +/// The primitive allocation method we use to implement a realloc function, +/// based on what the main module provides +pub enum ReallocScheme<'a> { + /// The main module exports a ready-made realloc function with the given + /// name. + Realloc(&'a str), + /// The main module exports a `malloc(size: u32) -> u32` function with the + /// given name. We can wrap this in a thin shell to make realloc. + Malloc(&'a str), + /// The main module exports no realloc function or anything to make one out + /// of, so we should implement realloc in terms of `memory.grow`. + MemoryGrow, +} +use ReallocScheme::*; + +/// This function will reduce the input core `wasm` module (the adapter, I believe) to only the set of /// exports `required`. /// /// This internally performs a "gc" pass after removing exports to ensure that @@ -21,7 +36,7 @@ const PAGE_SIZE: i32 = 64 * 1024; pub fn run( wasm: &[u8], required: &IndexSet, - main_module_realloc: Option<&str>, + realloc_scheme: ReallocScheme, ) -> Result> { assert!(!required.is_empty()); @@ -46,7 +61,7 @@ pub fn run( } assert!(!module.exports.is_empty()); module.liveness()?; - module.encode(main_module_realloc) + module.encode(realloc_scheme) } /// This function generates a Wasm function body which implements `cabi_realloc` in terms of `memory.grow`. It @@ -500,9 +515,21 @@ impl<'a> Module<'a> { live_iter(&self.live_tables, self.tables.iter()) } + /// Returns a new Wasm function body which implements `cabi_realloc` to call + /// a `malloc` that's exported from the main module. Pass in the index of + /// the `malloc` function within the adapter. + fn realloc_via_malloc(&self, malloc_index: u32) -> Result { + let mut func = wasm_encoder::Function::new([]); + func.instructions() + .local_get(3) // desired new size + .call(malloc_index) + .end(); + Ok(func) + } + /// Encodes this `Module` to a new wasm module which is gc'd and only /// contains the items that are live as calculated by the `liveness` pass. - fn encode(&mut self, main_module_realloc: Option<&str>) -> Result> { + fn encode(&mut self, realloc_scheme: ReallocScheme) -> Result> { // Data structure used to track the mapping of old index to new index // for all live items. let mut map = Encoder::default(); @@ -584,11 +611,38 @@ impl<'a> Module<'a> { let is_realloc = |m, n| m == "__main_module__" && matches!(n, "canonical_abi_realloc" | "cabi_realloc"); + let add_malloc_type = |types: &mut wasm_encoder::TypeSection| { + let type_index = types.len(); + types + .ty() + .function([wasm_encoder::ValType::I32], [wasm_encoder::ValType::I32]); + type_index + }; + + let mut malloc_index = None; + let mut func_names = Vec::new(); + + // Import malloc before we start keeping track of the realloc_index so + // we don't have to add num_func_imports every time we read it. + if let Malloc(malloc_name) = realloc_scheme { + imports.import( + "__main_module__", + malloc_name, + EntityType::Function(add_malloc_type(&mut types)), + ); + malloc_index = Some(num_func_imports); + map.funcs.reserve(); + func_names.push((num_func_imports, malloc_name)); + num_func_imports += 1; + } + let (imported, local) = self.live_funcs() .partition::, _>(|(_, func)| match &func.def { Definition::Import(m, n) => { - !is_realloc(*m, *n) || main_module_realloc.is_some() + // Keep realloc function around iff we're going to use + // it. Always keep other functions around. + !is_realloc(*m, *n) || matches!(realloc_scheme, Realloc(_)) } Definition::Local(_) => false, }); @@ -603,7 +657,10 @@ impl<'a> Module<'a> { // exports that function, but possibly using a different name // (e.g. `canonical_abi_realloc`). Update the name to match if necessary. realloc_index = Some(num_func_imports); - main_module_realloc.unwrap_or(n) + match realloc_scheme { + Realloc(name) => name, + _ => n, + } } else { n }; @@ -637,22 +694,17 @@ impl<'a> Module<'a> { let sp = self.find_mut_i32_global("__stack_pointer")?; let allocation_state = self.find_mut_i32_global("allocation_state")?; - let mut func_names = Vec::new(); - - if let (Some(realloc), Some(_), None) = (main_module_realloc, sp, realloc_index) { + if let (Realloc(realloc_name), Some(_), None) = (&realloc_scheme, sp, realloc_index) { // The main module exports a realloc function, and although the adapter doesn't import it, we're going // to add a function which calls it to allocate some stack space, so let's add an import now. - - // Tell the function remapper we're reserving a slot for our extra import: - map.funcs.next += 1; - + map.funcs.reserve(); realloc_index = Some(num_func_imports); imports.import( "__main_module__", - realloc, + realloc_name, EntityType::Function(add_realloc_type(&mut types)), ); - func_names.push((num_func_imports, realloc)); + func_names.push((num_func_imports, realloc_name)); num_func_imports += 1; } @@ -665,7 +717,16 @@ impl<'a> Module<'a> { // exporting it. In this case, we need to define a local function it can call instead. realloc_index = Some(num_func_imports + funcs.len()); funcs.function(ty); - code.function(&realloc_via_memory_grow()); + let realloc_func = match realloc_scheme { + Malloc(_) => self.realloc_via_malloc( + malloc_index.expect("this was set above when the enum was Malloc"), + )?, + MemoryGrow => realloc_via_memory_grow(), + Realloc(_) => bail!( + "shouldn't get here, as we already know the main module doesn't export a realloc function" + ), + }; + code.function(&realloc_func); } Definition::Local(_) => { funcs.function(ty); @@ -673,22 +734,18 @@ impl<'a> Module<'a> { } } - let lazy_stack_init_index = - if sp.is_some() && allocation_state.is_some() && main_module_realloc.is_some() { - // We have a stack pointer, a `cabi_realloc` function from the main module, and a global variable for + let lazy_stack_init_index = match (&realloc_scheme, sp, allocation_state) { + (Realloc(_), Some(_), Some(_)) => { + // We have a `cabi_realloc` function from the main module, a stack pointer, and a global variable for // keeping track of (and short-circuiting) reentrance. That means we can (and should) do lazy stack // allocation. let index = num_func_imports + funcs.len(); - - // Tell the function remapper we're reserving a slot for our extra function: - map.funcs.next += 1; - + map.funcs.reserve(); funcs.function(add_empty_type(&mut types)); - Some(index) - } else { - None - }; + } + _ => None, + }; let exported_funcs = self .exports @@ -733,10 +790,16 @@ impl<'a> Module<'a> { if sp.is_some() && (realloc_index.is_none() || allocation_state.is_none()) { // Either the main module does _not_ export a realloc function, or it is not safe to use for stack - // allocation because we have no way to short-circuit reentrance, so we'll use `memory.grow` instead. + // allocation because we have no way to short-circuit reentrance, so we'll provide our own realloc + // function instead. realloc_index = Some(num_func_imports + funcs.len()); funcs.function(add_realloc_type(&mut types)); - code.function(&realloc_via_memory_grow()); + let realloc_func = if let Some(index) = malloc_index { + self.realloc_via_malloc(index)? + } else { + realloc_via_memory_grow() + }; + code.function(&realloc_func); } // Inject a start function to initialize the stack pointer which will be local to this module. This only @@ -879,11 +942,14 @@ impl<'a> Module<'a> { section.push(code); subsection.encode(&mut section); }; - if let (Some(realloc_index), true) = ( - realloc_index, - main_module_realloc.is_none() || allocation_state.is_none(), - ) { - func_names.push((realloc_index, "realloc_via_memory_grow")); + if let Some(realloc_index) = realloc_index { + match (realloc_scheme, allocation_state) { + (MemoryGrow, _) | (_, None) => { + func_names.push((realloc_index, "realloc_via_memory_grow")) + } + (Malloc(_), _) => func_names.push((realloc_index, "realloc_via_malloc")), + (Realloc(_), _) => (), // The realloc routine is in another module. + } } if let Some(lazy_stack_init_index) = lazy_stack_init_index { func_names.push((lazy_stack_init_index, "allocate_stack")); @@ -1118,6 +1184,14 @@ impl Remap { self.next += 1; } + /// Reserves the next "new index" for an item you are adding. + /// + /// For example, call this when you add a new function to a module and need + /// to avoid other functions getting remapped to its index. + fn reserve(&mut self) { + self.next += 1; + } + /// Returns the new index corresponding to an old index. /// /// Panics if the `old` index was not added via `push` above. From 40bf3aa285e0a23d76f342c3931599a211e928dd Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 9 Jul 2025 13:02:00 -0400 Subject: [PATCH 2/4] Make sure we use `cabi_realloc()` in favor of a `malloc()`-based realloc if the former is provided. This allows TinyGo programs to take control over reallocation if they desire, elevating an explicit API over the heuristic identification of `malloc()`. It turns out it was already doing this, through the call to `realloc_to_import_into_adapter()`, which returns an (even more specific) `cabi_realloc_adapter()` if there is one and otherwise `cabi_realloc`. --- crates/wit-component/src/encoding/world.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs index f1c8486005..bc53f83e3f 100644 --- a/crates/wit-component/src/encoding/world.rs +++ b/crates/wit-component/src/encoding/world.rs @@ -88,12 +88,11 @@ impl<'a> ComponentWorld<'a> { /// returns the realloc scheme we should use. fn fallback_realloc_scheme(&self) -> ReallocScheme { if self.encoder.module_is_produced_by_tiny_go { - // TODO: Also check for cabi_realloc and bail if it's there. // If it appears the module was emitted by TinyGo, we delegate to // its `malloc()` function. (TinyGo assumes its GC has rein over the // whole memory and quickly overwrites the adapter's - // `memory.grow`-allocated State struct unless we inform TinyGo's GC - // of the memory we use.) + // `memory.grow`-allocated State struct, causing a crash. So we use + // `malloc()` to inform TinyGo's GC of the memory we use.) ReallocScheme::Malloc("malloc") } else { // If it's not TinyGo, use `memory.grow` instead. @@ -182,6 +181,8 @@ impl<'a> ComponentWorld<'a> { } let realloc = if self.encoder.realloc_via_memory_grow { + // User explicitly requested memory-grow-based realloc. We + // give them that unless it would definitely crash. self.fallback_realloc_scheme() } else { match self.info.exports.realloc_to_import_into_adapter() { From 35e9d09ef827c573a4c64ee2ab78a14b4bf59b9c Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Wed, 9 Jul 2025 15:29:33 -0400 Subject: [PATCH 3/4] Complain if a TinyGo module has a missing or malformed `malloc()`. In this case, we have no non-crashing way of allocating memory for the adapter state. --- crates/wit-component/src/encoding/world.rs | 25 ++++++++++++++++------ crates/wit-component/src/validation.rs | 6 ++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs index bc53f83e3f..e73f869cce 100644 --- a/crates/wit-component/src/encoding/world.rs +++ b/crates/wit-component/src/encoding/world.rs @@ -3,10 +3,11 @@ use crate::gc::ReallocScheme; use crate::validation::{ Import, ImportMap, ValidatedModule, validate_adapter_module, validate_module, }; -use anyhow::{Context, Result}; +use anyhow::{Context, Result, bail}; use indexmap::{IndexMap, IndexSet}; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; +use wasmparser::{FuncType, ValType}; use wit_parser::{ Function, InterfaceId, LiveTypes, Resolve, TypeDefKind, TypeId, TypeOwner, WorldId, WorldItem, WorldKey, @@ -86,17 +87,29 @@ impl<'a> ComponentWorld<'a> { /// Given that there is no realloc function exported from the main module, /// returns the realloc scheme we should use. - fn fallback_realloc_scheme(&self) -> ReallocScheme { + /// + /// Return an error if we need to use an exported malloc() and it wasn't + /// there. + fn fallback_realloc_scheme(&self) -> Result { if self.encoder.module_is_produced_by_tiny_go { // If it appears the module was emitted by TinyGo, we delegate to // its `malloc()` function. (TinyGo assumes its GC has rein over the // whole memory and quickly overwrites the adapter's // `memory.grow`-allocated State struct, causing a crash. So we use // `malloc()` to inform TinyGo's GC of the memory we use.) - ReallocScheme::Malloc("malloc") + let malloc_type = FuncType::new([ValType::I32], [ValType::I32]); + match self.info.exports.get_func_type("malloc") { + Some(func_type) if *func_type == malloc_type => Ok(ReallocScheme::Malloc("malloc")), + Some(_) => bail!( + "TinyGo-derived wasm had a malloc() export, but it lacked the expected type of malloc(i32) -> i32" + ), + None => bail!( + "TinyGo-derived wasm lacked a malloc() export; we don't know how else to reserve space for the adapter's state from its GC" + ), + } } else { // If it's not TinyGo, use `memory.grow` instead. - ReallocScheme::MemoryGrow + Ok(ReallocScheme::MemoryGrow) } } @@ -183,11 +196,11 @@ impl<'a> ComponentWorld<'a> { let realloc = if self.encoder.realloc_via_memory_grow { // User explicitly requested memory-grow-based realloc. We // give them that unless it would definitely crash. - self.fallback_realloc_scheme() + self.fallback_realloc_scheme()? } else { match self.info.exports.realloc_to_import_into_adapter() { Some(name) => ReallocScheme::Realloc(name), - None => self.fallback_realloc_scheme(), + None => self.fallback_realloc_scheme()?, } }; Cow::Owned( diff --git a/crates/wit-component/src/validation.rs b/crates/wit-component/src/validation.rs index 435c5dbd2c..e64d646184 100644 --- a/crates/wit-component/src/validation.rs +++ b/crates/wit-component/src/validation.rs @@ -1103,6 +1103,12 @@ impl ExportMap { Ok(()) } + /// Returns the type of the given exported function, None if an exported + /// function of that name does not exist. + pub(crate) fn get_func_type<'a>(&'a self, name: &str) -> Option<&'a FuncType> { + self.raw_exports.get(name) + } + fn classify( &mut self, export: wasmparser::Export<'_>, From be6af98feb66af7381bc89766d28932df02ed34a Mon Sep 17 00:00:00 2001 From: Erik Rose Date: Thu, 10 Jul 2025 10:18:37 -0400 Subject: [PATCH 4/4] Tweak lifetime syntax to dodge a warning in Rust 1.90 nightly. --- crates/wit-component/src/encoding/world.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/wit-component/src/encoding/world.rs b/crates/wit-component/src/encoding/world.rs index e73f869cce..6c57f6e190 100644 --- a/crates/wit-component/src/encoding/world.rs +++ b/crates/wit-component/src/encoding/world.rs @@ -90,7 +90,7 @@ impl<'a> ComponentWorld<'a> { /// /// Return an error if we need to use an exported malloc() and it wasn't /// there. - fn fallback_realloc_scheme(&self) -> Result { + fn fallback_realloc_scheme(&self) -> Result> { if self.encoder.module_is_produced_by_tiny_go { // If it appears the module was emitted by TinyGo, we delegate to // its `malloc()` function. (TinyGo assumes its GC has rein over the