diff --git a/Cargo.lock b/Cargo.lock index c0e30e7c5..02a32e182 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -132,6 +132,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arrayvec" version = "0.5.2" @@ -717,6 +726,17 @@ dependencies = [ "syn", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive_more" version = "0.99.20" @@ -1027,6 +1047,14 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzz" +version = "0.0.0" +dependencies = [ + "hugr-model", + "libfuzzer-sys", +] + [[package]] name = "fxhash" version = "0.2.1" @@ -1303,6 +1331,7 @@ dependencies = [ name = "hugr-model" version = "0.20.2" dependencies = [ + "arbitrary", "base64", "bumpalo", "capnp", @@ -1796,6 +1825,16 @@ version = "0.2.173" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8cfeafaffdbc32176b64fb251369d52ea9f0a8fbc6f8759edffef7b525d64bb" +[[package]] +name = "libfuzzer-sys" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" +dependencies = [ + "arbitrary", + "cc", +] + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -2006,6 +2045,7 @@ version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" dependencies = [ + "arbitrary", "num-traits", "rand 0.8.5", "serde", @@ -2912,6 +2952,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9676b89cd56310a87b93dec47b11af744f34d5fc9f367b829474eec0a891350d" dependencies = [ + "arbitrary", "borsh", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index b1ffa4de0..3aaa91880 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,14 +4,15 @@ lto = "thin" [workspace] resolver = "2" members = [ - "hugr", - "hugr-core", - "hugr-passes", - "hugr-cli", - "hugr-model", - "hugr-llvm", - "hugr-py", - "hugr-persistent", + "hugr", + "hugr-core", + "hugr-passes", + "hugr-cli", + "hugr-model", + "hugr-llvm", + "hugr-py", + "hugr-persistent", + "fuzz", ] default-members = ["hugr", "hugr-core", "hugr-passes", "hugr-cli", "hugr-model"] @@ -25,10 +26,10 @@ license = "Apache-2.0" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ - # Set by our CI - 'cfg(ci_run)', - # Set by codecov - 'cfg(coverage,coverage_nightly)', + # Set by our CI + 'cfg(ci_run)', + # Set by codecov + 'cfg(coverage,coverage_nightly)', ] } missing_docs = "warn" diff --git a/devenv.lock b/devenv.lock index 9b13193f5..92626f0c5 100644 --- a/devenv.lock +++ b/devenv.lock @@ -51,31 +51,10 @@ "type": "github" } }, - "git-hooks": { - "inputs": { - "flake-compat": "flake-compat", - "gitignore": "gitignore", - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1747372754, - "owner": "cachix", - "repo": "git-hooks.nix", - "rev": "80479b6ec16fefd9c1db3ea13aeb038c60530f46", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "git-hooks.nix", - "type": "github" - } - }, "gitignore": { "inputs": { "nixpkgs": [ - "git-hooks", + "pre-commit-hooks", "nixpkgs" ] }, @@ -107,15 +86,33 @@ "type": "github" } }, + "pre-commit-hooks": { + "inputs": { + "flake-compat": "flake-compat", + "gitignore": "gitignore", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1747372754, + "owner": "cachix", + "repo": "pre-commit-hooks.nix", + "rev": "80479b6ec16fefd9c1db3ea13aeb038c60530f46", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "pre-commit-hooks.nix", + "type": "github" + } + }, "root": { "inputs": { "devenv": "devenv", "fenix": "fenix", - "git-hooks": "git-hooks", "nixpkgs": "nixpkgs", - "pre-commit-hooks": [ - "git-hooks" - ] + "pre-commit-hooks": "pre-commit-hooks" } }, "rust-analyzer-src": { diff --git a/devenv.nix b/devenv.nix index 8664a43d3..d17c3b483 100644 --- a/devenv.nix +++ b/devenv.nix @@ -48,7 +48,7 @@ in # https://devenv.sh/languages/ # https://devenv.sh/reference/options/#languagesrustversion languages.rust = { - channel = "stable"; + channel = "nightly"; enable = true; components = [ "rustc" "cargo" "clippy" "rustfmt" "rust-analyzer" ]; }; diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 000000000..524b25fce --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +hugr-model = { path = "../hugr-model/", features = ["arbitrary"] } + +# [dependencies.hugr] +# path = ".." + +[[bin]] +name = "fuzz_random" +path = "fuzz_targets/fuzz_random.rs" +test = false +doc = false +bench = false + +[[bin]] +name = "fuzz_structure" +path = "fuzz_targets/fuzz_structure.rs" +test = false +doc = false +bench = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 000000000..68b9fc1c4 --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,64 @@ +# Fuzz testing + +This project uses `cargo-fuzz` for doing fuzz testing for hugr. + +## Requisites + +1. Install `cargo-fuzz` with: `cargo install cargo-fuzz` +2. Build with `cargo fuzz build` + +> [!NOTE] +> The `libFuzzer` used by `cargo-fuzz` needs **nightly**. + +## Fuzz targets + +You can list the fuzzing targets with: +`cargo fuzz list` + +### Model: Random + +The [fuzz_random](./fuzz_targets/fuzz_random.rs) target uses the coverage-guided +`libFuzzer` fuzzing engine to generate random bytes that we then try to +convert to a package with `hugr_model::v0::ast::Package::from_str()`. + +To run this target: +`cargo fuzz run fuzz_random` + +It is recommended to provide the `libFuzzer` with a corpus to speed up the +generation of test inputs. For this we can use the fixtures in +`hugr/hugr-model/tests/fixtures`: +`cargo fuzz run fuzz_random ../hugr-model/tests/fixtures` + +If you want `libFuzzer` to mutate the examples with ascii characters only: +`cargo fuzz run fuzz_random -- -only_ascii=1` + +### Model: Structure + +The [fuzz_structure](./fuzz_targets/fuzz_structure.rs) target uses `libFuzzer` to do +[structure-aware](https://rust-fuzz.github.io/book/cargo-fuzz/structure-aware-fuzzing.html) +modifications of the `hugr_model::v0::ast::Package` and its members. + +To run this target: +`cargo fuzz run fuzz_structure` + +> [!NOTE] +> This target needs some slight modifications to the `hugr-model` source +> code so the structs and enums can derive the `Arbitrary` implementations +> needed by `libFuzzer`. +> The `arbitrary` features for `ordered-float` and `smol_str` are also needed. + +## Results + +The fuzzing process will be terminated once a crash is detected, and the offending input +will be saved to the `artifacts/` directory. You can reproduce the crash by doing: +`cargo fuzz run fuzz_structure artifacts//crash-XXXXXX` + +If you want to keep the fuzzing process, even after a crash has been detected, +you can provide the options `-fork=1` and `-ignore_crashes=1`. + +## Providing options to `libFuzzer` + +You can provide lots of options to `libFuzzer` by doing `cargo fuzz run -- -flag1=val1 -flag2=val2`. + +To see all the available options: +`cargo fuzz run -- -help=1` diff --git a/fuzz/fuzz_targets/fuzz_random.rs b/fuzz/fuzz_targets/fuzz_random.rs new file mode 100644 index 000000000..866e5405f --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_random.rs @@ -0,0 +1,11 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use hugr_model::v0 as model; +use std::str::FromStr; + +fuzz_target!(|data: &[u8]| { + if let Ok(s) = std::str::from_utf8(data) { + let _package_ast = model::ast::Package::from_str(&s); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_structure.rs b/fuzz/fuzz_targets/fuzz_structure.rs new file mode 100644 index 000000000..ec0882c56 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_structure.rs @@ -0,0 +1,14 @@ +#![no_main] + +use hugr_model::v0 as model; +use libfuzzer_sys::fuzz_target; +use model::bumpalo::Bump; +use hugr_model::v0::ast::Package; + +fuzz_target!(|package: Package| { + let bump = Bump::new(); + let package = package.resolve(&bump).unwrap(); + let bytes = model::binary::write_to_vec(&package); + let deserialized_package = model::binary::read_from_slice(&bytes, &bump).unwrap(); + assert_eq!(package, deserialized_package); +}); diff --git a/hugr-model/Cargo.toml b/hugr-model/Cargo.toml index 24c1f9e42..11863687b 100644 --- a/hugr-model/Cargo.toml +++ b/hugr-model/Cargo.toml @@ -23,13 +23,14 @@ derive_more = { workspace = true, features = ["display", "error", "from"] } fxhash.workspace = true indexmap.workspace = true itertools.workspace = true -ordered-float = { workspace = true } +ordered-float = { workspace = true, features = ["arbitrary"] } pest = { workspace = true } pest_derive = { workspace = true } pretty = { workspace = true } -smol_str = { workspace = true, features = ["serde"] } +smol_str = { workspace = true, features = ["serde", "arbitrary"] } thiserror.workspace = true pyo3 = { workspace = true, optional = true, features = ["extension-module"] } +arbitrary = { version = "1", optional = true, features = ["derive"] } [features] pyo3 = ["dep:pyo3"] diff --git a/hugr-model/src/v0/ast/mod.rs b/hugr-model/src/v0/ast/mod.rs index faee6f827..1b44c02ba 100644 --- a/hugr-model/src/v0/ast/mod.rs +++ b/hugr-model/src/v0/ast/mod.rs @@ -43,6 +43,7 @@ pub use resolve::ResolveError; /// /// [`table::Package`]: crate::v0::table::Package #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Package { /// The sequence of modules in the package. pub modules: Vec, @@ -70,6 +71,7 @@ impl Package { /// /// [`table::Module`]: crate::v0::table::Module #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Module { /// The root region of the module. /// @@ -103,6 +105,7 @@ impl Module { /// /// [`table::Node`]: crate::v0::table::Node #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Node { /// The operation that the node performs. pub operation: Operation, @@ -129,6 +132,7 @@ pub struct Node { /// /// [`table::Operation`]: crate::v0::table::Operation #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum Operation { /// Invalid operation to be used as a placeholder. #[default] @@ -193,6 +197,7 @@ impl Operation { /// /// [`table::Symbol`]: crate::v0::table::Symbol #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Symbol { /// The name of the symbol. pub name: SymbolName, @@ -210,6 +215,7 @@ pub struct Symbol { /// /// [`table::Param`]: crate::v0::table::Param #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Param { /// The name of the parameter. pub name: VarName, @@ -223,6 +229,7 @@ pub struct Param { /// /// [`table::Region`]: crate::v0::table::Region #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct Region { /// The kind of the region. See [`RegionKind`] for details. pub kind: RegionKind, @@ -247,6 +254,7 @@ pub struct Region { /// /// [`table::Term`]: crate::v0::table::Term #[derive(Debug, Clone, PartialEq, Eq, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum Term { /// Standin for any term. #[default] @@ -277,6 +285,7 @@ impl From for Term { /// /// [`table::SeqPart`]: crate::v0::table::SeqPart #[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum SeqPart { /// An individual item in the sequence. Item(Term), diff --git a/hugr-model/src/v0/mod.rs b/hugr-model/src/v0/mod.rs index 27e6605ae..59d9f946c 100644 --- a/hugr-model/src/v0/mod.rs +++ b/hugr-model/src/v0/mod.rs @@ -343,6 +343,7 @@ impl<'py> pyo3::IntoPyObject<'py> for ScopeClosure { /// The kind of a region. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum RegionKind { /// Data flow region. #[default] @@ -388,6 +389,7 @@ impl<'py> pyo3::IntoPyObject<'py> for RegionKind { /// The name of a variable. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct VarName(SmolStr); impl VarName { @@ -424,6 +426,7 @@ impl<'py> pyo3::IntoPyObject<'py> for &VarName { /// The name of a symbol. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct SymbolName(SmolStr); impl SymbolName { @@ -449,6 +452,7 @@ impl<'py> pyo3::FromPyObject<'py> for SymbolName { /// The name of a link. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub struct LinkName(SmolStr); impl LinkName { @@ -496,6 +500,7 @@ impl<'py> pyo3::IntoPyObject<'py> for &LinkName { /// sequences of arbitrary length. To enable cheap cloning and sharing, /// strings and byte sequences use reference counting. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature="arbitrary", derive(arbitrary::Arbitrary))] pub enum Literal { /// String literal. Str(SmolStr),