Skip to content

Support VariableList longer than 2**31 on 32-bit architectures #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/test-suite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@ jobs:
run: rustup update stable
- name: Run tests
run: cargo test --release
cross-test-i686:
name: cross test i686-unknown-linux-gnu
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install cross
run: cargo install cross --git https://github.com/cross-rs/cross
- name: Add i686-unknown-linux-gnu target
run: rustup target add i686-unknown-linux-gnu
- name: Run cross test for i686-unknown-linux-gnu
run: cross test --target i686-unknown-linux-gnu
cross-test-i686-overflow:
name: cross test i686-unknown-linux-gnu (typenum overflow feature)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install cross
run: cargo install cross --git https://github.com/cross-rs/cross
- name: Add i686-unknown-linux-gnu target
run: rustup target add i686-unknown-linux-gnu
- name: Run cross test for i686-unknown-linux-gnu with cap-typenum-to-usize-overflow
run: cross test --target i686-unknown-linux-gnu --features cap-typenum-to-usize-overflow
coverage:
name: cargo-tarpaulin
runs-on: ubuntu-latest
Expand Down
9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@ typenum = "1.12.0"
smallvec = "1.8.0"
arbitrary = { version = "1.0", features = ["derive"], optional = true }
itertools = "0.13.0"
ethereum_hashing = {version = "0.7.0", optional = true}

[dev-dependencies]
serde_json = "1.0.0"
tree_hash_derive = "0.10.0"
ethereum_hashing = {version = "0.7.0"}

[target.i686-unknown-linux-gnu]
rustflags = ["-C", "target-feature=+sse2"]

[features]
# Very careful usage - see comment in the typenum_helpers
cap-typenum-to-usize-overflow=["dep:ethereum_hashing"]
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
mod fixed_vector;
pub mod serde_utils;
mod tree_hash;
mod typenum_helpers;
mod variable_list;

pub use fixed_vector::FixedVector;
Expand Down
118 changes: 92 additions & 26 deletions src/tree_hash.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,107 @@
use crate::typenum_helpers::to_usize;
use tree_hash::{Hash256, MerkleHasher, TreeHash, TreeHashType};
use typenum::Unsigned;

/// A helper function providing common functionality between the `TreeHash` implementations for
/// `FixedVector` and `VariableList`.
pub fn vec_tree_hash_root<T, N>(vec: &[T]) -> Hash256
where
T: TreeHash,
N: Unsigned,
{
pub fn packing_factor<T: TreeHash>() -> usize {
match T::tree_hash_type() {
TreeHashType::Basic => {
let mut hasher = MerkleHasher::with_leaves(
(N::to_usize() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(),
);
TreeHashType::Basic => T::tree_hash_packing_factor(),
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => 1,
}
}

mod default_impl {
use super::*;
/// A helper function providing common functionality between the `TreeHash` implementations for
/// `FixedVector` and `VariableList`.
pub fn vec_tree_hash_root<T, N>(vec: &[T]) -> Hash256
where
T: TreeHash,
N: Unsigned,
{
match T::tree_hash_type() {
TreeHashType::Basic => {
let mut hasher = MerkleHasher::with_leaves(
(to_usize::<N>() + T::tree_hash_packing_factor() - 1)
/ T::tree_hash_packing_factor(),
);

for item in vec {
hasher
.write(&item.tree_hash_packed_encoding())
.expect("ssz_types variable vec should not contain more elements than max");
}

for item in vec {
hasher
.write(&item.tree_hash_packed_encoding())
.expect("ssz_types variable vec should not contain more elements than max");
.finish()
.expect("ssz_types variable vec should not have a remaining buffer")
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let mut hasher = MerkleHasher::with_leaves(N::to_usize());

hasher
.finish()
.expect("ssz_types variable vec should not have a remaining buffer")
}
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
let mut hasher = MerkleHasher::with_leaves(N::to_usize());
for item in vec {
hasher
.write(item.tree_hash_root().as_slice())
.expect("ssz_types vec should not contain more elements than max");
}

for item in vec {
hasher
.write(item.tree_hash_root().as_slice())
.expect("ssz_types vec should not contain more elements than max");
.finish()
.expect("ssz_types vec should not have a remaining buffer")
}
}
}
}

#[cfg(feature = "cap-typenum-to-usize-overflow")]
mod arch_32x_workaround {
use super::*;
use ethereum_hashing::{hash32_concat, ZERO_HASHES};
use tree_hash::{Hash256, TreeHash};
use typenum::Unsigned;

type MaxDepth = typenum::U536870912;

hasher
.finish()
.expect("ssz_types vec should not have a remaining buffer")
fn pad_to_depth<Current: Unsigned, Target: Unsigned>(
hash: Hash256,
target_depth: usize,
current_depth: usize,
) -> Hash256 {
let mut curhash: [u8; 32] = hash.0;
for depth in current_depth..target_depth {
curhash = hash32_concat(&curhash, ZERO_HASHES[depth].as_slice());
}
curhash.into()
}

fn target_tree_depth<T: TreeHash, N: Unsigned>() -> usize {
let packing_factor = packing_factor::<T>();
let packing_factor_log2 = packing_factor.next_power_of_two().ilog2() as usize;
let tree_depth = N::to_u64().next_power_of_two().ilog2() as usize;
tree_depth - packing_factor_log2
}

pub fn vec_tree_hash_root<T: TreeHash, N: Unsigned>(vec: &[T]) -> Hash256 {
if N::to_u64() <= MaxDepth::to_u64() {
default_impl::vec_tree_hash_root::<T, N>(vec)
} else {
let main_tree_hash = default_impl::vec_tree_hash_root::<T, MaxDepth>(vec);

let target_depth = target_tree_depth::<T, N>();
let current_depth = target_tree_depth::<T, MaxDepth>();

pad_to_depth::<MaxDepth, N>(main_tree_hash, target_depth, current_depth)
}
}
}

#[cfg(any(
target_pointer_width = "64",
not(feature = "cap-typenum-to-usize-overflow")
))]
pub use default_impl::vec_tree_hash_root;

#[cfg(all(
not(target_pointer_width = "64"),
feature = "cap-typenum-to-usize-overflow"
))]
pub use arch_32x_workaround::vec_tree_hash_root;
42 changes: 42 additions & 0 deletions src/typenum_helpers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use typenum::Unsigned;

// On x64, all typenums always fit usize
#[cfg(target_pointer_width = "64")]
pub fn to_usize<N: Unsigned>() -> usize {
N::to_usize()
}

// On x32, typenums larger starting from 2**32 do not fit usize,
#[cfg(not(target_pointer_width = "64"))]
pub fn to_usize<N: Unsigned>() -> usize {
let as_usize = N::to_usize();
let as_u64 = N::to_u64();
// If usize == u64 representation - N still fit usize, so
// no overflow happened
if as_usize as u64 == as_u64 {
return as_usize;
}
// else we have a choice:
// Option 1. Loudly panic with as informative message as possible
#[cfg(not(feature = "cap-typenum-to-usize-overflow"))]
panic!(
"Overflow converting typenum U{} to usize (usize::MAX={})",
as_u64,
usize::MAX
);
// Option 2. Use usize::MAX - this allows working with VariableLists "virtually larger" than the
// usize, provided the actual number of elements do not exceed usize.
//
// One example is Ethereum BeaconChain.validators field that is a VariableList<..., 2**40>,
// but actual number of validators is far less than 2**32.
//
// This option still seems sound, since if the number of elements
// actually surpass usize::MAX, the machine running this will OOM/segfault/otherwise violently
// crash the program running this, which is nearly equivalent to panic.
//
// Still, the is a double-edged sword, only apply if you can guarantee that none of the
// VariableList used in your program will have more than usize::MAX elements on the
// architecture with the smallest usize it will be even run.
#[cfg(feature = "cap-typenum-to-usize-overflow")]
usize::MAX
}
Loading