Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,13 @@ impl AccountStorageEntry {
self.alive_bytes.load(Ordering::Acquire)
}

/// Returns the accounts that were marked obsolete as of the passed in slot
/// or earlier. Returned data includes the slots that the accounts were marked
/// obsolete at
pub fn obsolete_accounts_at_slot(&self, slot: Slot) -> ObsoleteAccounts {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had to create a new API for serialization that also returns the slots that accounts are marked obsolete at (other use cases don't include the slot).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not saying you have to do this here, but when creating one-off pub functions like this, I've found that naming who is calling the fn, to be helpful.

For here, that'd be something like obsolete_accounts_for_snapshots().

The main purpose being to inform others they should not call this function.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will update

self.obsolete_accounts_read_lock()
.obsolete_accounts_at_slot(slot)
}
/// Locks obsolete accounts with a read lock and returns the the accounts with the guard
pub(crate) fn obsolete_accounts_read_lock(&self) -> RwLockReadGuard<ObsoleteAccounts> {
self.obsolete_accounts.read().unwrap()
Expand Down
55 changes: 53 additions & 2 deletions accounts-db/src/obsolete_accounts.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use {crate::account_info::Offset, solana_clock::Slot};

#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
struct ObsoleteAccountItem {
/// Offset of the account in the account storage entry
offset: Offset,
Expand All @@ -10,7 +10,7 @@ struct ObsoleteAccountItem {
slot: Slot,
}

#[derive(Debug, Clone, PartialEq, Default)]
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ObsoleteAccounts {
accounts: Vec<ObsoleteAccountItem>,
}
Expand Down Expand Up @@ -45,6 +45,22 @@ impl ObsoleteAccounts {
.filter(move |obsolete_account| slot.is_none_or(|s| obsolete_account.slot <= s))
.map(|obsolete_account| (obsolete_account.offset, obsolete_account.data_len))
}

/// Returns the accounts that were marked obsolete as of the passed in slot
/// or earlier. Returned data includes the slots that the accounts were marked
/// obsolete at
pub fn obsolete_accounts_at_slot(&self, slot: Slot) -> ObsoleteAccounts {
let filtered_accounts = self
.accounts
.iter()
.filter(|account| account.slot <= slot)
.cloned()
.collect();

ObsoleteAccounts {
accounts: filtered_accounts,
}
}
}
#[cfg(test)]
mod tests {
Expand Down Expand Up @@ -99,4 +115,39 @@ mod tests {

assert_eq!(filtered_accounts, vec![(10, 100), (20, 200), (30, 300)]);
}

#[test]
fn test_obsolete_accounts_at_slot() {
let mut obsolete_accounts = ObsoleteAccounts::default();
let new_accounts = vec![(10, 100, 40), (20, 200, 42), (30, 300, 44)]
.into_iter()
.map(|(offset, data_len, slot)| ObsoleteAccountItem {
offset,
data_len,
slot,
})
.collect::<Vec<_>>();

// Mark accounts obsolete with different slots
new_accounts.iter().for_each(|item| {
obsolete_accounts
.mark_accounts_obsolete([(item.offset, item.data_len)].into_iter(), item.slot)
});

// Filter accounts obsolete as of slot 42
let obsolete_accounts_at_slot = obsolete_accounts.obsolete_accounts_at_slot(42);

let expected_accounts: Vec<_> = new_accounts
.iter()
.filter(|account| account.slot <= 42)
.cloned()
.collect();

assert_eq!(obsolete_accounts_at_slot.accounts, expected_accounts);

// Filter accounts obsolete passing in no slot (i.e., all obsolete accounts)
let obsolete_accounts_at_slot = obsolete_accounts.obsolete_accounts_at_slot(100);

assert_eq!(obsolete_accounts_at_slot.accounts, new_accounts);
}
}
15 changes: 14 additions & 1 deletion runtime/src/serde_snapshot/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl SerializableStorage for SerializableAccountStorageEntry {
impl solana_frozen_abi::abi_example::TransparentAsHelper for SerializableAccountStorageEntry {}

/// This structure handles the load/store of obsolete accounts during snapshot restoration.
#[derive(Debug, Default)]
#[derive(Debug, Default, Serialize, Deserialize)]
pub(crate) struct SerdeObsoleteAccounts {
/// The ID of the associated account file. Used for verification to ensure the restored
/// obsolete accounts correspond to the correct account file
Expand All @@ -63,3 +63,16 @@ pub(crate) struct SerdeObsoleteAccounts {
/// A list of accounts that are obsolete in the storage being restored.
pub accounts: ObsoleteAccounts,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use Vec<(Offset, usize, slot)> directly here and remove Serialize/Deserialize from ObsoleteAccounts.

}

impl SerdeObsoleteAccounts {
pub fn new_from_storage_entry_at_slot(
storage: &AccountStorageEntry,
snapshot_slot: Slot,
) -> Self {
SerdeObsoleteAccounts {
id: storage.id() as SerializedAccountsFileId,
bytes: storage.get_obsolete_bytes(Some(snapshot_slot)) as u64,
accounts: storage.obsolete_accounts_at_slot(snapshot_slot),
}
}
}
148 changes: 145 additions & 3 deletions runtime/src/snapshot_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use {
crate::{
bank::{BankFieldsToDeserialize, BankFieldsToSerialize, BankHashStats, BankSlotDelta},
serde_snapshot::{
self, AccountsDbFields, ExtraFieldsToSerialize, SerializableAccountStorageEntry,
SerializedAccountsFileId, SnapshotAccountsDbFields, SnapshotBankFields,
SnapshotStreams,
self, AccountsDbFields, ExtraFieldsToSerialize, SerdeObsoleteAccounts,
SerializableAccountStorageEntry, SerializedAccountsFileId, SnapshotAccountsDbFields,
SnapshotBankFields, SnapshotStreams,
},
snapshot_archive_info::{
FullSnapshotArchiveInfo, IncrementalSnapshotArchiveInfo, SnapshotArchiveInfo,
Expand All @@ -20,6 +20,7 @@ use {
},
},
crossbeam_channel::{Receiver, Sender},
dashmap::DashMap,
log::*,
regex::Regex,
semver::Version,
Expand Down Expand Up @@ -66,6 +67,7 @@ pub const SNAPSHOT_STATE_COMPLETE_FILENAME: &str = "state_complete";
pub const SNAPSHOT_STORAGES_FLUSHED_FILENAME: &str = "storages_flushed";
pub const SNAPSHOT_ACCOUNTS_HARDLINKS: &str = "accounts_hardlinks";
pub const SNAPSHOT_ARCHIVE_DOWNLOAD_DIR: &str = "remote";
pub const SNAPSHOT_OBSOLETE_ACCOUNTS_FILENAME: &str = "obsolete_accounts";
/// No longer checked in version v3.1. Can be removed in v3.2
pub const SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME: &str = "full_snapshot_slot";
/// When a snapshot is taken of a bank, the state is serialized under this directory.
Expand Down Expand Up @@ -1291,6 +1293,66 @@ fn do_get_highest_bank_snapshot(
bank_snapshots.into_iter().next_back()
}

pub fn write_obsolete_accounts_to_snapshot(
Copy link
Author

@roryharr roryharr Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might look a little bit strange at first: The accounts are serialized with write_obsolete_accounts_to_snapshot and deserialized with deserialize_obsolete_accounts.

This is because the translation they are handling is as follows:
AccountStorageEntry -> SerdesObsoleteAccounts -> file -> SerdesObsoleteAccounts

serialize_obsolete_accounts/deserialize_obsolete_accounts handles SerdesObsoleteAccounts -> file -> SerdesObsoleteAccounts, which are matching functions

Another approach is separating collect_obsolete_accounts and serialize at the caller, but that bled the internals of SerdesObsoleteAccounts into core which I want to avoid.

Combining write_obsolete_accounts_to_snapshot and serialize_obsolete_accounts and just calling it serialize_obsolete_accounts made for difficult unit test writing.

bank_snapshot_dir: &Path,
snapshot_storages: &[Arc<AccountStorageEntry>],
snapshot_slot: Slot,
) -> Result<u64> {
let obsolete_accounts = collect_obsolete_accounts(snapshot_storages, snapshot_slot);
serialize_obsolete_accounts(bank_snapshot_dir, &obsolete_accounts)
}

fn collect_obsolete_accounts(
snapshot_storages: &[Arc<AccountStorageEntry>],
snapshot_slot: Slot,
) -> HashMap<Slot, SerdeObsoleteAccounts> {
snapshot_storages
.iter()
.map(|storage| {
let obsolete_accounts =
SerdeObsoleteAccounts::new_from_storage_entry_at_slot(storage, snapshot_slot);
(storage.slot(), obsolete_accounts)
})
.collect()
}

fn serialize_obsolete_accounts(
bank_snapshot_dir: &Path,
obsolete_accounts_map: &HashMap<Slot, SerdeObsoleteAccounts>,
) -> Result<u64> {
let obsolete_accounts_path = bank_snapshot_dir.join(SNAPSHOT_OBSOLETE_ACCOUNTS_FILENAME);
let obsolete_accounts_file = fs::File::create(&obsolete_accounts_path)?;
let mut file_stream = BufWriter::new(obsolete_accounts_file);

for (slot, obsolete_accounts) in obsolete_accounts_map {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hashmap has non-deterministic order.
how about sort before serializing to ensure deterministic snapshot files?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not recommend this. We have many hashmaps in the snapshot already, and we don't sort those.

But we should not be serializing item by item. There are serde helpers for serializing the whole hashmap. See solana_runtime::serde_snapshot::utils::serialize_iter_as_seq() et al.

bincode::serialize_into(&mut file_stream, &(slot, obsolete_accounts))?;
}
file_stream.flush()?;

let consumed_size = file_stream.stream_position()?;
Ok(consumed_size)
Comment on lines 1331 to 1339

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to check the size and set some limit, similar to serialize_snapshot_data_file_capped().

Likely the same on the deserialize side too (arguably more important on the deserialize side).

}

#[allow(dead_code)]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function will be used in the next PR.

fn deserialize_obsolete_accounts(
bank_snapshot_dir: &Path,
) -> Result<DashMap<Slot, SerdeObsoleteAccounts>> {
let obsolete_accounts_path = bank_snapshot_dir.join(SNAPSHOT_OBSOLETE_ACCOUNTS_FILENAME);
let obsolete_accounts_file = fs::File::open(obsolete_accounts_path)?;
let mut data_file_stream = BufReader::new(obsolete_accounts_file);

let obsolete_accounts = DashMap::default();
while let Ok((slot, accounts)) = {
bincode::deserialize_from::<&mut BufReader<fs::File>, (Slot, SerdeObsoleteAccounts)>(
&mut data_file_stream,
)
} {
obsolete_accounts.insert(slot, accounts);
}

Ok(obsolete_accounts)
}

pub fn serialize_snapshot_data_file<F>(data_file_path: &Path, serializer: F) -> Result<u64>
where
F: FnOnce(&mut BufWriter<std::fs::File>) -> Result<()>,
Expand Down Expand Up @@ -2574,8 +2636,10 @@ mod tests {
super::*,
assert_matches::assert_matches,
bincode::{deserialize_from, serialize_into},
solana_accounts_db::{accounts_file::AccountsFileProvider, ObsoleteAccounts},
std::{convert::TryFrom, mem::size_of},
tempfile::NamedTempFile,
test_case::test_case,
};

#[test]
Expand Down Expand Up @@ -3549,4 +3613,82 @@ mod tests {
.starts_with("invalid full snapshot slot file size"));
}
}

#[test_case(0)]
#[test_case(1)]
#[test_case(10)]
fn test_serialize_deserialize_account_storage_entries(num_storages: u64) {
let temp_dir = tempfile::tempdir().unwrap();
let bank_snapshot_dir = temp_dir.path();
let snapshot_slot = num_storages + 1 as Slot;

// Create AccountStorageEntries
let mut snapshot_storages = Vec::new();
for i in 0..num_storages {
let storage = Arc::new(AccountStorageEntry::new(
&PathBuf::new(),
i, // Incrementing slot
i as u32, // Incrementing id
1024,
AccountsFileProvider::AppendVec,
StorageAccess::File,
));
snapshot_storages.push(storage);
}
Comment on lines +3641 to +3652

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let snapshot_storages: Vec<_> = (0..num_storages).map(|i| {
    // create storage
})
.collect();


// write obsolete accounts to snapshot
write_obsolete_accounts_to_snapshot(bank_snapshot_dir, &snapshot_storages, snapshot_slot)
.unwrap();

// Deserialize
let deserialized_accounts = deserialize_obsolete_accounts(bank_snapshot_dir).unwrap();

// Verify
assert_eq!(deserialized_accounts.len(), num_storages as usize);
for storage in &snapshot_storages {
assert!(deserialized_accounts.contains_key(&storage.slot()));
assert!(deserialized_accounts.get(&storage.slot()).unwrap().bytes == 0);
}
}

#[test_case(0, 0)]
#[test_case(1, 0)]
#[test_case(10, 15)]
fn test_serialize_and_deserialize_obsolete_accounts(
num_storages: u64,
num_accounts_per_storage: usize,
) {
let temp_dir = TempDir::new().unwrap();
let bank_snapshot_dir = temp_dir.path();

// Create a sample obsolete accounts map
let mut obsolete_accounts_map = HashMap::new();
for slot in 1..=num_storages {
let mut obsolete_accounts = ObsoleteAccounts::default();
let accounts = (0..num_accounts_per_storage).map(|j| (j, j * 10));
obsolete_accounts.mark_accounts_obsolete(accounts, slot + 1);

obsolete_accounts_map.insert(
slot,
SerdeObsoleteAccounts {
bytes: num_accounts_per_storage as u64 * 1000,
id: slot as usize,
accounts: obsolete_accounts,
},
);
}

// Serialize the obsolete accounts
serialize_obsolete_accounts(bank_snapshot_dir, &obsolete_accounts_map).unwrap();

// Deserialize the obsolete accounts
let deserialized_accounts = deserialize_obsolete_accounts(bank_snapshot_dir).unwrap();

// Verify the deserialized data matches the original
assert_eq!(deserialized_accounts.len(), obsolete_accounts_map.len());
for (slot, accounts) in obsolete_accounts_map {
let deserialized_accounts = deserialized_accounts.get(&slot).unwrap();
assert_eq!(accounts.accounts, deserialized_accounts.accounts);
}
}
}
Loading