Skip to content

Commit de95953

Browse files
fjahraureleoulesTheCharlatan
committed
rpc: Optimize serialization disk space of dumptxoutset
Co-authored-by: Aurèle Oulès <aurele@oules.com> Co-authored-by: TheCharlatan <seb.kung@gmail.com>
1 parent eb0bdbd commit de95953

File tree

5 files changed

+105
-63
lines changed

5 files changed

+105
-63
lines changed

src/rpc/blockchain.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <rpc/server_util.h>
3535
#include <rpc/util.h>
3636
#include <script/descriptor.h>
37+
#include <serialize.h>
3738
#include <streams.h>
3839
#include <sync.h>
3940
#include <txdb.h>
@@ -2695,24 +2696,48 @@ UniValue CreateUTXOSnapshot(
26952696
afile << metadata;
26962697

26972698
COutPoint key;
2699+
Txid last_hash;
26982700
Coin coin;
26992701
unsigned int iter{0};
2702+
size_t written_coins_count{0};
2703+
std::vector<std::pair<uint32_t, Coin>> coins;
2704+
2705+
auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
2706+
afile << last_hash;
2707+
WriteCompactSize(afile, coins.size());
2708+
for (const auto& [n, coin] : coins) {
2709+
WriteCompactSize(afile, n);
2710+
afile << coin;
2711+
++written_coins_count;
2712+
}
2713+
};
27002714

2715+
pcursor->GetKey(key);
2716+
last_hash = key.hash;
27012717
while (pcursor->Valid()) {
27022718
if (iter % 5000 == 0) node.rpc_interruption_point();
27032719
++iter;
27042720
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
2705-
afile << key;
2706-
afile << coin;
2721+
if (key.hash != last_hash) {
2722+
write_coins_to_file(afile, last_hash, coins, written_coins_count);
2723+
last_hash = key.hash;
2724+
coins.clear();
2725+
}
2726+
coins.emplace_back(key.n, coin);
27072727
}
2708-
27092728
pcursor->Next();
27102729
}
27112730

2731+
if (!coins.empty()) {
2732+
write_coins_to_file(afile, last_hash, coins, written_coins_count);
2733+
}
2734+
2735+
CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);
2736+
27122737
afile.fclose();
27132738

27142739
UniValue result(UniValue::VOBJ);
2715-
result.pushKV("coins_written", maybe_stats->coins_count);
2740+
result.pushKV("coins_written", written_coins_count);
27162741
result.pushKV("base_hash", tip->GetBlockHash().ToString());
27172742
result.pushKV("base_height", tip->nHeight);
27182743
result.pushKV("path", path.utf8string());

src/test/validation_chainstatemanager_tests.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,13 @@ struct SnapshotTestSetup : TestChain100Setup {
226226
// A UTXO is missing but count is correct
227227
metadata.m_coins_count -= 1;
228228

229-
COutPoint outpoint;
229+
Txid txid;
230+
auto_infile >> txid;
231+
// coins size
232+
(void)ReadCompactSize(auto_infile);
233+
// vout index
234+
(void)ReadCompactSize(auto_infile);
230235
Coin coin;
231-
232-
auto_infile >> outpoint;
233236
auto_infile >> coin;
234237
}));
235238

src/validation.cpp

Lines changed: 62 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5660,69 +5660,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
56605660
return false;
56615661
}
56625662

5663-
COutPoint outpoint;
5664-
Coin coin;
56655663
const uint64_t coins_count = metadata.m_coins_count;
56665664
uint64_t coins_left = metadata.m_coins_count;
56675665

5668-
LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString());
5666+
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString());
56695667
int64_t coins_processed{0};
56705668

56715669
while (coins_left > 0) {
56725670
try {
5673-
coins_file >> outpoint;
5674-
coins_file >> coin;
5675-
} catch (const std::ios_base::failure&) {
5676-
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
5677-
coins_count - coins_left);
5678-
return false;
5679-
}
5680-
if (coin.nHeight > base_height ||
5681-
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
5682-
) {
5683-
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
5684-
coins_count - coins_left);
5685-
return false;
5686-
}
5687-
if (!MoneyRange(coin.out.nValue)) {
5688-
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
5689-
coins_count - coins_left);
5690-
return false;
5691-
}
5671+
Txid txid;
5672+
coins_file >> txid;
5673+
size_t coins_per_txid{0};
5674+
coins_per_txid = ReadCompactSize(coins_file);
5675+
5676+
if (coins_per_txid > coins_left) {
5677+
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n");
5678+
return false;
5679+
}
56925680

5693-
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
5681+
for (size_t i = 0; i < coins_per_txid; i++) {
5682+
COutPoint outpoint;
5683+
Coin coin;
5684+
outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file));
5685+
outpoint.hash = txid;
5686+
coins_file >> coin;
5687+
if (coin.nHeight > base_height ||
5688+
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
5689+
) {
5690+
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
5691+
coins_count - coins_left);
5692+
return false;
5693+
}
5694+
if (!MoneyRange(coin.out.nValue)) {
5695+
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
5696+
coins_count - coins_left);
5697+
return false;
5698+
}
5699+
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
56945700

5695-
--coins_left;
5696-
++coins_processed;
5701+
--coins_left;
5702+
++coins_processed;
56975703

5698-
if (coins_processed % 1000000 == 0) {
5699-
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
5700-
coins_processed,
5701-
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
5702-
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
5703-
}
5704+
if (coins_processed % 1000000 == 0) {
5705+
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
5706+
coins_processed,
5707+
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
5708+
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
5709+
}
57045710

5705-
// Batch write and flush (if we need to) every so often.
5706-
//
5707-
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
5708-
// means <5MB of memory imprecision.
5709-
if (coins_processed % 120000 == 0) {
5710-
if (m_interrupt) {
5711-
return false;
5712-
}
5711+
// Batch write and flush (if we need to) every so often.
5712+
//
5713+
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
5714+
// means <5MB of memory imprecision.
5715+
if (coins_processed % 120000 == 0) {
5716+
if (m_interrupt) {
5717+
return false;
5718+
}
57135719

5714-
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
5715-
return snapshot_chainstate.GetCoinsCacheSizeState());
5720+
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
5721+
return snapshot_chainstate.GetCoinsCacheSizeState());
57165722

5717-
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
5718-
// This is a hack - we don't know what the actual best block is, but that
5719-
// doesn't matter for the purposes of flushing the cache here. We'll set this
5720-
// to its correct value (`base_blockhash`) below after the coins are loaded.
5721-
coins_cache.SetBestBlock(GetRandHash());
5723+
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
5724+
// This is a hack - we don't know what the actual best block is, but that
5725+
// doesn't matter for the purposes of flushing the cache here. We'll set this
5726+
// to its correct value (`base_blockhash`) below after the coins are loaded.
5727+
coins_cache.SetBestBlock(GetRandHash());
57225728

5723-
// No need to acquire cs_main since this chainstate isn't being used yet.
5724-
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
5729+
// No need to acquire cs_main since this chainstate isn't being used yet.
5730+
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
5731+
}
5732+
}
57255733
}
5734+
} catch (const std::ios_base::failure&) {
5735+
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
5736+
coins_processed);
5737+
return false;
57265738
}
57275739
}
57285740

@@ -5735,7 +5747,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
57355747

57365748
bool out_of_coins{false};
57375749
try {
5738-
coins_file >> outpoint;
5750+
Txid txid;
5751+
coins_file >> txid;
57395752
} catch (const std::ios_base::failure&) {
57405753
// We expect an exception since we should be out of coins.
57415754
out_of_coins = true;

test/functional/feature_assumeutxo.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,16 @@ def expected_error(log_msg="", rpc_details=""):
9494
f.write(valid_snapshot_contents[32 + 8:])
9595
expected_error(log_msg=f"bad snapshot - coins left over after deserializing 298 coins" if off == -1 else f"bad snapshot format or truncated snapshot after deserializing 299 coins")
9696

97-
self.log.info(" - snapshot file with alternated UTXO data")
97+
98+
self.log.info(" - snapshot file with alternated but parsable UTXO data results in different hash")
9899
cases = [
99100
# (content, offset, wrong_hash, custom_message)
100101
[b"\xff" * 32, 0, "7d52155c9a9fdc4525b637ef6170568e5dad6fabd0b1fdbb9432010b8453095b", None], # wrong outpoint hash
101-
[(1).to_bytes(4, "little"), 32, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad", None], # wrong outpoint index
102-
[b"\x81", 36, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8", None], # wrong coin code VARINT
103-
[b"\x80", 36, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5", None], # another wrong coin code
104-
[b"\x84\x58", 36, None, "[snapshot] bad snapshot data after deserializing 0 coins"], # wrong coin case with height 364 and coinbase 0
105-
[b"\xCA\xD2\x8F\x5A", 41, None, "[snapshot] bad snapshot data after deserializing 0 coins - bad tx out value"], # Amount exceeds MAX_MONEY
102+
[b"\x01", 33, "9f4d897031ab8547665b4153317ae2fdbf0130c7840b66427ebc48b881cb80ad", None], # wrong outpoint index
103+
[b"\x81", 34, "3da966ba9826fb6d2604260e01607b55ba44e1a5de298606b08704bc62570ea8", None], # wrong coin code VARINT
104+
[b"\x80", 34, "091e893b3ccb4334378709578025356c8bcb0a623f37c7c4e493133c988648e5", None], # another wrong coin code
105+
[b"\x84\x58", 34, None, "[snapshot] bad snapshot data after deserializing 0 coins"], # wrong coin case with height 364 and coinbase 0
106+
[b"\xCA\xD2\x8F\x5A", 39, None, "[snapshot] bad snapshot data after deserializing 0 coins - bad tx out value"], # Amount exceeds MAX_MONEY
106107
]
107108

108109
for content, offset, wrong_hash, custom_message in cases:

test/functional/rpc_dumptxoutset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def run_test(self):
4343
# UTXO snapshot hash should be deterministic based on mocked time.
4444
assert_equal(
4545
sha256sum_file(str(expected_path)).hex(),
46-
'b1bacb602eacf5fbc9a7c2ef6eeb0d229c04e98bdf0c2ea5929012cd0eae3830')
46+
'3263fc0311ea46415b85513a59ad8fe67806b3cdce66147175ecb9da768d4a99')
4747

4848
assert_equal(
4949
out['txoutset_hash'], 'a0b7baa3bf5ccbd3279728f230d7ca0c44a76e9923fca8f32dbfd08d65ea496a')

0 commit comments

Comments
 (0)