Skip to content

Commit 413844f

Browse files
committed
Merge bitcoin/bitcoin#29612: rpc: Optimize serialization and enhance metadata of dumptxoutset output
542e13b rpc: Enhance metadata of the dumptxoutset output (Fabian Jahr) 4d8e5ed assumeutxo: Add documentation on dumptxoutset serialization format (Fabian Jahr) c14ed7f assumeutxo: Add test for changed coin size value (Fabian Jahr) de95953 rpc: Optimize serialization disk space of dumptxoutset (Fabian Jahr) Pull request description: The second attempt at implementing the `dumptxoutset` space optimization as suggested in #25675. Closes #25675. This builds on the work done in #26045, addresses open feedback, adds some further improvements (most importantly usage of compact size), documentation, and an additional test. The [original snapshot at height 830,000](bitcoin/bitcoin#29551) came in at 10.82 GB. With this change, the same snapshot is 8.94 GB, a reduction of 17.4%. This also enhances the metadata of the output file and adds the following data to allow for better error handling and make future upgrades easier: - A newly introduced utxo set magic - A version number - The network magic - The block height ACKs for top commit: achow101: ACK 542e13b TheCharlatan: Re-ACK 542e13b theStack: ACK 542e13b Tree-SHA512: 0825d30e5c3c364062db3c6cbca4e3c680e6e6d3e259fa70c0c2b2a7020f24a47406a623582040988d5c7745b08649c31110df4c10656aa25f3f27eb35843d99
2 parents 915d727 + 542e13b commit 413844f

File tree

9 files changed

+260
-75
lines changed

9 files changed

+260
-75
lines changed

src/kernel/chainparams.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,33 @@ std::unique_ptr<const CChainParams> CChainParams::TestNet()
542542
{
543543
return std::make_unique<const CTestNetParams>();
544544
}
545+
546+
std::vector<int> CChainParams::GetAvailableSnapshotHeights() const
547+
{
548+
std::vector<int> heights;
549+
heights.reserve(m_assumeutxo_data.size());
550+
551+
for (const auto& data : m_assumeutxo_data) {
552+
heights.emplace_back(data.height);
553+
}
554+
return heights;
555+
}
556+
557+
std::optional<ChainType> GetNetworkForMagic(MessageStartChars& message)
558+
{
559+
const auto mainnet_msg = CChainParams::Main()->MessageStart();
560+
const auto testnet_msg = CChainParams::TestNet()->MessageStart();
561+
const auto regtest_msg = CChainParams::RegTest({})->MessageStart();
562+
const auto signet_msg = CChainParams::SigNet({})->MessageStart();
563+
564+
if (std::equal(message.begin(), message.end(), mainnet_msg.data())) {
565+
return ChainType::MAIN;
566+
} else if (std::equal(message.begin(), message.end(), testnet_msg.data())) {
567+
return ChainType::TESTNET;
568+
} else if (std::equal(message.begin(), message.end(), regtest_msg.data())) {
569+
return ChainType::REGTEST;
570+
} else if (std::equal(message.begin(), message.end(), signet_msg.data())) {
571+
return ChainType::SIGNET;
572+
}
573+
return std::nullopt;
574+
}

src/kernel/chainparams.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class CChainParams
9393
const Consensus::Params& GetConsensus() const { return consensus; }
9494
const MessageStartChars& MessageStart() const { return pchMessageStart; }
9595
uint16_t GetDefaultPort() const { return nDefaultPort; }
96+
std::vector<int> GetAvailableSnapshotHeights() const;
9697

9798
const CBlock& GenesisBlock() const { return genesis; }
9899
/** Default value for -checkmempool and -checkblockindex argument */
@@ -183,4 +184,6 @@ class CChainParams
183184
ChainTxData chainTxData;
184185
};
185186

187+
std::optional<ChainType> GetNetworkForMagic(MessageStartChars& pchMessageStart);
188+
186189
#endif // BITCOIN_KERNEL_CHAINPARAMS_H

src/node/utxo_snapshot.h

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,37 @@
66
#ifndef BITCOIN_NODE_UTXO_SNAPSHOT_H
77
#define BITCOIN_NODE_UTXO_SNAPSHOT_H
88

9+
#include <chainparams.h>
10+
#include <kernel/chainparams.h>
911
#include <kernel/cs_main.h>
1012
#include <serialize.h>
1113
#include <sync.h>
1214
#include <uint256.h>
15+
#include <util/chaintype.h>
1316
#include <util/fs.h>
1417

1518
#include <cstdint>
1619
#include <optional>
1720
#include <string_view>
1821

22+
// UTXO set snapshot magic bytes
23+
static constexpr std::array<uint8_t, 5> SNAPSHOT_MAGIC_BYTES = {'u', 't', 'x', 'o', 0xff};
24+
1925
class Chainstate;
2026

2127
namespace node {
2228
//! Metadata describing a serialized version of a UTXO set from which an
2329
//! assumeutxo Chainstate can be constructed.
2430
class SnapshotMetadata
2531
{
32+
const uint16_t m_version{1};
33+
const std::set<uint16_t> m_supported_versions{1};
2634
public:
2735
//! The hash of the block that reflects the tip of the chain for the
2836
//! UTXO set contained in this snapshot.
2937
uint256 m_base_blockhash;
38+
uint32_t m_base_blockheight;
39+
3040

3141
//! The number of coins in the UTXO set contained in this snapshot. Used
3242
//! during snapshot load to estimate progress of UTXO set reconstruction.
@@ -35,11 +45,55 @@ class SnapshotMetadata
3545
SnapshotMetadata() { }
3646
SnapshotMetadata(
3747
const uint256& base_blockhash,
48+
const int base_blockheight,
3849
uint64_t coins_count) :
3950
m_base_blockhash(base_blockhash),
51+
m_base_blockheight(base_blockheight),
4052
m_coins_count(coins_count) { }
4153

42-
SERIALIZE_METHODS(SnapshotMetadata, obj) { READWRITE(obj.m_base_blockhash, obj.m_coins_count); }
54+
template <typename Stream>
55+
inline void Serialize(Stream& s) const {
56+
s << SNAPSHOT_MAGIC_BYTES;
57+
s << m_version;
58+
s << Params().MessageStart();
59+
s << m_base_blockheight;
60+
s << m_base_blockhash;
61+
s << m_coins_count;
62+
}
63+
64+
template <typename Stream>
65+
inline void Unserialize(Stream& s) {
66+
// Read the snapshot magic bytes
67+
std::array<uint8_t, SNAPSHOT_MAGIC_BYTES.size()> snapshot_magic;
68+
s >> snapshot_magic;
69+
if (snapshot_magic != SNAPSHOT_MAGIC_BYTES) {
70+
throw std::ios_base::failure("Invalid UTXO set snapshot magic bytes. Please check if this is indeed a snapshot file or if you are using an outdated snapshot format.");
71+
}
72+
73+
// Read the version
74+
uint16_t version;
75+
s >> version;
76+
if (m_supported_versions.find(version) == m_supported_versions.end()) {
77+
throw std::ios_base::failure(strprintf("Version of snapshot %s does not match any of the supported versions.", version));
78+
}
79+
80+
// Read the network magic (pchMessageStart)
81+
MessageStartChars message;
82+
s >> message;
83+
if (!std::equal(message.begin(), message.end(), Params().MessageStart().data())) {
84+
auto metadata_network = GetNetworkForMagic(message);
85+
if (metadata_network) {
86+
std::string network_string{ChainTypeToString(metadata_network.value())};
87+
throw std::ios_base::failure(strprintf("The network of the snapshot (%s) does not match the network of this node (%s).", network_string, Params().GetChainTypeString()));
88+
} else {
89+
throw std::ios_base::failure("This snapshot has been created for an unrecognized network. This could be a custom signet, a new testnet or possibly caused by data corruption.");
90+
}
91+
}
92+
93+
s >> m_base_blockheight;
94+
s >> m_base_blockhash;
95+
s >> m_coins_count;
96+
}
4397
};
4498

4599
//! The file in the snapshot chainstate dir which stores the base blockhash. This is

src/rpc/blockchain.cpp

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <rpc/server_util.h>
3535
#include <rpc/util.h>
3636
#include <script/descriptor.h>
37+
#include <serialize.h>
3738
#include <streams.h>
3839
#include <sync.h>
3940
#include <txdb.h>
@@ -2696,29 +2697,60 @@ UniValue CreateUTXOSnapshot(
26962697
tip->nHeight, tip->GetBlockHash().ToString(),
26972698
fs::PathToString(path), fs::PathToString(temppath)));
26982699

2699-
SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count};
2700+
SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count};
27002701

27012702
afile << metadata;
27022703

27032704
COutPoint key;
2705+
Txid last_hash;
27042706
Coin coin;
27052707
unsigned int iter{0};
2708+
size_t written_coins_count{0};
2709+
std::vector<std::pair<uint32_t, Coin>> coins;
2710+
2711+
// To reduce space the serialization format of the snapshot avoids
2712+
// duplication of tx hashes. The code takes advantage of the guarantee by
2713+
// leveldb that keys are lexicographically sorted.
2714+
// In the coins vector we collect all coins that belong to a certain tx hash
2715+
// (key.hash) and when we have them all (key.hash != last_hash) we write
2716+
// them to file using the below lambda function.
2717+
// See also https://github.com/bitcoin/bitcoin/issues/25675
2718+
auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
2719+
afile << last_hash;
2720+
WriteCompactSize(afile, coins.size());
2721+
for (const auto& [n, coin] : coins) {
2722+
WriteCompactSize(afile, n);
2723+
afile << coin;
2724+
++written_coins_count;
2725+
}
2726+
};
27062727

2728+
pcursor->GetKey(key);
2729+
last_hash = key.hash;
27072730
while (pcursor->Valid()) {
27082731
if (iter % 5000 == 0) node.rpc_interruption_point();
27092732
++iter;
27102733
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
2711-
afile << key;
2712-
afile << coin;
2734+
if (key.hash != last_hash) {
2735+
write_coins_to_file(afile, last_hash, coins, written_coins_count);
2736+
last_hash = key.hash;
2737+
coins.clear();
2738+
}
2739+
coins.emplace_back(key.n, coin);
27132740
}
2714-
27152741
pcursor->Next();
27162742
}
27172743

2744+
if (!coins.empty()) {
2745+
write_coins_to_file(afile, last_hash, coins, written_coins_count);
2746+
}
2747+
2748+
CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);
2749+
27182750
afile.fclose();
27192751

27202752
UniValue result(UniValue::VOBJ);
2721-
result.pushKV("coins_written", maybe_stats->coins_count);
2753+
result.pushKV("coins_written", written_coins_count);
27222754
result.pushKV("base_hash", tip->GetBlockHash().ToString());
27232755
result.pushKV("base_height", tip->nHeight);
27242756
result.pushKV("path", path.utf8string());
@@ -2778,12 +2810,22 @@ static RPCHelpMan loadtxoutset()
27782810
}
27792811

27802812
SnapshotMetadata metadata;
2781-
afile >> metadata;
2813+
try {
2814+
afile >> metadata;
2815+
} catch (const std::ios_base::failure& e) {
2816+
throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what()));
2817+
}
27822818

27832819
uint256 base_blockhash = metadata.m_base_blockhash;
2820+
int base_blockheight = metadata.m_base_blockheight;
27842821
if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) {
2822+
auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights();
2823+
std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); });
27852824
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, "
2786-
"assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString()));
2825+
"assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.",
2826+
base_blockhash.ToString(),
2827+
base_blockheight,
2828+
heights_formatted));
27872829
}
27882830
CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main,
27892831
return chainman.m_blockman.LookupBlockIndex(base_blockhash));

src/test/validation_chainstatemanager_tests.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,13 @@ struct SnapshotTestSetup : TestChain100Setup {
226226
// A UTXO is missing but count is correct
227227
metadata.m_coins_count -= 1;
228228

229-
COutPoint outpoint;
229+
Txid txid;
230+
auto_infile >> txid;
231+
// coins size
232+
(void)ReadCompactSize(auto_infile);
233+
// vout index
234+
(void)ReadCompactSize(auto_infile);
230235
Coin coin;
231-
232-
auto_infile >> outpoint;
233236
auto_infile >> coin;
234237
}));
235238

src/validation.cpp

Lines changed: 62 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5666,69 +5666,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
56665666
return false;
56675667
}
56685668

5669-
COutPoint outpoint;
5670-
Coin coin;
56715669
const uint64_t coins_count = metadata.m_coins_count;
56725670
uint64_t coins_left = metadata.m_coins_count;
56735671

5674-
LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString());
5672+
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString());
56755673
int64_t coins_processed{0};
56765674

56775675
while (coins_left > 0) {
56785676
try {
5679-
coins_file >> outpoint;
5680-
coins_file >> coin;
5681-
} catch (const std::ios_base::failure&) {
5682-
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
5683-
coins_count - coins_left);
5684-
return false;
5685-
}
5686-
if (coin.nHeight > base_height ||
5687-
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
5688-
) {
5689-
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
5690-
coins_count - coins_left);
5691-
return false;
5692-
}
5693-
if (!MoneyRange(coin.out.nValue)) {
5694-
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
5695-
coins_count - coins_left);
5696-
return false;
5697-
}
5677+
Txid txid;
5678+
coins_file >> txid;
5679+
size_t coins_per_txid{0};
5680+
coins_per_txid = ReadCompactSize(coins_file);
5681+
5682+
if (coins_per_txid > coins_left) {
5683+
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n");
5684+
return false;
5685+
}
56985686

5699-
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
5687+
for (size_t i = 0; i < coins_per_txid; i++) {
5688+
COutPoint outpoint;
5689+
Coin coin;
5690+
outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file));
5691+
outpoint.hash = txid;
5692+
coins_file >> coin;
5693+
if (coin.nHeight > base_height ||
5694+
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
5695+
) {
5696+
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
5697+
coins_count - coins_left);
5698+
return false;
5699+
}
5700+
if (!MoneyRange(coin.out.nValue)) {
5701+
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
5702+
coins_count - coins_left);
5703+
return false;
5704+
}
5705+
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
57005706

5701-
--coins_left;
5702-
++coins_processed;
5707+
--coins_left;
5708+
++coins_processed;
57035709

5704-
if (coins_processed % 1000000 == 0) {
5705-
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
5706-
coins_processed,
5707-
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
5708-
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
5709-
}
5710+
if (coins_processed % 1000000 == 0) {
5711+
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
5712+
coins_processed,
5713+
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
5714+
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
5715+
}
57105716

5711-
// Batch write and flush (if we need to) every so often.
5712-
//
5713-
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
5714-
// means <5MB of memory imprecision.
5715-
if (coins_processed % 120000 == 0) {
5716-
if (m_interrupt) {
5717-
return false;
5718-
}
5717+
// Batch write and flush (if we need to) every so often.
5718+
//
5719+
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
5720+
// means <5MB of memory imprecision.
5721+
if (coins_processed % 120000 == 0) {
5722+
if (m_interrupt) {
5723+
return false;
5724+
}
57195725

5720-
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
5721-
return snapshot_chainstate.GetCoinsCacheSizeState());
5726+
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
5727+
return snapshot_chainstate.GetCoinsCacheSizeState());
57225728

5723-
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
5724-
// This is a hack - we don't know what the actual best block is, but that
5725-
// doesn't matter for the purposes of flushing the cache here. We'll set this
5726-
// to its correct value (`base_blockhash`) below after the coins are loaded.
5727-
coins_cache.SetBestBlock(GetRandHash());
5729+
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
5730+
// This is a hack - we don't know what the actual best block is, but that
5731+
// doesn't matter for the purposes of flushing the cache here. We'll set this
5732+
// to its correct value (`base_blockhash`) below after the coins are loaded.
5733+
coins_cache.SetBestBlock(GetRandHash());
57285734

5729-
// No need to acquire cs_main since this chainstate isn't being used yet.
5730-
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
5735+
// No need to acquire cs_main since this chainstate isn't being used yet.
5736+
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
5737+
}
5738+
}
57315739
}
5740+
} catch (const std::ios_base::failure&) {
5741+
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
5742+
coins_processed);
5743+
return false;
57325744
}
57335745
}
57345746

@@ -5741,7 +5753,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
57415753

57425754
bool out_of_coins{false};
57435755
try {
5744-
coins_file >> outpoint;
5756+
Txid txid;
5757+
coins_file >> txid;
57455758
} catch (const std::ios_base::failure&) {
57465759
// We expect an exception since we should be out of coins.
57475760
out_of_coins = true;

0 commit comments

Comments
 (0)