Skip to content

Commit db92989

Browse files
committed
Faster -reindex by initially deserializing only headers
When a block is initially read from a blk*.dat file during reindexing, it can be added to the block index only if all of its ancestor blocks have been added, which is rare. If the block's ancestors have not been added, the block must be re-read from disk later when it can be added. This commit: During the initial block read, deserialize only its header, rather than the entire block, since this is sufficient to determine if its parent (and thus all its ancestors) has been added. This is a performance improvement.
1 parent c72de99 commit db92989

File tree

2 files changed

+78
-18
lines changed

2 files changed

+78
-18
lines changed

src/validation.cpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4393,6 +4393,8 @@ void Chainstate::LoadExternalBlockFile(
43934393
try {
43944394
// This takes over fileIn and calls fclose() on it in the CBufferedFile destructor
43954395
CBufferedFile blkdat(fileIn, 2*MAX_BLOCK_SERIALIZED_SIZE, MAX_BLOCK_SERIALIZED_SIZE+8, SER_DISK, CLIENT_VERSION);
4396+
// nRewind indicates where to resume scanning in case something goes wrong,
4397+
// such as a block fails to deserialize.
43964398
uint64_t nRewind = blkdat.GetPos();
43974399
while (!blkdat.eof()) {
43984400
if (ShutdownRequested()) return;
@@ -4416,42 +4418,50 @@ void Chainstate::LoadExternalBlockFile(
44164418
continue;
44174419
} catch (const std::exception&) {
44184420
// no valid block header found; don't complain
4421+
// (this happens at the end of every blk.dat file)
44194422
break;
44204423
}
44214424
try {
4422-
// read block
4423-
uint64_t nBlockPos = blkdat.GetPos();
4425+
// read block header
4426+
const uint64_t nBlockPos{blkdat.GetPos()};
44244427
if (dbp)
44254428
dbp->nPos = nBlockPos;
44264429
blkdat.SetLimit(nBlockPos + nSize);
4427-
std::shared_ptr<CBlock> pblock = std::make_shared<CBlock>();
4428-
CBlock& block = *pblock;
4429-
blkdat >> block;
4430-
nRewind = blkdat.GetPos();
4431-
4432-
uint256 hash = block.GetHash();
4430+
CBlockHeader header;
4431+
blkdat >> header;
4432+
const uint256 hash{header.GetHash()};
4433+
// Skip the rest of this block (this may read from disk into memory); position to the marker before the
4434+
// next block, but it's still possible to rewind to the start of the current block (without a disk read).
4435+
nRewind = nBlockPos + nSize;
4436+
blkdat.SkipTo(nRewind);
44334437
{
44344438
LOCK(cs_main);
44354439
// detect out of order blocks, and store them for later
4436-
if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(block.hashPrevBlock)) {
4440+
if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(header.hashPrevBlock)) {
44374441
LogPrint(BCLog::REINDEX, "%s: Out of order block %s, parent %s not known\n", __func__, hash.ToString(),
4438-
block.hashPrevBlock.ToString());
4442+
header.hashPrevBlock.ToString());
44394443
if (dbp && blocks_with_unknown_parent) {
4440-
blocks_with_unknown_parent->emplace(block.hashPrevBlock, *dbp);
4444+
blocks_with_unknown_parent->emplace(header.hashPrevBlock, *dbp);
44414445
}
44424446
continue;
44434447
}
44444448

44454449
// process in case the block isn't known yet
44464450
const CBlockIndex* pindex = m_blockman.LookupBlockIndex(hash);
44474451
if (!pindex || (pindex->nStatus & BLOCK_HAVE_DATA) == 0) {
4448-
BlockValidationState state;
4449-
if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) {
4450-
nLoaded++;
4451-
}
4452-
if (state.IsError()) {
4453-
break;
4454-
}
4452+
// This block can be processed immediately; rewind to its start, read and deserialize it.
4453+
blkdat.SetPos(nBlockPos);
4454+
std::shared_ptr<CBlock> pblock{std::make_shared<CBlock>()};
4455+
blkdat >> *pblock;
4456+
nRewind = blkdat.GetPos();
4457+
4458+
BlockValidationState state;
4459+
if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) {
4460+
nLoaded++;
4461+
}
4462+
if (state.IsError()) {
4463+
break;
4464+
}
44554465
} else if (hash != params.GetConsensus().hashGenesisBlock && pindex->nHeight % 1000 == 0) {
44564466
LogPrint(BCLog::REINDEX, "Block Import: already had block %s at height %d\n", hash.ToString(), pindex->nHeight);
44574467
}

test/functional/feature_reindex.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
- Start a single node and generate 3 blocks.
88
- Stop the node and restart it with -reindex. Verify that the node has reindexed up to block 3.
99
- Stop the node and restart it with -reindex-chainstate. Verify that the node has reindexed up to block 3.
10+
- Verify that out-of-order blocks are correctly processed, see LoadExternalBlockFile()
1011
"""
1112

13+
import os
1214
from test_framework.test_framework import BitcoinTestFramework
15+
from test_framework.p2p import MAGIC_BYTES
1316
from test_framework.util import assert_equal
1417

1518

@@ -27,11 +30,58 @@ def reindex(self, justchainstate=False):
2730
assert_equal(self.nodes[0].getblockcount(), blockcount) # start_node is blocking on reindex
2831
self.log.info("Success")
2932

33+
# Check that blocks can be processed out of order
34+
def out_of_order(self):
35+
# The previous test created 12 blocks
36+
assert_equal(self.nodes[0].getblockcount(), 12)
37+
self.stop_nodes()
38+
39+
# In this test environment, blocks will always be in order (since
40+
# we're generating them rather than getting them from peers), so to
41+
# test out-of-order handling, swap blocks 1 and 2 on disk.
42+
blk0 = os.path.join(self.nodes[0].datadir, self.nodes[0].chain, 'blocks', 'blk00000.dat')
43+
with open(blk0, 'r+b') as bf:
44+
# Read at least the first few blocks (including genesis)
45+
b = bf.read(2000)
46+
47+
# Find the offsets of blocks 2, 3, and 4 (the first 3 blocks beyond genesis)
48+
# by searching for the regtest marker bytes (see pchMessageStart).
49+
def find_block(b, start):
50+
return b.find(MAGIC_BYTES["regtest"], start)+4
51+
52+
genesis_start = find_block(b, 0)
53+
assert_equal(genesis_start, 4)
54+
b2_start = find_block(b, genesis_start)
55+
b3_start = find_block(b, b2_start)
56+
b4_start = find_block(b, b3_start)
57+
58+
# Blocks 2 and 3 should be the same size.
59+
assert_equal(b3_start-b2_start, b4_start-b3_start)
60+
61+
# Swap the second and third blocks (don't disturb the genesis block).
62+
bf.seek(b2_start)
63+
bf.write(b[b3_start:b4_start])
64+
bf.write(b[b2_start:b3_start])
65+
66+
# The reindexing code should detect and accommodate out of order blocks.
67+
with self.nodes[0].assert_debug_log([
68+
'LoadExternalBlockFile: Out of order block',
69+
'LoadExternalBlockFile: Processing out of order child',
70+
]):
71+
extra_args = [["-reindex"]]
72+
self.start_nodes(extra_args)
73+
74+
# All blocks should be accepted and processed.
75+
assert_equal(self.nodes[0].getblockcount(), 12)
76+
3077
def run_test(self):
3178
self.reindex(False)
3279
self.reindex(True)
3380
self.reindex(False)
3481
self.reindex(True)
3582

83+
self.out_of_order()
84+
85+
3686
if __name__ == '__main__':
3787
ReindexTest().main()

0 commit comments

Comments
 (0)