Skip to content

Commit e444823

Browse files
s1narjl493456442
andauthored
core: fix sync reset in pruned nodes (#31638)
This is an attempt at fixing #31601. I think what happens is the startup logic will try to get the full block body (it's `bc.loadLastState`) and fail because genesis block has been pruned from the freezer. This will cause it to keep repeating the reset logic, causing a deadlock. This can happen when due to an unsuccessful sync we don't have the state for the head (or any other state) fully, and try to redo the snap sync. --------- Co-authored-by: Gary Rong <garyrong0905@gmail.com>
1 parent cb21177 commit e444823

File tree

3 files changed

+60
-23
lines changed

3 files changed

+60
-23
lines changed

core/blockchain.go

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -514,19 +514,33 @@ func (bc *BlockChain) loadLastState() error {
514514
log.Warn("Empty database, resetting chain")
515515
return bc.Reset()
516516
}
517-
// Make sure the entire head block is available
518-
headBlock := bc.GetBlockByHash(head)
517+
headHeader := bc.GetHeaderByHash(head)
518+
if headHeader == nil {
519+
// Corrupt or empty database, init from scratch
520+
log.Warn("Head header missing, resetting chain", "hash", head)
521+
return bc.Reset()
522+
}
523+
524+
var headBlock *types.Block
525+
if cmp := headHeader.Number.Cmp(new(big.Int)); cmp == 1 {
526+
// Make sure the entire head block is available.
527+
headBlock = bc.GetBlockByHash(head)
528+
} else if cmp == 0 {
529+
// On a pruned node the block body might not be available. But a pruned
530+
// block should never be the head block. The only exception is when, as
531+
// a last resort, chain is reset to genesis.
532+
headBlock = bc.genesisBlock
533+
}
519534
if headBlock == nil {
520535
// Corrupt or empty database, init from scratch
521536
log.Warn("Head block missing, resetting chain", "hash", head)
522537
return bc.Reset()
523538
}
524539
// Everything seems to be fine, set as the head block
525-
bc.currentBlock.Store(headBlock.Header())
540+
bc.currentBlock.Store(headHeader)
526541
headBlockGauge.Update(int64(headBlock.NumberU64()))
527542

528543
// Restore the last known head header
529-
headHeader := headBlock.Header()
530544
if head := rawdb.ReadHeadHeaderHash(bc.db); head != (common.Hash{}) {
531545
if header := bc.GetHeaderByHash(head); header != nil {
532546
headHeader = header
@@ -642,11 +656,15 @@ func (bc *BlockChain) SetHead(head uint64) error {
642656
// Send chain head event to update the transaction pool
643657
header := bc.CurrentBlock()
644658
if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil {
645-
// This should never happen. In practice, previously currentBlock
646-
// contained the entire block whereas now only a "marker", so there
647-
// is an ever so slight chance for a race we should handle.
648-
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
649-
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
659+
// In a pruned node the genesis block will not exist in the freezer.
660+
// It should not happen that we set head to any other pruned block.
661+
if header.Number.Uint64() > 0 {
662+
// This should never happen. In practice, previously currentBlock
663+
// contained the entire block whereas now only a "marker", so there
664+
// is an ever so slight chance for a race we should handle.
665+
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
666+
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
667+
}
650668
}
651669
bc.chainHeadFeed.Send(ChainHeadEvent{Header: header})
652670
return nil
@@ -663,11 +681,15 @@ func (bc *BlockChain) SetHeadWithTimestamp(timestamp uint64) error {
663681
// Send chain head event to update the transaction pool
664682
header := bc.CurrentBlock()
665683
if block := bc.GetBlock(header.Hash(), header.Number.Uint64()); block == nil {
666-
// This should never happen. In practice, previously currentBlock
667-
// contained the entire block whereas now only a "marker", so there
668-
// is an ever so slight chance for a race we should handle.
669-
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
670-
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
684+
// In a pruned node the genesis block will not exist in the freezer.
685+
// It should not happen that we set head to any other pruned block.
686+
if header.Number.Uint64() > 0 {
687+
// This should never happen. In practice, previously currentBlock
688+
// contained the entire block whereas now only a "marker", so there
689+
// is an ever so slight chance for a race we should handle.
690+
log.Error("Current block not found in database", "block", header.Number, "hash", header.Hash())
691+
return fmt.Errorf("current block missing: #%d [%x..]", header.Number, header.Hash().Bytes()[:4])
692+
}
671693
}
672694
bc.chainHeadFeed.Send(ChainHeadEvent{Header: header})
673695
return nil

core/txindexer.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,16 +196,29 @@ func (indexer *txIndexer) repair(head uint64) {
196196
}
197197
}
198198

199+
// resolveHead resolves the block number of the current chain head.
200+
func (indexer *txIndexer) resolveHead() uint64 {
201+
headBlockHash := rawdb.ReadHeadBlockHash(indexer.db)
202+
if headBlockHash == (common.Hash{}) {
203+
return 0
204+
}
205+
headBlockNumber := rawdb.ReadHeaderNumber(indexer.db, headBlockHash)
206+
if headBlockNumber == nil {
207+
return 0
208+
}
209+
return *headBlockNumber
210+
}
211+
199212
// loop is the scheduler of the indexer, assigning indexing/unindexing tasks depending
200213
// on the received chain event.
201214
func (indexer *txIndexer) loop(chain *BlockChain) {
202215
defer close(indexer.closed)
203216

204217
// Listening to chain events and manipulate the transaction indexes.
205218
var (
206-
stop chan struct{} // Non-nil if background routine is active
207-
done chan struct{} // Non-nil if background routine is active
208-
head = rawdb.ReadHeadBlock(indexer.db).NumberU64() // The latest announced chain head
219+
stop chan struct{} // Non-nil if background routine is active
220+
done chan struct{} // Non-nil if background routine is active
221+
head = indexer.resolveHead() // The latest announced chain head
209222

210223
headCh = make(chan ChainHeadEvent)
211224
sub = chain.SubscribeChainHeadEvent(headCh)

core/txpool/txpool.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,13 +186,15 @@ func (p *TxPool) loop(head *types.Header) {
186186
// Try to inject a busy marker and start a reset if successful
187187
select {
188188
case resetBusy <- struct{}{}:
189-
statedb, err := p.chain.StateAt(newHead.Root)
190-
if err != nil {
191-
log.Crit("Failed to reset txpool state", "err", err)
189+
// Updates the statedb with the new chain head. The head state may be
190+
// unavailable if the initial state sync has not yet completed.
191+
if statedb, err := p.chain.StateAt(newHead.Root); err != nil {
192+
log.Error("Failed to reset txpool state", "err", err)
193+
} else {
194+
p.stateLock.Lock()
195+
p.state = statedb
196+
p.stateLock.Unlock()
192197
}
193-
p.stateLock.Lock()
194-
p.state = statedb
195-
p.stateLock.Unlock()
196198

197199
// Busy marker injected, start a new subpool reset
198200
go func(oldHead, newHead *types.Header) {

0 commit comments

Comments
 (0)