triedb/pathdb: polish code

rjl493456442 · rjl493456442 · commit e4ac9f49cdb4 · 2025-05-28T13:47:24.000+08:00
diff --git a/triedb/pathdb/generate.go b/triedb/pathdb/generate.go
@@ -738,7 +738,7 @@ func (g *generator) generateAccounts(ctx *generatorContext, accMarker []byte) er
 				return err
 			}
 		}
-		// Some account counter, unmark the marker
+		// Some account processed, unmark the marker
 		accMarker = nil
 		return nil
 	}
@@ -784,7 +784,7 @@ func (g *generator) generate(ctx *generatorContext) {
 	// Even if the snapshot data is updated during the interruption (before
 	// or at the marker), the assumption is still held.
 	// For the account or storage slot at the interruption, they will be
-	// counter twice by the generator(they are already counter in the
+	// processed twice by the generator(they are already processed in the
 	// last run) but it's fine.
 	var (
 		accMarker, _ = splitMarker(g.progress)
diff --git a/triedb/pathdb/history_index.go b/triedb/pathdb/history_index.go
@@ -122,7 +122,7 @@ func (r *indexReader) refresh() error {
 }
 
 // readGreaterThan locates the first element that is greater than the specified
-// value. If no such element is found, MaxUint64 is returned.
+// id. If no such element is found, MaxUint64 is returned.
 func (r *indexReader) readGreaterThan(id uint64) (uint64, error) {
 	index := sort.Search(len(r.descList), func(i int) bool {
 		return id < r.descList[i].max
diff --git a/triedb/pathdb/history_index_block.go b/triedb/pathdb/history_index_block.go
@@ -98,7 +98,17 @@ func (d *indexBlockDesc) decode(blob []byte) {
 // Each chunk begins with the full value of the first integer, followed by
 // subsequent integers representing the differences between the current value
 // and the preceding one. Integers are encoded with variable-size for best
-// storage efficiency.
+// storage efficiency. Each chunk can be illustrated as below.
+//
+//		  Restart ---> +----------------+
+//	                   |  Full integer  |
+//		               +----------------+
+//		               | Diff with prev |
+//		               +----------------+
+//		               |      ...       |
+//		               +----------------+
+//		               | Diff with prev |
+//		               +----------------+
 //
 // Empty index block is regarded as invalid.
 func parseIndexBlock(blob []byte) ([]uint32, []byte, error) {
@@ -118,6 +128,8 @@ func parseIndexBlock(blob []byte) ([]uint32, []byte, error) {
 		restart := binary.BigEndian.Uint32(blob[len(blob)-int(i+1)*4:])
 		restarts = append(restarts, restart)
 	}
+	// Validate that restart points are strictly ordered and within the valid
+	// data range.
 	var prev uint32
 	for i := 0; i < len(restarts); i++ {
 		if i != 0 {
@@ -198,18 +210,21 @@ func (br *blockReader) readGreaterThan(id uint64) (uint64, error) {
 		}
 		pos += n
 	}
+	// The element which is greater than specified id is not found.
 	if index == len(br.restarts) {
 		return math.MaxUint64, nil
 	}
+	// The element which is the first one greater than the specified id
+	// is exactly the one located at the restart point.
 	item, _ := binary.Uvarint(br.data[br.restarts[index]:])
 	return item, nil
 }
 
 type blockWriter struct {
-	desc     *indexBlockDesc
-	restarts []uint32
-	scratch  []byte
-	data     []byte
+	desc     *indexBlockDesc // Descriptor of the block
+	restarts []uint32        // Offsets into the data slice, marking the start of each section
+	scratch  []byte          // Buffer used for encoding full integers or value differences
+	data     []byte          // Aggregated encoded data slice
 }
 
 func newBlockWriter(blob []byte, desc *indexBlockDesc) (*blockWriter, error) {
@@ -234,16 +249,18 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc) (*blockWriter, error) {
 }
 
 // append adds a new element to the block. The new element must be greater than
-// the previously one.
+// the previous one. The provided ID is assumed to always be greater than 0.
 func (b *blockWriter) append(id uint64) error {
 	if id == 0 {
 		return errors.New("invalid zero id")
 	}
 	if id <= b.desc.max {
 		return fmt.Errorf("append element out of order, last: %d, this: %d", b.desc.max, id)
 	}
+	// Rotate the current restart section if it's full
 	if b.desc.entries%indexBlockRestartLen == 0 {
-		// Rotate the current restart range if it's full
+		// Save the offset within the data slice as the restart point
+		// for the next section.
 		b.restarts = append(b.restarts, uint32(len(b.data)))
 
 		// The restart point item can either be encoded in variable
@@ -256,8 +273,10 @@ func (b *blockWriter) append(id uint64) error {
 		n := binary.PutUvarint(b.scratch[0:], id)
 		b.data = append(b.data, b.scratch[:n]...)
 	} else {
-		// Non-head elements within a restart range are encoded using
-		// their difference from the preceding element.
+		// The current section is not full, append the element.
+		// The element which is not the first one in the section
+		// is encoded using the value difference from the preceding
+		// element.
 		n := binary.PutUvarint(b.scratch[0:], id-b.desc.max)
 		b.data = append(b.data, b.scratch[:n]...)
 	}
@@ -310,16 +329,17 @@ func (b *blockWriter) sectionLast(section int) uint64 {
 
 // sectionSearch looks up the specified value in the given section,
 // the position and the preceding value will be returned if found.
-func (b *blockWriter) sectionSearch(section int, n uint64) (prev uint64, pos int) {
+func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int) {
 	b.scanSection(section, func(v uint64, p int) bool {
 		if n == v {
 			pos = p
-			return true
+			found = true
+			return true // terminate iteration
 		}
 		prev = v
-		return false
+		return false // continue iteration
 	})
-	return prev, pos
+	return found, prev, pos
 }
 
 // pop removes the last element from the block. The assumption is held that block
@@ -351,7 +371,10 @@ func (b *blockWriter) pop(id uint64) error {
 	}
 	// Look up the element preceding the one to be popped, in order to update
 	// the maximum element in the block.
-	prev, pos := b.sectionSearch(len(b.restarts)-1, id)
+	found, prev, pos := b.sectionSearch(len(b.restarts)-1, id)
+	if !found {
+		return fmt.Errorf("pop element is not found, last: %d, this: %d", b.desc.max, id)
+	}
 	b.desc.max = prev
 	b.data = b.data[:pos]
 	b.desc.entries -= 1
diff --git a/triedb/pathdb/history_indexer.go b/triedb/pathdb/history_indexer.go
@@ -57,7 +57,6 @@ func newBatchIndexer(db ethdb.KeyValueStore, delete bool) *batchIndexer {
 // process iterates through the accounts and their associated storage slots in the
 // state history, tracking the mapping between state and history IDs.
 func (b *batchIndexer) process(h *history, historyID uint64) error {
-	buf := crypto.NewKeccakState()
 	for _, address := range h.accountList {
 		b.counter += 1
 		b.accounts[address] = append(b.accounts[address], historyID)
@@ -72,7 +71,7 @@ func (b *batchIndexer) process(h *history, historyID uint64) error {
 			// conversion from storage key to hash is necessary for non-v0 histories.
 			slotHash := slotKey
 			if h.meta.version != stateHistoryV0 {
-				slotHash = crypto.HashData(buf, slotKey.Bytes())
+				slotHash = crypto.Keccak256Hash(slotKey.Bytes())
 			}
 			b.storages[address][slotHash] = append(b.storages[address][slotHash], historyID)
 		}
@@ -225,6 +224,14 @@ type interruptSignal struct {
 	result    chan error
 }
 
+// indexIniter is responsible for completing the indexing of remaining state
+// histories in batch. It runs as a one-time background thread and terminates
+// once all available state histories are indexed.
+//
+// Afterward, new state histories should be indexed synchronously alongside
+// the state data itself, ensuring both the history and its index are available.
+// If a state history is removed due to a rollback, the associated indexes should
+// be unmarked accordingly.
 type indexIniter struct {
 	disk      ethdb.KeyValueStore
 	freezer   ethdb.AncientStore
@@ -422,7 +429,7 @@ func (i *indexIniter) index(done chan struct{}, interrupt *atomic.Int32, lastID
 				)
 				// Override the ETA if larger than the largest until now
 				eta := time.Duration(left/speed) * time.Millisecond
-				log.Info("Indexing state history", "counter", done, "left", left, "eta", common.PrettyDuration(eta))
+				log.Info("Indexing state history", "processed", done, "left", left, "eta", common.PrettyDuration(eta))
 			}
 		}
 		// Check interruption signal and abort process if it's fired
@@ -450,13 +457,16 @@ func (i *indexIniter) index(done chan struct{}, interrupt *atomic.Int32, lastID
 // process is finished, all state histories are marked as fully indexed,
 // enabling handling of requests for historical states. Thereafter, any new
 // state histories must be indexed or unindexed synchronously, ensuring that
-// the history index is created or removed with the corresponding state history.
+// the history index is created or removed along with the corresponding
+// state history.
 type historyIndexer struct {
 	initer  *indexIniter
 	disk    ethdb.KeyValueStore
 	freezer ethdb.AncientStore
 }
 
+// newHistoryIndexer constructs the history indexer and launches the background
+// initer to complete the indexing of any remaining state histories.
 func newHistoryIndexer(disk ethdb.KeyValueStore, freezer ethdb.AncientStore, lastHistoryID uint64) *historyIndexer {
 	return &historyIndexer{
 		initer:  newIndexIniter(disk, freezer, lastHistoryID),
diff --git a/triedb/pathdb/history_reader.go b/triedb/pathdb/history_reader.go
@@ -32,9 +32,16 @@ import (
 // stateIdent represents the identifier of a state element, which can be
 // either an account or a storage slot.
 type stateIdent struct {
-	account     bool
-	address     common.Address
-	storageHash common.Hash // null if account is true, the hash of the raw storage slot key
+	account bool
+	address common.Address
+
+	// The hash of the storage slot key. This is used instead of the raw slot key
+	// because, in legacy state histories (prior to the Cancun fork), the slot
+	// identifier is the hash of the key, and the original key (preimage) cannot
+	// be recovered. To maintain backward compatibility, the key hash is used.
+	//
+	// This field is null if the identifier refers to account data.
+	storageHash common.Hash
 }
 
 // String returns the string format state identifier.
@@ -122,7 +129,7 @@ func newIndexReaderWithLimitTag(db ethdb.KeyValueReader, state stateIdent) (*ind
 }
 
 // readGreaterThan locates the first element that is greater than the specified
-// value. If no such element is found, MaxUint64 is returned.
+// id. If no such element is found, MaxUint64 is returned.
 //
 // Note: It is possible that additional histories have been indexed since the
 // reader was created. The reader should be refreshed as needed to load the

Original file line number	Diff line number	Diff line change
`@@ -738,7 +738,7 @@ func (g generator) generateAccounts(ctx generatorContext, accMarker []byte) er`
`738`	`738`	`return err`
`739`	`739`	`}`
`740`	`740`	`}`
`741`		`- // Some account counter, unmark the marker`
	`741`	`+ // Some account processed, unmark the marker`
`742`	`742`	`accMarker = nil`
`743`	`743`	`return nil`
`744`	`744`	`}`
`@@ -784,7 +784,7 @@ func (g generator) generate(ctx generatorContext) {`
`784`	`784`	`// Even if the snapshot data is updated during the interruption (before`
`785`	`785`	`// or at the marker), the assumption is still held.`
`786`	`786`	`// For the account or storage slot at the interruption, they will be`
`787`		`- // counter twice by the generator(they are already counter in the`
	`787`	`+ // processed twice by the generator(they are already processed in the`
`788`	`788`	`// last run) but it's fine.`
`789`	`789`	`var (`
`790`	`790`	`accMarker, _ = splitMarker(g.progress)`
Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ func (r *indexReader) refresh() error {`
`122`	`122`	`}`
`123`	`123`
`124`	`124`	`// readGreaterThan locates the first element that is greater than the specified`
`125`		`-// value. If no such element is found, MaxUint64 is returned.`
	`125`	`+// id. If no such element is found, MaxUint64 is returned.`
`126`	`126`	`func (r *indexReader) readGreaterThan(id uint64) (uint64, error) {`
`127`	`127`	`index := sort.Search(len(r.descList), func(i int) bool {`
`128`	`128`	`return id < r.descList[i].max`