Skip to content

Commit b369a85

Browse files
eth/protocols/snap: add healing and syncing metrics (#32258)
Adds the heal time and snap sync time to grafana --------- Co-authored-by: Gary Rong <garyrong0905@gmail.com>
1 parent 16117eb commit b369a85

File tree

2 files changed

+26
-3
lines changed

2 files changed

+26
-3
lines changed

eth/protocols/snap/metrics.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,7 @@ var (
6666
// discarded during the snap sync.
6767
largeStorageDiscardGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/discard", nil)
6868
largeStorageResumedGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/storage/chunk/resume", nil)
69+
70+
stateSyncTimeGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/time/statesync", nil)
71+
stateHealTimeGauge = metrics.NewRegisteredGauge("eth/protocols/snap/sync/time/stateheal", nil)
6972
)

eth/protocols/snap/sync.go

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -502,8 +502,10 @@ type Syncer struct {
502502
storageHealed uint64 // Number of storage slots downloaded during the healing stage
503503
storageHealedBytes common.StorageSize // Number of raw storage bytes persisted to disk during the healing stage
504504

505-
startTime time.Time // Time instance when snapshot sync started
506-
logTime time.Time // Time instance when status was last reported
505+
startTime time.Time // Time instance when snapshot sync started
506+
healStartTime time.Time // Time instance when the state healing started
507+
syncTimeOnce sync.Once // Ensure that the state sync time is uploaded only once
508+
logTime time.Time // Time instance when status was last reported
507509

508510
pend sync.WaitGroup // Tracks network request goroutines for graceful shutdown
509511
lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root)
@@ -685,6 +687,14 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
685687
s.cleanStorageTasks()
686688
s.cleanAccountTasks()
687689
if len(s.tasks) == 0 && s.healer.scheduler.Pending() == 0 {
690+
// State healing phase completed, record the elapsed time in metrics.
691+
// Note: healing may be rerun in subsequent cycles to fill gaps between
692+
// pivot states (e.g., if chain sync takes longer).
693+
if !s.healStartTime.IsZero() {
694+
stateHealTimeGauge.Inc(int64(time.Since(s.healStartTime)))
695+
log.Info("State healing phase is completed", "elapsed", common.PrettyDuration(time.Since(s.healStartTime)))
696+
s.healStartTime = time.Time{}
697+
}
688698
return nil
689699
}
690700
// Assign all the data retrieval tasks to any free peers
@@ -693,7 +703,17 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error {
693703
s.assignStorageTasks(storageResps, storageReqFails, cancel)
694704

695705
if len(s.tasks) == 0 {
696-
// Sync phase done, run heal phase
706+
// State sync phase completed, record the elapsed time in metrics.
707+
// Note: the initial state sync runs only once, regardless of whether
708+
// a new cycle is started later. Any state differences in subsequent
709+
// cycles will be handled by the state healer.
710+
s.syncTimeOnce.Do(func() {
711+
stateSyncTimeGauge.Update(int64(time.Since(s.startTime)))
712+
log.Info("State sync phase is completed", "elapsed", common.PrettyDuration(time.Since(s.startTime)))
713+
})
714+
if s.healStartTime.IsZero() {
715+
s.healStartTime = time.Now()
716+
}
697717
s.assignTrienodeHealTasks(trienodeHealResps, trienodeHealReqFails, cancel)
698718
s.assignBytecodeHealTasks(bytecodeHealResps, bytecodeHealReqFails, cancel)
699719
}

0 commit comments

Comments
 (0)