Skip to content

Commit 4594d84

Browse files
authored
Merge pull request #1585 from Roasbeef/retry-n-block
chain_bridge+fn: add retry logic for RPC calls to improve reliability in load-balanced environments
2 parents 360a253 + e901b98 commit 4594d84

File tree

3 files changed

+446
-30
lines changed

3 files changed

+446
-30
lines changed

chain_bridge.go

Lines changed: 72 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"github.com/lightninglabs/lndclient"
1212
"github.com/lightninglabs/neutrino/cache/lru"
1313
"github.com/lightninglabs/taproot-assets/asset"
14+
"github.com/lightninglabs/taproot-assets/fn"
1415
"github.com/lightninglabs/taproot-assets/proof"
1516
"github.com/lightninglabs/taproot-assets/rfq"
1617
"github.com/lightninglabs/taproot-assets/tapchannel"
@@ -49,6 +50,7 @@ type LndRpcChainBridge struct {
4950
lnd *lndclient.LndServices
5051

5152
blockTimestampCache *lru.Cache[uint32, cacheableTimestamp]
53+
retryConfig fn.RetryConfig
5254

5355
assetStore *tapdb.AssetStore
5456
}
@@ -63,7 +65,8 @@ func NewLndRpcChainBridge(lnd *lndclient.LndServices,
6365
blockTimestampCache: lru.NewCache[uint32, cacheableTimestamp](
6466
maxNumBlocksInCache,
6567
),
66-
assetStore: assetStore,
68+
retryConfig: fn.DefaultRetryConfig(),
69+
assetStore: assetStore,
6770
}
6871
}
6972

@@ -112,39 +115,56 @@ func (l *LndRpcChainBridge) RegisterBlockEpochNtfn(
112115
func (l *LndRpcChainBridge) GetBlock(ctx context.Context,
113116
hash chainhash.Hash) (*wire.MsgBlock, error) {
114117

115-
block, err := l.lnd.ChainKit.GetBlock(ctx, hash)
116-
if err != nil {
117-
return nil, fmt.Errorf("unable to retrieve block: %w", err)
118-
}
119-
120-
return block, nil
118+
return fn.RetryFuncN(
119+
ctx, l.retryConfig, func() (*wire.MsgBlock, error) {
120+
block, err := l.lnd.ChainKit.GetBlock(ctx, hash)
121+
if err != nil {
122+
return nil, fmt.Errorf(
123+
"unable to retrieve block: %w", err,
124+
)
125+
}
126+
return block, nil
127+
},
128+
)
121129
}
122130

123131
// GetBlockHeader returns a block header given its hash.
124132
func (l *LndRpcChainBridge) GetBlockHeader(ctx context.Context,
125133
hash chainhash.Hash) (*wire.BlockHeader, error) {
126134

127-
header, err := l.lnd.ChainKit.GetBlockHeader(ctx, hash)
128-
if err != nil {
129-
return nil, fmt.Errorf("unable to retrieve block header: %w",
130-
err)
131-
}
132-
133-
return header, nil
135+
return fn.RetryFuncN(
136+
ctx, l.retryConfig, func() (*wire.BlockHeader, error) {
137+
header, err := l.lnd.ChainKit.GetBlockHeader(ctx, hash)
138+
if err != nil {
139+
return nil, fmt.Errorf(
140+
"unable to retrieve block "+
141+
"header: %w", err,
142+
)
143+
}
144+
return header, nil
145+
},
146+
)
134147
}
135148

136149
// GetBlockHash returns the hash of the block in the best blockchain at the
137150
// given height.
138151
func (l *LndRpcChainBridge) GetBlockHash(ctx context.Context,
139152
blockHeight int64) (chainhash.Hash, error) {
140153

141-
blockHash, err := l.lnd.ChainKit.GetBlockHash(ctx, blockHeight)
142-
if err != nil {
143-
return chainhash.Hash{}, fmt.Errorf("unable to retrieve "+
144-
"block hash: %w", err)
145-
}
146-
147-
return blockHash, nil
154+
return fn.RetryFuncN(
155+
ctx, l.retryConfig, func() (chainhash.Hash, error) {
156+
blockHash, err := l.lnd.ChainKit.GetBlockHash(
157+
ctx, blockHeight,
158+
)
159+
if err != nil {
160+
return chainhash.Hash{}, fmt.Errorf(
161+
"unable to retrieve block hash: %w",
162+
err,
163+
)
164+
}
165+
return blockHash, nil
166+
},
167+
)
148168
}
149169

150170
// VerifyBlock returns an error if a block (with given header and height) is not
@@ -184,12 +204,17 @@ func (l *LndRpcChainBridge) VerifyBlock(ctx context.Context,
184204

185205
// CurrentHeight return the current height of the main chain.
186206
func (l *LndRpcChainBridge) CurrentHeight(ctx context.Context) (uint32, error) {
187-
_, bestHeight, err := l.lnd.ChainKit.GetBestBlock(ctx)
188-
if err != nil {
189-
return 0, fmt.Errorf("unable to grab block height: %w", err)
190-
}
191-
192-
return uint32(bestHeight), nil
207+
return fn.RetryFuncN(
208+
ctx, l.retryConfig, func() (uint32, error) {
209+
_, bestHeight, err := l.lnd.ChainKit.GetBestBlock(ctx)
210+
if err != nil {
211+
return 0, fmt.Errorf(
212+
"unable to grab block height: %w", err,
213+
)
214+
}
215+
return uint32(bestHeight), nil
216+
},
217+
)
193218
}
194219

195220
// GetBlockTimestamp returns the timestamp of the block at the given height.
@@ -207,7 +232,11 @@ func (l *LndRpcChainBridge) GetBlockTimestamp(ctx context.Context,
207232
return int64(cacheTS)
208233
}
209234

210-
hash, err := l.lnd.ChainKit.GetBlockHash(ctx, int64(height))
235+
hash, err := fn.RetryFuncN(
236+
ctx, l.retryConfig, func() (chainhash.Hash, error) {
237+
return l.lnd.ChainKit.GetBlockHash(ctx, int64(height))
238+
},
239+
)
211240
if err != nil {
212241
return 0
213242
}
@@ -229,14 +258,27 @@ func (l *LndRpcChainBridge) GetBlockTimestamp(ctx context.Context,
229258
func (l *LndRpcChainBridge) PublishTransaction(ctx context.Context,
230259
tx *wire.MsgTx, label string) error {
231260

232-
return l.lnd.WalletKit.PublishTransaction(ctx, tx, label)
261+
_, err := fn.RetryFuncN(
262+
ctx, l.retryConfig, func() (struct{}, error) {
263+
return struct{}{}, l.lnd.WalletKit.PublishTransaction(
264+
ctx, tx, label,
265+
)
266+
},
267+
)
268+
return err
233269
}
234270

235271
// EstimateFee returns a fee estimate for the confirmation target.
236272
func (l *LndRpcChainBridge) EstimateFee(ctx context.Context,
237273
confTarget uint32) (chainfee.SatPerKWeight, error) {
238274

239-
return l.lnd.WalletKit.EstimateFeeRate(ctx, int32(confTarget))
275+
return fn.RetryFuncN(
276+
ctx, l.retryConfig, func() (chainfee.SatPerKWeight, error) {
277+
return l.lnd.WalletKit.EstimateFeeRate(
278+
ctx, int32(confTarget),
279+
)
280+
},
281+
)
240282
}
241283

242284
// GenFileChainLookup generates a chain lookup interface for the given

fn/retry.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package fn
2+
3+
import (
4+
"context"
5+
"time"
6+
)
7+
8+
// RetryConfig defines the parameters for exponential backoff retry behavior.
9+
type RetryConfig struct {
10+
// MaxRetries specifies how many times to retry after the initial
11+
// attempt fails.
12+
MaxRetries int
13+
14+
// InitialBackoff sets the delay before the first retry attempt.
15+
InitialBackoff time.Duration
16+
17+
// BackoffMultiplier determines the exponential growth rate of the
18+
// backoff duration between successive retries.
19+
BackoffMultiplier float64
20+
21+
// MaxBackoff caps the delay between retries to prevent excessive
22+
// wait times.
23+
MaxBackoff time.Duration
24+
}
25+
26+
// DefaultRetryConfig provides sensible defaults for retrying RPC calls in
27+
// load-balanced environments where transient failures are expected.
28+
func DefaultRetryConfig() RetryConfig {
29+
return RetryConfig{
30+
MaxRetries: 10,
31+
InitialBackoff: 100 * time.Millisecond,
32+
BackoffMultiplier: 2.0,
33+
MaxBackoff: 5 * time.Second,
34+
}
35+
}
36+
37+
// RetryFuncN executes the provided function with exponential backoff retry
38+
// logic. This is particularly useful for RPC calls in load-balanced
39+
// environments where nodes may temporarily return inconsistent results. The
40+
// function respects context cancellation and returns immediately if the context
41+
// is cancelled.
42+
func RetryFuncN[T any](ctx context.Context,
43+
config RetryConfig, fn func() (T, error)) (T, error) {
44+
45+
var (
46+
result T
47+
err error
48+
)
49+
50+
backoff := config.InitialBackoff
51+
52+
// We'll retry the function up to MaxRetries times, backing off each
53+
// time until it succeeds.
54+
for attempt := 0; attempt <= config.MaxRetries; attempt++ {
55+
result, err = fn()
56+
if err == nil {
57+
return result, nil
58+
}
59+
60+
if attempt == config.MaxRetries {
61+
return result, err
62+
}
63+
64+
// Cap the backoff at the configured maximum to prevent
65+
// excessive delays.
66+
if backoff > config.MaxBackoff {
67+
backoff = config.MaxBackoff
68+
}
69+
70+
// Wait for the backoff duration or until the context is
71+
// cancelled, whichever comes first.
72+
select {
73+
case <-ctx.Done():
74+
return result, ctx.Err()
75+
76+
case <-time.After(backoff):
77+
// Apply the multiplier to implement exponential
78+
// backoff.
79+
backoff = time.Duration(
80+
float64(backoff) * config.BackoffMultiplier,
81+
)
82+
}
83+
}
84+
85+
return result, err
86+
}

0 commit comments

Comments
 (0)