Skip to content

Commit 1756f40

Browse files
authored
Merge pull request #9929 from yyforyongyu/fix-decalylog
htlcswitch: remove the bucket `batchReplayBucket`
2 parents 7857d2c + 2defb11 commit 1756f40

File tree

5 files changed

+72
-82
lines changed

5 files changed

+72
-82
lines changed

docs/release-notes/release-notes-0.19.2.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@
5151

5252
## Performance Improvements
5353

54+
- The replay protection is
55+
[optimized](https://github.com/lightningnetwork/lnd/pull/9929) to use less disk
56+
space such that the `sphinxreplay.db` or the `decayedlogdb_kv` table will grow
57+
much more slowly.
58+
5459
## Deprecations
5560

5661
# Technical and Architectural Updates

htlcswitch/decayedlog.go

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package htlcswitch
22

33
import (
4-
"bytes"
54
"encoding/binary"
65
"errors"
76
"fmt"
@@ -24,11 +23,6 @@ var (
2423
// bytes of a received HTLC's hashed shared secret as the key and the HTLC's
2524
// CLTV expiry as the value.
2625
sharedHashBucket = []byte("shared-hash")
27-
28-
// batchReplayBucket is a bucket that maps batch identifiers to
29-
// serialized ReplaySets. This is used to give idempotency in the event
30-
// that a batch is processed more than once.
31-
batchReplayBucket = []byte("batch-replay")
3226
)
3327

3428
var (
@@ -138,11 +132,6 @@ func (d *DecayedLog) initBuckets() error {
138132
return ErrDecayedLogInit
139133
}
140134

141-
_, err = tx.CreateTopLevelBucket(batchReplayBucket)
142-
if err != nil {
143-
return ErrDecayedLogInit
144-
}
145-
146135
return nil
147136
}, func() {})
148137
}
@@ -329,11 +318,8 @@ func (d *DecayedLog) Put(hash *sphinx.HashPrefix, cltv uint32) error {
329318
// PutBatch accepts a pending batch of hashed secret entries to write to disk.
330319
// Each hashed secret is inserted with a corresponding time value, dictating
331320
// when the entry will be evicted from the log.
332-
// NOTE: This method enforces idempotency by writing the replay set obtained
333-
// from the first attempt for a particular batch ID, and decoding the return
334-
// value to subsequent calls. For the indices of the replay set to be aligned
335-
// properly, the batch MUST be constructed identically to the first attempt,
336-
// pruning will cause the indices to become invalid.
321+
//
322+
// TODO(yy): remove this method and use `Put` instead.
337323
func (d *DecayedLog) PutBatch(b *sphinx.Batch) (*sphinx.ReplaySet, error) {
338324
// Since batched boltdb txns may be executed multiple times before
339325
// succeeding, we will create a new replay set for each invocation to
@@ -348,25 +334,6 @@ func (d *DecayedLog) PutBatch(b *sphinx.Batch) (*sphinx.ReplaySet, error) {
348334
return ErrDecayedLogCorrupted
349335
}
350336

351-
// Load the batch replay bucket, which will be used to either
352-
// retrieve the result of previously processing this batch, or
353-
// to write the result of this operation.
354-
batchReplayBkt := tx.ReadWriteBucket(batchReplayBucket)
355-
if batchReplayBkt == nil {
356-
return ErrDecayedLogCorrupted
357-
}
358-
359-
// Check for the existence of this batch's id in the replay
360-
// bucket. If a non-nil value is found, this indicates that we
361-
// have already processed this batch before. We deserialize the
362-
// resulting and return it to ensure calls to put batch are
363-
// idempotent.
364-
replayBytes := batchReplayBkt.Get(b.ID)
365-
if replayBytes != nil {
366-
replays = sphinx.NewReplaySet()
367-
return replays.Decode(bytes.NewReader(replayBytes))
368-
}
369-
370337
// The CLTV will be stored into scratch and then stored into the
371338
// sharedHashBucket.
372339
var scratch [4]byte
@@ -394,17 +361,7 @@ func (d *DecayedLog) PutBatch(b *sphinx.Batch) (*sphinx.ReplaySet, error) {
394361
// batch's construction.
395362
replays.Merge(b.ReplaySet)
396363

397-
// Write the replay set under the batch identifier to the batch
398-
// replays bucket. This can be used during recovery to test (1)
399-
// that a particular batch was successfully processed and (2)
400-
// recover the indexes of the adds that were rejected as
401-
// replays.
402-
var replayBuf bytes.Buffer
403-
if err := replays.Encode(&replayBuf); err != nil {
404-
return err
405-
}
406-
407-
return batchReplayBkt.Put(b.ID, replayBuf.Bytes())
364+
return nil
408365
}); err != nil {
409366
return nil, err
410367
}

htlcswitch/hop/iterator.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,8 @@ func (r *DecodeHopIteratorResponse) Result() (Iterator, lnwire.FailCode) {
745745
// the presented readers and rhashes *NEVER* deviate across invocations for the
746746
// same id.
747747
func (p *OnionProcessor) DecodeHopIterators(id []byte,
748-
reqs []DecodeHopIteratorRequest) ([]DecodeHopIteratorResponse, error) {
748+
reqs []DecodeHopIteratorRequest,
749+
reforward bool) ([]DecodeHopIteratorResponse, error) {
749750

750751
var (
751752
batchSize = len(reqs)
@@ -783,6 +784,8 @@ func (p *OnionProcessor) DecodeHopIterators(id []byte,
783784
b.Val,
784785
))
785786
})
787+
788+
// TODO(yy): use `p.router.ProcessOnionPacket` instead.
786789
err = tx.ProcessOnionPacket(
787790
seqNum, onionPkt, req.RHash, req.IncomingCltv, opts...,
788791
)
@@ -864,11 +867,12 @@ func (p *OnionProcessor) DecodeHopIterators(id []byte,
864867
continue
865868
}
866869

867-
// If this index is contained in the replay set, mark it with a
868-
// temporary channel failure error code. We infer that the
869-
// offending error was due to a replayed packet because this
870-
// index was found in the replay set.
871-
if replays.Contains(uint16(i)) {
870+
// If this index is contained in the replay set, and it is not a
871+
// reforwarding on startup, mark it with a permanent channel
872+
// failure error code. We infer that the offending error was due
873+
// to a replayed packet because this index was found in the
874+
// replay set.
875+
if !reforward && replays.Contains(uint16(i)) {
872876
log.Errorf("unable to process onion packet: %v",
873877
sphinx.ErrReplayedPacket)
874878

htlcswitch/link.go

Lines changed: 53 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,10 @@ type ChannelLinkConfig struct {
108108
// blobs, which are then used to inform how to forward an HTLC.
109109
//
110110
// NOTE: This function assumes the same set of readers and preimages
111-
// are always presented for the same identifier.
112-
DecodeHopIterators func([]byte, []hop.DecodeHopIteratorRequest) (
111+
// are always presented for the same identifier. The last boolean is
112+
// used to decide whether this is a reforwarding or not - when it's
113+
// reforwarding, we skip the replay check enforced in our decay log.
114+
DecodeHopIterators func([]byte, []hop.DecodeHopIteratorRequest, bool) (
113115
[]hop.DecodeHopIteratorResponse, error)
114116

115117
// ExtractErrorEncrypter function is responsible for decoding HTLC
@@ -3630,6 +3632,13 @@ func (l *channelLink) processRemoteSettleFails(fwdPkg *channeldb.FwdPkg) {
36303632
return
36313633
}
36323634

3635+
// Exit early if the fwdPkg is already processed.
3636+
if fwdPkg.State == channeldb.FwdStateCompleted {
3637+
l.log.Debugf("skipped processing completed fwdPkg %v", fwdPkg)
3638+
3639+
return
3640+
}
3641+
36333642
l.log.Debugf("settle-fail-filter: %v", fwdPkg.SettleFailFilter)
36343643

36353644
var switchPackets []*htlcPacket
@@ -3738,13 +3747,36 @@ func (l *channelLink) processRemoteAdds(fwdPkg *channeldb.FwdPkg) {
37383747
return
37393748
}
37403749

3750+
// Exit early if the fwdPkg is already processed.
3751+
if fwdPkg.State == channeldb.FwdStateCompleted {
3752+
l.log.Debugf("skipped processing completed fwdPkg %v", fwdPkg)
3753+
3754+
return
3755+
}
3756+
37413757
l.log.Tracef("processing %d remote adds for height %d",
37423758
len(fwdPkg.Adds), fwdPkg.Height)
37433759

3744-
decodeReqs := make(
3745-
[]hop.DecodeHopIteratorRequest, 0, len(fwdPkg.Adds),
3746-
)
3747-
for _, update := range fwdPkg.Adds {
3760+
// decodeReqs is a list of requests sent to the onion decoder. We expect
3761+
// the same length of responses to be returned.
3762+
decodeReqs := make([]hop.DecodeHopIteratorRequest, 0, len(fwdPkg.Adds))
3763+
3764+
// unackedAdds is a list of ADDs that's waiting for the remote's
3765+
// settle/fail update.
3766+
unackedAdds := make([]*lnwire.UpdateAddHTLC, 0, len(fwdPkg.Adds))
3767+
3768+
for i, update := range fwdPkg.Adds {
3769+
// If this index is already found in the ack filter, the
3770+
// response to this forwarding decision has already been
3771+
// committed by one of our commitment txns. ADDs in this state
3772+
// are waiting for the rest of the fwding package to get acked
3773+
// before being garbage collected.
3774+
if fwdPkg.State == channeldb.FwdStateProcessed &&
3775+
fwdPkg.AckFilter.Contains(uint16(i)) {
3776+
3777+
continue
3778+
}
3779+
37483780
if msg, ok := update.UpdateMsg.(*lnwire.UpdateAddHTLC); ok {
37493781
// Before adding the new htlc to the state machine,
37503782
// parse the onion object in order to obtain the
@@ -3761,15 +3793,20 @@ func (l *channelLink) processRemoteAdds(fwdPkg *channeldb.FwdPkg) {
37613793
}
37623794

37633795
decodeReqs = append(decodeReqs, req)
3796+
unackedAdds = append(unackedAdds, msg)
37643797
}
37653798
}
37663799

3800+
// If the fwdPkg has already been processed, it means we are
3801+
// reforwarding the packets again, which happens only on a restart.
3802+
reforward := fwdPkg.State == channeldb.FwdStateProcessed
3803+
37673804
// Atomically decode the incoming htlcs, simultaneously checking for
37683805
// replay attempts. A particular index in the returned, spare list of
37693806
// channel iterators should only be used if the failure code at the
37703807
// same index is lnwire.FailCodeNone.
37713808
decodeResps, sphinxErr := l.cfg.DecodeHopIterators(
3772-
fwdPkg.ID(), decodeReqs,
3809+
fwdPkg.ID(), decodeReqs, reforward,
37733810
)
37743811
if sphinxErr != nil {
37753812
l.failf(LinkFailureError{code: ErrInternalError},
@@ -3779,23 +3816,10 @@ func (l *channelLink) processRemoteAdds(fwdPkg *channeldb.FwdPkg) {
37793816

37803817
var switchPackets []*htlcPacket
37813818

3782-
for i, update := range fwdPkg.Adds {
3819+
for i, update := range unackedAdds {
37833820
idx := uint16(i)
3784-
3785-
//nolint:forcetypeassert
3786-
add := *update.UpdateMsg.(*lnwire.UpdateAddHTLC)
37873821
sourceRef := fwdPkg.SourceRef(idx)
3788-
3789-
if fwdPkg.State == channeldb.FwdStateProcessed &&
3790-
fwdPkg.AckFilter.Contains(idx) {
3791-
3792-
// If this index is already found in the ack filter,
3793-
// the response to this forwarding decision has already
3794-
// been committed by one of our commitment txns. ADDs
3795-
// in this state are waiting for the rest of the fwding
3796-
// package to get acked before being garbage collected.
3797-
continue
3798-
}
3822+
add := *update
37993823

38003824
// An incoming HTLC add has been full-locked in. As a result we
38013825
// can now examine the forwarding details of the HTLC, and the
@@ -3815,8 +3839,10 @@ func (l *channelLink) processRemoteAdds(fwdPkg *channeldb.FwdPkg) {
38153839
add.ID, failureCode, add.OnionBlob, &sourceRef,
38163840
)
38173841

3818-
l.log.Errorf("unable to decode onion hop "+
3819-
"iterator: %v", failureCode)
3842+
l.log.Errorf("unable to decode onion hop iterator "+
3843+
"for htlc(id=%v, hash=%x): %v", add.ID,
3844+
add.PaymentHash, failureCode)
3845+
38203846
continue
38213847
}
38223848

@@ -4120,17 +4146,15 @@ func (l *channelLink) processRemoteAdds(fwdPkg *channeldb.FwdPkg) {
41204146
return
41214147
}
41224148

4123-
replay := fwdPkg.State != channeldb.FwdStateLockedIn
4124-
4125-
l.log.Debugf("forwarding %d packets to switch: replay=%v",
4126-
len(switchPackets), replay)
4149+
l.log.Debugf("forwarding %d packets to switch: reforward=%v",
4150+
len(switchPackets), reforward)
41274151

41284152
// NOTE: This call is made synchronous so that we ensure all circuits
41294153
// are committed in the exact order that they are processed in the link.
41304154
// Failing to do this could cause reorderings/gaps in the range of
41314155
// opened circuits, which violates assumptions made by the circuit
41324156
// trimming.
4133-
l.forwardBatch(replay, switchPackets...)
4157+
l.forwardBatch(reforward, switchPackets...)
41344158
}
41354159

41364160
// experimentalEndorsement returns the value to set for our outgoing

htlcswitch/mock.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ func (p *mockIteratorDecoder) DecodeHopIterator(r io.Reader, rHash []byte,
522522
}
523523

524524
func (p *mockIteratorDecoder) DecodeHopIterators(id []byte,
525-
reqs []hop.DecodeHopIteratorRequest) (
525+
reqs []hop.DecodeHopIteratorRequest, _ bool) (
526526
[]hop.DecodeHopIteratorResponse, error) {
527527

528528
idHash := sha256.Sum256(id)

0 commit comments

Comments
 (0)