Skip to content

Commit b90c0c3

Browse files
committed
Make re-org strat more cautious and add more config (#4151)
## Proposed Changes This change attempts to prevent failed re-orgs by: 1. Lowering the re-org cutoff from 2s to 1s. This is informed by a failed re-org attempted by @yorickdowne's node. The failed block was requested in the 1.5-2s window due to a Vouch failure, and failed to propagate to the majority of the network before the attestation deadline at 4s. 2. Allow users to adjust their re-org cutoff depending on observed network conditions and their risk profile. The static 2 second cutoff was too rigid. 3. Add a `--proposer-reorg-disallowed-offsets` flag which can be used to prohibit reorgs at certain slots. This is intended to help workaround an issue whereby reorging blocks at slot 1 are currently taking ~1.6s to propagate on gossip rather than ~500ms. This is suspected to be due to a cache miss in current versions of Prysm, which should be fixed in their next release. ## Additional Info I'm of two minds about removing the `shuffling_stable` check which checks for blocks at slot 0 in the epoch. If we removed it users would be able to configure Lighthouse to try reorging at slot 0, which likely wouldn't work very well due to interactions with the proposer index cache. I think we could leave it for now and revisit it later.
1 parent 00cf5fc commit b90c0c3

File tree

12 files changed

+218
-18
lines changed

12 files changed

+218
-18
lines changed

beacon_node/beacon_chain/src/beacon_chain.rs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ use task_executor::{ShutdownReason, TaskExecutor};
106106
use tokio_stream::Stream;
107107
use tree_hash::TreeHash;
108108
use types::beacon_state::CloneConfig;
109-
use types::consts::merge::INTERVALS_PER_SLOT;
110109
use types::*;
111110

112111
pub type ForkChoiceError = fork_choice::Error<crate::ForkChoiceStoreError>;
@@ -128,12 +127,6 @@ pub const VALIDATOR_PUBKEY_CACHE_LOCK_TIMEOUT: Duration = Duration::from_secs(1)
128127
/// The timeout for the eth1 finalization cache
129128
pub const ETH1_FINALIZATION_CACHE_LOCK_TIMEOUT: Duration = Duration::from_millis(200);
130129

131-
/// The latest delay from the start of the slot at which to attempt a 1-slot re-org.
132-
fn max_re_org_slot_delay(seconds_per_slot: u64) -> Duration {
133-
// Allow at least half of the attestation deadline for the block to propagate.
134-
Duration::from_secs(seconds_per_slot) / INTERVALS_PER_SLOT as u32 / 2
135-
}
136-
137130
// These keys are all zero because they get stored in different columns, see `DBColumn` type.
138131
pub const BEACON_CHAIN_DB_KEY: Hash256 = Hash256::zero();
139132
pub const OP_POOL_DB_KEY: Hash256 = Hash256::zero();
@@ -3761,7 +3754,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
37613754
// 1. It seems we have time to propagate and still receive the proposer boost.
37623755
// 2. The current head block was seen late.
37633756
// 3. The `get_proposer_head` conditions from fork choice pass.
3764-
let proposing_on_time = slot_delay < max_re_org_slot_delay(self.spec.seconds_per_slot);
3757+
let proposing_on_time = slot_delay < self.config.re_org_cutoff(self.spec.seconds_per_slot);
37653758
if !proposing_on_time {
37663759
debug!(
37673760
self.log,
@@ -3791,6 +3784,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
37913784
slot,
37923785
canonical_head,
37933786
re_org_threshold,
3787+
&self.config.re_org_disallowed_offsets,
37943788
self.config.re_org_max_epochs_since_finalization,
37953789
)
37963790
.map_err(|e| match e {
@@ -4069,6 +4063,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
40694063
.get_preliminary_proposer_head(
40704064
head_block_root,
40714065
re_org_threshold,
4066+
&self.config.re_org_disallowed_offsets,
40724067
self.config.re_org_max_epochs_since_finalization,
40734068
)
40744069
.map_err(|e| e.map_inner_error(Error::ProposerHeadForkChoiceError))?;
@@ -4079,7 +4074,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
40794074
let re_org_block_slot = head_slot + 1;
40804075
let fork_choice_slot = info.current_slot;
40814076

4082-
// If a re-orging proposal isn't made by the `max_re_org_slot_delay` then we give up
4077+
// If a re-orging proposal isn't made by the `re_org_cutoff` then we give up
40834078
// and allow the fork choice update for the canonical head through so that we may attest
40844079
// correctly.
40854080
let current_slot_ok = if head_slot == fork_choice_slot {
@@ -4090,7 +4085,7 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
40904085
.and_then(|slot_start| {
40914086
let now = self.slot_clock.now_duration()?;
40924087
let slot_delay = now.saturating_sub(slot_start);
4093-
Some(slot_delay <= max_re_org_slot_delay(self.spec.seconds_per_slot))
4088+
Some(slot_delay <= self.config.re_org_cutoff(self.spec.seconds_per_slot))
40944089
})
40954090
.unwrap_or(false)
40964091
} else {

beacon_node/beacon_chain/src/builder.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use fork_choice::{CountUnrealized, ForkChoice, ResetPayloadStatuses};
2222
use futures::channel::mpsc::Sender;
2323
use operation_pool::{OperationPool, PersistedOperationPool};
2424
use parking_lot::RwLock;
25-
use proto_array::ReOrgThreshold;
25+
use proto_array::{DisallowedReOrgOffsets, ReOrgThreshold};
2626
use slasher::Slasher;
2727
use slog::{crit, error, info, Logger};
2828
use slot_clock::{SlotClock, TestingSlotClock};
@@ -175,6 +175,15 @@ where
175175
self
176176
}
177177

178+
/// Sets the proposer re-org disallowed offsets list.
179+
pub fn proposer_re_org_disallowed_offsets(
180+
mut self,
181+
disallowed_offsets: DisallowedReOrgOffsets,
182+
) -> Self {
183+
self.chain_config.re_org_disallowed_offsets = disallowed_offsets;
184+
self
185+
}
186+
178187
/// Sets the store (database).
179188
///
180189
/// Should generally be called early in the build chain.

beacon_node/beacon_chain/src/chain_config.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
pub use proto_array::ReOrgThreshold;
1+
pub use proto_array::{DisallowedReOrgOffsets, ReOrgThreshold};
22
use serde_derive::{Deserialize, Serialize};
33
use std::time::Duration;
44
use types::{Checkpoint, Epoch};
55

66
pub const DEFAULT_RE_ORG_THRESHOLD: ReOrgThreshold = ReOrgThreshold(20);
77
pub const DEFAULT_RE_ORG_MAX_EPOCHS_SINCE_FINALIZATION: Epoch = Epoch::new(2);
8+
/// Default to 1/12th of the slot, which is 1 second on mainnet.
9+
pub const DEFAULT_RE_ORG_CUTOFF_DENOMINATOR: u32 = 12;
810
pub const DEFAULT_FORK_CHOICE_BEFORE_PROPOSAL_TIMEOUT: u64 = 250;
911

1012
/// Default fraction of a slot lookahead for payload preparation (12/3 = 4 seconds on mainnet).
@@ -34,6 +36,13 @@ pub struct ChainConfig {
3436
pub re_org_threshold: Option<ReOrgThreshold>,
3537
/// Maximum number of epochs since finalization for attempting a proposer re-org.
3638
pub re_org_max_epochs_since_finalization: Epoch,
39+
/// Maximum delay after the start of the slot at which to propose a reorging block.
40+
pub re_org_cutoff_millis: Option<u64>,
41+
/// Additional epoch offsets at which re-orging block proposals are not permitted.
42+
///
43+
/// By default this list is empty, but it can be useful for reacting to network conditions, e.g.
44+
/// slow gossip of re-org blocks at slot 1 in the epoch.
45+
pub re_org_disallowed_offsets: DisallowedReOrgOffsets,
3746
/// Number of milliseconds to wait for fork choice before proposing a block.
3847
///
3948
/// If set to 0 then block proposal will not wait for fork choice at all.
@@ -82,6 +91,8 @@ impl Default for ChainConfig {
8291
max_network_size: 10 * 1_048_576, // 10M
8392
re_org_threshold: Some(DEFAULT_RE_ORG_THRESHOLD),
8493
re_org_max_epochs_since_finalization: DEFAULT_RE_ORG_MAX_EPOCHS_SINCE_FINALIZATION,
94+
re_org_cutoff_millis: None,
95+
re_org_disallowed_offsets: DisallowedReOrgOffsets::default(),
8596
fork_choice_before_proposal_timeout_ms: DEFAULT_FORK_CHOICE_BEFORE_PROPOSAL_TIMEOUT,
8697
// Builder fallback configs that are set in `clap` will override these.
8798
builder_fallback_skips: 3,
@@ -100,3 +111,14 @@ impl Default for ChainConfig {
100111
}
101112
}
102113
}
114+
115+
impl ChainConfig {
116+
/// The latest delay from the start of the slot at which to attempt a 1-slot re-org.
117+
pub fn re_org_cutoff(&self, seconds_per_slot: u64) -> Duration {
118+
self.re_org_cutoff_millis
119+
.map(Duration::from_millis)
120+
.unwrap_or_else(|| {
121+
Duration::from_secs(seconds_per_slot) / DEFAULT_RE_ORG_CUTOFF_DENOMINATOR
122+
})
123+
}
124+
}

beacon_node/http_api/tests/interactive_tests.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Generic tests that make use of the (newer) `InteractiveApiTester`
22
use beacon_chain::{
3-
chain_config::ReOrgThreshold,
3+
chain_config::{DisallowedReOrgOffsets, ReOrgThreshold},
44
test_utils::{AttestationStrategy, BlockStrategy, SyncCommitteeStrategy},
55
};
66
use eth2::types::DepositContractData;
@@ -110,6 +110,8 @@ pub struct ReOrgTest {
110110
misprediction: bool,
111111
/// Whether to expect withdrawals to change on epoch boundaries.
112112
expect_withdrawals_change_on_epoch: bool,
113+
/// Epoch offsets to avoid proposing reorg blocks at.
114+
disallowed_offsets: Vec<u64>,
113115
}
114116

115117
impl Default for ReOrgTest {
@@ -127,6 +129,7 @@ impl Default for ReOrgTest {
127129
should_re_org: true,
128130
misprediction: false,
129131
expect_withdrawals_change_on_epoch: false,
132+
disallowed_offsets: vec![],
130133
}
131134
}
132135
}
@@ -238,6 +241,32 @@ pub async fn proposer_boost_re_org_head_distance() {
238241
.await;
239242
}
240243

244+
// Check that a re-org at a disallowed offset fails.
245+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
246+
pub async fn proposer_boost_re_org_disallowed_offset() {
247+
let offset = 4;
248+
proposer_boost_re_org_test(ReOrgTest {
249+
head_slot: Slot::new(E::slots_per_epoch() + offset - 1),
250+
disallowed_offsets: vec![offset],
251+
should_re_org: false,
252+
..Default::default()
253+
})
254+
.await;
255+
}
256+
257+
// Check that a re-org at the *only* allowed offset succeeds.
258+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
259+
pub async fn proposer_boost_re_org_disallowed_offset_exact() {
260+
let offset = 4;
261+
let disallowed_offsets = (0..E::slots_per_epoch()).filter(|o| *o != offset).collect();
262+
proposer_boost_re_org_test(ReOrgTest {
263+
head_slot: Slot::new(E::slots_per_epoch() + offset - 1),
264+
disallowed_offsets,
265+
..Default::default()
266+
})
267+
.await;
268+
}
269+
241270
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
242271
pub async fn proposer_boost_re_org_very_unhealthy() {
243272
proposer_boost_re_org_test(ReOrgTest {
@@ -286,6 +315,7 @@ pub async fn proposer_boost_re_org_test(
286315
should_re_org,
287316
misprediction,
288317
expect_withdrawals_change_on_epoch,
318+
disallowed_offsets,
289319
}: ReOrgTest,
290320
) {
291321
assert!(head_slot > 0);
@@ -320,6 +350,9 @@ pub async fn proposer_boost_re_org_test(
320350
.proposer_re_org_max_epochs_since_finalization(Epoch::new(
321351
max_epochs_since_finalization,
322352
))
353+
.proposer_re_org_disallowed_offsets(
354+
DisallowedReOrgOffsets::new::<E>(disallowed_offsets).unwrap(),
355+
)
323356
})),
324357
)
325358
.await;

beacon_node/src/cli.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,28 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> {
885885
allowed. Default: 2")
886886
.conflicts_with("disable-proposer-reorgs")
887887
)
888+
.arg(
889+
Arg::with_name("proposer-reorg-cutoff")
890+
.long("proposer-reorg-cutoff")
891+
.value_name("MILLISECONDS")
892+
.help("Maximum delay after the start of the slot at which to propose a reorging \
893+
block. Lower values can prevent failed reorgs by ensuring the block has \
894+
ample time to propagate and be processed by the network. The default is \
895+
1/12th of a slot (1 second on mainnet)")
896+
.conflicts_with("disable-proposer-reorgs")
897+
)
898+
.arg(
899+
Arg::with_name("proposer-reorg-disallowed-offsets")
900+
.long("proposer-reorg-disallowed-offsets")
901+
.value_name("N1,N2,...")
902+
.help("Comma-separated list of integer offsets which can be used to avoid \
903+
proposing reorging blocks at certain slots. An offset of N means that \
904+
reorging proposals will not be attempted at any slot such that \
905+
`slot % SLOTS_PER_EPOCH == N`. By default only re-orgs at offset 0 will be \
906+
avoided. Any offsets supplied with this flag will impose additional \
907+
restrictions.")
908+
.conflicts_with("disable-proposer-reorgs")
909+
)
888910
.arg(
889911
Arg::with_name("prepare-payload-lookahead")
890912
.long("prepare-payload-lookahead")

beacon_node/src/config.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use beacon_chain::chain_config::{
2-
ReOrgThreshold, DEFAULT_PREPARE_PAYLOAD_LOOKAHEAD_FACTOR,
2+
DisallowedReOrgOffsets, ReOrgThreshold, DEFAULT_PREPARE_PAYLOAD_LOOKAHEAD_FACTOR,
33
DEFAULT_RE_ORG_MAX_EPOCHS_SINCE_FINALIZATION, DEFAULT_RE_ORG_THRESHOLD,
44
};
55
use clap::ArgMatches;
@@ -686,6 +686,23 @@ pub fn get_config<E: EthSpec>(
686686
client_config.chain.re_org_max_epochs_since_finalization =
687687
clap_utils::parse_optional(cli_args, "proposer-reorg-epochs-since-finalization")?
688688
.unwrap_or(DEFAULT_RE_ORG_MAX_EPOCHS_SINCE_FINALIZATION);
689+
client_config.chain.re_org_cutoff_millis =
690+
clap_utils::parse_optional(cli_args, "proposer-reorg-cutoff")?;
691+
692+
if let Some(disallowed_offsets_str) =
693+
clap_utils::parse_optional::<String>(cli_args, "proposer-reorg-disallowed-offsets")?
694+
{
695+
let disallowed_offsets = disallowed_offsets_str
696+
.split(',')
697+
.map(|s| {
698+
s.parse()
699+
.map_err(|e| format!("invalid disallowed-offsets: {e:?}"))
700+
})
701+
.collect::<Result<Vec<u64>, _>>()?;
702+
client_config.chain.re_org_disallowed_offsets =
703+
DisallowedReOrgOffsets::new::<E>(disallowed_offsets)
704+
.map_err(|e| format!("invalid disallowed-offsets: {e:?}"))?;
705+
}
689706
}
690707

691708
// Note: This overrides any previous flags that enable this option.

book/src/late-block-re-orgs.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ There are three flags which control the re-orging behaviour:
1414
* `--proposer-reorg-threshold N`: attempt to orphan blocks with less than N% of the committee vote. If this parameter isn't set then N defaults to 20% when the feature is enabled.
1515
* `--proposer-reorg-epochs-since-finalization N`: only attempt to re-org late blocks when the number of epochs since finalization is less than or equal to N. The default is 2 epochs,
1616
meaning re-orgs will only be attempted when the chain is finalizing optimally.
17+
* `--proposer-reorg-cutoff T`: only attempt to re-org late blocks when the proposal is being made
18+
before T milliseconds into the slot. Delays between the validator client and the beacon node can
19+
cause some blocks to be requested later than the start of the slot, which makes them more likely
20+
to fail. The default cutoff is 1000ms on mainnet, which gives blocks 3000ms to be signed and
21+
propagated before the attestation deadline at 4000ms.
22+
* `--proposer-reorg-disallowed-offsets N1,N2,N3...`: Prohibit Lighthouse from attempting to reorg at
23+
specific offsets in each epoch. A disallowed offset `N` prevents reorging blocks from being
24+
proposed at any `slot` such that `slot % SLOTS_PER_EPOCH == N`. The value to this flag is a
25+
comma-separated list of integer offsets.
1726

1827
All flags should be applied to `lighthouse bn`. The default configuration is recommended as it
1928
balances the chance of the re-org succeeding against the chance of failure due to attestations

consensus/fork_choice/src/fork_choice.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::{ForkChoiceStore, InvalidationOperation};
22
use proto_array::{
3-
Block as ProtoBlock, ExecutionStatus, ProposerHeadError, ProposerHeadInfo,
4-
ProtoArrayForkChoice, ReOrgThreshold,
3+
Block as ProtoBlock, DisallowedReOrgOffsets, ExecutionStatus, ProposerHeadError,
4+
ProposerHeadInfo, ProtoArrayForkChoice, ReOrgThreshold,
55
};
66
use slog::{crit, debug, warn, Logger};
77
use ssz_derive::{Decode, Encode};
@@ -533,6 +533,7 @@ where
533533
current_slot: Slot,
534534
canonical_head: Hash256,
535535
re_org_threshold: ReOrgThreshold,
536+
disallowed_offsets: &DisallowedReOrgOffsets,
536537
max_epochs_since_finalization: Epoch,
537538
) -> Result<ProposerHeadInfo, ProposerHeadError<Error<proto_array::Error>>> {
538539
// Ensure that fork choice has already been updated for the current slot. This prevents
@@ -564,6 +565,7 @@ where
564565
canonical_head,
565566
self.fc_store.justified_balances(),
566567
re_org_threshold,
568+
disallowed_offsets,
567569
max_epochs_since_finalization,
568570
)
569571
.map_err(ProposerHeadError::convert_inner_error)
@@ -573,6 +575,7 @@ where
573575
&self,
574576
canonical_head: Hash256,
575577
re_org_threshold: ReOrgThreshold,
578+
disallowed_offsets: &DisallowedReOrgOffsets,
576579
max_epochs_since_finalization: Epoch,
577580
) -> Result<ProposerHeadInfo, ProposerHeadError<Error<proto_array::Error>>> {
578581
let current_slot = self.fc_store.get_current_slot();
@@ -582,6 +585,7 @@ where
582585
canonical_head,
583586
self.fc_store.justified_balances(),
584587
re_org_threshold,
588+
disallowed_offsets,
585589
max_epochs_since_finalization,
586590
)
587591
.map_err(ProposerHeadError::convert_inner_error)

consensus/proto_array/src/error.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pub enum Error {
5050
block_root: Hash256,
5151
parent_root: Hash256,
5252
},
53+
InvalidEpochOffset(u64),
5354
Arith(ArithError),
5455
}
5556

consensus/proto_array/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ mod ssz_container;
88
pub use crate::justified_balances::JustifiedBalances;
99
pub use crate::proto_array::{calculate_committee_fraction, InvalidationOperation};
1010
pub use crate::proto_array_fork_choice::{
11-
Block, DoNotReOrg, ExecutionStatus, ProposerHeadError, ProposerHeadInfo, ProtoArrayForkChoice,
12-
ReOrgThreshold,
11+
Block, DisallowedReOrgOffsets, DoNotReOrg, ExecutionStatus, ProposerHeadError,
12+
ProposerHeadInfo, ProtoArrayForkChoice, ReOrgThreshold,
1313
};
1414
pub use error::Error;
1515

0 commit comments

Comments
 (0)