diff --git a/code/crates/config/src/lib.rs b/code/crates/config/src/lib.rs index 3d6ed9459..5d055b38e 100644 --- a/code/crates/config/src/lib.rs +++ b/code/crates/config/src/lib.rs @@ -539,6 +539,11 @@ pub struct TimeoutConfig { /// the vote synchronization protocol. #[serde(with = "humantime_serde")] pub timeout_step: Duration, + + /// How long we wait after entering a round before starting + /// the rebroadcast liveness protocol + #[serde(with = "humantime_serde")] + pub timeout_rebroadcast: Duration, } impl TimeoutConfig { @@ -550,7 +555,9 @@ impl TimeoutConfig { TimeoutKind::PrevoteTimeLimit => self.timeout_step, TimeoutKind::PrecommitTimeLimit => self.timeout_step, // TODO - clarify the rebroadcast timeout duration - TimeoutKind::Rebroadcast => self.timeout_prevote, + TimeoutKind::Rebroadcast => { + self.timeout_propose + self.timeout_prevote + self.timeout_precommit + } } } @@ -568,14 +575,21 @@ impl TimeoutConfig { impl Default for TimeoutConfig { fn default() -> Self { + let timeout_propose = Duration::from_secs(3); + let timeout_prevote = Duration::from_secs(1); + let timeout_precommit = Duration::from_secs(1); + let timeout_step = Duration::from_secs(2); + let timeout_rebroadcast = timeout_propose + timeout_prevote + timeout_precommit; + Self { - timeout_propose: Duration::from_secs(3), + timeout_propose, timeout_propose_delta: Duration::from_millis(500), - timeout_prevote: Duration::from_secs(1), + timeout_prevote, timeout_prevote_delta: Duration::from_millis(500), - timeout_precommit: Duration::from_secs(1), + timeout_precommit, timeout_precommit_delta: Duration::from_millis(500), - timeout_step: Duration::from_secs(2), + timeout_step, + timeout_rebroadcast, } } } diff --git a/code/crates/core-consensus/src/handle/driver.rs b/code/crates/core-consensus/src/handle/driver.rs index 7288a1766..9cfe5fe05 100644 --- a/code/crates/core-consensus/src/handle/driver.rs +++ b/code/crates/core-consensus/src/handle/driver.rs @@ -65,6 +65,15 @@ where co, Effect::StartRound(*height, *round, proposer.clone(), Default::default()) ); + + #[cfg(feature = "metrics")] + metrics.rebroadcast_timeouts.inc(); + + // Schedule rebroadcast timer if necessary + if state.params.vote_sync_mode == VoteSyncMode::Rebroadcast { + let timeout = Timeout::rebroadcast(*round); + perform!(co, Effect::ScheduleTimeout(timeout, Default::default())); + } } DriverInput::ProposeValue(round, _) => { @@ -396,13 +405,6 @@ where ); state.set_last_vote(signed_vote); - - // Schedule rebroadcast timer if necessary - if state.params.vote_sync_mode == VoteSyncMode::Rebroadcast { - let timeout = Timeout::rebroadcast(state.driver.round()); - - perform!(co, Effect::ScheduleTimeout(timeout, Default::default())); - } } Ok(()) diff --git a/code/crates/core-consensus/src/handle/liveness.rs b/code/crates/core-consensus/src/handle/liveness.rs index 81735afcb..1c7348db1 100644 --- a/code/crates/core-consensus/src/handle/liveness.rs +++ b/code/crates/core-consensus/src/handle/liveness.rs @@ -98,7 +98,11 @@ pub async fn on_round_certificate( where Ctx: Context, { - info!(%certificate.height, %certificate.round, "Received round certificate"); + info!( + %certificate.height, + %certificate.round, + "Received round certificate" + ); if certificate.height != state.height() { warn!( @@ -139,5 +143,13 @@ where } apply_driver_input(co, state, metrics, DriverInput::Vote(vote)).await?; } + + // Cancel rebroadcast timer + // TODO: Should do only if the round certificate is well formed, i.e. either PrecommitAny or SkipRound + perform!( + co, + Effect::CancelTimeout(Timeout::rebroadcast(certificate.round), Default::default()) + ); + Ok(()) } diff --git a/code/crates/core-consensus/src/handle/rebroadcast_timeout.rs b/code/crates/core-consensus/src/handle/rebroadcast_timeout.rs index a9bf8f3aa..60f34ae2c 100644 --- a/code/crates/core-consensus/src/handle/rebroadcast_timeout.rs +++ b/code/crates/core-consensus/src/handle/rebroadcast_timeout.rs @@ -5,7 +5,6 @@ pub async fn on_rebroadcast_timeout( co: &Co, state: &mut State, metrics: &Metrics, - timeout: Timeout, ) -> Result<(), Error> where Ctx: Context, @@ -19,8 +18,8 @@ where if let Some(vote) = state.last_signed_prevote.as_ref() { warn!( %height, %round, vote_height = %vote.height(), vote_round = %vote.round(), - "Rebroadcasting vote at {:?} step after {:?} timeout", - state.driver.step(), timeout.kind, + "Rebroadcasting vote at {:?} step", + state.driver.step() ); perform!( @@ -32,8 +31,8 @@ where if let Some(vote) = state.last_signed_precommit.as_ref() { warn!( %height, %round, vote_height = %vote.height(), vote_round = %vote.round(), - "Rebroadcasting vote at {:?} step after {:?} timeout", - state.driver.step(), timeout.kind, + "Rebroadcasting vote at {:?} step", + state.driver.step() ); perform!( co, @@ -60,6 +59,7 @@ where #[cfg(feature = "metrics")] metrics.rebroadcast_timeouts.inc(); + let timeout = Timeout::rebroadcast(round); perform!(co, Effect::ScheduleTimeout(timeout, Default::default())); Ok(()) diff --git a/code/crates/core-consensus/src/handle/timeout.rs b/code/crates/core-consensus/src/handle/timeout.rs index 4b2c744f1..79df7d0e0 100644 --- a/code/crates/core-consensus/src/handle/timeout.rs +++ b/code/crates/core-consensus/src/handle/timeout.rs @@ -50,7 +50,7 @@ where apply_driver_input(co, state, metrics, DriverInput::TimeoutElapsed(timeout)).await?; match timeout.kind { - TimeoutKind::Rebroadcast => on_rebroadcast_timeout(co, state, metrics, timeout).await, + TimeoutKind::Rebroadcast => on_rebroadcast_timeout(co, state, metrics).await, TimeoutKind::PrevoteTimeLimit | TimeoutKind::PrecommitTimeLimit => { on_step_limit_timeout(co, state, metrics, timeout.round).await } diff --git a/code/crates/engine/src/consensus.rs b/code/crates/engine/src/consensus.rs index 2f34c969e..f4ee11913 100644 --- a/code/crates/engine/src/consensus.rs +++ b/code/crates/engine/src/consensus.rs @@ -190,7 +190,11 @@ impl Timeouts { TimeoutKind::Precommit => self.config.timeout_precommit, TimeoutKind::PrevoteTimeLimit => self.config.timeout_step, TimeoutKind::PrecommitTimeLimit => self.config.timeout_step, - TimeoutKind::Rebroadcast => self.config.timeout_prevote, + TimeoutKind::Rebroadcast => { + self.config.timeout_propose + + self.config.timeout_prevote + + self.config.timeout_precommit + } } } @@ -202,7 +206,10 @@ impl Timeouts { TimeoutKind::Precommit => c.timeout_precommit += c.timeout_precommit_delta, TimeoutKind::PrevoteTimeLimit => (), TimeoutKind::PrecommitTimeLimit => (), - TimeoutKind::Rebroadcast => (), + TimeoutKind::Rebroadcast => { + c.timeout_rebroadcast += + c.timeout_propose_delta + c.timeout_prevote_delta + c.timeout_precommit_delta + } }; } } diff --git a/code/crates/test/app/config.toml b/code/crates/test/app/config.toml index bc6c70c4a..2a44109c9 100644 --- a/code/crates/test/app/config.toml +++ b/code/crates/test/app/config.toml @@ -62,6 +62,10 @@ timeout_commit = "0s" # Override with MALACHITE__CONSENSUS__TIMEOUT_STEP env variable timeout_step = "2s" +# How long we wait after entering a round before starting the rebroadcast liveness protocol +# Override with MALACHITE__CONSENSUS__TIMEOUT_REBROADCAST env variable +timeout_rebroadcast = "5s" + # The message(s) required to carry the value payload. # Available options are: # - "parts-only": Full value is included in the proposal parts and there is no explicit Proposal message (default) diff --git a/code/examples/channel/config.toml b/code/examples/channel/config.toml index 9e7c472e1..50912f687 100644 --- a/code/examples/channel/config.toml +++ b/code/examples/channel/config.toml @@ -62,6 +62,10 @@ timeout_commit = "0s" # Override with MALACHITE__CONSENSUS__TIMEOUT_STEP env variable timeout_step = "2s" +# How long we wait after entering a round before starting the rebroadcast liveness protocol +# Override with MALACHITE__CONSENSUS__TIMEOUT_REBROADCAST env variable +timeout_rebroadcast = "5s" + # The message(s) required to carry the value payload. # Available options are: # - "parts-only": Full value is included in the proposal parts and there is no explicit Proposal message (default) diff --git a/code/scripts/spawn.bash b/code/scripts/spawn.bash index fdb062e9c..95230ca8f 100755 --- a/code/scripts/spawn.bash +++ b/code/scripts/spawn.bash @@ -38,11 +38,12 @@ fi # Environment variables export MALACHITE__CONSENSUS__P2P__PROTOCOL__TYPE="gossipsub" -export MALACHITE__CONSENSUS__TIMEOUT_PROPOSE="2s" -export MALACHITE__CONSENSUS__TIMEOUT_PROPOSE_DELTA="1s" +export MALACHITE__CONSENSUS__TIMEOUT_PROPOSE="3s" +export MALACHITE__CONSENSUS__TIMEOUT_PROPOSE_DELTA="500ms" export MALACHITE__CONSENSUS__TIMEOUT_PREVOTE="1s" +export MALACHITE__CONSENSUS__TIMEOUT_PREVOTE_DELTA="500ms" export MALACHITE__CONSENSUS__TIMEOUT_PRECOMMIT="1s" -export MALACHITE__CONSENSUS__TIMEOUT_COMMIT="0s" +export MALACHITE__CONSENSUS__TIMEOUT_PRECOMMIT_DELTA="500ms" # Set the timeout step to 2 seconds to trigger the vote sync and polka certificate faster export MALACHITE__CONSENSUS__TIMEOUT_STEP="2s" # Set to request-response to be able to sync polka certificates, "broadcast" does not yet send the certificates