Skip to content

feat: add failsafe to transaction replay #6212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
983f9ce
feat: add failsafe to transaction replay
hstove Jun 19, 2025
4fa3499
fix: clippy
hstove Jun 20, 2025
d422eae
feat: wait for +2 blocks after previous fork tip to reset
hstove Jun 21, 2025
772798b
fix: use pending burn block in bitcoin_block_arrival
hstove Jun 25, 2025
bbea1c7
wip: update tx replay tests to work with failsafe
hstove Jun 25, 2025
2ac624a
Merge remote-tracking branch 'core/develop' into feat/tx-replay-failsafe
hstove Jun 25, 2025
991f010
fix: build warnings in test commands
hstove Jun 25, 2025
de8b6e6
fix: tx_replay_disagreement
hstove Jun 25, 2025
8f790dc
fix: btc_on_stx test
hstove Jun 26, 2025
d4d3917
fix: revert logic for setting `expected_burn_height`
hstove Jun 26, 2025
c6ca6b9
fix: dont rely on node burn block to be processed
hstove Jun 27, 2025
795b4a7
Revert "fix: dont rely on node burn block to be processed"
hstove Jun 30, 2025
4cc0758
fix: better descendency check
hstove Jun 30, 2025
d70d554
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jun 30, 2025
fbce54a
fix: off-by-one in failsafe descendency check
hstove Jul 1, 2025
bdfed91
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jul 1, 2025
66bfc13
fix: prevent panic in test setup
hstove Jul 1, 2025
78ac7cc
crc: review comments
hstove Jul 2, 2025
2499c16
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jul 3, 2025
13a71b2
crc: code improvements from feedback
hstove Jul 3, 2025
3130e83
fix: return `bool` instead of `Result<bool>`
hstove Jul 3, 2025
8f4d08e
fix: use `DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS` in tests
hstove Jul 7, 2025
89920fe
fix: incorrect block wait logic, test logic ordering
hstove Jul 7, 2025
d63fb5d
fix: rename integration test name
hstove Jul 7, 2025
a9c9efa
feat: changelog for failsafe
hstove Jul 7, 2025
03ba279
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions stacks-signer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to the versioning scheme outlined in the [README.md](README.md).

## Unreleased

### Added

- When a transaction replay set has been active for a configurable number of burn blocks (which defaults to `2`), and the replay set still hasn't been cleared, the replay set is automatically cleared. This is provided as a "failsafe" to ensure chain liveness as transaction replay is rolled out.

## [3.1.0.0.13.0]

### Changed
Expand Down
4 changes: 4 additions & 0 deletions stacks-signer/src/chainstate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ pub struct ProposalEvalConfig {
pub reorg_attempts_activity_timeout: Duration,
/// Time to wait before submitting a block proposal to the stacks-node
pub proposal_wait_for_parent_time: Duration,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: u64,
}

impl From<&SignerConfig> for ProposalEvalConfig {
Expand All @@ -155,6 +158,7 @@ impl From<&SignerConfig> for ProposalEvalConfig {
reorg_attempts_activity_timeout: value.reorg_attempts_activity_timeout,
tenure_idle_timeout_buffer: value.tenure_idle_timeout_buffer,
proposal_wait_for_parent_time: value.proposal_wait_for_parent_time,
reset_replay_set_after_fork_blocks: value.reset_replay_set_after_fork_blocks,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions stacks-signer/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ pub(crate) mod tests {
reorg_attempts_activity_timeout: config.reorg_attempts_activity_timeout,
proposal_wait_for_parent_time: config.proposal_wait_for_parent_time,
validate_with_replay_tx: config.validate_with_replay_tx,
reset_replay_set_after_fork_blocks: config.reset_replay_set_after_fork_blocks,
}
}

Expand Down
19 changes: 19 additions & 0 deletions stacks-signer/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ const DEFAULT_TENURE_IDLE_TIMEOUT_BUFFER_SECS: u64 = 2;
/// cannot determine that our stacks-node has processed the parent
/// block
const DEFAULT_PROPOSAL_WAIT_TIME_FOR_PARENT_SECS: u64 = 15;
/// Default number of blocks after a fork to reset the replay set,
/// as a failsafe mechanism
pub const DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS: u64 = 2;

#[derive(thiserror::Error, Debug)]
/// An error occurred parsing the provided configuration
Expand Down Expand Up @@ -184,6 +187,9 @@ pub struct SignerConfig {
pub proposal_wait_for_parent_time: Duration,
/// Whether or not to validate blocks with replay transactions
pub validate_with_replay_tx: bool,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: u64,
}

/// The parsed configuration for the signer
Expand Down Expand Up @@ -237,6 +243,9 @@ pub struct GlobalConfig {
pub dry_run: bool,
/// Whether or not to validate blocks with replay transactions
pub validate_with_replay_tx: bool,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: u64,
}

/// Internal struct for loading up the config file
Expand Down Expand Up @@ -288,6 +297,9 @@ struct RawConfigFile {
pub dry_run: Option<bool>,
/// Whether or not to validate blocks with replay transactions
pub validate_with_replay_tx: Option<bool>,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: Option<u64>,
}

impl RawConfigFile {
Expand Down Expand Up @@ -413,6 +425,10 @@ impl TryFrom<RawConfigFile> for GlobalConfig {
// https://github.com/stacks-network/stacks-core/issues/6087
let validate_with_replay_tx = raw_data.validate_with_replay_tx.unwrap_or(false);

let reset_replay_set_after_fork_blocks = raw_data
.reset_replay_set_after_fork_blocks
.unwrap_or(DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS);

Ok(Self {
node_host: raw_data.node_host,
endpoint,
Expand All @@ -435,6 +451,7 @@ impl TryFrom<RawConfigFile> for GlobalConfig {
tenure_idle_timeout_buffer,
proposal_wait_for_parent_time,
validate_with_replay_tx,
reset_replay_set_after_fork_blocks,
})
}
}
Expand Down Expand Up @@ -714,12 +731,14 @@ network = "mainnet"
auth_password = "abcd"
db_path = ":memory:"
validate_with_replay_tx = true
reset_replay_set_after_fork_blocks = 100
"#
);
let config = GlobalConfig::load_from_str(&config_toml).unwrap();
assert_eq!(config.stacks_address.to_string(), expected_addr);
assert_eq!(config.to_chain_id(), CHAIN_ID_MAINNET);
assert!(config.validate_with_replay_tx);
assert_eq!(config.reset_replay_set_after_fork_blocks, 100);
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions stacks-signer/src/runloop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ impl<Signer: SignerTrait<T>, T: StacksMessageCodec + Clone + Send + Debug> RunLo
reorg_attempts_activity_timeout: self.config.reorg_attempts_activity_timeout,
proposal_wait_for_parent_time: self.config.proposal_wait_for_parent_time,
validate_with_replay_tx: self.config.validate_with_replay_tx,
reset_replay_set_after_fork_blocks: self.config.reset_replay_set_after_fork_blocks,
}))
}

Expand Down
2 changes: 2 additions & 0 deletions stacks-signer/src/tests/chainstate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use stacks_common::util::secp256k1::MessageSignature;
use crate::chainstate::{ProposalEvalConfig, SortitionMinerStatus, SortitionState, SortitionsView};
use crate::client::tests::MockServerClient;
use crate::client::StacksClient;
use crate::config::DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS;
use crate::signerdb::{BlockInfo, SignerDb};

fn setup_test_environment(
Expand Down Expand Up @@ -92,6 +93,7 @@ fn setup_test_environment(
tenure_idle_timeout_buffer: Duration::from_secs(2),
reorg_attempts_activity_timeout: Duration::from_secs(3),
proposal_wait_for_parent_time: Duration::from_secs(0),
reset_replay_set_after_fork_blocks: DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS,
},
};

Expand Down
3 changes: 3 additions & 0 deletions stacks-signer/src/v0/signer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ pub struct Signer {
pub validate_with_replay_tx: bool,
/// Scope of Tx Replay in terms of Burn block boundaries
pub tx_replay_scope: ReplayScopeOpt,
/// The number of blocks after the past tip to reset the replay set
pub reset_replay_set_after_fork_blocks: u64,
}

impl std::fmt::Display for SignerMode {
Expand Down Expand Up @@ -244,6 +246,7 @@ impl SignerTrait<SignerMessage> for Signer {
global_state_evaluator,
validate_with_replay_tx: signer_config.validate_with_replay_tx,
tx_replay_scope: None,
reset_replay_set_after_fork_blocks: signer_config.reset_replay_set_after_fork_blocks,
}
}

Expand Down
122 changes: 115 additions & 7 deletions stacks-signer/src/v0/signer_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::time::{Duration, UNIX_EPOCH};
use blockstack_lib::chainstate::burn::ConsensusHashExtensions;
use blockstack_lib::chainstate::nakamoto::{NakamotoBlock, NakamotoBlockHeader};
use blockstack_lib::chainstate::stacks::{StacksTransaction, TransactionPayload};
use blockstack_lib::net::api::get_tenures_fork_info::TenureForkingInfo;
use blockstack_lib::net::api::postblock_proposal::NakamotoBlockProposal;
use clarity::types::chainstate::StacksAddress;
#[cfg(any(test, feature = "testing"))]
Expand Down Expand Up @@ -594,9 +595,11 @@ impl LocalStateMachine {
&& next_burn_block_hash != expected_burn_block.consensus_hash;
if node_behind_expected || node_on_equal_fork {
let err_msg = format!(
"Node has not processed the next burn block yet. Expected height = {}, Expected consensus hash = {}",
"Node has not processed the next burn block yet. Expected height = {}, Expected consensus hash = {}, Node height = {}, Node consensus hash = {}",
expected_burn_block.burn_block_height,
expected_burn_block.consensus_hash,
next_burn_block_height,
next_burn_block_hash,
);
*self = Self::Pending {
update: StateMachineUpdate::BurnBlock(expected_burn_block),
Expand All @@ -620,7 +623,7 @@ impl LocalStateMachine {
client,
&expected_burn_block,
&prior_state_machine,
replay_state,
&replay_state,
)? {
match new_replay_state {
ReplayState::Unset => {
Expand All @@ -632,6 +635,17 @@ impl LocalStateMachine {
*tx_replay_scope = Some(new_scope);
}
}
} else if Self::handle_possible_replay_failsafe(
&replay_state,
&expected_burn_block,
proposal_config.reset_replay_set_after_fork_blocks,
) {
info!(
"Signer state: replay set is stalled after {} tenures. Clearing the replay set.",
proposal_config.reset_replay_set_after_fork_blocks
);
tx_replay_set = ReplayTransactionSet::none();
*tx_replay_scope = None;
}
}

Expand Down Expand Up @@ -981,11 +995,24 @@ impl LocalStateMachine {
client: &StacksClient,
expected_burn_block: &NewBurnBlock,
prior_state_machine: &SignerStateMachine,
replay_state: ReplayState,
replay_state: &ReplayState,
) -> Result<Option<ReplayState>, SignerChainstateError> {
if expected_burn_block.burn_block_height > prior_state_machine.burn_block_height {
// no bitcoin fork, because we're advancing the burn block height
return Ok(None);
if Self::new_burn_block_fork_descendency_check(
db,
expected_burn_block,
prior_state_machine.burn_block_height,
prior_state_machine.burn_block,
)? {
info!("Detected bitcoin fork - prior tip is not parent of new tip.";
"new_tip.burn_block_height" => expected_burn_block.burn_block_height,
"new_tip.consensus_hash" => %expected_burn_block.consensus_hash,
"prior_tip.burn_block_height" => prior_state_machine.burn_block_height,
"prior_tip.consensus_hash" => %prior_state_machine.burn_block,
);
} else {
return Ok(None);
}
}
if expected_burn_block.consensus_hash == prior_state_machine.burn_block {
// no bitcoin fork, because we're at the same burn block hash as before
Expand Down Expand Up @@ -1088,7 +1115,7 @@ impl LocalStateMachine {
client: &StacksClient,
expected_burn_block: &NewBurnBlock,
prior_state_machine: &SignerStateMachine,
scope: ReplayScope,
scope: &ReplayScope,
) -> Result<Option<ReplayState>, SignerChainstateError> {
info!("Tx Replay: detected bitcoin fork while in replay mode. Tryng to handle the fork";
"expected_burn_block.height" => expected_burn_block.burn_block_height,
Expand Down Expand Up @@ -1182,6 +1209,10 @@ impl LocalStateMachine {
return Ok(None);
}

Ok(Some(Self::get_forked_txs_from_fork_info(&fork_info)))
}

fn get_forked_txs_from_fork_info(fork_info: &[TenureForkingInfo]) -> Vec<StacksTransaction> {
// Collect transactions to be replayed across the forked blocks
let mut forked_blocks = fork_info
.iter()
Expand All @@ -1201,6 +1232,83 @@ impl LocalStateMachine {
))
.cloned()
.collect::<Vec<_>>();
Ok(Some(forked_txs))
forked_txs
}

/// If it has been `reset_replay_set_after_fork_blocks` burn blocks since the origin of our replay set, and
/// we haven't produced any replay blocks since then, we should reset our replay set
///
/// Returns a `bool` indicating whether the replay set should be reset.
fn handle_possible_replay_failsafe(
replay_state: &ReplayState,
new_burn_block: &NewBurnBlock,
reset_replay_set_after_fork_blocks: u64,
) -> bool {
match replay_state {
ReplayState::Unset => {
// not in replay - skip
false
}
ReplayState::InProgress(_, replay_scope) => {
let failsafe_height =
replay_scope.past_tip.burn_block_height + reset_replay_set_after_fork_blocks;
new_burn_block.burn_block_height > failsafe_height
}
}
}

/// Check if the new burn block is a fork, by checking if the new burn block
/// is a descendant of the prior burn block
fn new_burn_block_fork_descendency_check(
db: &SignerDb,
new_burn_block: &NewBurnBlock,
prior_burn_block_height: u64,
prior_burn_block_ch: ConsensusHash,
) -> Result<bool, SignerChainstateError> {
let max_height_delta = 10;
let height_delta = match new_burn_block
.burn_block_height
.checked_sub(prior_burn_block_height)
{
None | Some(0) => return Ok(false), // same height or older
Some(d) if d > max_height_delta => return Ok(false), // too far apart
Some(d) => d,
};

let mut parent_burn_block_info = match db
.get_burn_block_by_ch(&new_burn_block.consensus_hash)
.and_then(|burn_block_info| {
db.get_burn_block_by_hash(&burn_block_info.parent_burn_block_hash)
}) {
Ok(info) => info,
Err(e) => {
warn!(
"Failed to get parent burn block info for {}",
new_burn_block.consensus_hash;
"error" => ?e,
);
return Ok(false);
}
};

for _ in 0..height_delta {
if parent_burn_block_info.block_height == prior_burn_block_height {
return Ok(parent_burn_block_info.consensus_hash != prior_burn_block_ch);
}

parent_burn_block_info =
match db.get_burn_block_by_hash(&parent_burn_block_info.parent_burn_block_hash) {
Ok(bi) => bi,
Err(e) => {
warn!(
"Failed to get parent burn block info for {}. Error: {e}",
parent_burn_block_info.parent_burn_block_hash
);
return Ok(false);
}
};
}

Ok(false)
}
}
23 changes: 23 additions & 0 deletions stackslib/src/net/api/postblock_proposal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ pub static TEST_REPLAY_TRANSACTIONS: LazyLock<
TestFlag<std::collections::VecDeque<StacksTransaction>>,
> = LazyLock::new(TestFlag::default);

#[cfg(any(test, feature = "testing"))]
/// Whether to reject any transaction while we're in a replay set.
pub static TEST_REJECT_REPLAY_TXS: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);

// This enum is used to supply a `reason_code` for validation
// rejection responses. This is serialized as an enum with string
// type (in jsonschema terminology).
Expand Down Expand Up @@ -200,6 +204,24 @@ fn fault_injection_validation_delay() {
#[cfg(not(any(test, feature = "testing")))]
fn fault_injection_validation_delay() {}

#[cfg(any(test, feature = "testing"))]
fn fault_injection_reject_replay_txs() -> Result<(), BlockValidateRejectReason> {
let reject = TEST_REJECT_REPLAY_TXS.get();
if reject {
Err(BlockValidateRejectReason {
reason_code: ValidateRejectCode::InvalidTransactionReplay,
reason: "Rejected by test flag".into(),
})
} else {
Ok(())
}
}

#[cfg(not(any(test, feature = "testing")))]
fn fault_injection_reject_replay_txs() -> Result<(), BlockValidateRejectReason> {
Ok(())
}

/// Represents a block proposed to the `v3/block_proposal` endpoint for validation
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NakamotoBlockProposal {
Expand Down Expand Up @@ -723,6 +745,7 @@ impl NakamotoBlockProposal {
// Allow this to happen, tenure extend checks happen elsewhere.
break;
}
fault_injection_reject_replay_txs()?;
let Some(replay_tx) = replay_txs.pop_front() else {
// During transaction replay, we expect that the block only
// contains transactions from the replay set. Thus, if we're here,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2816,6 +2816,17 @@ impl BitcoinRPCRequest {
BitcoinRPCRequest::send(config, payload)
}

pub fn get_chain_tips(config: &Config) -> RPCResult<serde_json::Value> {
let payload = BitcoinRPCRequest {
method: "getchaintips".to_string(),
params: vec![],
id: "stacks".to_string(),
jsonrpc: "2.0".to_string(),
};

BitcoinRPCRequest::send(config, payload)
}

pub fn send(config: &Config, payload: BitcoinRPCRequest) -> RPCResult<serde_json::Value> {
let request = BitcoinRPCRequest::build_rpc_request(config, &payload);
let timeout = Duration::from_secs(u64::from(config.burnchain.timeout));
Expand Down
Loading
Loading