Skip to content

feat: add failsafe to transaction replay #6212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
983f9ce
feat: add failsafe to transaction replay
hstove Jun 19, 2025
4fa3499
fix: clippy
hstove Jun 20, 2025
d422eae
feat: wait for +2 blocks after previous fork tip to reset
hstove Jun 21, 2025
772798b
fix: use pending burn block in bitcoin_block_arrival
hstove Jun 25, 2025
bbea1c7
wip: update tx replay tests to work with failsafe
hstove Jun 25, 2025
2ac624a
Merge remote-tracking branch 'core/develop' into feat/tx-replay-failsafe
hstove Jun 25, 2025
991f010
fix: build warnings in test commands
hstove Jun 25, 2025
de8b6e6
fix: tx_replay_disagreement
hstove Jun 25, 2025
8f790dc
fix: btc_on_stx test
hstove Jun 26, 2025
d4d3917
fix: revert logic for setting `expected_burn_height`
hstove Jun 26, 2025
c6ca6b9
fix: dont rely on node burn block to be processed
hstove Jun 27, 2025
795b4a7
Revert "fix: dont rely on node burn block to be processed"
hstove Jun 30, 2025
4cc0758
fix: better descendency check
hstove Jun 30, 2025
d70d554
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jun 30, 2025
fbce54a
fix: off-by-one in failsafe descendency check
hstove Jul 1, 2025
bdfed91
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jul 1, 2025
66bfc13
fix: prevent panic in test setup
hstove Jul 1, 2025
78ac7cc
crc: review comments
hstove Jul 2, 2025
2499c16
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jul 3, 2025
13a71b2
crc: code improvements from feedback
hstove Jul 3, 2025
3130e83
fix: return `bool` instead of `Result<bool>`
hstove Jul 3, 2025
8f4d08e
fix: use `DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS` in tests
hstove Jul 7, 2025
89920fe
fix: incorrect block wait logic, test logic ordering
hstove Jul 7, 2025
d63fb5d
fix: rename integration test name
hstove Jul 7, 2025
a9c9efa
feat: changelog for failsafe
hstove Jul 7, 2025
03ba279
Merge branch 'develop' into feat/tx-replay-failsafe
hstove Jul 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions stacks-signer/src/chainstate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ pub struct ProposalEvalConfig {
pub reorg_attempts_activity_timeout: Duration,
/// Time to wait before submitting a block proposal to the stacks-node
pub proposal_wait_for_parent_time: Duration,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism?
pub reset_replay_set_after_fork_blocks: u64,
}

impl From<&SignerConfig> for ProposalEvalConfig {
Expand All @@ -155,6 +158,7 @@ impl From<&SignerConfig> for ProposalEvalConfig {
reorg_attempts_activity_timeout: value.reorg_attempts_activity_timeout,
tenure_idle_timeout_buffer: value.tenure_idle_timeout_buffer,
proposal_wait_for_parent_time: value.proposal_wait_for_parent_time,
reset_replay_set_after_fork_blocks: value.reset_replay_set_after_fork_blocks,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions stacks-signer/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ pub(crate) mod tests {
reorg_attempts_activity_timeout: config.reorg_attempts_activity_timeout,
proposal_wait_for_parent_time: config.proposal_wait_for_parent_time,
validate_with_replay_tx: config.validate_with_replay_tx,
reset_replay_set_after_fork_blocks: config.reset_replay_set_after_fork_blocks,
}
}

Expand Down
19 changes: 19 additions & 0 deletions stacks-signer/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ const DEFAULT_TENURE_IDLE_TIMEOUT_BUFFER_SECS: u64 = 2;
/// cannot determine that our stacks-node has processed the parent
/// block
const DEFAULT_PROPOSAL_WAIT_TIME_FOR_PARENT_SECS: u64 = 15;
/// Default number of blocks after a fork to reset the replay set,
/// as a failsafe mechanism
const DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS: u64 = 2;

#[derive(thiserror::Error, Debug)]
/// An error occurred parsing the provided configuration
Expand Down Expand Up @@ -184,6 +187,9 @@ pub struct SignerConfig {
pub proposal_wait_for_parent_time: Duration,
/// Whether or not to validate blocks with replay transactions
pub validate_with_replay_tx: bool,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: u64,
}

/// The parsed configuration for the signer
Expand Down Expand Up @@ -237,6 +243,9 @@ pub struct GlobalConfig {
pub dry_run: bool,
/// Whether or not to validate blocks with replay transactions
pub validate_with_replay_tx: bool,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: u64,
}

/// Internal struct for loading up the config file
Expand Down Expand Up @@ -288,6 +297,9 @@ struct RawConfigFile {
pub dry_run: Option<bool>,
/// Whether or not to validate blocks with replay transactions
pub validate_with_replay_tx: Option<bool>,
/// How many blocks after a fork should we reset the replay set,
/// as a failsafe mechanism
pub reset_replay_set_after_fork_blocks: Option<u64>,
}

impl RawConfigFile {
Expand Down Expand Up @@ -413,6 +425,10 @@ impl TryFrom<RawConfigFile> for GlobalConfig {
// https://github.com/stacks-network/stacks-core/issues/6087
let validate_with_replay_tx = raw_data.validate_with_replay_tx.unwrap_or(false);

let reset_replay_set_after_fork_blocks = raw_data
.reset_replay_set_after_fork_blocks
.unwrap_or(DEFAULT_RESET_REPLAY_SET_AFTER_FORK_BLOCKS);

Ok(Self {
node_host: raw_data.node_host,
endpoint,
Expand All @@ -435,6 +451,7 @@ impl TryFrom<RawConfigFile> for GlobalConfig {
tenure_idle_timeout_buffer,
proposal_wait_for_parent_time,
validate_with_replay_tx,
reset_replay_set_after_fork_blocks,
})
}
}
Expand Down Expand Up @@ -714,12 +731,14 @@ network = "mainnet"
auth_password = "abcd"
db_path = ":memory:"
validate_with_replay_tx = true
reset_replay_set_after_fork_blocks = 100
"#
);
let config = GlobalConfig::load_from_str(&config_toml).unwrap();
assert_eq!(config.stacks_address.to_string(), expected_addr);
assert_eq!(config.to_chain_id(), CHAIN_ID_MAINNET);
assert!(config.validate_with_replay_tx);
assert_eq!(config.reset_replay_set_after_fork_blocks, 100);
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions stacks-signer/src/runloop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ impl<Signer: SignerTrait<T>, T: StacksMessageCodec + Clone + Send + Debug> RunLo
reorg_attempts_activity_timeout: self.config.reorg_attempts_activity_timeout,
proposal_wait_for_parent_time: self.config.proposal_wait_for_parent_time,
validate_with_replay_tx: self.config.validate_with_replay_tx,
reset_replay_set_after_fork_blocks: self.config.reset_replay_set_after_fork_blocks,
}))
}

Expand Down
1 change: 1 addition & 0 deletions stacks-signer/src/tests/chainstate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ fn setup_test_environment(
tenure_idle_timeout_buffer: Duration::from_secs(2),
reorg_attempts_activity_timeout: Duration::from_secs(3),
proposal_wait_for_parent_time: Duration::from_secs(0),
reset_replay_set_after_fork_blocks: 2,
},
};

Expand Down
3 changes: 3 additions & 0 deletions stacks-signer/src/v0/signer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ pub struct Signer {
pub validate_with_replay_tx: bool,
/// Scope of Tx Replay in terms of Burn block boundaries
pub tx_replay_scope: ReplayScopeOpt,
/// The number of blocks after the past tip to reset the replay set
pub reset_replay_set_after_fork_blocks: u64,
}

impl std::fmt::Display for SignerMode {
Expand Down Expand Up @@ -244,6 +246,7 @@ impl SignerTrait<SignerMessage> for Signer {
global_state_evaluator,
validate_with_replay_tx: signer_config.validate_with_replay_tx,
tx_replay_scope: None,
reset_replay_set_after_fork_blocks: signer_config.reset_replay_set_after_fork_blocks,
}
}

Expand Down
121 changes: 114 additions & 7 deletions stacks-signer/src/v0/signer_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
use blockstack_lib::chainstate::burn::ConsensusHashExtensions;
use blockstack_lib::chainstate::nakamoto::{NakamotoBlock, NakamotoBlockHeader};
use blockstack_lib::chainstate::stacks::{StacksTransaction, TransactionPayload};
use blockstack_lib::net::api::get_tenures_fork_info::TenureForkingInfo;
use blockstack_lib::net::api::postblock_proposal::NakamotoBlockProposal;
use clarity::types::chainstate::StacksAddress;
#[cfg(any(test, feature = "testing"))]
Expand Down Expand Up @@ -594,9 +595,11 @@
&& next_burn_block_hash != expected_burn_block.consensus_hash;
if node_behind_expected || node_on_equal_fork {
let err_msg = format!(
"Node has not processed the next burn block yet. Expected height = {}, Expected consensus hash = {}",
"Node has not processed the next burn block yet. Expected height = {}, Expected consensus hash = {}, Node height = {}, Node consensus hash = {}",
expected_burn_block.burn_block_height,
expected_burn_block.consensus_hash,
next_burn_block_height,
next_burn_block_hash,
);
*self = Self::Pending {
update: StateMachineUpdate::BurnBlock(expected_burn_block),
Expand All @@ -620,7 +623,7 @@
client,
&expected_burn_block,
&prior_state_machine,
replay_state,
&replay_state,
)? {
match new_replay_state {
ReplayState::Unset => {
Expand All @@ -632,6 +635,16 @@
*tx_replay_scope = Some(new_scope);
}
}
} else if Self::handle_possible_replay_failsafe(
&replay_state,
&expected_burn_block,
proposal_config.reset_replay_set_after_fork_blocks,
)? {
info!(
"Signer state: replay set is stalled after 2 tenures. Clearing the replay set."
);
tx_replay_set = ReplayTransactionSet::none();
*tx_replay_scope = None;
}
}

Expand Down Expand Up @@ -981,11 +994,24 @@
client: &StacksClient,
expected_burn_block: &NewBurnBlock,
prior_state_machine: &SignerStateMachine,
replay_state: ReplayState,
replay_state: &ReplayState,
) -> Result<Option<ReplayState>, SignerChainstateError> {
if expected_burn_block.burn_block_height > prior_state_machine.burn_block_height {
// no bitcoin fork, because we're advancing the burn block height
return Ok(None);
if Self::new_burn_block_fork_descendency_check(
db,
expected_burn_block,
prior_state_machine.burn_block_height,
prior_state_machine.burn_block,
)? {
info!("Detected bitcoin fork - prior tip is not parent of new tip.";
"new_tip.burn_block_height" => expected_burn_block.burn_block_height,
"new_tip.consensus_hash" => %expected_burn_block.consensus_hash,
"prior_tip.burn_block_height" => prior_state_machine.burn_block_height,
"prior_tip.consensus_hash" => %prior_state_machine.burn_block,
);
} else {
return Ok(None);
}
}
if expected_burn_block.consensus_hash == prior_state_machine.burn_block {
// no bitcoin fork, because we're at the same burn block hash as before
Expand Down Expand Up @@ -1088,7 +1114,7 @@
client: &StacksClient,
expected_burn_block: &NewBurnBlock,
prior_state_machine: &SignerStateMachine,
scope: ReplayScope,
scope: &ReplayScope,
) -> Result<Option<ReplayState>, SignerChainstateError> {
info!("Tx Replay: detected bitcoin fork while in replay mode. Tryng to handle the fork";
"expected_burn_block.height" => expected_burn_block.burn_block_height,
Expand Down Expand Up @@ -1182,6 +1208,10 @@
return Ok(None);
}

Ok(Some(Self::get_forked_txs_from_fork_info(&fork_info)))
}

fn get_forked_txs_from_fork_info(fork_info: &[TenureForkingInfo]) -> Vec<StacksTransaction> {
// Collect transactions to be replayed across the forked blocks
let mut forked_blocks = fork_info
.iter()
Expand All @@ -1201,6 +1231,83 @@
))
.cloned()
.collect::<Vec<_>>();
Ok(Some(forked_txs))
forked_txs
}

/// If it has been `reset_replay_set_after_fork_blocks` burn blocks since the origin of our replay set, and
/// we haven't produced any replay blocks since then, we should reset our replay set
///
/// Returns a `bool` indicating whether the replay set should be reset.
fn handle_possible_replay_failsafe(
replay_state: &ReplayState,
new_burn_block: &NewBurnBlock,
reset_replay_set_after_fork_blocks: u64,
) -> Result<bool, SignerChainstateError> {
match replay_state {
ReplayState::Unset => {
// not in replay - skip
return Ok(false);

Check failure on line 1249 in stacks-signer/src/v0/signer_state.rs

View workflow job for this annotation

GitHub Actions / Clippy Check

unneeded `return` statement
}
ReplayState::InProgress(_, replay_scope) => {
let failsafe_height =
replay_scope.past_tip.burn_block_height + reset_replay_set_after_fork_blocks;
Ok(new_burn_block.burn_block_height > failsafe_height)
}
}
}

/// Check if the new burn block is a fork, by checking if the new burn block
/// is a descendant of the prior burn block
fn new_burn_block_fork_descendency_check(
db: &SignerDb,
new_burn_block: &NewBurnBlock,
prior_burn_block_height: u64,
prior_burn_block_ch: ConsensusHash,
) -> Result<bool, SignerChainstateError> {
let max_height_delta = 10;
let height_delta = match new_burn_block
.burn_block_height
.checked_sub(prior_burn_block_height)
{
None | Some(0) => return Ok(false), // same height or older
Some(d) if d > max_height_delta => return Ok(false), // too far apart
Some(d) => d,
};

let mut parent_burn_block_info = match db
.get_burn_block_by_ch(&new_burn_block.consensus_hash)
.and_then(|burn_block_info| {
db.get_burn_block_by_hash(&burn_block_info.parent_burn_block_hash)
}) {
Ok(info) => info,
Err(e) => {
warn!(
"Failed to get parent burn block info for {}",
new_burn_block.consensus_hash;
"error" => ?e,
);
return Ok(false);
}
};

for _ in 0..height_delta {
if parent_burn_block_info.block_height == prior_burn_block_height {
return Ok(parent_burn_block_info.consensus_hash != prior_burn_block_ch);
}

parent_burn_block_info =
match db.get_burn_block_by_hash(&parent_burn_block_info.parent_burn_block_hash) {
Ok(bi) => bi,
Err(e) => {
warn!(
"Failed to get parent burn block info for {}. Error: {e}",
parent_burn_block_info.parent_burn_block_hash
);
return Ok(false);
}
};
}

Ok(false)
}
}
23 changes: 23 additions & 0 deletions stackslib/src/net/api/postblock_proposal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ pub static TEST_REPLAY_TRANSACTIONS: LazyLock<
TestFlag<std::collections::VecDeque<StacksTransaction>>,
> = LazyLock::new(TestFlag::default);

#[cfg(any(test, feature = "testing"))]
/// Whether to reject any transaction while we're in a replay set.
pub static TEST_REJECT_REPLAY_TXS: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);

// This enum is used to supply a `reason_code` for validation
// rejection responses. This is serialized as an enum with string
// type (in jsonschema terminology).
Expand Down Expand Up @@ -200,6 +204,24 @@ fn fault_injection_validation_delay() {
#[cfg(not(any(test, feature = "testing")))]
fn fault_injection_validation_delay() {}

#[cfg(any(test, feature = "testing"))]
fn fault_injection_reject_replay_txs() -> Result<(), BlockValidateRejectReason> {
let reject = TEST_REJECT_REPLAY_TXS.get();
if reject {
Err(BlockValidateRejectReason {
reason_code: ValidateRejectCode::InvalidTransactionReplay,
reason: "Rejected by test flag".into(),
})
} else {
Ok(())
}
}

#[cfg(not(any(test, feature = "testing")))]
fn fault_injection_reject_replay_txs() -> Result<(), BlockValidateRejectReason> {
Ok(())
}

/// Represents a block proposed to the `v3/block_proposal` endpoint for validation
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NakamotoBlockProposal {
Expand Down Expand Up @@ -723,6 +745,7 @@ impl NakamotoBlockProposal {
// Allow this to happen, tenure extend checks happen elsewhere.
break;
}
fault_injection_reject_replay_txs()?;
let Some(replay_tx) = replay_txs.pop_front() else {
// During transaction replay, we expect that the block only
// contains transactions from the replay set. Thus, if we're here,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2833,6 +2833,17 @@ impl BitcoinRPCRequest {
BitcoinRPCRequest::send(config, payload)
}

pub fn get_chain_tips(config: &Config) -> RPCResult<serde_json::Value> {
let payload = BitcoinRPCRequest {
method: "getchaintips".to_string(),
params: vec![],
id: "stacks".to_string(),
jsonrpc: "2.0".to_string(),
};

BitcoinRPCRequest::send(config, payload)
}

pub fn send(config: &Config, payload: BitcoinRPCRequest) -> RPCResult<serde_json::Value> {
let request = BitcoinRPCRequest::build_rpc_request(config, &payload);
let timeout = Duration::from_secs(u64::from(config.burnchain.timeout));
Expand Down
3 changes: 3 additions & 0 deletions testnet/stacks-node/src/tests/nakamoto_integrations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6589,6 +6589,7 @@ fn signer_chainstate() {
tenure_idle_timeout: Duration::from_secs(300),
tenure_idle_timeout_buffer: Duration::from_secs(2),
reorg_attempts_activity_timeout: Duration::from_secs(30),
reset_replay_set_after_fork_blocks: 2,
};
let mut sortitions_view =
SortitionsView::fetch_view(proposal_conf, &signer_client).unwrap();
Expand Down Expand Up @@ -6716,6 +6717,7 @@ fn signer_chainstate() {
tenure_idle_timeout: Duration::from_secs(300),
tenure_idle_timeout_buffer: Duration::from_secs(2),
reorg_attempts_activity_timeout: Duration::from_secs(30),
reset_replay_set_after_fork_blocks: 2,
};
let burn_block_height = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn())
.unwrap()
Expand Down Expand Up @@ -6794,6 +6796,7 @@ fn signer_chainstate() {
tenure_idle_timeout: Duration::from_secs(300),
tenure_idle_timeout_buffer: Duration::from_secs(2),
reorg_attempts_activity_timeout: Duration::from_secs(30),
reset_replay_set_after_fork_blocks: 2,
};
let mut sortitions_view = SortitionsView::fetch_view(proposal_conf, &signer_client).unwrap();
sortitions_view
Expand Down
Loading
Loading