Skip to content

Commit a7c1178

Browse files
authored
Merge pull request #5801 from stacks-network/retry-mining-on-tip-change
feat: stop waiting for signatures if the chain tip advances
2 parents bffb1aa + 5308a99 commit a7c1178

File tree

6 files changed

+548
-37
lines changed

6 files changed

+548
-37
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
1313

1414
### Changed
1515

16+
- Miner will stop waiting for signatures on a block if the Stacks tip advances (causing the block it had proposed to be invalid).
17+
1618
### Fixed
1719

1820
- Error responses to /v2/transactions/fees are once again expressed as JSON ([#4145](https://github.com/stacks-network/stacks-core/issues/4145)).

stacks-signer/src/v0/signer.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ impl SignerTrait<SignerMessage> for Signer {
207207
"block_height" => b.header.chain_length,
208208
"signer_sighash" => %b.header.signer_signature_hash(),
209209
);
210+
#[cfg(any(test, feature = "testing"))]
211+
if self.test_skip_block_broadcast(b) {
212+
return;
213+
}
210214
stacks_client.post_block_until_ok(self, b);
211215
}
212216
SignerMessage::MockProposal(mock_proposal) => {

testnet/stacks-node/src/nakamoto_node/miner.rs

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,20 @@ use crate::run_loop::RegisteredKey;
6565
pub static TEST_MINE_STALL: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
6666
#[cfg(test)]
6767
/// Test flag to stall block proposal broadcasting
68-
pub static TEST_BROADCAST_STALL: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
68+
pub static TEST_BROADCAST_PROPOSAL_STALL: LazyLock<TestFlag<bool>> =
69+
LazyLock::new(TestFlag::default);
6970
#[cfg(test)]
71+
// Test flag to stall the miner from announcing a block while this flag is true
7072
pub static TEST_BLOCK_ANNOUNCE_STALL: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
7173
#[cfg(test)]
72-
pub static TEST_SKIP_P2P_BROADCAST: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
74+
// Test flag to skip broadcasting blocks over the p2p network
75+
pub static TEST_P2P_BROADCAST_SKIP: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
76+
#[cfg(test)]
77+
// Test flag to stall broadcasting blocks over the p2p network
78+
pub static TEST_P2P_BROADCAST_STALL: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
79+
#[cfg(test)]
80+
// Test flag to skip pushing blocks to the signers
81+
pub static TEST_BLOCK_PUSH_SKIP: LazyLock<TestFlag<bool>> = LazyLock::new(TestFlag::default);
7382

7483
/// If the miner was interrupted while mining a block, how long should the
7584
/// miner thread sleep before trying again?
@@ -252,19 +261,19 @@ impl BlockMinerThread {
252261
}
253262

254263
#[cfg(test)]
255-
fn fault_injection_block_broadcast_stall(new_block: &NakamotoBlock) {
256-
if TEST_BROADCAST_STALL.get() {
264+
fn fault_injection_block_proposal_stall(new_block: &NakamotoBlock) {
265+
if TEST_BROADCAST_PROPOSAL_STALL.get() {
257266
// Do an extra check just so we don't log EVERY time.
258-
warn!("Fault injection: Broadcasting is stalled due to testing directive.";
267+
warn!("Fault injection: Block proposal broadcast is stalled due to testing directive.";
259268
"stacks_block_id" => %new_block.block_id(),
260269
"stacks_block_hash" => %new_block.header.block_hash(),
261270
"height" => new_block.header.chain_length,
262271
"consensus_hash" => %new_block.header.consensus_hash
263272
);
264-
while TEST_BROADCAST_STALL.get() {
273+
while TEST_BROADCAST_PROPOSAL_STALL.get() {
265274
std::thread::sleep(std::time::Duration::from_millis(10));
266275
}
267-
info!("Fault injection: Broadcasting is no longer stalled due to testing directive.";
276+
info!("Fault injection: Block proposal broadcast is no longer stalled due to testing directive.";
268277
"block_id" => %new_block.block_id(),
269278
"height" => new_block.header.chain_length,
270279
"consensus_hash" => %new_block.header.consensus_hash
@@ -273,7 +282,7 @@ impl BlockMinerThread {
273282
}
274283

275284
#[cfg(not(test))]
276-
fn fault_injection_block_broadcast_stall(_ignored: &NakamotoBlock) {}
285+
fn fault_injection_block_proposal_stall(_ignored: &NakamotoBlock) {}
277286

278287
#[cfg(test)]
279288
fn fault_injection_block_announce_stall(new_block: &NakamotoBlock) {
@@ -301,17 +310,48 @@ impl BlockMinerThread {
301310

302311
#[cfg(test)]
303312
fn fault_injection_skip_block_broadcast() -> bool {
304-
if TEST_SKIP_P2P_BROADCAST.get() {
305-
return true;
306-
}
307-
false
313+
TEST_P2P_BROADCAST_SKIP.get()
308314
}
309315

310316
#[cfg(not(test))]
311317
fn fault_injection_skip_block_broadcast() -> bool {
312318
false
313319
}
314320

321+
#[cfg(test)]
322+
fn fault_injection_block_broadcast_stall(new_block: &NakamotoBlock) {
323+
if TEST_P2P_BROADCAST_STALL.get() {
324+
// Do an extra check just so we don't log EVERY time.
325+
warn!("Fault injection: P2P block broadcast is stalled due to testing directive.";
326+
"stacks_block_id" => %new_block.block_id(),
327+
"stacks_block_hash" => %new_block.header.block_hash(),
328+
"height" => new_block.header.chain_length,
329+
"consensus_hash" => %new_block.header.consensus_hash
330+
);
331+
while TEST_P2P_BROADCAST_STALL.get() {
332+
std::thread::sleep(std::time::Duration::from_millis(10));
333+
}
334+
info!("Fault injection: P2P block broadcast is no longer stalled due to testing directive.";
335+
"block_id" => %new_block.block_id(),
336+
"height" => new_block.header.chain_length,
337+
"consensus_hash" => %new_block.header.consensus_hash
338+
);
339+
}
340+
}
341+
342+
#[cfg(not(test))]
343+
fn fault_injection_block_broadcast_stall(_ignored: &NakamotoBlock) {}
344+
345+
#[cfg(test)]
346+
fn fault_injection_skip_block_push() -> bool {
347+
TEST_BLOCK_PUSH_SKIP.get()
348+
}
349+
350+
#[cfg(not(test))]
351+
fn fault_injection_skip_block_push() -> bool {
352+
false
353+
}
354+
315355
/// Stop a miner tenure by blocking the miner and then joining the tenure thread
316356
#[cfg(test)]
317357
fn fault_injection_stall_miner() {
@@ -516,7 +556,7 @@ impl BlockMinerThread {
516556
};
517557

518558
if let Some(mut new_block) = new_block {
519-
Self::fault_injection_block_broadcast_stall(&new_block);
559+
Self::fault_injection_block_proposal_stall(&new_block);
520560

521561
let signer_signature = match self.propose_block(
522562
coordinator,
@@ -532,7 +572,7 @@ impl BlockMinerThread {
532572
"block_height" => new_block.header.chain_length,
533573
"consensus_hash" => %new_block.header.consensus_hash,
534574
);
535-
return Err(e);
575+
return Ok(());
536576
}
537577
NakamotoNodeError::BurnchainTipChanged => {
538578
info!("Burnchain tip changed while waiting for signatures";
@@ -739,6 +779,7 @@ impl BlockMinerThread {
739779
);
740780
return Ok(());
741781
}
782+
Self::fault_injection_block_broadcast_stall(block);
742783

743784
let parent_block_info =
744785
NakamotoChainState::get_block_header(chain_state.db(), &block.header.parent_block_id)?
@@ -834,6 +875,14 @@ impl BlockMinerThread {
834875
let miners_contract_id = boot_code_id(MINERS_NAME, chain_state.mainnet);
835876
let mut miners_session = StackerDBSession::new(&rpc_socket.to_string(), miners_contract_id);
836877

878+
if Self::fault_injection_skip_block_push() {
879+
warn!(
880+
"Fault injection: Skipping block push for {}",
881+
block.block_id()
882+
);
883+
return Ok(());
884+
}
885+
837886
SignerCoordinator::send_miners_message(
838887
miner_privkey,
839888
&sort_db,

testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ impl SignerCoordinator {
288288
self.get_block_status(
289289
&block.header.signer_signature_hash(),
290290
&block.block_id(),
291+
block.header.parent_block_id,
291292
chain_state,
292293
sortdb,
293294
counters,
@@ -303,6 +304,7 @@ impl SignerCoordinator {
303304
&self,
304305
block_signer_sighash: &Sha512Trunc256Sum,
305306
block_id: &StacksBlockId,
307+
parent_block_id: StacksBlockId,
306308
chain_state: &mut StacksChainState,
307309
sortdb: &SortitionDB,
308310
counters: &Counters,
@@ -319,6 +321,10 @@ impl SignerCoordinator {
319321
)
320322
})?;
321323

324+
let parent_tenure_header =
325+
NakamotoChainState::get_block_header(chain_state.db(), &parent_block_id)?
326+
.ok_or(NakamotoNodeError::UnexpectedChainState)?;
327+
322328
// this is used to track the start of the waiting cycle
323329
let rejections_timer = Instant::now();
324330
loop {
@@ -384,6 +390,18 @@ impl SignerCoordinator {
384390
));
385391
}
386392

393+
// Check if a new Stacks block has arrived in the parent tenure
394+
let highest_in_tenure =
395+
NakamotoChainState::get_highest_known_block_header_in_tenure(
396+
&mut chain_state.index_conn(),
397+
&parent_tenure_header.consensus_hash,
398+
)?
399+
.ok_or(NakamotoNodeError::UnexpectedChainState)?;
400+
if highest_in_tenure.index_block_hash() != parent_block_id {
401+
debug!("SignCoordinator: Exiting due to new stacks tip");
402+
return Err(NakamotoNodeError::StacksTipChanged);
403+
}
404+
387405
continue;
388406
}
389407
};

testnet/stacks-node/src/tests/nakamoto_integrations.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ use stacks_signer::v0::SpawnedSigner;
9898

9999
use super::bitcoin_regtest::BitcoinCoreController;
100100
use crate::nakamoto_node::miner::{
101-
TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST,
101+
TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_PROPOSAL_STALL, TEST_MINE_STALL,
102+
TEST_P2P_BROADCAST_SKIP,
102103
};
103104
use crate::nakamoto_node::relayer::{RelayerThread, TEST_MINER_THREAD_STALL};
104105
use crate::neon::{Counters, RunLoopCounter};
@@ -5188,7 +5189,7 @@ fn forked_tenure_is_ignored() {
51885189

51895190
// For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted.
51905191
// Stall the miner thread; only wait until the number of submitted commits increases.
5191-
TEST_BROADCAST_STALL.set(true);
5192+
TEST_BROADCAST_PROPOSAL_STALL.set(true);
51925193
TEST_BLOCK_ANNOUNCE_STALL.set(true);
51935194

51945195
let blocks_before = mined_blocks.load(Ordering::SeqCst);
@@ -5207,7 +5208,7 @@ fn forked_tenure_is_ignored() {
52075208
// Unpause the broadcast of Tenure B's block, do not submit commits, and do not allow blocks to
52085209
// be processed
52095210
test_skip_commit_op.set(true);
5210-
TEST_BROADCAST_STALL.set(false);
5211+
TEST_BROADCAST_PROPOSAL_STALL.set(false);
52115212

52125213
// Wait for a stacks block to be broadcasted.
52135214
// However, it will not be processed.
@@ -9881,7 +9882,7 @@ fn skip_mining_long_tx() {
98819882
})
98829883
.unwrap();
98839884

9884-
TEST_SKIP_P2P_BROADCAST.set(true);
9885+
TEST_P2P_BROADCAST_SKIP.set(true);
98859886
let tx = make_contract_publish(
98869887
&sender_2_sk,
98879888
0,
@@ -9908,7 +9909,7 @@ fn skip_mining_long_tx() {
99089909
})
99099910
.unwrap();
99109911

9911-
TEST_SKIP_P2P_BROADCAST.set(false);
9912+
TEST_P2P_BROADCAST_SKIP.set(false);
99129913
} else {
99139914
let transfer_tx = make_stacks_transfer(
99149915
&sender_1_sk,

0 commit comments

Comments
 (0)