Skip to content

Commit ec00df2

Browse files
authored
Merge pull request #5877 from stacks-network/feat/resend-proposal
feat: on timeout, re-propose the same block
2 parents acd148a + 346e823 commit ec00df2

File tree

8 files changed

+416
-109
lines changed

8 files changed

+416
-109
lines changed

CHANGELOG.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,17 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
77

88
## [Unreleased]
99

10+
### Changed
11+
12+
- When a miner times out waiting for signatures, it will re-propose the same block instead of building a new block ([#5877](https://github.com/stacks-network/stacks-core/pull/5877))
13+
1014
## [3.1.0.0.7]
1115

12-
## Added
16+
### Added
1317

1418
- Add `disable_retries` mode for events_observer disabling automatic retry on error
1519

16-
## Changed
20+
### Changed
1721

1822
- Implement faster cost tracker for default cost functions in Clarity
1923
- By default, miners will wait for a new tenure to start for a configurable amount of time after receiving a burn block before
@@ -23,7 +27,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
2327

2428
## [3.1.0.0.6]
2529

26-
## Added
30+
### Added
2731

2832
- The `BlockProposal` StackerDB message serialization struct now includes a `server_version` string, which represents the version of the node that the miner is using. ([#5803](https://github.com/stacks-network/stacks-core/pull/5803))
2933
- Add `vrf_seed` to the `/v3/sortitions` rpc endpoint

stacks-signer/src/chainstate.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ impl SortitionsView {
402402
false,
403403
);
404404
let epoch_time = get_epoch_time_secs();
405-
let enough_time_passed = epoch_time > extend_timestamp;
405+
let enough_time_passed = epoch_time >= extend_timestamp;
406406
if !changed_burn_view && !enough_time_passed {
407407
warn!(
408408
"Miner block proposal contains a tenure extend, but the burnchain view has not changed and enough time has not passed to refresh the block limit. Considering proposal invalid.";

stacks-signer/src/v0/signer.rs

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
use std::collections::HashMap;
1616
use std::fmt::Debug;
1717
use std::sync::mpsc::Sender;
18+
#[cfg(any(test, feature = "testing"))]
19+
use std::sync::LazyLock;
1820
use std::time::{Duration, Instant};
1921

2022
use blockstack_lib::chainstate::nakamoto::{NakamotoBlock, NakamotoBlockHeader};
@@ -23,9 +25,15 @@ use blockstack_lib::net::api::postblock_proposal::{
2325
};
2426
use blockstack_lib::util_lib::db::Error as DBError;
2527
use clarity::types::chainstate::StacksPrivateKey;
28+
#[cfg(any(test, feature = "testing"))]
29+
use clarity::types::chainstate::StacksPublicKey;
2630
use clarity::types::{PrivateKey, StacksEpochId};
2731
use clarity::util::hash::{MerkleHashFunc, Sha512Trunc256Sum};
2832
use clarity::util::secp256k1::Secp256k1PublicKey;
33+
#[cfg(any(test, feature = "testing"))]
34+
use clarity::util::sleep_ms;
35+
#[cfg(any(test, feature = "testing"))]
36+
use clarity::util::tests::TestFlag;
2937
use libsigner::v0::messages::{
3038
BlockAccepted, BlockRejection, BlockResponse, MessageSlotID, MockProposal, MockSignature,
3139
RejectReason, RejectReasonPrefix, SignerMessage,
@@ -44,6 +52,12 @@ use crate::runloop::SignerResult;
4452
use crate::signerdb::{BlockInfo, BlockState, SignerDb};
4553
use crate::Signer as SignerTrait;
4654

55+
/// A global variable that can be used to make signers repeat their proposal
56+
/// response if their public key is in the provided list
57+
#[cfg(any(test, feature = "testing"))]
58+
pub static TEST_REPEAT_PROPOSAL_RESPONSE: LazyLock<TestFlag<Vec<StacksPublicKey>>> =
59+
LazyLock::new(TestFlag::default);
60+
4761
/// Signer running mode (whether dry-run or real)
4862
#[derive(Debug)]
4963
pub enum SignerMode {
@@ -464,6 +478,45 @@ impl Signer {
464478
}
465479
}
466480

481+
/// The actual `send_block_response` implementation. Declared so that we do
482+
/// not need to duplicate in testing.
483+
fn impl_send_block_response(&mut self, block_response: BlockResponse) {
484+
let res = self
485+
.stackerdb
486+
.send_message_with_retry::<SignerMessage>(block_response.clone().into());
487+
match res {
488+
Err(e) => warn!("{self}: Failed to send block rejection to stacker-db: {e:?}"),
489+
Ok(ack) if !ack.accepted => warn!(
490+
"{self}: Block rejection not accepted by stacker-db: {:?}",
491+
ack.reason
492+
),
493+
Ok(_) => debug!("{self}: Block rejection accepted by stacker-db"),
494+
}
495+
}
496+
497+
#[cfg(any(test, feature = "testing"))]
498+
fn send_block_response(&mut self, block_response: BlockResponse) {
499+
const NUM_REPEATS: usize = 1;
500+
let mut count = 0;
501+
let public_keys = TEST_REPEAT_PROPOSAL_RESPONSE.get();
502+
if !public_keys.contains(
503+
&stacks_common::types::chainstate::StacksPublicKey::from_private(&self.private_key),
504+
) {
505+
count = NUM_REPEATS;
506+
}
507+
while count <= NUM_REPEATS {
508+
self.impl_send_block_response(block_response.clone());
509+
510+
count += 1;
511+
sleep_ms(1000);
512+
}
513+
}
514+
515+
#[cfg(not(any(test, feature = "testing")))]
516+
fn send_block_response(&mut self, block_response: BlockResponse) {
517+
self.impl_send_block_response(block_response)
518+
}
519+
467520
/// Handle block proposal messages submitted to signers stackerdb
468521
fn handle_block_proposal(
469522
&mut self,
@@ -575,18 +628,7 @@ impl Signer {
575628
if let Some(block_response) = block_response {
576629
// We know proposal is invalid. Send rejection message, do not do further validation and do not store it.
577630
debug!("{self}: Broadcasting a block response to stacks node: {block_response:?}");
578-
let res = self
579-
.stackerdb
580-
.send_message_with_retry::<SignerMessage>(block_response.into());
581-
582-
match res {
583-
Err(e) => warn!("{self}: Failed to send block rejection to stacker-db: {e:?}"),
584-
Ok(ack) if !ack.accepted => warn!(
585-
"{self}: Block rejection not accepted by stacker-db: {:?}",
586-
ack.reason
587-
),
588-
Ok(_) => debug!("{self}: Block rejection accepted by stacker-db"),
589-
}
631+
self.send_block_response(block_response);
590632
} else {
591633
// Just in case check if the last block validation submission timed out.
592634
self.check_submitted_block_proposal();

testnet/stacks-node/src/nakamoto_node.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ pub enum Error {
143143
/// NetError wrapper
144144
#[error("NetError: {0}")]
145145
NetError(#[from] NetError),
146+
#[error("Timed out waiting for signatures")]
147+
SignatureTimeout,
146148
}
147149

148150
impl StacksNode {

testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs

Lines changed: 65 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -254,46 +254,54 @@ impl SignerCoordinator {
254254
};
255255

256256
let block_proposal_message = SignerMessageV0::BlockProposal(block_proposal);
257-
debug!("Sending block proposal message to signers";
258-
"signer_signature_hash" => %block.header.signer_signature_hash(),
259-
);
260-
Self::send_miners_message::<SignerMessageV0>(
261-
&self.message_key,
262-
sortdb,
263-
election_sortition,
264-
stackerdbs,
265-
block_proposal_message,
266-
MinerSlotID::BlockProposal,
267-
self.is_mainnet,
268-
&mut self.miners_session,
269-
&election_sortition.consensus_hash,
270-
)?;
271-
counters.bump_naka_proposed_blocks();
272257

273-
#[cfg(test)]
274-
{
275-
info!(
276-
"SignerCoordinator: sent block proposal to .miners, waiting for test signing channel"
258+
loop {
259+
debug!("Sending block proposal message to signers";
260+
"signer_signature_hash" => %block.header.signer_signature_hash(),
277261
);
278-
// In test mode, short-circuit waiting for the signers if the TEST_SIGNING
279-
// channel has been created. This allows integration tests for the stacks-node
280-
// independent of the stacks-signer.
281-
if let Some(signatures) =
282-
crate::tests::nakamoto_integrations::TestSigningChannel::get_signature()
262+
Self::send_miners_message::<SignerMessageV0>(
263+
&self.message_key,
264+
sortdb,
265+
election_sortition,
266+
stackerdbs,
267+
block_proposal_message.clone(),
268+
MinerSlotID::BlockProposal,
269+
self.is_mainnet,
270+
&mut self.miners_session,
271+
&election_sortition.consensus_hash,
272+
)?;
273+
counters.bump_naka_proposed_blocks();
274+
275+
#[cfg(test)]
283276
{
284-
debug!("Short-circuiting waiting for signers, using test signature");
285-
return Ok(signatures);
277+
info!(
278+
"SignerCoordinator: sent block proposal to .miners, waiting for test signing channel"
279+
);
280+
// In test mode, short-circuit waiting for the signers if the TEST_SIGNING
281+
// channel has been created. This allows integration tests for the stacks-node
282+
// independent of the stacks-signer.
283+
if let Some(signatures) =
284+
crate::tests::nakamoto_integrations::TestSigningChannel::get_signature()
285+
{
286+
debug!("Short-circuiting waiting for signers, using test signature");
287+
return Ok(signatures);
288+
}
286289
}
287-
}
288290

289-
self.get_block_status(
290-
&block.header.signer_signature_hash(),
291-
&block.block_id(),
292-
block.header.parent_block_id,
293-
chain_state,
294-
sortdb,
295-
counters,
296-
)
291+
let res = self.get_block_status(
292+
&block.header.signer_signature_hash(),
293+
&block.block_id(),
294+
block.header.parent_block_id,
295+
chain_state,
296+
sortdb,
297+
counters,
298+
);
299+
300+
match res {
301+
Err(NakamotoNodeError::SignatureTimeout) => continue,
302+
_ => return res,
303+
}
304+
}
297305
}
298306

299307
/// Get the block status for a given block hash.
@@ -340,7 +348,7 @@ impl SignerCoordinator {
340348
if rejections_timer.elapsed() > *rejections_timeout {
341349
return false;
342350
}
343-
// number or rejections changed?
351+
// number of rejections changed?
344352
if status.total_weight_rejected != rejections {
345353
return false;
346354
}
@@ -353,7 +361,7 @@ impl SignerCoordinator {
353361
// If we just received a timeout, we should check if the burnchain
354362
// tip has changed or if we received this signed block already in
355363
// the staging db.
356-
debug!("SignerCoordinator: Timeout waiting for block signatures");
364+
debug!("SignerCoordinator: Intermediate timeout waiting for block status");
357365

358366
// Look in the nakamoto staging db -- a block can only get stored there
359367
// if it has enough signing weight to clear the threshold.
@@ -380,15 +388,17 @@ impl SignerCoordinator {
380388
}
381389

382390
if rejections_timer.elapsed() > *rejections_timeout {
383-
warn!("Timed out while waiting for responses from signers";
384-
"elapsed" => rejections_timer.elapsed().as_secs(),
385-
"rejections_timeout" => rejections_timeout.as_secs(),
386-
"rejections" => rejections,
387-
"rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)
391+
warn!("Timed out while waiting for responses from signers, resending proposal";
392+
"elapsed" => rejections_timer.elapsed().as_secs(),
393+
"rejections_timeout" => rejections_timeout.as_secs(),
394+
"rejections" => rejections,
395+
"rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)
388396
);
389-
return Err(NakamotoNodeError::SigningCoordinatorFailure(
390-
"Timed out while waiting for signatures".into(),
391-
));
397+
398+
// Reset the rejections in the stackerdb listener
399+
self.stackerdb_comms.reset_rejections(block_signer_sighash);
400+
401+
return Err(NakamotoNodeError::SignatureTimeout);
392402
}
393403

394404
// Check if a new Stacks block has arrived in the parent tenure
@@ -399,7 +409,7 @@ impl SignerCoordinator {
399409
)?
400410
.ok_or(NakamotoNodeError::UnexpectedChainState)?;
401411
if highest_in_tenure.index_block_hash() != parent_block_id {
402-
debug!("SignCoordinator: Exiting due to new stacks tip");
412+
info!("SignCoordinator: Exiting due to new stacks tip");
403413
return Err(NakamotoNodeError::StacksTipChanged);
404414
}
405415

@@ -448,14 +458,16 @@ impl SignerCoordinator {
448458
return Ok(block_status.gathered_signatures.values().cloned().collect());
449459
} else if rejections_timer.elapsed() > *rejections_timeout {
450460
warn!("Timed out while waiting for responses from signers";
451-
"elapsed" => rejections_timer.elapsed().as_secs(),
452-
"rejections_timeout" => rejections_timeout.as_secs(),
453-
"rejections" => rejections,
454-
"rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)
461+
"elapsed" => rejections_timer.elapsed().as_secs(),
462+
"rejections_timeout" => rejections_timeout.as_secs(),
463+
"rejections" => rejections,
464+
"rejections_threshold" => self.total_weight.saturating_sub(self.weight_threshold)
455465
);
456-
return Err(NakamotoNodeError::SigningCoordinatorFailure(
457-
"Timed out while waiting for signatures".into(),
458-
));
466+
467+
// Reset the rejections in the stackerdb listener
468+
self.stackerdb_comms.reset_rejections(block_signer_sighash);
469+
470+
return Err(NakamotoNodeError::SignatureTimeout);
459471
} else {
460472
continue;
461473
}

testnet/stacks-node/src/nakamoto_node/stackerdb_listener.rs

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,13 @@ pub static EVENT_RECEIVER_POLL: Duration = Duration::from_millis(500);
5252

5353
#[derive(Debug, Clone)]
5454
pub struct BlockStatus {
55-
pub responded_signers: HashSet<StacksPublicKey>,
55+
/// Set of the slot ids of signers who have responded
56+
pub responded_signers: HashSet<u32>,
57+
/// Map of the slot id of signers who have signed the block and their signature
5658
pub gathered_signatures: BTreeMap<u32, MessageSignature>,
59+
/// Total weight of signers who have signed the block
5760
pub total_weight_approved: u32,
61+
/// Total weight of signers who have rejected the block
5862
pub total_weight_rejected: u32,
5963
}
6064

@@ -342,7 +346,7 @@ impl StackerDBListener {
342346
"server_version" => metadata.server_version,
343347
);
344348
block.gathered_signatures.insert(slot_id, signature);
345-
block.responded_signers.insert(signer_pubkey);
349+
block.responded_signers.insert(slot_id);
346350

347351
if block.total_weight_approved >= self.weight_threshold {
348352
// Signal to anyone waiting on this block that we have enough signatures
@@ -384,11 +388,13 @@ impl StackerDBListener {
384388
continue;
385389
}
386390
};
387-
block.responded_signers.insert(rejected_pubkey);
388-
block.total_weight_rejected = block
389-
.total_weight_rejected
390-
.checked_add(signer_entry.weight)
391-
.expect("FATAL: total weight rejected exceeds u32::MAX");
391+
392+
if block.responded_signers.insert(slot_id) {
393+
block.total_weight_rejected = block
394+
.total_weight_rejected
395+
.checked_add(signer_entry.weight)
396+
.expect("FATAL: total weight rejected exceeds u32::MAX");
397+
}
392398

393399
info!("StackerDBListener: Signer rejected block";
394400
"block_signer_sighash" => %rejected_data.signer_signature_hash,
@@ -496,6 +502,25 @@ impl StackerDBListenerComms {
496502
blocks.insert(block.signer_signature_hash(), block_status);
497503
}
498504

505+
/// Reset rejections for a block proposal.
506+
/// This is used when a block proposal times out and we need to retry it by
507+
/// clearing the block's rejections. Block approvals cannot be cleared
508+
/// because an old approval could always be used to make a block reach
509+
/// the approval threshold.
510+
pub fn reset_rejections(&self, signer_sighash: &Sha512Trunc256Sum) {
511+
let (lock, _cvar) = &*self.blocks;
512+
let mut blocks = lock.lock().expect("FATAL: failed to lock block status");
513+
if let Some(block) = blocks.get_mut(signer_sighash) {
514+
block.responded_signers.clear();
515+
block.total_weight_rejected = 0;
516+
517+
// Add approving signers back to the responded signers set
518+
for (slot_id, _) in block.gathered_signatures.iter() {
519+
block.responded_signers.insert(*slot_id);
520+
}
521+
}
522+
}
523+
499524
/// Get the status for `block` from the Stacker DB listener.
500525
/// If the block is not found in the map, return an error.
501526
/// If the block is found, call `condition` to check if the block status

0 commit comments

Comments
 (0)