Skip to content

Commit e963f87

Browse files
authored
Evict oldest vote on vote refresh after restart (#327)
1 parent 5f16932 commit e963f87

File tree

1 file changed

+124
-40
lines changed

1 file changed

+124
-40
lines changed

gossip/src/cluster_info.rs

Lines changed: 124 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,31 @@ impl ClusterInfo {
10451045
}
10461046
}
10471047

1048+
fn find_vote_index_to_evict(&self, should_evict_vote: impl Fn(&Vote) -> bool) -> u8 {
1049+
let self_pubkey = self.id();
1050+
let mut num_crds_votes = 0;
1051+
let vote_index = {
1052+
let gossip_crds =
1053+
self.time_gossip_read_lock("gossip_read_push_vote", &self.stats.push_vote_read);
1054+
(0..MAX_LOCKOUT_HISTORY as u8)
1055+
.filter_map(|ix| {
1056+
let vote = CrdsValueLabel::Vote(ix, self_pubkey);
1057+
let vote: &CrdsData = gossip_crds.get(&vote)?;
1058+
num_crds_votes += 1;
1059+
match &vote {
1060+
CrdsData::Vote(_, vote) if should_evict_vote(vote) => {
1061+
Some((vote.wallclock, ix))
1062+
}
1063+
CrdsData::Vote(_, _) => None,
1064+
_ => panic!("this should not happen!"),
1065+
}
1066+
})
1067+
.min() // Boot the oldest evicted vote by wallclock.
1068+
.map(|(_ /*wallclock*/, ix)| ix)
1069+
};
1070+
vote_index.unwrap_or(num_crds_votes)
1071+
}
1072+
10481073
pub fn push_vote(&self, tower: &[Slot], vote: Transaction) {
10491074
debug_assert!(tower.iter().tuple_windows().all(|(a, b)| a < b));
10501075
// Find a crds vote which is evicted from the tower, and recycle its
@@ -1057,8 +1082,7 @@ impl ClusterInfo {
10571082
// gossip.
10581083
// TODO: When there are more than one vote evicted from the tower, only
10591084
// one crds vote is overwritten here. Decide what to do with the rest.
1060-
let mut num_crds_votes = 0;
1061-
let self_pubkey = self.id();
1085+
10621086
// Returns true if the tower does not contain the vote.slot.
10631087
let should_evict_vote = |vote: &Vote| -> bool {
10641088
match vote.slot() {
@@ -1069,26 +1093,7 @@ impl ClusterInfo {
10691093
}
10701094
}
10711095
};
1072-
let vote_index = {
1073-
let gossip_crds =
1074-
self.time_gossip_read_lock("gossip_read_push_vote", &self.stats.push_vote_read);
1075-
(0..MAX_LOCKOUT_HISTORY as u8)
1076-
.filter_map(|ix| {
1077-
let vote = CrdsValueLabel::Vote(ix, self_pubkey);
1078-
let vote: &CrdsData = gossip_crds.get(&vote)?;
1079-
num_crds_votes += 1;
1080-
match &vote {
1081-
CrdsData::Vote(_, vote) if should_evict_vote(vote) => {
1082-
Some((vote.wallclock, ix))
1083-
}
1084-
CrdsData::Vote(_, _) => None,
1085-
_ => panic!("this should not happen!"),
1086-
}
1087-
})
1088-
.min() // Boot the oldest evicted vote by wallclock.
1089-
.map(|(_ /*wallclock*/, ix)| ix)
1090-
};
1091-
let vote_index = vote_index.unwrap_or(num_crds_votes);
1096+
let vote_index = self.find_vote_index_to_evict(should_evict_vote);
10921097
if (vote_index as usize) >= MAX_LOCKOUT_HISTORY {
10931098
let (_, vote, hash, _) = vote_parser::parse_vote_transaction(&vote).unwrap();
10941099
panic!(
@@ -1102,7 +1107,7 @@ impl ClusterInfo {
11021107
self.push_vote_at_index(vote, vote_index);
11031108
}
11041109

1105-
pub fn refresh_vote(&self, vote: Transaction, vote_slot: Slot) {
1110+
pub fn refresh_vote(&self, refresh_vote: Transaction, refresh_vote_slot: Slot) {
11061111
let vote_index = {
11071112
let self_pubkey = self.id();
11081113
let gossip_crds =
@@ -1116,7 +1121,7 @@ impl ClusterInfo {
11161121
panic!("this should not happen!");
11171122
};
11181123
match prev_vote.slot() {
1119-
Some(prev_vote_slot) => prev_vote_slot == vote_slot,
1124+
Some(prev_vote_slot) => prev_vote_slot == refresh_vote_slot,
11201125
None => {
11211126
error!("crds vote with no slots!");
11221127
false
@@ -1125,13 +1130,27 @@ impl ClusterInfo {
11251130
})
11261131
};
11271132

1128-
// If you don't see a vote with the same slot yet, this means you probably
1129-
// restarted, and need to wait for your oldest vote to propagate back to you.
1130-
//
11311133
// We don't write to an arbitrary index, because it may replace one of this validator's
11321134
// existing votes on the network.
11331135
if let Some(vote_index) = vote_index {
1134-
self.push_vote_at_index(vote, vote_index);
1136+
self.push_vote_at_index(refresh_vote, vote_index);
1137+
} else {
1138+
// If you don't see a vote with the same slot yet, this means you probably
1139+
// restarted, and need to repush and evict the oldest vote
1140+
let should_evict_vote = |vote: &Vote| -> bool {
1141+
vote.slot()
1142+
.map(|slot| refresh_vote_slot > slot)
1143+
.unwrap_or(true)
1144+
};
1145+
let vote_index = self.find_vote_index_to_evict(should_evict_vote);
1146+
if (vote_index as usize) >= MAX_LOCKOUT_HISTORY {
1147+
warn!(
1148+
"trying to refresh slot {} but all votes in gossip table are for newer slots",
1149+
refresh_vote_slot,
1150+
);
1151+
return;
1152+
}
1153+
self.push_vote_at_index(refresh_vote, vote_index);
11351154
}
11361155
}
11371156

@@ -3673,6 +3692,77 @@ mod tests {
36733692
.unwrap();
36743693
}
36753694

3695+
#[test]
3696+
fn test_refresh_vote_eviction() {
3697+
let keypair = Arc::new(Keypair::new());
3698+
let contact_info = ContactInfo::new_localhost(&keypair.pubkey(), 0);
3699+
let cluster_info = ClusterInfo::new(contact_info, keypair, SocketAddrSpace::Unspecified);
3700+
3701+
// Push MAX_LOCKOUT_HISTORY votes into gossip, one for each slot between
3702+
// [lowest_vote_slot, lowest_vote_slot + MAX_LOCKOUT_HISTORY)
3703+
let lowest_vote_slot = 1;
3704+
let max_vote_slot = lowest_vote_slot + MAX_LOCKOUT_HISTORY as Slot;
3705+
let mut first_vote = None;
3706+
let mut prev_votes = vec![];
3707+
for slot in 1..max_vote_slot {
3708+
prev_votes.push(slot);
3709+
let unrefresh_vote = Vote::new(vec![slot], Hash::new_unique());
3710+
let vote_ix = vote_instruction::vote(
3711+
&Pubkey::new_unique(), // vote_pubkey
3712+
&Pubkey::new_unique(), // authorized_voter_pubkey
3713+
unrefresh_vote,
3714+
);
3715+
let vote_tx = Transaction::new_with_payer(
3716+
&[vote_ix], // instructions
3717+
None, // payer
3718+
);
3719+
if first_vote.is_none() {
3720+
first_vote = Some(vote_tx.clone());
3721+
}
3722+
cluster_info.push_vote(&prev_votes, vote_tx);
3723+
}
3724+
3725+
let initial_votes = cluster_info.get_votes(&mut Cursor::default());
3726+
assert_eq!(initial_votes.len(), MAX_LOCKOUT_HISTORY);
3727+
3728+
// Trying to refresh a vote less than all votes in gossip should do nothing
3729+
let refresh_slot = lowest_vote_slot - 1;
3730+
let refresh_vote = Vote::new(vec![refresh_slot], Hash::new_unique());
3731+
let refresh_ix = vote_instruction::vote(
3732+
&Pubkey::new_unique(), // vote_pubkey
3733+
&Pubkey::new_unique(), // authorized_voter_pubkey
3734+
refresh_vote.clone(),
3735+
);
3736+
let refresh_tx = Transaction::new_with_payer(
3737+
&[refresh_ix], // instructions
3738+
None, // payer
3739+
);
3740+
cluster_info.refresh_vote(refresh_tx.clone(), refresh_slot);
3741+
let current_votes = cluster_info.get_votes(&mut Cursor::default());
3742+
assert_eq!(initial_votes, current_votes);
3743+
assert!(!current_votes.contains(&refresh_tx));
3744+
3745+
// Trying to refresh a vote should evict the first slot less than the refreshed vote slot
3746+
let refresh_slot = max_vote_slot + 1;
3747+
let refresh_vote = Vote::new(vec![refresh_slot], Hash::new_unique());
3748+
let refresh_ix = vote_instruction::vote(
3749+
&Pubkey::new_unique(), // vote_pubkey
3750+
&Pubkey::new_unique(), // authorized_voter_pubkey
3751+
refresh_vote.clone(),
3752+
);
3753+
let refresh_tx = Transaction::new_with_payer(
3754+
&[refresh_ix], // instructions
3755+
None, // payer
3756+
);
3757+
cluster_info.refresh_vote(refresh_tx.clone(), refresh_slot);
3758+
3759+
// This should evict the latest vote since it's for a slot less than refresh_slot
3760+
let votes = cluster_info.get_votes(&mut Cursor::default());
3761+
assert_eq!(votes.len(), MAX_LOCKOUT_HISTORY);
3762+
assert!(votes.contains(&refresh_tx));
3763+
assert!(!votes.contains(&first_vote.unwrap()));
3764+
}
3765+
36763766
#[test]
36773767
fn test_refresh_vote() {
36783768
let keypair = Arc::new(Keypair::new());
@@ -3697,8 +3787,9 @@ mod tests {
36973787
let votes = cluster_info.get_votes(&mut cursor);
36983788
assert_eq!(votes, vec![unrefresh_tx.clone()]);
36993789

3700-
// Now construct vote for the slot to be refreshed later
3701-
let refresh_slot = 7;
3790+
// Now construct vote for the slot to be refreshed later. Has to be less than the `unrefresh_slot`,
3791+
// otherwise it will evict that slot
3792+
let refresh_slot = unrefresh_slot - 1;
37023793
let refresh_tower = vec![1, 3, unrefresh_slot, refresh_slot];
37033794
let refresh_vote = Vote::new(refresh_tower.clone(), Hash::new_unique());
37043795
let refresh_ix = vote_instruction::vote(
@@ -3712,19 +3803,12 @@ mod tests {
37123803
);
37133804

37143805
// Trying to refresh vote when it doesn't yet exist in gossip
3715-
// shouldn't add the vote
3806+
// should add the vote without eviction if there is room in the gossip table.
37163807
cluster_info.refresh_vote(refresh_tx.clone(), refresh_slot);
3717-
let votes = cluster_info.get_votes(&mut cursor);
3718-
assert_eq!(votes, vec![]);
3719-
let votes = cluster_info.get_votes(&mut Cursor::default());
3720-
assert_eq!(votes.len(), 1);
3721-
assert!(votes.contains(&unrefresh_tx));
3722-
3723-
// Push the new vote for `refresh_slot`
3724-
cluster_info.push_vote(&refresh_tower, refresh_tx.clone());
37253808

37263809
// Should be two votes in gossip
3727-
let votes = cluster_info.get_votes(&mut Cursor::default());
3810+
cursor = Cursor::default();
3811+
let votes = cluster_info.get_votes(&mut cursor);
37283812
assert_eq!(votes.len(), 2);
37293813
assert!(votes.contains(&unrefresh_tx));
37303814
assert!(votes.contains(&refresh_tx));

0 commit comments

Comments
 (0)