Skip to content

Commit fadbe42

Browse files
committed
refactor(meta-service): add RaftWriteError and RaftChangeMembershipError
- Meta-service replace returning error `MetaError` with `RaftChangeMembershipError` and `RaftWriteError` for `join()`, `leave()` and `write()`. These three methods do not need to all the sub errors in MetaError. - Also remove unnecessary error checking before changing membership. The error will be returned by raft. - Use RAII to report metrics of pending proposals.
1 parent 8337678 commit fadbe42

File tree

7 files changed

+145
-127
lines changed

7 files changed

+145
-127
lines changed

src/meta/service/src/meta_service/meta_leader.rs

Lines changed: 45 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,26 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::collections::BTreeSet;
16-
1715
use common_meta_api::KVApi;
18-
use common_meta_sled_store::openraft::error::ChangeMembershipError;
19-
use common_meta_sled_store::openraft::error::ClientWriteError;
20-
use common_meta_sled_store::openraft::error::InProgress;
2116
use common_meta_types::AppliedState;
2217
use common_meta_types::Cmd;
2318
use common_meta_types::ForwardRequest;
2419
use common_meta_types::ForwardResponse;
25-
use common_meta_types::ForwardToLeader;
2620
use common_meta_types::LogEntry;
2721
use common_meta_types::MetaError;
28-
use common_meta_types::MetaRaftError;
2922
use common_meta_types::Node;
30-
use common_meta_types::NodeId;
23+
use common_meta_types::RaftChangeMembershipError;
24+
use common_meta_types::RaftWriteError;
3125
use common_meta_types::SeqV;
26+
use common_metrics::counter::WithCount;
3227
use tracing::debug;
3328
use tracing::info;
3429

3530
use crate::meta_service::ForwardRequestBody;
3631
use crate::meta_service::JoinRequest;
3732
use crate::meta_service::LeaveRequest;
3833
use crate::meta_service::MetaNode;
34+
use crate::metrics::ProposalPending;
3935

4036
/// The container of APIs of a metasrv leader in a metasrv cluster.
4137
///
@@ -98,7 +94,7 @@ impl<'a> MetaLeader<'a> {
9894
///
9995
/// If the node is already in cluster membership, it still returns Ok.
10096
#[tracing::instrument(level = "debug", skip(self))]
101-
pub async fn join(&self, req: JoinRequest) -> Result<(), MetaError> {
97+
pub async fn join(&self, req: JoinRequest) -> Result<(), RaftChangeMembershipError> {
10298
let node_id = req.node_id;
10399
let endpoint = req.endpoint;
104100
let grpc_api_addr = req.grpc_api_addr;
@@ -109,35 +105,28 @@ impl<'a> MetaLeader<'a> {
109105
return Ok(());
110106
}
111107

112-
// deal with joint config,If the cluster is in joint config,
113-
// we need to return an Inprogress error with membership log id.
114-
match membership.get_ith_config(1) {
115-
Some(_membership) => Err(MetaRaftError::ChangeMembershipError(
116-
ChangeMembershipError::InProgress(InProgress {
117-
membership_log_id: metrics.membership_config.log_id,
118-
}),
119-
)
120-
.into()),
121-
None => {
122-
// safe unwrap: if the first config is None, panic is the expected behavior here.
123-
let mut membership = membership.get_ith_config(0).unwrap().clone();
124-
membership.insert(node_id);
125-
let ent = LogEntry {
126-
txid: None,
127-
time_ms: None,
128-
cmd: Cmd::AddNode {
129-
node_id,
130-
node: Node {
131-
name: node_id.to_string(),
132-
endpoint,
133-
grpc_api_addr: Some(grpc_api_addr),
134-
},
135-
},
136-
};
137-
self.write(ent).await?;
138-
self.change_membership(membership).await
139-
}
140-
}
108+
// safe unwrap: if the first config is None, panic is the expected behavior here.
109+
let mut membership = membership.get_ith_config(0).unwrap().clone();
110+
membership.insert(node_id);
111+
let ent = LogEntry {
112+
txid: None,
113+
time_ms: None,
114+
cmd: Cmd::AddNode {
115+
node_id,
116+
node: Node {
117+
name: node_id.to_string(),
118+
endpoint,
119+
grpc_api_addr: Some(grpc_api_addr),
120+
},
121+
},
122+
};
123+
self.write(ent).await?;
124+
125+
self.meta_node
126+
.raft
127+
.change_membership(membership, true)
128+
.await?;
129+
Ok(())
141130
}
142131

143132
/// A node leave the cluster.
@@ -147,7 +136,7 @@ impl<'a> MetaLeader<'a> {
147136
///
148137
/// If the node is not in cluster membership, it still returns Ok.
149138
#[tracing::instrument(level = "debug", skip(self))]
150-
pub async fn leave(&self, req: LeaveRequest) -> Result<(), MetaError> {
139+
pub async fn leave(&self, req: LeaveRequest) -> Result<(), RaftChangeMembershipError> {
151140
let node_id = req.node_id;
152141
let metrics = self.meta_node.raft.metrics().borrow().clone();
153142
let membership = metrics.membership_config.membership.clone();
@@ -156,93 +145,42 @@ impl<'a> MetaLeader<'a> {
156145
return Ok(());
157146
}
158147

159-
// deal with joint config,If the cluster is in joint config,
160-
// we need to return an Inprogress error with membership log id.
161-
match membership.get_ith_config(1) {
162-
Some(_membership) => Err(MetaRaftError::ChangeMembershipError(
163-
ChangeMembershipError::InProgress(InProgress {
164-
membership_log_id: metrics.membership_config.log_id,
165-
}),
166-
)
167-
.into()),
168-
None => {
169-
// safe unwrap: if the first config is None, panic is the expected behavior here.
170-
let mut membership = membership.get_ith_config(0).unwrap().clone();
171-
membership.remove(&node_id);
172-
173-
self.change_membership(membership).await?;
174-
let ent = LogEntry {
175-
txid: None,
176-
time_ms: None,
177-
cmd: Cmd::RemoveNode { node_id },
178-
};
179-
self.write(ent).await?;
180-
Ok(())
181-
}
182-
}
183-
}
148+
// safe unwrap: if the first config is None, panic is the expected behavior here.
149+
let mut membership = membership.get_ith_config(0).unwrap().clone();
150+
membership.remove(&node_id);
184151

185-
#[tracing::instrument(level = "debug", skip(self))]
186-
pub async fn change_membership(&self, membership: BTreeSet<NodeId>) -> Result<(), MetaError> {
187-
let res = self
188-
.meta_node
152+
self.meta_node
189153
.raft
190154
.change_membership(membership, true)
191-
.await;
155+
.await?;
192156

193-
let err = match res {
194-
Ok(_) => return Ok(()),
195-
Err(e) => e,
157+
let ent = LogEntry {
158+
txid: None,
159+
time_ms: None,
160+
cmd: Cmd::RemoveNode { node_id },
196161
};
197-
198-
match err {
199-
ClientWriteError::ChangeMembershipError(e) => {
200-
Err(MetaRaftError::ChangeMembershipError(e).into())
201-
}
202-
// TODO(xp): enable MetaNode::RaftError when RaftError impl Serialized
203-
ClientWriteError::Fatal(fatal) => Err(MetaRaftError::RaftFatal(fatal).into()),
204-
ClientWriteError::ForwardToLeader(to_leader) => {
205-
Err(MetaRaftError::ForwardToLeader(ForwardToLeader {
206-
leader_id: to_leader.leader_id,
207-
})
208-
.into())
209-
}
210-
}
162+
self.write(ent).await?;
163+
Ok(())
211164
}
212165

213166
/// Write a log through local raft node and return the states before and after applying the log.
214167
///
215168
/// If the raft node is not a leader, it returns MetaRaftError::ForwardToLeader.
216169
#[tracing::instrument(level = "debug", skip(self, entry))]
217-
pub async fn write(&self, mut entry: LogEntry) -> Result<AppliedState, MetaError> {
170+
pub async fn write(&self, mut entry: LogEntry) -> Result<AppliedState, RaftWriteError> {
218171
// Add consistent clock time to log entry.
219172
entry.time_ms = Some(SeqV::<()>::now_ms());
220173

174+
// report metrics
175+
let _guard = WithCount::new((), ProposalPending);
176+
221177
info!("write LogEntry: {:?}", entry);
222178
let write_rst = self.meta_node.raft.client_write(entry).await;
223-
224179
info!("raft.client_write rst: {:?}", write_rst);
225180

226181
match write_rst {
227-
Ok(resp) => {
228-
let data = resp.data;
229-
Ok(data)
230-
}
231-
232-
Err(cli_write_err) => match cli_write_err {
233-
// fatal error
234-
ClientWriteError::Fatal(fatal) => Err(MetaRaftError::RaftFatal(fatal).into()),
235-
// retryable error
236-
ClientWriteError::ForwardToLeader(to_leader) => {
237-
Err(MetaRaftError::ForwardToLeader(ForwardToLeader {
238-
leader_id: to_leader.leader_id,
239-
})
240-
.into())
241-
}
242-
ClientWriteError::ChangeMembershipError(_) => {
243-
unreachable!("there should not be a ChangeMembershipError for client_write")
244-
}
245-
},
182+
Ok(resp) => Ok(resp.data),
183+
Err(cli_write_err) => Err(RaftWriteError::from_raft_err(cli_write_err)),
246184
}
247185
}
248186
}

src/meta/service/src/meta_service/meta_service_impl.rs

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ use tonic::codegen::futures_core::Stream;
3333
use crate::meta_service::ForwardRequestBody;
3434
use crate::meta_service::MetaNode;
3535
use crate::metrics::incr_meta_metrics_proposals_failed;
36-
use crate::metrics::incr_meta_metrics_proposals_pending;
3736
use crate::metrics::incr_meta_metrics_recv_bytes_from_peer;
3837
use crate::metrics::incr_meta_metrics_snapshot_recv_failure_from_peer;
3938
use crate::metrics::incr_meta_metrics_snapshot_recv_inflights_from_peer;
@@ -72,12 +71,9 @@ impl RaftService for RaftServiceImpl {
7271
) -> Result<tonic::Response<RaftReply>, tonic::Status> {
7372
common_tracing::extract_remote_span_as_parent(&request);
7473

75-
let mes = request.into_inner();
76-
let ent: LogEntry = mes.try_into()?;
74+
let raft_req = request.into_inner();
75+
let ent: LogEntry = raft_req.try_into()?;
7776

78-
incr_meta_metrics_proposals_pending(1);
79-
80-
// TODO(xp): call meta_node.write()
8177
let res = self
8278
.meta_node
8379
.handle_forwardable_request(ForwardRequest {
@@ -86,8 +82,6 @@ impl RaftService for RaftServiceImpl {
8682
})
8783
.await;
8884

89-
incr_meta_metrics_proposals_pending(-1);
90-
9185
let res = match res {
9286
Ok(r) => {
9387
let a: Result<AppliedState, MetaError> = r.try_into().map_err(|e: &str| {
@@ -115,10 +109,10 @@ impl RaftService for RaftServiceImpl {
115109

116110
let req = request.into_inner();
117111

118-
let admin_req: ForwardRequest = serde_json::from_str(&req.data)
112+
let forward_req: ForwardRequest = serde_json::from_str(&req.data)
119113
.map_err(|x| tonic::Status::invalid_argument(x.to_string()))?;
120114

121-
let res = self.meta_node.handle_forwardable_request(admin_req).await;
115+
let res = self.meta_node.handle_forwardable_request(forward_req).await;
122116

123117
let raft_mes: RaftReply = res.into();
124118

src/meta/service/src/metrics/meta_metrics.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,15 @@ impl counter::Count for RequestInFlight {
714714
}
715715
}
716716

717+
/// RAII metrics counter of pending raft proposals
718+
pub(crate) struct ProposalPending;
719+
720+
impl counter::Count for ProposalPending {
721+
fn incr_count(&mut self, n: i64) {
722+
PROPOSALS_PENDING.add(n);
723+
}
724+
}
725+
717726
/// Encode metrics as prometheus format string
718727
pub fn meta_metrics_to_prometheus_string() -> String {
719728
use prometheus::Encoder;

src/meta/service/src/metrics/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,5 @@ pub use meta_metrics::set_meta_metrics_last_log_index;
4747
pub use meta_metrics::set_meta_metrics_last_seq;
4848
pub use meta_metrics::set_meta_metrics_node_is_health;
4949
pub use meta_metrics::set_meta_metrics_proposals_applied;
50+
pub(crate) use meta_metrics::ProposalPending;
5051
pub(crate) use meta_metrics::RequestInFlight;

src/meta/service/tests/it/meta_node/meta_node_request_forwarding.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ use common_base::base::tokio;
1818
use common_meta_types::Cmd;
1919
use common_meta_types::ForwardToLeader;
2020
use common_meta_types::LogEntry;
21-
use common_meta_types::MetaError;
22-
use common_meta_types::MetaRaftError;
21+
use common_meta_types::RaftWriteError;
2322
use common_meta_types::UpsertKV;
2423
use databend_meta::meta_service::meta_leader::MetaLeader;
2524
use databend_meta::meta_service::MetaNode;
@@ -59,9 +58,9 @@ async fn test_meta_node_forward_to_leader() -> anyhow::Result<()> {
5958
assert!(rst.is_err());
6059
let e = rst.unwrap_err();
6160
match e {
62-
MetaError::MetaRaftError(MetaRaftError::ForwardToLeader(ForwardToLeader {
61+
RaftWriteError::ForwardToLeader(ForwardToLeader {
6362
leader_id: forward_leader_id,
64-
})) => {
63+
}) => {
6564
assert_eq!(Some(leader_id), forward_leader_id);
6665
}
6766
_ => {

src/meta/types/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ pub use meta_raft_errors::Fatal;
131131
pub use meta_raft_errors::ForwardToLeader;
132132
pub use meta_raft_errors::InitializeError;
133133
pub use meta_raft_errors::MetaRaftError;
134+
pub use meta_raft_errors::RaftChangeMembershipError;
135+
pub use meta_raft_errors::RaftWriteError;
134136
pub use meta_raft_errors::RetryableError;
135137
pub use meta_storage_errors::MetaStorageError;
136138
pub use meta_storage_errors::MetaStorageResult;

0 commit comments

Comments
 (0)