Skip to content

Commit e83c286

Browse files
Merge pull request #13643 from rabbitmq/su_aws/try_to_leave_cluster_before_joining
Allow a previously reset node to rejoin its original cluster
2 parents c8c7bfb + e6bc6a4 commit e83c286

File tree

2 files changed

+32
-13
lines changed

2 files changed

+32
-13
lines changed

deps/rabbit/src/rabbit_db_cluster.erl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ ensure_feature_flags_are_in_sync(Nodes, NodeIsVirgin) ->
5050
RemoteNode :: node(),
5151
Ret :: Ok | Error,
5252
Ok :: {ok, [node()]} | {ok, already_member},
53-
Error :: {error, {inconsistent_cluster, string()}}.
53+
Error :: {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}.
5454

5555
can_join(RemoteNode) ->
5656
?LOG_INFO(
@@ -82,7 +82,7 @@ can_join_using_khepri(RemoteNode) ->
8282
NodeType :: node_type(),
8383
Ret :: Ok | Error,
8484
Ok :: ok | {ok, already_member},
85-
Error :: {error, {inconsistent_cluster, string()}}.
85+
Error :: {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}.
8686
%% @doc Adds this node to a cluster using `RemoteNode' to reach it.
8787

8888
join(ThisNode, _NodeType) when ThisNode =:= node() ->
@@ -214,6 +214,22 @@ join(RemoteNode, NodeType)
214214
end;
215215
{ok, already_member} ->
216216
{ok, already_member};
217+
{error, {inconsistent_cluster, _Msg}} = Error ->
218+
case rabbit_khepri:is_enabled() of
219+
true ->
220+
Error;
221+
false ->
222+
%% rabbit_mnesia:can_join_cluster/1 notice inconsistent_cluster,
223+
%% as RemoteNode thinks this node is already in the cluster.
224+
%% Attempt to leave the RemoteNode cluster, the discovery cluster,
225+
%% and simply retry the operation.
226+
rabbit_log:info("Mnesia: node ~tp thinks it's clustered "
227+
"with node ~tp, but ~tp disagrees. ~tp will ask "
228+
"to leave the cluster and try again.",
229+
[RemoteNode, node(), node(), node()]),
230+
ok = rabbit_mnesia:leave_then_rediscover_cluster(RemoteNode),
231+
join(RemoteNode, NodeType)
232+
end;
217233
{error, _} = Error ->
218234
Error
219235
end.

deps/rabbit/src/rabbit_mnesia.erl

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
-export([node_info/0, remove_node_if_mnesia_running/1]).
7474

7575
%% Used internally in `rabbit_db_cluster'.
76-
-export([members/0]).
76+
-export([members/0, leave_then_rediscover_cluster/1]).
7777

7878
%% Used internally in `rabbit_khepri'.
7979
-export([mnesia_and_msg_store_files/0]).
@@ -155,7 +155,7 @@ init() ->
155155
%% we cluster to its cluster.
156156

157157
-spec can_join_cluster(node())
158-
-> {ok, [node()]} | {ok, already_member} | {error, {inconsistent_cluster, string()}}.
158+
-> {ok, [node()]} | {ok, already_member} | {error, {inconsistent_cluster, string()} | {error, {erpc, noconnection}}}.
159159

160160
can_join_cluster(DiscoveryNode) ->
161161
ensure_mnesia_dir(),
@@ -179,7 +179,6 @@ can_join_cluster(DiscoveryNode) ->
179179
{ok, already_member};
180180
false ->
181181
Msg = format_inconsistent_cluster_message(DiscoveryNode, node()),
182-
rabbit_log:error(Msg),
183182
{error, {inconsistent_cluster, Msg}}
184183
end
185184
end.
@@ -923,15 +922,19 @@ remove_node_if_mnesia_running(Node) ->
923922
end
924923
end.
925924

926-
leave_cluster() ->
927-
case rabbit_nodes:nodes_excl_me(cluster_nodes(all)) of
928-
[] -> ok;
929-
AllNodes -> case lists:any(fun leave_cluster/1, AllNodes) of
930-
true -> ok;
931-
false -> e(no_running_cluster_nodes)
932-
end
933-
end.
925+
leave_then_rediscover_cluster(DiscoveryNode) ->
926+
{ClusterNodes, _, _} = discover_cluster([DiscoveryNode]),
927+
leave_cluster(rabbit_nodes:nodes_excl_me(ClusterNodes)).
934928

929+
leave_cluster() ->
930+
leave_cluster(rabbit_nodes:nodes_excl_me(cluster_nodes(all))).
931+
leave_cluster([]) ->
932+
ok;
933+
leave_cluster(Nodes) when is_list(Nodes) ->
934+
case lists:any(fun leave_cluster/1, Nodes) of
935+
true -> ok;
936+
false -> e(no_running_cluster_nodes)
937+
end;
935938
leave_cluster(Node) ->
936939
case rpc:call(Node,
937940
rabbit_mnesia, remove_node_if_mnesia_running, [node()]) of

0 commit comments

Comments
 (0)