Skip to content

Commit 5f1bef6

Browse files
Merge pull request #13548 from rabbitmq/rabbitmq-server-13175-mk
For 4.1.x, by @aaron-seo: introduce a command that would force QQs to take a checkpoint and truncate its segments
2 parents bf8fd69 + 0c2b6a1 commit 5f1bef6

File tree

4 files changed

+282
-0
lines changed

4 files changed

+282
-0
lines changed

deps/rabbit/src/rabbit_quorum_queue.erl

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
queue_vm_stats_sups/0,
8585
queue_vm_ets/0]).
8686

87+
-export([force_checkpoint/2, force_checkpoint_on_queue/1]).
88+
8789
%% for backwards compatibility
8890
-export([file_handle_leader_reservation/1,
8991
file_handle_other_reservation/0,
@@ -157,6 +159,7 @@
157159
-define(RPC_TIMEOUT, 1000).
158160
-define(START_CLUSTER_TIMEOUT, 5000).
159161
-define(START_CLUSTER_RPC_TIMEOUT, 60_000). %% needs to be longer than START_CLUSTER_TIMEOUT
162+
-define(FORCE_CHECKPOINT_RPC_TIMEOUT, 15_000).
160163
-define(TICK_INTERVAL, 5000). %% the ra server tick time
161164
-define(DELETE_TIMEOUT, 5000).
162165
-define(MEMBER_CHANGE_TIMEOUT, 20_000).
@@ -2115,6 +2118,40 @@ force_all_queues_shrink_member_to_current_member(ListQQFun) when is_function(Lis
21152118
rabbit_log:warning("Shrinking finished"),
21162119
ok.
21172120

2121+
force_checkpoint_on_queue(QName) ->
2122+
QNameFmt = rabbit_misc:rs(QName),
2123+
case rabbit_db_queue:get_durable(QName) of
2124+
{ok, Q} when ?amqqueue_is_classic(Q) ->
2125+
{error, classic_queue_not_supported};
2126+
{ok, Q} when ?amqqueue_is_quorum(Q) ->
2127+
{RaName, _} = amqqueue:get_pid(Q),
2128+
rabbit_log:debug("Sending command to force ~ts to take a checkpoint", [QNameFmt]),
2129+
Nodes = amqqueue:get_nodes(Q),
2130+
_ = [ra:cast_aux_command({RaName, Node}, force_checkpoint)
2131+
|| Node <- Nodes],
2132+
ok;
2133+
{ok, _Q} ->
2134+
{error, not_quorum_queue};
2135+
{error, _} = E ->
2136+
E
2137+
end.
2138+
2139+
force_checkpoint(VhostSpec, QueueSpec) ->
2140+
[begin
2141+
QName = amqqueue:get_name(Q),
2142+
case force_checkpoint_on_queue(QName) of
2143+
ok ->
2144+
{QName, {ok}};
2145+
{error, Err} ->
2146+
rabbit_log:warning("~ts: failed to force checkpoint, error: ~w",
2147+
[rabbit_misc:rs(QName), Err]),
2148+
{QName, {error, Err}}
2149+
end
2150+
end
2151+
|| Q <- rabbit_db_queue:get_all_durable_by_type(?MODULE),
2152+
is_match(amqqueue:get_vhost(Q), VhostSpec)
2153+
andalso is_match(get_resource_name(amqqueue:get_name(Q)), QueueSpec)].
2154+
21182155
is_minority(All, Up) ->
21192156
MinQuorum = length(All) div 2 + 1,
21202157
length(Up) < MinQuorum.

deps/rabbit/test/quorum_queue_SUITE.erl

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
-include_lib("eunit/include/eunit.hrl").
1111
-include_lib("amqp_client/include/amqp_client.hrl").
1212
-include_lib("rabbitmq_ct_helpers/include/rabbit_assert.hrl").
13+
-include_lib("rabbit/src/rabbit_fifo.hrl").
1314

1415
-import(queue_utils, [wait_for_messages_ready/3,
1516
wait_for_messages_pending_ack/3,
@@ -98,6 +99,8 @@ groups() ->
9899
force_shrink_member_to_current_member,
99100
force_all_queues_shrink_member_to_current_member,
100101
force_vhost_queues_shrink_member_to_current_member,
102+
force_checkpoint_on_queue,
103+
force_checkpoint,
101104
policy_repair,
102105
gh_12635,
103106
replica_states
@@ -1339,6 +1342,96 @@ force_vhost_queues_shrink_member_to_current_member(Config) ->
13391342
?assertEqual(3, length(Nodes0))
13401343
end || Q <- QQs, VHost <- VHosts].
13411344

1345+
force_checkpoint_on_queue(Config) ->
1346+
[Server0, Server1, Server2] =
1347+
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
1348+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
1349+
QQ = ?config(queue_name, Config),
1350+
RaName = ra_name(QQ),
1351+
QName = rabbit_misc:r(<<"/">>, queue, QQ),
1352+
1353+
?assertEqual({'queue.declare_ok', QQ, 0, 0},
1354+
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
1355+
1356+
N = 20_000,
1357+
rabbit_ct_client_helpers:publish(Ch, QQ, N),
1358+
wait_for_messages_ready([Server0], RaName, N),
1359+
1360+
%% The state before any checkpoints
1361+
rabbit_ct_helpers:await_condition(
1362+
fun() ->
1363+
{ok, State, _} = rpc:call(Server0, ra, member_overview, [{RaName, Server0}]),
1364+
#{log := #{latest_checkpoint_index := LCI}} = State,
1365+
LCI =:= undefined
1366+
end),
1367+
rabbit_ct_helpers:await_condition(
1368+
fun() ->
1369+
{ok, State, _} = rpc:call(Server1, ra, member_overview, [{RaName, Server1}]),
1370+
#{log := #{latest_checkpoint_index := LCI}} = State,
1371+
LCI =:= undefined
1372+
end),
1373+
rabbit_ct_helpers:await_condition(
1374+
fun() ->
1375+
{ok, State, _} = rpc:call(Server2, ra, member_overview, [{RaName, Server2}]),
1376+
#{log := #{latest_checkpoint_index := LCI}} = State,
1377+
LCI =:= undefined
1378+
end),
1379+
1380+
{ok, State0, _} = rpc:call(Server0, ra, member_overview, [{RaName, Server0}]),
1381+
ct:pal("Ra server state before forcing a checkpoint: ~tp~n", [State0]),
1382+
1383+
%% wait for longer than ?CHECK_MIN_INTERVAL_MS ms
1384+
timer:sleep(?CHECK_MIN_INTERVAL_MS + 1000),
1385+
rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_quorum_queue,
1386+
force_checkpoint_on_queue, [QName]),
1387+
1388+
%% Wait for initial checkpoint and make sure it's not 0
1389+
rabbit_ct_helpers:await_condition(
1390+
fun() ->
1391+
{ok, State, _} = rpc:call(Server0, ra, member_overview, [{RaName, Server0}]),
1392+
ct:pal("Ra server state post forced checkpoint: ~tp~n", [State]),
1393+
#{log := #{latest_checkpoint_index := LCI}} = State,
1394+
(LCI =/= undefined) andalso (LCI >= N)
1395+
end),
1396+
rabbit_ct_helpers:await_condition(
1397+
fun() ->
1398+
{ok, State, _} = rpc:call(Server1, ra, member_overview, [{RaName, Server1}]),
1399+
ct:pal("Ra server state post forced checkpoint: ~tp~n", [State]),
1400+
#{log := #{latest_checkpoint_index := LCI}} = State,
1401+
(LCI =/= undefined) andalso (LCI >= N)
1402+
end),
1403+
rabbit_ct_helpers:await_condition(
1404+
fun() ->
1405+
{ok, State, _} = rpc:call(Server2, ra, member_overview, [{RaName, Server2}]),
1406+
ct:pal("Ra server state post forced checkpoint: ~tp~n", [State]),
1407+
#{log := #{latest_checkpoint_index := LCI}} = State,
1408+
(LCI =/= undefined) andalso (LCI >= N)
1409+
end).
1410+
1411+
force_checkpoint(Config) ->
1412+
[Server0, _Server1, _Server2] =
1413+
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
1414+
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
1415+
QQ = ?config(queue_name, Config),
1416+
QQName = rabbit_misc:r(<<"/">>, queue, QQ),
1417+
CQ = <<"force_checkpoint_cq">>,
1418+
RaName = ra_name(QQ),
1419+
1420+
?assertEqual({'queue.declare_ok', QQ, 0, 0},
1421+
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),
1422+
1423+
?assertEqual({'queue.declare_ok', CQ, 0, 0},
1424+
declare(Ch, CQ, [{<<"x-queue-type">>, longstr, <<"classic">>}])),
1425+
1426+
rabbit_ct_client_helpers:publish(Ch, QQ, 3),
1427+
wait_for_messages_ready([Server0], RaName, 3),
1428+
1429+
ForceCheckpointRes = rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_quorum_queue,
1430+
force_checkpoint, [<<".*">>, <<".*">>]),
1431+
ExpectedRes = [{QQName, {ok}}],
1432+
1433+
% Result should only have quorum queue
1434+
?assertEqual(ExpectedRes, ForceCheckpointRes).
13421435

13431436
% Tests that, if the process of a QQ is dead in the moment of declaring a policy
13441437
% that affects such queue, when the process is made available again, the policy
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
## This Source Code Form is subject to the terms of the Mozilla Public
2+
## License, v. 2.0. If a copy of the MPL was not distributed with this
3+
## file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
##
5+
## Copyright (c) 2007-2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
6+
7+
defmodule RabbitMQ.CLI.Queues.Commands.ForceCheckpointCommand do
8+
alias RabbitMQ.CLI.Core.{DocGuide}
9+
10+
@behaviour RabbitMQ.CLI.CommandBehaviour
11+
12+
defp default_opts,
13+
do: %{vhost_pattern: ".*", queue_pattern: ".*", errors_only: false}
14+
15+
def switches(),
16+
do: [
17+
vhost_pattern: :string,
18+
queue_pattern: :string,
19+
errors_only: :boolean
20+
]
21+
22+
def merge_defaults(args, opts) do
23+
{args, Map.merge(default_opts(), opts)}
24+
end
25+
26+
use RabbitMQ.CLI.Core.RequiresRabbitAppRunning
27+
use RabbitMQ.CLI.Core.AcceptsNoPositionalArguments
28+
29+
def run([], %{
30+
node: node_name,
31+
vhost_pattern: vhost_pat,
32+
queue_pattern: queue_pat,
33+
errors_only: errors_only
34+
}) do
35+
args = [vhost_pat, queue_pat]
36+
37+
case :rabbit_misc.rpc_call(node_name, :rabbit_quorum_queue, :force_checkpoint, args) do
38+
{:badrpc, _} = error ->
39+
error
40+
41+
results when errors_only ->
42+
for {{:resource, vhost, _kind, name}, {:error, _, _} = res} <- results,
43+
do: [
44+
{:vhost, vhost},
45+
{:name, name},
46+
{:result, res}
47+
]
48+
49+
results ->
50+
for {{:resource, vhost, _kind, name}, res} <- results,
51+
do: [
52+
{:vhost, vhost},
53+
{:name, name},
54+
{:result, res}
55+
]
56+
end
57+
end
58+
59+
use RabbitMQ.CLI.DefaultOutput
60+
61+
def formatter(), do: RabbitMQ.CLI.Formatters.Table
62+
63+
def usage,
64+
do: "force_checkpoint [--vhost-pattern <pattern>] [--queue-pattern <pattern>]"
65+
66+
def usage_additional do
67+
[
68+
["--queue-pattern <pattern>", "regular expression to match queue names"],
69+
["--vhost-pattern <pattern>", "regular expression to match virtual host names"],
70+
["--errors-only", "only list queues which reported an error"]
71+
]
72+
end
73+
74+
def usage_doc_guides() do
75+
[
76+
DocGuide.quorum_queues()
77+
]
78+
end
79+
80+
def help_section, do: :replication
81+
82+
def description,
83+
do: "Forces checkpoints for all matching quorum queues"
84+
85+
def banner([], _) do
86+
"Forcing checkpoint for all matching quorum queues..."
87+
end
88+
end
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
## This Source Code Form is subject to the terms of the Mozilla Public
2+
## License, v. 2.0. If a copy of the MPL was not distributed with this
3+
## file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
##
5+
## Copyright (c) 2007-2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved.
6+
7+
defmodule RabbitMQ.CLI.Queues.Commands.ForceCheckpointCommandTest do
8+
use ExUnit.Case, async: false
9+
import TestHelper
10+
11+
@command RabbitMQ.CLI.Queues.Commands.ForceCheckpointCommand
12+
13+
setup_all do
14+
RabbitMQ.CLI.Core.Distribution.start()
15+
16+
:ok
17+
end
18+
19+
setup context do
20+
{:ok,
21+
opts: %{
22+
node: get_rabbit_hostname(),
23+
timeout: context[:test_timeout] || 30000,
24+
vhost_pattern: ".*",
25+
queue_pattern: ".*",
26+
errors_only: false
27+
}}
28+
end
29+
30+
test "merge_defaults: defaults to reporting complete results" do
31+
assert @command.merge_defaults([], %{}) ==
32+
{[],
33+
%{
34+
vhost_pattern: ".*",
35+
queue_pattern: ".*",
36+
errors_only: false
37+
}}
38+
end
39+
40+
test "validate: accepts no positional arguments" do
41+
assert @command.validate([], %{}) == :ok
42+
end
43+
44+
test "validate: any positional arguments fail validation" do
45+
assert @command.validate(["quorum-queue-a"], %{}) == {:validation_failure, :too_many_args}
46+
47+
assert @command.validate(["quorum-queue-a", "two"], %{}) ==
48+
{:validation_failure, :too_many_args}
49+
50+
assert @command.validate(["quorum-queue-a", "two", "three"], %{}) ==
51+
{:validation_failure, :too_many_args}
52+
end
53+
54+
@tag test_timeout: 3000
55+
test "run: targeting an unreachable node throws a badrpc", context do
56+
assert match?(
57+
{:badrpc, _},
58+
@command.run(
59+
[],
60+
Map.merge(context[:opts], %{node: :jake@thedog})
61+
)
62+
)
63+
end
64+
end

0 commit comments

Comments
 (0)