Skip to content

Commit

Permalink
rabbitmq_ct_broker_helpers: Use node 2 as the cluster seed node
Browse files Browse the repository at this point in the history
[Why]
When running mixed-version tests, nodes 1/3/5/... are using the primary
umbrella, so usually the newest version. Nodes 2/4/6/... are using the
secondary umbrella, thus the old version.

When clustering, we used to use node 1 (running a new version) as the
seed node, meaning other nodes would join it.

This complicates things with feature flags because we have to make sure
that we start node 1 with new stable feature flags disabled to allow old
nodes to join.

This is also a problem with Khepri machine versions because the cluster
would start with the latest version, which old nodes might not have.

[How]
This patch changes the logic to use a node running the secondary
umbrella as the seed node instead. If there is no node running it, we
pick the first node as before.

V2: Revert part of "rabbitmq_ct_helpers: Fix how we set
    `$RABBITMQ_FEATURE_FLAGS` in tests" (commit
    57ed962). These changes are no
    longer needed with the new logic.

V3: The check that verifies that the correct metadata store is used has
    a special case for nodes that use the secondary umbrella: if Khepri
    is supposed to be used but it's not, the feature flag is enabled.
    The reason is that the `v4.0.x` branch doesn't know about the `rel`
    configuration of `forced_feature_flags_on_init`. The nodes will
    have ignored thies parameter and booted with the stable feature
    flags only.

    Many testsuites are adapted to the new clustering order. If they
    manage which node joins which node, either the order is changed in
    the testcases, or nodes are started with only required feature
    flags. For testsuites that rely on peer discovery where the order is
    unknown, nodes are started with only required feature flags.
  • Loading branch information
dumbbell committed Jan 27, 2025
1 parent 28602be commit f549425
Show file tree
Hide file tree
Showing 13 changed files with 225 additions and 128 deletions.
4 changes: 3 additions & 1 deletion deps/rabbit/test/cluster_minority_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ init_per_group(Group, Config0) ->
{rmq_nodes_clustered, false},
{tcp_ports_base},
{net_ticktime, 5}]),
rabbit_ct_helpers:run_steps(Config,
Config1 = rabbit_ct_helpers:merge_app_env(
Config, {rabbit, [{forced_feature_flags_on_init, []}]}),
rabbit_ct_helpers:run_steps(Config1,
rabbit_ct_broker_helpers:setup_steps() ++
rabbit_ct_client_helpers:setup_steps()).

Expand Down
8 changes: 4 additions & 4 deletions deps/rabbit/test/clustering_events_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ configure_cluster_essentials(Config, Group, Clustered) ->

node_added_event(Config) ->
[Server1, Server2, _Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
ok = event_recorder:start(Config),
join_cluster(Server2, Server1),
E = event_recorder:get_events(Config),
ok = event_recorder:stop(Config),
ok = event_recorder:start(Config, Server2),
join_cluster(Server1, Server2),
E = event_recorder:get_events(Config, Server2),
ok = event_recorder:stop(Config, Server2),
?assert(lists:any(fun(#event{type = node_added}) ->
true;
(_) ->
Expand Down
10 changes: 8 additions & 2 deletions deps/rabbit/test/clustering_management_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,15 @@ init_per_group(mnesia_store, Config) ->
Config
end;
init_per_group(unclustered_2_nodes, Config) ->
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
Config1 = rabbit_ct_helpers:set_config(
Config, [{rmq_nodes_clustered, false}]),
rabbit_ct_helpers:merge_app_env(
Config1, {rabbit, [{forced_feature_flags_on_init, []}]});
init_per_group(unclustered_3_nodes, Config) ->
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]);
Config1 = rabbit_ct_helpers:set_config(
Config, [{rmq_nodes_clustered, false}]),
rabbit_ct_helpers:merge_app_env(
Config1, {rabbit, [{forced_feature_flags_on_init, []}]});
init_per_group(clustered_2_nodes, Config) ->
rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, true}]);
init_per_group(clustered_3_nodes, Config) ->
Expand Down
32 changes: 16 additions & 16 deletions deps/rabbit/test/direct_exchange_routing_v2_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ join_cluster(Config) ->
Servers0 = [Server1, Server2] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
Servers = lists:sort(Servers0),

{_Conn1, Ch1} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server1),
{_Conn1, Ch1} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server2),
DirectX = <<"amq.direct">>,
Q = <<"q">>,
RKey = <<"k">>,
Expand All @@ -346,35 +346,35 @@ join_cluster(Config) ->
bind_queue(Ch1, Q, DirectX, RKey),

%% Server1 and Server2 are not clustered yet.
%% Hence, every node has their own table (copy) and only Server1's table contains the binding.
?assertEqual([Server1], index_table_ram_copies(Config, Server1)),
%% Hence, every node has their own table (copy) and only Server2's table contains the binding.
?assertEqual([Server2], index_table_ram_copies(Config, Server2)),
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server1)),
?assertEqual(0, table_size(Config, ?INDEX_TABLE_NAME, Server2)),
?assertEqual([Server1], index_table_ram_copies(Config, Server1)),
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server2)),
?assertEqual(0, table_size(Config, ?INDEX_TABLE_NAME, Server1)),

ok = rabbit_control_helper:command(stop_app, Server2),
%% For the purpose of this test it shouldn't matter whether Server2 is reset. Both should work.
ok = rabbit_control_helper:command(stop_app, Server1),
%% For the purpose of this test it shouldn't matter whether Server1 is reset. Both should work.
case erlang:system_time() rem 2 of
0 ->
ok = rabbit_control_helper:command(reset, Server2);
ok = rabbit_control_helper:command(reset, Server1);
1 ->
ok
end,
ok = rabbit_control_helper:command(join_cluster, Server2, [atom_to_list(Server1)], []),
ok = rabbit_control_helper:command(start_app, Server2),
ok = rabbit_control_helper:command(join_cluster, Server1, [atom_to_list(Server2)], []),
ok = rabbit_control_helper:command(start_app, Server1),

%% After Server2 joined Server1, the table should be clustered.
?assertEqual(Servers, index_table_ram_copies(Config, Server2)),
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server2)),
%% After Server1 joined Server2, the table should be clustered.
?assertEqual(Servers, index_table_ram_copies(Config, Server1)),
?assertEqual(1, table_size(Config, ?INDEX_TABLE_NAME, Server1)),

%% Publishing via Server1 via "direct exchange routing v2" should work.
%% Publishing via Server2 via "direct exchange routing v2" should work.
amqp_channel:call(Ch1, #'confirm.select'{}),
amqp_channel:register_confirm_handler(Ch1, self()),
publish(Ch1, DirectX, RKey),
assert_confirm(),

%% Publishing via Server2 via "direct exchange routing v2" should work.
{_Conn2, Ch2} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server2),
%% Publishing via Server1 via "direct exchange routing v2" should work.
{_Conn2, Ch2} = rabbit_ct_client_helpers:open_connection_and_channel(Config, Server1),
amqp_channel:call(Ch2, #'confirm.select'{}),
amqp_channel:register_confirm_handler(Ch2, self()),
publish(Ch2, DirectX, RKey),
Expand Down
27 changes: 18 additions & 9 deletions deps/rabbit/test/event_recorder.erl
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
handle_event/2,
handle_call/2]).
%% client API
-export([start/1,
stop/1,
get_events/1]).
-export([start/1, start/2,
stop/1, stop/2,
get_events/1, get_events/2]).
-export([assert_event_type/2,
assert_event_prop/2]).

Expand All @@ -42,22 +42,31 @@ handle_call(take_state, State) ->
{ok, lists:reverse(State), ?INIT_STATE}.

start(Config) ->
start(Config, 0).

start(Config, Node) ->
ok = rabbit_ct_broker_helpers:add_code_path_to_all_nodes(Config, ?MODULE),
ok = gen_event:add_handler(event_manager_ref(Config), ?MODULE, []).
ok = gen_event:add_handler(event_manager_ref(Config, Node), ?MODULE, []).

stop(Config) ->
ok = gen_event:delete_handler(event_manager_ref(Config), ?MODULE, []).
stop(Config, 0).

stop(Config, Node) ->
ok = gen_event:delete_handler(event_manager_ref(Config, Node), ?MODULE, []).

get_events(Config) ->
get_events(Config, 0).

get_events(Config, Node) ->
%% events are sent and processed asynchronously
timer:sleep(500),
Result = gen_event:call(event_manager_ref(Config), ?MODULE, take_state),
Result = gen_event:call(event_manager_ref(Config, Node), ?MODULE, take_state),
?assert(is_list(Result)),
Result.

event_manager_ref(Config) ->
Node = get_node_config(Config, 0, nodename),
{rabbit_event, Node}.
event_manager_ref(Config, Node) ->
Node1 = get_node_config(Config, Node, nodename),
{rabbit_event, Node1}.

assert_event_type(ExpectedType, #event{type = ActualType}) ->
?assertEqual(ExpectedType, ActualType).
Expand Down
24 changes: 17 additions & 7 deletions deps/rabbit/test/feature_flags_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@ groups() ->

init_per_suite(Config) ->
rabbit_ct_helpers:log_environment(),
Config1 = rabbit_ct_helpers:set_config(
Config, {skip_metadata_store_configuration, true}),
rabbit_ct_helpers:run_setup_steps(Config1, [
rabbit_ct_helpers:run_setup_steps(Config, [
fun rabbit_ct_broker_helpers:configure_dist_proxy/1
]).

Expand Down Expand Up @@ -198,7 +196,9 @@ init_per_group(clustering, Config) ->
[{rmq_nodes_count, 2},
{rmq_nodes_clustered, false},
{start_rmq_with_plugins_disabled, true}]),
rabbit_ct_helpers:run_setup_steps(Config1, [fun prepare_my_plugin/1]);
Config2 = rabbit_ct_helpers:merge_app_env(
Config1, {rabbit, [{forced_feature_flags_on_init, []}]}),
rabbit_ct_helpers:run_setup_steps(Config2, [fun prepare_my_plugin/1]);
init_per_group(activating_plugin, Config) ->
Config1 = rabbit_ct_helpers:set_config(
Config,
Expand All @@ -212,7 +212,17 @@ init_per_group(_, Config) ->
end_per_group(_, Config) ->
Config.

init_per_testcase(enable_feature_flag_when_ff_file_is_unwritable = Testcase, Config) ->
case erlang:system_info(otp_release) of
"26" ->
{skip, "Hits a crash in Mnesia fairly frequently"};
_ ->
do_init_per_testcase(Testcase, Config)
end;
init_per_testcase(Testcase, Config) ->
do_init_per_testcase(Testcase, Config).

do_init_per_testcase(Testcase, Config) ->
rabbit_ct_helpers:testcase_started(Config, Testcase),
TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase),
Config1 = case Testcase of
Expand Down Expand Up @@ -891,7 +901,7 @@ clustering_ok_with_ff_enabled_on_some_nodes(Config) ->
ok
end,

?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config)),
?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config, 0)),

log_feature_flags_of_all_nodes(Config),
case FFSubsysOk of
Expand Down Expand Up @@ -987,7 +997,7 @@ clustering_denied_with_new_ff_enabled(Config) ->
false -> ok
end,

?assertMatch({skip, _}, rabbit_ct_broker_helpers:cluster_nodes(Config)),
?assertMatch({skip, _}, rabbit_ct_broker_helpers:cluster_nodes(Config, 0)),

log_feature_flags_of_all_nodes(Config),
case FFSubsysOk of
Expand Down Expand Up @@ -1049,7 +1059,7 @@ clustering_ok_with_new_ff_enabled_from_plugin_on_some_nodes(Config) ->
false -> ok
end,

?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config)),
?assertEqual(Config, rabbit_ct_broker_helpers:cluster_nodes(Config, 0)),

log_feature_flags_of_all_nodes(Config),
case FFSubsysOk of
Expand Down
3 changes: 3 additions & 0 deletions deps/rabbit/test/peer_discovery_classic_config_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ init_per_testcase(successful_discovery = Testcase, Config) ->
NodeNamesWithHostname = [rabbit_nodes:make({Name, "localhost"}) || Name <- NodeNames],
Config3 = rabbit_ct_helpers:merge_app_env(Config2,
{rabbit, [
{forced_feature_flags_on_init, []},
{cluster_nodes, {NodeNamesWithHostname, disc}},
{cluster_formation, [
{internal_lock_retries, 10}
Expand Down Expand Up @@ -124,6 +125,7 @@ init_per_testcase(successful_discovery_with_a_subset_of_nodes_coming_online = Te
%% unreachable nodes vs ~6min without them
Config3 = rabbit_ct_helpers:merge_app_env(Config2,
{rabbit, [
{forced_feature_flags_on_init, []},
{cluster_nodes, {NodeNamesWithHostname, disc}},
{cluster_formation, [
{internal_lock_retries, 10}
Expand All @@ -141,6 +143,7 @@ init_per_testcase(no_nodes_configured = Testcase, Config) ->
]),
Config3 = rabbit_ct_helpers:merge_app_env(Config2,
{rabbit, [
{forced_feature_flags_on_init, []},
{cluster_nodes, {[], disc}},
{cluster_formation, [
{internal_lock_retries, 10}
Expand Down
46 changes: 23 additions & 23 deletions deps/rabbit/test/quorum_queue_member_reconciliation_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ merge_app_env(Config) ->
end_per_testcase(Testcase, Config) ->
[Server0, Server1, Server2] =
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
reset_nodes([Server1, Server2], Server0),
reset_nodes([Server2, Server0], Server1),
Config1 = rabbit_ct_helpers:run_steps(
Config,
rabbit_ct_client_helpers:teardown_steps()),
Expand All @@ -107,83 +107,83 @@ reset_nodes([Node| Nodes], Leader) ->
auto_grow(Config) ->
[Server0, Server1, Server2] =
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),

QQ = ?config(queue_name, Config),
?assertEqual({'queue.declare_ok', QQ, 0, 0},
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),

%% There is only one node in the cluster at the moment
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
?assertEqual(1, length(Members)),

add_server_to_cluster(Server1, Server0),
add_server_to_cluster(Server0, Server1),
%% With 2 nodes in the cluster, target group size is not reached, so no
%% new members should be available. We sleep a while so the periodic check
%% runs
timer:sleep(4000),
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
?assertEqual(1, length(Members)),

add_server_to_cluster(Server2, Server0),
add_server_to_cluster(Server2, Server1),
%% With 3 nodes in the cluster, target size is met so eventually it should
%% be 3 members
wait_until(fun() ->
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
3 =:= length(M)
end).

auto_grow_drained_node(Config) ->
[Server0, Server1, Server2] =
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),

QQ = ?config(queue_name, Config),
?assertEqual({'queue.declare_ok', QQ, 0, 0},
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),

%% There is only one node in the cluster at the moment
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
{ok, Members, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
?assertEqual(1, length(Members)),

add_server_to_cluster(Server1, Server0),
%% mark server1 as drained, which should mean the node is not a candiate
add_server_to_cluster(Server0, Server1),
%% mark Server0 as drained, which should mean the node is not a candiate
%% for qq membership
rabbit_ct_broker_helpers:mark_as_being_drained(Config, Server1),
rabbit_ct_broker_helpers:mark_as_being_drained(Config, Server0),
rabbit_ct_helpers:await_condition(
fun () -> rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server1) end,
fun () -> rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server0) end,
10000),
add_server_to_cluster(Server2, Server0),
add_server_to_cluster(Server2, Server1),
timer:sleep(5000),
%% We have 3 nodes, but one is drained, so it will not be concidered.
{ok, Members1, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
{ok, Members1, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
?assertEqual(1, length(Members1)),

rabbit_ct_broker_helpers:unmark_as_being_drained(Config, Server1),
rabbit_ct_broker_helpers:unmark_as_being_drained(Config, Server0),
rabbit_ct_helpers:await_condition(
fun () -> not rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server1) end,
fun () -> not rabbit_ct_broker_helpers:is_being_drained_local_read(Config, Server0) end,
10000),
%% We have 3 nodes, none is being drained, so we should grow membership to 3
wait_until(fun() ->
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server0}),
{ok, M, _} = ra:members({queue_utils:ra_name(QQ), Server1}),
3 =:= length(M)
end).


auto_shrink(Config) ->
[Server0, Server1, Server2] =
rabbit_ct_broker_helpers:get_node_configs(Config, nodename),
Ch = rabbit_ct_client_helpers:open_channel(Config, Server0),
add_server_to_cluster(Server1, Server0),
add_server_to_cluster(Server2, Server0),
Ch = rabbit_ct_client_helpers:open_channel(Config, Server1),
add_server_to_cluster(Server0, Server1),
add_server_to_cluster(Server2, Server1),

QQ = ?config(queue_name, Config),
?assertEqual({'queue.declare_ok', QQ, 0, 0},
declare(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}])),

wait_until(fun() ->
{ok, M, _} = ra:members({queue_utils:ra_name(QQ),
Server0}),
Server1}),
3 =:= length(M)
end),
ok = rabbit_control_helper:command(stop_app, Server2),
Expand All @@ -192,7 +192,7 @@ auto_shrink(Config) ->
%% with one node 'forgotten', eventually the membership will shrink to 2
wait_until(fun() ->
{ok, M, _} = ra:members({queue_utils:ra_name(QQ),
Server0}),
Server1}),
2 =:= length(M)
end).

Expand Down
4 changes: 3 additions & 1 deletion deps/rabbit/test/rabbit_stream_queue_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ init_per_group1(Group, Config) ->
_ ->
Config1
end,
Ret = rabbit_ct_helpers:run_steps(Config1b,
Config1c = rabbit_ct_helpers:merge_app_env(
Config1b, {rabbit, [{forced_feature_flags_on_init, []}]}),
Ret = rabbit_ct_helpers:run_steps(Config1c,
[fun merge_app_env/1 ] ++
rabbit_ct_broker_helpers:setup_steps()),
case Ret of
Expand Down
Loading

0 comments on commit f549425

Please sign in to comment.