Przeglądaj źródła

refactor(ds): Extract DS replication layer to a separate application

ieQu1 1 rok temu
rodzic
commit
a8ea0ae4e5
25 zmienionych plików z 347 dodań i 177 usunięć
  1. 94 0
      apps/emqx_ds_builtin_raft/BSL.txt
  2. 3 0
      apps/emqx_ds_builtin_raft/README.md
  3. 5 0
      apps/emqx_ds_builtin_raft/rebar.config
  4. 11 0
      apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src
  5. 11 0
      apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl
  6. 3 15
      apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl
  7. 3 16
      apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl
  8. 10 17
      apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl
  9. 0 12
      apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl
  10. 0 12
      apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl
  11. 1 13
      apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl
  12. 0 12
      apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl
  13. 3 15
      apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl
  14. 2 2
      apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl
  15. 0 12
      apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl
  16. 0 12
      apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl
  17. 0 12
      apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl
  18. 0 12
      apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl
  19. 152 9
      apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl
  20. 34 2
      apps/emqx_durable_storage/README.md
  21. 1 2
      apps/emqx_durable_storage/src/emqx_ds.erl
  22. 2 1
      apps/emqx_machine/priv/reboot_lists.eterm
  23. 7 0
      changes/ce/breaking-13248.en.md
  24. 4 1
      mix.exs
  25. 1 0
      rebar.config.erl

+ 94 - 0
apps/emqx_ds_builtin_raft/BSL.txt

@@ -0,0 +1,94 @@
+Business Source License 1.1
+
+Licensor:             Hangzhou EMQ Technologies Co., Ltd.
+Licensed Work:        EMQX Enterprise Edition
+                      The Licensed Work is (c) 2024
+                      Hangzhou EMQ Technologies Co., Ltd.
+Additional Use Grant: Students and educators are granted right to copy,
+                      modify, and create derivative work for research
+                      or education.
+Change Date:          2028-06-13
+Change License:       Apache License, Version 2.0
+
+For information about alternative licensing arrangements for the Software,
+please contact Licensor: https://www.emqx.com/en/contact
+
+Notice
+
+The Business Source License (this document, or the “License”) is not an Open
+Source license. However, the Licensed Work will eventually be made available
+under an Open Source License, as stated in this License.
+
+License text copyright (c) 2017, 2024 MariaDB Corporation Ab, All Rights Reserved.
+“Business Source License” is a trademark of MariaDB Corporation Ab.
+
+-----------------------------------------------------------------------------
+
+Business Source License 1.1
+
+Terms
+
+The Licensor hereby grants you the right to copy, modify, create derivative
+works, redistribute, and make non-production use of the Licensed Work. The
+Licensor may make an Additional Use Grant, above, permitting limited
+production use.
+
+Effective on the Change Date, or the fourth anniversary of the first publicly
+available distribution of a specific version of the Licensed Work under this
+License, whichever comes first, the Licensor hereby grants you rights under
+the terms of the Change License, and the rights granted in the paragraph
+above terminate.
+
+If your use of the Licensed Work does not comply with the requirements
+currently in effect as described in this License, you must purchase a
+commercial license from the Licensor, its affiliated entities, or authorized
+resellers, or you must refrain from using the Licensed Work.
+
+All copies of the original and modified Licensed Work, and derivative works
+of the Licensed Work, are subject to this License. This License applies
+separately for each version of the Licensed Work and the Change Date may vary
+for each version of the Licensed Work released by Licensor.
+
+You must conspicuously display this License on each original or modified copy
+of the Licensed Work. If you receive the Licensed Work in original or
+modified form from a third party, the terms and conditions set forth in this
+License apply to your use of that work.
+
+Any use of the Licensed Work in violation of this License will automatically
+terminate your rights under this License for the current and all other
+versions of the Licensed Work.
+
+This License does not grant you any right in any trademark or logo of
+Licensor or its affiliates (provided that you may use a trademark or logo of
+Licensor as expressly required by this License).
+
+TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
+AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
+EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
+TITLE.
+
+MariaDB hereby grants you permission to use this License’s text to license
+your works, and to refer to it using the trademark “Business Source License”,
+as long as you comply with the Covenants of Licensor below.
+
+Covenants of Licensor
+
+In consideration of the right to use this License’s text and the “Business
+Source License” name and trademark, Licensor covenants to MariaDB, and to all
+other recipients of the licensed work to be provided by Licensor:
+
+1. To specify as the Change License the GPL Version 2.0 or any later version,
+   or a license that is compatible with GPL Version 2.0 or a later version,
+   where “compatible” means that software provided under the Change License can
+   be included in a program with software provided under GPL Version 2.0 or a
+   later version. Licensor may specify additional Change Licenses without
+   limitation.
+
+2. To either: (a) specify an additional grant of rights to use that does not
+   impose any additional restriction on the right granted in this License, as
+   the Additional Use Grant; or (b) insert the text “None”.
+
+3. To specify a Change Date.
+
+4. Not to modify this License in any other way.

+ 3 - 0
apps/emqx_ds_builtin_raft/README.md

@@ -0,0 +1,3 @@
+# `emqx_ds_builtin_raft`
+
+Replication layer for the builtin EMQX durable storage backend that uses Raft algorithm.

+ 5 - 0
apps/emqx_ds_builtin_raft/rebar.config

@@ -0,0 +1,5 @@
+%% -*- mode:erlang -*-
+
+{deps, [
+    {emqx_durable_storage, {path, "../emqx_durable_storage"}}
+]}.

+ 11 - 0
apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft.app.src

@@ -0,0 +1,11 @@
+%% -*- mode: erlang -*-
+{application, emqx_ds_builtin_raft, [
+    {description, "Raft replication layer for the durable storage"},
+    % strict semver, bump manually!
+    {vsn, "0.1.0"},
+    {modules, []},
+    {registered, []},
+    {applications, [kernel, stdlib, gproc, mria, ra, emqx_durable_storage]},
+    {mod, {emqx_ds_builtin_raft_app, []}},
+    {env, []}
+]}.

+ 11 - 0
apps/emqx_ds_builtin_raft/src/emqx_ds_builtin_raft_app.erl

@@ -0,0 +1,11 @@
+%%--------------------------------------------------------------------
+%% Copyright (c) 2020-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
+%%--------------------------------------------------------------------
+
+-module(emqx_ds_builtin_raft_app).
+
+-export([start/2]).
+
+start(_Type, _Args) ->
+    emqx_ds:register_backend(builtin_raft, emqx_ds_replication_layer),
+    {ok, self()}.

+ 3 - 15
apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl

@@ -1,22 +1,10 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 %% @doc Supervisor that contains all the processes that belong to a
 %% given builtin DS database.
--module(emqx_ds_builtin_db_sup).
+-module(emqx_ds_builtin_raft_db_sup).
 
 -behaviour(supervisor).
 
@@ -150,7 +138,7 @@ get_shard_workers(DB) ->
 init({#?db_sup{db = DB}, DefaultOpts}) ->
     %% Spec for the top-level supervisor for the database:
     logger:notice("Starting DS DB ~p", [DB]),
-    emqx_ds_builtin_sup:clean_gvars(DB),
+    emqx_ds_builtin_raft_sup:clean_gvars(DB),
     emqx_ds_builtin_metrics:init_for_db(DB),
     Opts = emqx_ds_replication_layer_meta:open_db(DB, DefaultOpts),
     ok = start_ra_system(DB, Opts),
@@ -197,7 +185,7 @@ init({#?shard_sup{db = DB, shard = Shard}, _}) ->
     {ok, {SupFlags, Children}}.
 
 start_ra_system(DB, #{replication_options := ReplicationOpts}) ->
-    DataDir = filename:join([emqx_ds:base_dir(), DB, dsrepl]),
+    DataDir = filename:join([emqx_ds_storage_layer:base_dir(), DB, dsrepl]),
     Config = lists:foldr(fun maps:merge/2, #{}, [
         ra_system:default_config(),
         #{

+ 3 - 16
apps/emqx_durable_storage/src/emqx_ds_builtin_sup.erl

@@ -1,23 +1,11 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 %% @doc This supervisor manages the global worker processes needed for
 %% the functioning of builtin databases, and all builtin database
 %% attach to it.
--module(emqx_ds_builtin_sup).
+-module(emqx_ds_builtin_raft_sup).
 
 -behaviour(supervisor).
 
@@ -39,7 +27,6 @@
 
 -define(top, ?MODULE).
 -define(databases, emqx_ds_builtin_databases_sup).
-
 -define(gvar_tab, emqx_ds_builtin_gvar).
 
 -record(gvar, {
@@ -57,7 +44,7 @@ start_db(DB, Opts) ->
     ensure_top(),
     ChildSpec = #{
         id => DB,
-        start => {emqx_ds_builtin_db_sup, start_db, [DB, Opts]},
+        start => {emqx_ds_builtin_raft_db_sup, start_db, [DB, Opts]},
         type => supervisor,
         shutdown => infinity
     },
@@ -158,5 +145,5 @@ start_databases_sup() ->
 %%================================================================================
 
 ensure_top() ->
-    {ok, _} = emqx_ds_sup:attach_backend(builtin, {?MODULE, start_top, []}),
+    {ok, _} = emqx_ds_sup:attach_backend(builtin_raft, {?MODULE, start_top, []}),
     ok.

+ 10 - 17
apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl

@@ -1,28 +1,17 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 %% @doc Replication layer for DS backends that don't support
 %% replication on their own.
 -module(emqx_ds_replication_layer).
 
--behaviour(emqx_ds).
+%-behaviour(emqx_ds).
 
 -export([
     list_shards/1,
     open_db/2,
+    close_db/1,
     add_generation/1,
     update_db_config/2,
     list_generations_with_lifetimes/1,
@@ -176,7 +165,7 @@ list_shards(DB) ->
 
 -spec open_db(emqx_ds:db(), builtin_db_opts()) -> ok | {error, _}.
 open_db(DB, CreateOpts) ->
-    case emqx_ds_builtin_sup:start_db(DB, CreateOpts) of
+    case emqx_ds_builtin_raft_sup:start_db(DB, CreateOpts) of
         {ok, _} ->
             ok;
         {error, {already_started, _}} ->
@@ -185,6 +174,10 @@ open_db(DB, CreateOpts) ->
             {error, Err}
     end.
 
+-spec close_db(emqx_ds:db()) -> ok.
+close_db(DB) ->
+    emqx_ds_builtin_raft_sup:stop_db(DB).
+
 -spec add_generation(emqx_ds:db()) -> ok | {error, _}.
 add_generation(DB) ->
     foreach_shard(
@@ -376,7 +369,7 @@ foreach_shard(DB, Fun) ->
 %% local server
 -spec current_timestamp(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) -> emqx_ds:time().
 current_timestamp(DB, Shard) ->
-    emqx_ds_builtin_sup:get_gvar(DB, ?gv_timestamp(Shard), 0).
+    emqx_ds_builtin_raft_sup:get_gvar(DB, ?gv_timestamp(Shard), 0).
 
 %%================================================================================
 %% behavior callbacks
@@ -402,7 +395,7 @@ current_timestamp(DB, Shard) ->
 -spec do_drop_db_v1(emqx_ds:db()) -> ok | {error, _}.
 do_drop_db_v1(DB) ->
     MyShards = emqx_ds_replication_layer_meta:my_shards(DB),
-    emqx_ds_builtin_sup:stop_db(DB),
+    emqx_ds_builtin_raft_sup:stop_db(DB),
     lists:foreach(
         fun(Shard) ->
             emqx_ds_storage_layer:drop_shard({DB, Shard})
@@ -874,4 +867,4 @@ handle_custom_event(DBShard, Latest, Event) ->
     end.
 
 set_ts({DB, Shard}, TS) ->
-    emqx_ds_builtin_sup:set_gvar(DB, ?gv_timestamp(Shard), TS).
+    emqx_ds_builtin_raft_sup:set_gvar(DB, ?gv_timestamp(Shard), TS).

+ 0 - 12
apps/emqx_durable_storage/src/emqx_ds_replication_layer.hrl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2022, 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 -ifndef(EMQX_DS_REPLICATION_LAYER_HRL).
 -define(EMQX_DS_REPLICATION_LAYER_HRL, true).

+ 0 - 12
apps/emqx_durable_storage/src/emqx_ds_replication_layer_egress.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 %% @doc Egress servers are responsible for proxing the outcoming

+ 1 - 13
apps/emqx_durable_storage/src/emqx_ds_replication_layer_meta.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 %% @doc Metadata storage for the builtin sharded database.
@@ -678,7 +666,7 @@ ensure_tables() ->
     ok = mria:wait_for_tables([?META_TAB, ?NODE_TAB, ?SHARD_TAB]).
 
 ensure_site() ->
-    Filename = filename:join(emqx_ds:base_dir(), "emqx_ds_builtin_site.eterm"),
+    Filename = filename:join(emqx_ds_storage_layer:base_dir(), "emqx_ds_builtin_site.eterm"),
     case file:consult(Filename) of
         {ok, [Site]} ->
             ok;

+ 0 - 12
apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 -module(emqx_ds_replication_layer_shard).

+ 3 - 15
apps/emqx_durable_storage/src/emqx_ds_replication_shard_allocator.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 
 -module(emqx_ds_replication_shard_allocator).
@@ -297,7 +285,7 @@ trans_drop_local(DB, Shard, {del, Site}) ->
 do_drop_local(DB, Shard) ->
     case emqx_ds_replication_layer_shard:drop_local_server(DB, Shard) of
         ok ->
-            ok = emqx_ds_builtin_db_sup:stop_shard({DB, Shard}),
+            ok = emqx_ds_builtin_raft_db_sup:stop_shard({DB, Shard}),
             ok = emqx_ds_storage_layer:drop_shard({DB, Shard}),
             logger:info(#{msg => "Local shard replica dropped"});
         {error, recoverable, Reason} ->
@@ -428,7 +416,7 @@ start_shards(DB, Shards) ->
     lists:foreach(fun(Shard) -> start_shard(DB, Shard) end, Shards).
 
 start_shard(DB, Shard) ->
-    ok = emqx_ds_builtin_db_sup:ensure_shard({DB, Shard}),
+    ok = emqx_ds_builtin_raft_db_sup:ensure_shard({DB, Shard}),
     ok = logger:info(#{msg => "Shard started", shard => Shard}),
     ok.
 
@@ -436,7 +424,7 @@ start_egresses(DB, Shards) ->
     lists:foreach(fun(Shard) -> start_egress(DB, Shard) end, Shards).
 
 start_egress(DB, Shard) ->
-    ok = emqx_ds_builtin_db_sup:ensure_egress({DB, Shard}),
+    ok = emqx_ds_builtin_raft_db_sup:ensure_egress({DB, Shard}),
     ok = logger:info(#{msg => "Egress started", shard => Shard}),
     ok.
 

+ 2 - 2
apps/emqx_durable_storage/src/emqx_ds_replication_snapshot.erl

@@ -195,7 +195,7 @@ start_snapshot_writer(WS) ->
         msg => "dsrepl_snapshot_write_started",
         shard => ShardId
     }),
-    _ = emqx_ds_builtin_db_sup:terminate_storage(ShardId),
+    _ = emqx_ds_builtin_raft_db_sup:terminate_storage(ShardId),
     {ok, SnapWriter} = emqx_ds_storage_layer:accept_snapshot(ShardId),
     {ok, WS#ws{phase = storage_snapshot, writer = SnapWriter}}.
 
@@ -223,7 +223,7 @@ complete_accept(WS = #ws{started_at = StartedAt, writer = SnapWriter}) ->
         duration_ms => erlang:monotonic_time(millisecond) - StartedAt,
         bytes_written => emqx_ds_storage_snapshot:writer_info(bytes_written, SnapWriter)
     }),
-    {ok, _} = emqx_ds_builtin_db_sup:restart_storage(ShardId),
+    {ok, _} = emqx_ds_builtin_raft_db_sup:restart_storage(ShardId),
     write_machine_snapshot(WS).
 
 write_machine_snapshot(#ws{dir = Dir, meta = Meta, state = MachineState}) ->

+ 0 - 12
apps/emqx_durable_storage/src/proto/emqx_ds_proto_v1.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 -module(emqx_ds_proto_v1).
 

+ 0 - 12
apps/emqx_durable_storage/src/proto/emqx_ds_proto_v2.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2023-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 -module(emqx_ds_proto_v2).
 

+ 0 - 12
apps/emqx_durable_storage/src/proto/emqx_ds_proto_v3.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 -module(emqx_ds_proto_v3).
 

+ 0 - 12
apps/emqx_durable_storage/src/proto/emqx_ds_proto_v4.erl

@@ -1,17 +1,5 @@
 %%--------------------------------------------------------------------
 %% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
 %%--------------------------------------------------------------------
 -module(emqx_ds_proto_v4).
 

+ 152 - 9
apps/emqx_durable_storage/test/emqx_ds_replication_SUITE.erl

@@ -35,7 +35,7 @@ opts() ->
 opts(Overrides) ->
     maps:merge(
         #{
-            backend => builtin,
+            backend => builtin_raft,
             %% storage => {emqx_ds_storage_reference, #{}},
             storage => {emqx_ds_storage_bitfield_lts, #{epoch_bits => 10}},
             n_shards => 16,
@@ -56,8 +56,52 @@ appspec(emqx_durable_storage) ->
         override_env => [{egress_flush_interval, 1}]
     }}.
 
+t_metadata(init, Config) ->
+    emqx_cth_suite:start([emqx_ds_builtin_raft], #{
+        work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)
+    }),
+    Config;
+t_metadata('end', Config) ->
+    emqx_cth_suite:stop([emqx_ds_builtin_raft]),
+    Config.
+
+t_metadata(_Config) ->
+    DB = ?FUNCTION_NAME,
+    NShards = 1,
+    Options = #{
+        backend => builtin_raft,
+        storage => {emqx_ds_storage_reference, #{}},
+        n_shards => NShards,
+        n_sites => 1,
+        replication_factor => 1,
+        replication_options => #{}
+    },
+    try
+        ?assertMatch(ok, emqx_ds:open_db(DB, Options)),
+        %% Check metadata:
+        %%    We have only one site:
+        [Site] = emqx_ds_replication_layer_meta:sites(),
+        %%    Check all shards:
+        Shards = emqx_ds_replication_layer_meta:shards(DB),
+        %%    Since there is only one site all shards should be allocated
+        %%    to this site:
+        MyShards = emqx_ds_replication_layer_meta:my_shards(DB),
+        ?assertEqual(NShards, length(Shards)),
+        lists:foreach(
+            fun(Shard) ->
+                ?assertEqual(
+                    [Site], emqx_ds_replication_layer_meta:replica_set(DB, Shard)
+                )
+            end,
+            Shards
+        ),
+        ?assertEqual(lists:sort(Shards), lists:sort(MyShards))
+    after
+        ?assertMatch(ok, emqx_ds:drop_db(DB))
+    end.
+
 t_replication_transfers_snapshots(init, Config) ->
-    Apps = [appspec(emqx_durable_storage)],
+    Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
     NodeSpecs = emqx_cth_cluster:mk_nodespecs(
         [
             {t_replication_transfers_snapshots1, #{apps => Apps}},
@@ -130,7 +174,7 @@ t_replication_transfers_snapshots(Config) ->
     ).
 
 t_rebalance(init, Config) ->
-    Apps = [appspec(emqx_durable_storage)],
+    Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
     Nodes = emqx_cth_cluster:start(
         [
             {t_rebalance1, #{apps => Apps}},
@@ -260,7 +304,7 @@ t_rebalance(Config) ->
     ).
 
 t_join_leave_errors(init, Config) ->
-    Apps = [appspec(emqx_durable_storage)],
+    Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
     Nodes = emqx_cth_cluster:start(
         [
             {t_join_leave_errors1, #{apps => Apps}},
@@ -322,7 +366,7 @@ t_join_leave_errors(Config) ->
     ?assertEqual([], emqx_ds_test_helpers:transitions(N1, ?DB)).
 
 t_rebalance_chaotic_converges(init, Config) ->
-    Apps = [appspec(emqx_durable_storage)],
+    Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
     Nodes = emqx_cth_cluster:start(
         [
             {t_rebalance_chaotic_converges1, #{apps => Apps}},
@@ -418,7 +462,7 @@ t_rebalance_chaotic_converges(Config) ->
     ).
 
 t_rebalance_offline_restarts(init, Config) ->
-    Apps = [appspec(emqx_durable_storage)],
+    Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
     Specs = emqx_cth_cluster:mk_nodespecs(
         [
             {t_rebalance_offline_restarts1, #{apps => Apps}},
@@ -435,6 +479,7 @@ t_rebalance_offline_restarts('end', Config) ->
 t_rebalance_offline_restarts(Config) ->
     %% This testcase verifies that rebalancing progresses if nodes restart or
     %% go offline and never come back.
+    ok = snabbkaffe:start_trace(),
 
     Nodes = [N1, N2, N3] = ?config(nodes, Config),
     _Specs = [NS1, NS2, _] = ?config(nodespecs, Config),
@@ -477,7 +522,7 @@ t_rebalance_offline_restarts(Config) ->
     ?assertEqual(lists:sort([S1, S2]), ds_repl_meta(N1, db_sites, [?DB])).
 
 t_drop_generation(Config) ->
-    Apps = [appspec(emqx_durable_storage)],
+    Apps = [appspec(emqx_durable_storage), emqx_ds_builtin_raft],
     [_, _, NS3] =
         NodeSpecs = emqx_cth_cluster:mk_nodespecs(
             [
@@ -554,6 +599,105 @@ t_drop_generation(Config) ->
         end
     ).
 
+t_error_mapping_replication_layer(init, Config) ->
+    emqx_cth_suite:start([emqx_ds_builtin_raft], #{
+        work_dir => emqx_cth_suite:work_dir(?FUNCTION_NAME, Config)
+    }),
+    Config;
+t_error_mapping_replication_layer('end', Config) ->
+    emqx_cth_suite:stop([emqx_ds_builtin_raft]),
+    Config.
+
+t_error_mapping_replication_layer(_Config) ->
+    %% This checks that the replication layer maps recoverable errors correctly.
+
+    ok = emqx_ds_test_helpers:mock_rpc(),
+    ok = snabbkaffe:start_trace(),
+
+    DB = ?FUNCTION_NAME,
+    ?assertMatch(ok, emqx_ds:open_db(DB, (opts())#{n_shards => 2})),
+    [Shard1, Shard2] = emqx_ds_replication_layer_meta:shards(DB),
+
+    TopicFilter = emqx_topic:words(<<"foo/#">>),
+    Msgs = [
+        message(<<"C1">>, <<"foo/bar">>, <<"1">>, 0),
+        message(<<"C1">>, <<"foo/baz">>, <<"2">>, 1),
+        message(<<"C2">>, <<"foo/foo">>, <<"3">>, 2),
+        message(<<"C3">>, <<"foo/xyz">>, <<"4">>, 3),
+        message(<<"C4">>, <<"foo/bar">>, <<"5">>, 4),
+        message(<<"C5">>, <<"foo/oof">>, <<"6">>, 5)
+    ],
+
+    ?assertMatch(ok, emqx_ds:store_batch(DB, Msgs)),
+
+    ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard1}),
+    ?block_until(#{?snk_kind := emqx_ds_replication_layer_egress_flush, shard := Shard2}),
+
+    Streams0 = emqx_ds:get_streams(DB, TopicFilter, 0),
+    Iterators0 = lists:map(
+        fun({_Rank, S}) ->
+            {ok, Iter} = emqx_ds:make_iterator(DB, S, TopicFilter, 0),
+            Iter
+        end,
+        Streams0
+    ),
+
+    %% Disrupt the link to the second shard.
+    ok = emqx_ds_test_helpers:mock_rpc_result(
+        fun(_Node, emqx_ds_replication_layer, _Function, Args) ->
+            case Args of
+                [DB, Shard1 | _] -> passthrough;
+                [DB, Shard2 | _] -> unavailable
+            end
+        end
+    ),
+
+    %% Result of `emqx_ds:get_streams/3` will just contain partial results, not an error.
+    Streams1 = emqx_ds:get_streams(DB, TopicFilter, 0),
+    ?assert(
+        length(Streams1) > 0 andalso length(Streams1) =< length(Streams0),
+        Streams1
+    ),
+
+    %% At least one of `emqx_ds:make_iterator/4` will end in an error.
+    Results1 = lists:map(
+        fun({_Rank, S}) ->
+            case emqx_ds:make_iterator(DB, S, TopicFilter, 0) of
+                Ok = {ok, _Iter} ->
+                    Ok;
+                Error = {error, recoverable, {erpc, _}} ->
+                    Error;
+                Other ->
+                    ct:fail({unexpected_result, Other})
+            end
+        end,
+        Streams0
+    ),
+    ?assert(
+        length([error || {error, _, _} <- Results1]) > 0,
+        Results1
+    ),
+
+    %% At least one of `emqx_ds:next/3` over initial set of iterators will end in an error.
+    Results2 = lists:map(
+        fun(Iter) ->
+            case emqx_ds:next(DB, Iter, _BatchSize = 42) of
+                Ok = {ok, _Iter, [_ | _]} ->
+                    Ok;
+                Error = {error, recoverable, {badrpc, _}} ->
+                    Error;
+                Other ->
+                    ct:fail({unexpected_result, Other})
+            end
+        end,
+        Iterators0
+    ),
+    ?assert(
+        length([error || {error, _, _} <- Results2]) > 0,
+        Results2
+    ),
+    meck:unload().
+
 %%
 
 shard_server_info(Node, DB, Shard, Site, Info) ->
@@ -583,7 +727,7 @@ shards(Node, DB) ->
     erpc:call(Node, emqx_ds_replication_layer_meta, shards, [DB]).
 
 shards_online(Node, DB) ->
-    erpc:call(Node, emqx_ds_builtin_db_sup, which_shards, [DB]).
+    erpc:call(Node, emqx_ds_builtin_raft_db_sup, which_shards, [DB]).
 
 n_shards_online(Node, DB) ->
     length(shards_online(Node, DB)).
@@ -635,7 +779,6 @@ all() -> emqx_common_test_helpers:all(?MODULE).
 
 init_per_testcase(TCName, Config0) ->
     Config = emqx_common_test_helpers:init_per_testcase(?MODULE, TCName, Config0),
-    ok = snabbkaffe:start_trace(),
     Config.
 
 end_per_testcase(TCName, Config) ->

+ 34 - 2
apps/emqx_durable_storage/README.md

@@ -103,7 +103,7 @@ Consumption of messages is done in several stages:
 
 # Documentation links
 
-TBD
+https://docs.emqx.com/en/enterprise/latest/durability/durability_introduction.html
 
 # Usage
 
@@ -146,7 +146,39 @@ The following REST APIs are available for managing the builtin durable storages:
 - `/ds/storages/:ds/replicas/:site` — add or remove replica of the durable storage on the site
 
 # Other
-TBD
+
+Note: this application contains main interface module and some common utility modules used by the backends, but it doesn't contain any ready-to-use DS backends.
+The backends are instead implemented as separate OTP applications, such as `emqx_ds_backend_local` and `emqx_ds_backend_raft`.
+
+There is a helper placeholder application `emqx_ds_backends` that depends on all backend applications available in the release.
+Business logic applications must have `emqx_ds_backends` as a dependency.
+
+The dependency diagram is the following:
+
+```
+                              +------------------------+
+                              |  emqx_durable_storage  |
+                              +------------------------+
+                              /           |            \
+                             /            |             \
+                            /             |              \
+   +------------------------+  +----------------------+   +------+
+   | emqx_ds_backend_local  |  | emqx_ds_builtin_raft |   | ...  |
+   +------------------------+  +-----------+----------+   +------+
+                            \            |               /
+                             \           |              /
+                              \          |             /
+                             +-------------------------+
+                             |    emqx_ds_backends     |
+                             +-------------------------+
+                                 /              \
+                                /                \
+       ......................../.. business apps .\........................
+                              /                    \
+                         +------+                +-------+
+                         | emqx |                |  ...  |
+                         +------+                +-------+
+```
 
 # Contributing
 Please see our [contributing.md](../../CONTRIBUTING.md).

+ 1 - 2
apps/emqx_durable_storage/src/emqx_ds.erl

@@ -175,8 +175,7 @@
         _ => _
     }.
 
--type create_db_opts() ::
-    emqx_ds_replication_layer:builtin_db_opts() | generic_db_opts().
+-type create_db_opts() :: generic_db_opts().
 
 -type message_id() :: emqx_ds_replication_layer:message_id().
 

+ 2 - 1
apps/emqx_machine/priv/reboot_lists.eterm

@@ -135,7 +135,8 @@
             emqx_bridge_confluent,
             emqx_ds_shared_sub,
             emqx_auth_ext,
-            emqx_cluster_link
+            emqx_cluster_link,
+            emqx_ds_builtin_raft
         ],
     %% must always be of type `load'
     ce_business_apps =>

+ 7 - 0
changes/ce/breaking-13248.en.md

@@ -0,0 +1,7 @@
+`builtin` durable storage backend has been replaced with the following two backends:
+
+- `builtin_local`: A durable storage backend that doesn't support replication.
+   It can't be used in a multi-node cluster.
+   This backend is available in both open source and enterprise editions.
+- `builtin_raft`: A durable storage backend that uses Raft algorithm for replication.
+   This backend is available enterprise edition.

+ 4 - 1
mix.exs

@@ -205,7 +205,8 @@ defmodule EMQXUmbrella.MixProject do
       :emqx_bridge_syskeeper,
       :emqx_ds_shared_sub,
       :emqx_auth_ext,
-      :emqx_cluster_link
+      :emqx_cluster_link,
+      :emqx_ds_builtin_raft
     ])
   end
 
@@ -341,6 +342,8 @@ defmodule EMQXUmbrella.MixProject do
             :emqx_s3,
             :emqx_opentelemetry,
             :emqx_durable_storage,
+            :emqx_ds_builtin_local,
+            :emqx_ds_builtin_raft,
             :rabbit_common,
             :emqx_eviction_agent,
             :emqx_node_rebalance

+ 1 - 0
rebar.config.erl

@@ -124,6 +124,7 @@ is_community_umbrella_app("apps/emqx_node_rebalance") -> false;
 is_community_umbrella_app("apps/emqx_ds_shared_sub") -> false;
 is_community_umbrella_app("apps/emqx_auth_ext") -> false;
 is_community_umbrella_app("apps/emqx_cluster_link") -> false;
+is_community_umbrella_app("apps/emqx_ds_builtin_raft") -> false;
 is_community_umbrella_app(_) -> true.
 
 %% BUILD_WITHOUT_JQ