|
|
@@ -21,11 +21,34 @@
|
|
|
%% implementation details from this module.
|
|
|
-module(emqx_ds_replication_layer_meta).
|
|
|
|
|
|
+-behaviour(gen_server).
|
|
|
+
|
|
|
%% API:
|
|
|
--export([init/0, shards/1, replica_set/2, sites/0, open_db/2, drop_db/1]).
|
|
|
+-export([
|
|
|
+ shards/1,
|
|
|
+ my_shards/1,
|
|
|
+ replica_set/2,
|
|
|
+ in_sync_replicas/2,
|
|
|
+ sites/0,
|
|
|
+ open_db/2,
|
|
|
+ drop_db/1,
|
|
|
+ shard_leader/2,
|
|
|
+ this_site/0,
|
|
|
+ set_leader/3
|
|
|
+]).
|
|
|
+
|
|
|
+%% gen_server
|
|
|
+-export([start_link/0, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
|
|
|
|
|
|
%% internal exports:
|
|
|
--export([open_db_trans/2, drop_db_trans/1, claim_site/2]).
|
|
|
+-export([
|
|
|
+ open_db_trans/2,
|
|
|
+ drop_db_trans/1,
|
|
|
+ claim_site/2,
|
|
|
+ in_sync_replicas_trans/2,
|
|
|
+ set_leader_trans/3,
|
|
|
+ n_shards/1
|
|
|
+]).
|
|
|
|
|
|
-export_type([site/0]).
|
|
|
|
|
|
@@ -35,6 +58,8 @@
|
|
|
%% Type declarations
|
|
|
%%================================================================================
|
|
|
|
|
|
+-define(SERVER, ?MODULE).
|
|
|
+
|
|
|
-define(SHARD, emqx_ds_builtin_metadata_shard).
|
|
|
%% DS database metadata:
|
|
|
-define(META_TAB, emqx_ds_builtin_metadata_tab).
|
|
|
@@ -56,7 +81,10 @@
|
|
|
|
|
|
-record(?SHARD_TAB, {
|
|
|
shard :: {emqx_ds:db(), emqx_ds_replication_layer:shard_id()},
|
|
|
+ %% Sites that the
|
|
|
replica_set :: [site()],
|
|
|
+ %% Sites that contain the actual data:
|
|
|
+ in_sync_replicas :: [site()],
|
|
|
leader :: node() | undefined,
|
|
|
misc = #{} :: map()
|
|
|
}).
|
|
|
@@ -64,14 +92,21 @@
|
|
|
%% Persistent ID of the node (independent from the IP/FQDN):
|
|
|
-type site() :: binary().
|
|
|
|
|
|
+%% Peristent term key:
|
|
|
+-define(emqx_ds_builtin_site, emqx_ds_builtin_site).
|
|
|
+
|
|
|
%%================================================================================
|
|
|
%% API funcions
|
|
|
%%================================================================================
|
|
|
|
|
|
--spec init() -> ok.
|
|
|
-init() ->
|
|
|
- ensure_tables(),
|
|
|
- ensure_site().
|
|
|
+-spec n_shards(emqx_ds:db()) -> pos_integer().
|
|
|
+n_shards(DB) ->
|
|
|
+ [#?META_TAB{db_props = #{n_shards := NShards}}] = mnesia:dirty_read(?META_TAB, DB),
|
|
|
+ NShards.
|
|
|
+
|
|
|
+-spec start_link() -> {ok, pid()}.
|
|
|
+start_link() ->
|
|
|
+ gen_server:start_link({local, ?SERVER}, ?MODULE, [], []).
|
|
|
|
|
|
-spec shards(emqx_ds:db()) -> [emqx_ds_replication_layer:shard_id()].
|
|
|
shards(DB) ->
|
|
|
@@ -79,6 +114,20 @@ shards(DB) ->
|
|
|
qlc:q([Shard || #?SHARD_TAB{shard = {D, Shard}} <- mnesia:table(?SHARD_TAB), D =:= DB])
|
|
|
).
|
|
|
|
|
|
+-spec my_shards(emqx_ds:db()) -> [emqx_ds_replication_layer:shard_id()].
|
|
|
+my_shards(DB) ->
|
|
|
+ Site = this_site(),
|
|
|
+ eval_qlc(
|
|
|
+ qlc:q([
|
|
|
+ Shard
|
|
|
+ || #?SHARD_TAB{shard = {D, Shard}, replica_set = ReplicaSet, in_sync_replicas = InSync} <- mnesia:table(
|
|
|
+ ?SHARD_TAB
|
|
|
+ ),
|
|
|
+ D =:= DB,
|
|
|
+ lists:member(Site, ReplicaSet) orelse lists:member(Site, InSync)
|
|
|
+ ])
|
|
|
+ ).
|
|
|
+
|
|
|
-spec replica_set(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) ->
|
|
|
{ok, [site()]} | {error, _}.
|
|
|
replica_set(DB, Shard) ->
|
|
|
@@ -89,10 +138,37 @@ replica_set(DB, Shard) ->
|
|
|
{error, no_shard}
|
|
|
end.
|
|
|
|
|
|
+-spec in_sync_replicas(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) ->
|
|
|
+ [site()].
|
|
|
+in_sync_replicas(DB, ShardId) ->
|
|
|
+ {atomic, Result} = mria:transaction(?SHARD, fun ?MODULE:in_sync_replicas_trans/2, [DB, ShardId]),
|
|
|
+ case Result of
|
|
|
+ {ok, InSync} ->
|
|
|
+ InSync;
|
|
|
+ {error, _} ->
|
|
|
+ []
|
|
|
+ end.
|
|
|
+
|
|
|
-spec sites() -> [site()].
|
|
|
sites() ->
|
|
|
eval_qlc(qlc:q([Site || #?NODE_TAB{site = Site} <- mnesia:table(?NODE_TAB)])).
|
|
|
|
|
|
+-spec shard_leader(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) ->
|
|
|
+ {ok, node()} | {error, no_leader_for_shard}.
|
|
|
+shard_leader(DB, Shard) ->
|
|
|
+ case mnesia:dirty_read(?SHARD_TAB, {DB, Shard}) of
|
|
|
+ [#?SHARD_TAB{leader = Leader}] ->
|
|
|
+ {ok, Leader};
|
|
|
+ [] ->
|
|
|
+ {error, no_leader_for_shard}
|
|
|
+ end.
|
|
|
+
|
|
|
+-spec set_leader(emqx_ds:db(), emqx_ds_replication_layer:shard_id(), node()) ->
|
|
|
+ ok.
|
|
|
+set_leader(DB, Shard, Node) ->
|
|
|
+ {atomic, _} = mria:transaction(?SHARD, fun ?MODULE:set_leader_trans/3, [DB, Shard, Node]),
|
|
|
+ ok.
|
|
|
+
|
|
|
-spec open_db(emqx_ds:db(), emqx_ds_replication_layer:builtin_db_opts()) ->
|
|
|
emqx_ds_replication_layer:builtin_db_opts().
|
|
|
open_db(DB, DefaultOpts) ->
|
|
|
@@ -108,6 +184,29 @@ drop_db(DB) ->
|
|
|
%% behavior callbacks
|
|
|
%%================================================================================
|
|
|
|
|
|
+-record(s, {}).
|
|
|
+
|
|
|
+init([]) ->
|
|
|
+ process_flag(trap_exit, true),
|
|
|
+ logger:set_process_metadata(#{domain => [ds, meta]}),
|
|
|
+ ensure_tables(),
|
|
|
+ ensure_site(),
|
|
|
+ S = #s{},
|
|
|
+ {ok, S}.
|
|
|
+
|
|
|
+handle_call(_Call, _From, S) ->
|
|
|
+ {reply, {error, unknown_call}, S}.
|
|
|
+
|
|
|
+handle_cast(_Cast, S) ->
|
|
|
+ {noreply, S}.
|
|
|
+
|
|
|
+handle_info(_Info, S) ->
|
|
|
+ {noreply, S}.
|
|
|
+
|
|
|
+terminate(_Reason, #s{}) ->
|
|
|
+ persistent_term:erase(?emqx_ds_builtin_site),
|
|
|
+ ok.
|
|
|
+
|
|
|
%%================================================================================
|
|
|
%% Internal exports
|
|
|
%%================================================================================
|
|
|
@@ -136,6 +235,23 @@ drop_db_trans(DB) ->
|
|
|
claim_site(Site, Node) ->
|
|
|
mnesia:write(#?NODE_TAB{site = Site, node = Node}).
|
|
|
|
|
|
+-spec in_sync_replicas_trans(emqx_ds:db(), emqx_ds_replication_layer:shard_id()) ->
|
|
|
+ {ok, [site()]} | {error, no_shard}.
|
|
|
+in_sync_replicas_trans(DB, Shard) ->
|
|
|
+ case mnesia:read(?SHARD_TAB, {DB, Shard}) of
|
|
|
+ [#?SHARD_TAB{in_sync_replicas = InSync}] ->
|
|
|
+ {ok, InSync};
|
|
|
+ [] ->
|
|
|
+ {error, no_shard}
|
|
|
+ end.
|
|
|
+
|
|
|
+-spec set_leader_trans(emqx_ds:ds(), emqx_ds_replication_layer:shard_id(), node()) ->
|
|
|
+ ok.
|
|
|
+set_leader_trans(DB, Shard, Node) ->
|
|
|
+ [Record0] = mnesia:wread({?SHARD_TAB, {DB, Shard}}),
|
|
|
+ Record = Record0#?SHARD_TAB{leader = Node},
|
|
|
+ mnesia:write(Record).
|
|
|
+
|
|
|
%%================================================================================
|
|
|
%% Internal functions
|
|
|
%%================================================================================
|
|
|
@@ -182,8 +298,13 @@ ensure_site() ->
|
|
|
file:close(FD)
|
|
|
end,
|
|
|
{atomic, ok} = mria:transaction(?SHARD, fun ?MODULE:claim_site/2, [Site, node()]),
|
|
|
+ persistent_term:put(?emqx_ds_builtin_site, Site),
|
|
|
ok.
|
|
|
|
|
|
+-spec this_site() -> site().
|
|
|
+this_site() ->
|
|
|
+ persistent_term:get(?emqx_ds_builtin_site).
|
|
|
+
|
|
|
-spec create_shards(emqx_ds:db(), pos_integer(), pos_integer()) -> ok.
|
|
|
create_shards(DB, NShards, ReplicationFactor) ->
|
|
|
Shards = [integer_to_binary(I) || I <- lists:seq(0, NShards - 1)],
|
|
|
@@ -193,10 +314,11 @@ create_shards(DB, NShards, ReplicationFactor) ->
|
|
|
Hashes0 = [{hash(Shard, Site), Site} || Site <- Sites],
|
|
|
Hashes = lists:sort(Hashes0),
|
|
|
{_, Sites} = lists:unzip(Hashes),
|
|
|
- ReplicaSet = lists:sublist(Sites, 1, ReplicationFactor),
|
|
|
+ [First | _] = ReplicaSet = lists:sublist(Sites, 1, ReplicationFactor),
|
|
|
Record = #?SHARD_TAB{
|
|
|
shard = {DB, Shard},
|
|
|
- replica_set = ReplicaSet
|
|
|
+ replica_set = ReplicaSet,
|
|
|
+ in_sync_replicas = [First]
|
|
|
},
|
|
|
mnesia:write(Record)
|
|
|
end,
|