|
|
@@ -229,6 +229,7 @@ handle_transition(DB, Shard, Trans, Handler) ->
|
|
|
domain => [emqx, ds, DB, shard_transition]
|
|
|
}),
|
|
|
?tp(
|
|
|
+ debug,
|
|
|
dsrepl_shard_transition_begin,
|
|
|
#{shard => Shard, db => DB, transition => Trans, pid => self()}
|
|
|
),
|
|
|
@@ -240,7 +241,12 @@ apply_handler(Fun, DB, Shard, Trans) ->
|
|
|
erlang:apply(Fun, [DB, Shard, Trans]).
|
|
|
|
|
|
trans_add_local(DB, Shard, {add, Site}) ->
|
|
|
- logger:info(#{msg => "Adding new local shard replica", site => Site}),
|
|
|
+ logger:info(#{
|
|
|
+ msg => "Adding new local shard replica",
|
|
|
+ site => Site,
|
|
|
+ db => DB,
|
|
|
+ shard => Shard
|
|
|
+ }),
|
|
|
do_add_local(membership, DB, Shard).
|
|
|
|
|
|
do_add_local(membership = Stage, DB, Shard) ->
|
|
|
@@ -251,6 +257,8 @@ do_add_local(membership = Stage, DB, Shard) ->
|
|
|
{error, recoverable, Reason} ->
|
|
|
logger:warning(#{
|
|
|
msg => "Shard membership change failed",
|
|
|
+ db => DB,
|
|
|
+ shard => Shard,
|
|
|
reason => Reason,
|
|
|
retry_in => ?TRANS_RETRY_TIMEOUT
|
|
|
}),
|
|
|
@@ -261,10 +269,12 @@ do_add_local(readiness = Stage, DB, Shard) ->
|
|
|
LocalServer = emqx_ds_replication_layer_shard:local_server(DB, Shard),
|
|
|
case emqx_ds_replication_layer_shard:server_info(readiness, LocalServer) of
|
|
|
ready ->
|
|
|
- logger:info(#{msg => "Local shard replica ready"});
|
|
|
+ logger:info(#{msg => "Local shard replica ready", db => DB, shard => Shard});
|
|
|
Status ->
|
|
|
logger:warning(#{
|
|
|
msg => "Still waiting for local shard replica to be ready",
|
|
|
+ db => DB,
|
|
|
+ shard => Shard,
|
|
|
status => Status,
|
|
|
retry_in => ?TRANS_RETRY_TIMEOUT
|
|
|
}),
|
|
|
@@ -273,7 +283,12 @@ do_add_local(readiness = Stage, DB, Shard) ->
|
|
|
end.
|
|
|
|
|
|
trans_drop_local(DB, Shard, {del, Site}) ->
|
|
|
- logger:info(#{msg => "Dropping local shard replica", site => Site}),
|
|
|
+ logger:info(#{
|
|
|
+ msg => "Dropping local shard replica",
|
|
|
+ site => Site,
|
|
|
+ db => DB,
|
|
|
+ shard => Shard
|
|
|
+ }),
|
|
|
do_drop_local(DB, Shard).
|
|
|
|
|
|
do_drop_local(DB, Shard) ->
|
|
|
@@ -293,17 +308,24 @@ do_drop_local(DB, Shard) ->
|
|
|
end.
|
|
|
|
|
|
trans_rm_unresponsive(DB, Shard, {del, Site}) ->
|
|
|
- logger:info(#{msg => "Removing unresponsive shard replica", site => Site}),
|
|
|
+ logger:info(#{
|
|
|
+ msg => "Removing unresponsive shard replica",
|
|
|
+ site => Site,
|
|
|
+ db => DB,
|
|
|
+ shard => Shard
|
|
|
+ }),
|
|
|
do_rm_unresponsive(DB, Shard, Site).
|
|
|
|
|
|
do_rm_unresponsive(DB, Shard, Site) ->
|
|
|
Server = emqx_ds_replication_layer_shard:shard_server(DB, Shard, Site),
|
|
|
case emqx_ds_replication_layer_shard:remove_server(DB, Shard, Server) of
|
|
|
ok ->
|
|
|
- logger:info(#{msg => "Unresponsive shard replica removed"});
|
|
|
+ logger:info(#{msg => "Unresponsive shard replica removed", db => DB, shard => Shard});
|
|
|
{error, recoverable, Reason} ->
|
|
|
logger:warning(#{
|
|
|
msg => "Shard membership change failed",
|
|
|
+ db => DB,
|
|
|
+ shard => Shard,
|
|
|
reason => Reason,
|
|
|
retry_in => ?TRANS_RETRY_TIMEOUT
|
|
|
}),
|
|
|
@@ -341,6 +363,7 @@ handle_exit(Pid, Reason, State0 = #{db := DB, transitions := Ts}) ->
|
|
|
case maps:to_list(maps:filter(fun(_, TH) -> TH#transhdl.pid == Pid end, Ts)) of
|
|
|
[{Track, #transhdl{shard = Shard, trans = Trans}}] ->
|
|
|
?tp(
|
|
|
+ debug,
|
|
|
dsrepl_shard_transition_end,
|
|
|
#{shard => Shard, db => DB, transition => Trans, pid => Pid, reason => Reason}
|
|
|
),
|
|
|
@@ -361,9 +384,10 @@ handle_transition_exit(Shard, Trans, normal, State = #{db := DB}) ->
|
|
|
State;
|
|
|
handle_transition_exit(_Shard, _Trans, {shutdown, skipped}, State) ->
|
|
|
State;
|
|
|
-handle_transition_exit(Shard, Trans, Reason, State) ->
|
|
|
+handle_transition_exit(Shard, Trans, Reason, State = #{db := DB}) ->
|
|
|
logger:warning(#{
|
|
|
msg => "Shard membership transition failed",
|
|
|
+ db => DB,
|
|
|
shard => Shard,
|
|
|
transition => Trans,
|
|
|
reason => Reason,
|