před 1 rokem · ac19cf89df
--- a/apps/emqx_cluster_link/include/emqx_cluster_link.hrl
+++ b/apps/emqx_cluster_link/include/emqx_cluster_link.hrl
@@ -3,12 +3,9 @@
 
																 %%--------------------------------------------------------------------
															
 
																 -define(TOPIC_PREFIX, "$LINK/cluster/").
															
 
																--define(CTRL_TOPIC_PREFIX, ?TOPIC_PREFIX "ctrl/").
															
 
																 -define(ROUTE_TOPIC_PREFIX, ?TOPIC_PREFIX "route/").
															
 
																 -define(MSG_TOPIC_PREFIX, ?TOPIC_PREFIX "msg/").
															
 
																--define(DEST(FromClusterName), {external, {link, FromClusterName}}).
															
 
																-
															
 
																 %% Fairly compact text encoding.
															
 
																 -define(SHARED_ROUTE_ID(Topic, Group), <<"$s/", Group/binary, "/", Topic/binary>>).
															
 
																 -define(PERSISTENT_ROUTE_ID(Topic, ID), <<"$p/", ID/binary, "/", Topic/binary>>).
															
--- a/apps/emqx_cluster_link/src/emqx_cluster_link.app.src
+++ b/apps/emqx_cluster_link/src/emqx_cluster_link.app.src
@@ -9,7 +9,6 @@
 
																         kernel,
															
 
																         stdlib,
															
 
																         emqtt,
															
 
																-        ecpool,
															
 
																         emqx,
															
 
																         emqx_resource
															
 
																     ]},
															
--- a/apps/emqx_cluster_link/src/emqx_cluster_link.erl
+++ b/apps/emqx_cluster_link/src/emqx_cluster_link.erl
@@ -100,22 +100,6 @@ on_message_publish(#message{topic = <<?MSG_TOPIC_PREFIX, ClusterName/binary>>, p
 
																             %% Just ignore it. It must be already logged by the decoder
															
 
																             {stop, []}
															
 
																     end;
															
 
																-on_message_publish(
															
 
																-    #message{topic = <<?CTRL_TOPIC_PREFIX, ClusterName/binary>>, payload = Payload} = Msg
															
 
																-) ->
															
 
																-    case emqx_cluster_link_mqtt:decode_ctrl_msg(Payload, ClusterName) of
															
 
																-        {init_link, InitRes} ->
															
 
																-            on_init(InitRes, ClusterName, Msg);
															
 
																-        {ack_link, Res} ->
															
 
																-            on_init_ack(Res, ClusterName, Msg);
															
 
																-        unlink ->
															
 
																-            %% Stop pushing messages to the cluster that requested unlink,
															
 
																-            %% It brings the link to a half-closed (unidirectional) state,
															
 
																-            %% as this cluster may still replicate routes and receive messages from ClusterName.
															
 
																-            emqx_cluster_link_mqtt:stop_msg_fwd_resource(ClusterName),
															
 
																-            cleanup_routes(ClusterName)
															
 
																-    end,
															
 
																-    {stop, []};
															
 
																 on_message_publish(_Msg) ->
															
 
																     ok.
															
@@ -166,44 +150,6 @@ update_routes(ClusterName, Actor, Incarnation, RouteOps) ->
 
																         RouteOps
															
 
																     ).
															
 
																-cleanup_routes(ClusterName) ->
															
 
																-    emqx_router:cleanup_routes(?DEST(ClusterName)).
															
 
																-
															
 
																-lookup_link_conf(ClusterName) ->
															
 
																-    lists:search(
															
 
																-        fun(#{upstream := N}) -> N =:= ClusterName end,
															
 
																-        emqx:get_config([cluster, links], [])
															
 
																-    ).
															
 
																-
															
 
																-on_init(Res, ClusterName, Msg) ->
															
 
																-    #{
															
 
																-        'Correlation-Data' := ReqId,
															
 
																-        'Response-Topic' := RespTopic
															
 
																-    } = emqx_message:get_header(properties, Msg),
															
 
																-    case lookup_link_conf(ClusterName) of
															
 
																-        {value, LinkConf} ->
															
 
																-            _ = emqx_cluster_link_mqtt:ensure_msg_fwd_resource(LinkConf),
															
 
																-            emqx_cluster_link_mqtt:ack_link(ClusterName, Res, RespTopic, ReqId);
															
 
																-        false ->
															
 
																-            ?SLOG(error, #{
															
 
																-                msg => "init_link_request_from_unknown_cluster",
															
 
																-                link_name => ClusterName
															
 
																-            }),
															
 
																-            %% Cannot ack/reply since we don't know how to reach the link cluster,
															
 
																-            %% The cluster that tried to initiatw this link is expected to eventually fail with timeout.
															
 
																-            ok
															
 
																-    end.
															
 
																-
															
 
																-on_init_ack(Res, ClusterName, Msg) ->
															
 
																-    #{'Correlation-Data' := ReqId} = emqx_message:get_header(properties, Msg),
															
 
																-    emqx_cluster_link_coordinator:on_link_ack(ClusterName, ReqId, Res).
															
 
																-
															
 
																-%% add_routes(Topics, ClusterName) ->
															
 
																-%%     lists:foreach(
															
 
																-%%         fun(T) -> emqx_router_syncer:push(add, T, ?DEST(ClusterName), #{}) end,
															
 
																-%%         Topics
															
 
																-%%     ).
															
 
																-
															
 
																 %% let it crash if extra is not a map,
															
 
																 %% we don't expect the message to be forwarded from an older EMQX release,
															
 
																 %% that doesn't set extra = #{} by default.
															
--- a/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl
+++ b/apps/emqx_cluster_link/src/emqx_cluster_link_config.erl
@@ -21,7 +21,8 @@
 
																     link/1,
															
 
																     topic_filters/1,
															
 
																     %% Connections
															
 
																-    emqtt_options/1
															
 
																+    emqtt_options/1,
															
 
																+    mk_emqtt_options/1
															
 
																 ]).
															
 
																 -export([
															
@@ -152,16 +153,18 @@ add_links(LinksConf) ->
 
																 add_link(#{enabled := true} = LinkConf) ->
															
 
																     %% NOTE: this can be started later during init_link phase, but it looks not harmful to start it beforehand...
															
 
																     MsgFwdRes = emqx_cluster_link_mqtt:ensure_msg_fwd_resource(LinkConf),
															
 
																-    CoordRes = ensure_coordinator(LinkConf),
															
 
																-    combine_results(CoordRes, MsgFwdRes);
															
 
																+    %% TODO
															
 
																+    ActorRes = ok,
															
 
																+    combine_results(ActorRes, MsgFwdRes);
															
 
																 add_link(_DisabledLinkConf) ->
															
 
																     ok.
															
 
																 remove_links(LinksConf) ->
															
 
																     [remove_link(Link) || Link <- LinksConf].
															
 
																-remove_link(LinkConf) ->
															
 
																-    emqx_cluster_link_coord_sup:stop_coordinator(LinkConf).
															
 
																+remove_link(_LinkConf) ->
															
 
																+    %% TODO
															
 
																+    ok.
															
 
																 update_links(LinksConf) ->
															
 
																     [update_link(Link) || Link <- LinksConf].
															
@@ -176,14 +179,6 @@ update_link(#{enabled := false} = LinkConf) ->
 
																         Other -> Other
															
 
																     end.
															
 
																-ensure_coordinator(LinkConf) ->
															
 
																-    case emqx_cluster_link_coord_sup:start_coordinator(LinkConf) of
															
 
																-        {error, {already_started, Pid}} ->
															
 
																-            {ok, Pid};
															
 
																-        {error, already_present} ->
															
 
																-            emqx_cluster_link_coord_sup:restart_coordinator(LinkConf)
															
 
																-    end.
															
 
																-
															
 
																 combine_results(ok, ok) ->
															
 
																     ok;
															
 
																 combine_results(CoordRes, MsgFwdRes) ->
															
--- a/apps/emqx_cluster_link/src/emqx_cluster_link_coord_sup.erl
+++ b/apps/emqx_cluster_link/src/emqx_cluster_link_coord_sup.erl
@@ -1,57 +0,0 @@
 
																-%%--------------------------------------------------------------------
															
 
																-%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
															
 
																-%%--------------------------------------------------------------------
															
 
																-
															
 
																--module(emqx_cluster_link_coord_sup).
															
 
																-
															
 
																--behaviour(supervisor).
															
 
																-
															
 
																--export([start_link/1]).
															
 
																--export([init/1]).
															
 
																-
															
 
																--export([
															
 
																-    start_coordinator/1,
															
 
																-    restart_coordinator/1,
															
 
																-    stop_coordinator/1
															
 
																-]).
															
 
																-
															
 
																--define(SERVER, ?MODULE).
															
 
																--define(COORDINATOR_MOD, emqx_cluster_link_coordinator).
															
 
																-
															
 
																-start_link(LinksConf) ->
															
 
																-    supervisor:start_link({local, ?SERVER}, ?SERVER, LinksConf).
															
 
																-
															
 
																-init(LinksConf) ->
															
 
																-    SupFlags = #{
															
 
																-        strategy => one_for_one,
															
 
																-        intensity => 10,
															
 
																-        period => 5
															
 
																-    },
															
 
																-    {ok, {SupFlags, children(LinksConf)}}.
															
 
																-
															
 
																-start_coordinator(#{upstream := Name} = LinkConf) ->
															
 
																-    supervisor:start_child(?SERVER, worker_spec(Name, LinkConf)).
															
 
																-
															
 
																-restart_coordinator(#{upstream := Name} = _LinkConf) ->
															
 
																-    supervisor:restart_child(?SERVER, Name).
															
 
																-
															
 
																-stop_coordinator(#{upstream := Name} = _LinkConf) ->
															
 
																-    case supervisor:terminate_child(?SERVER, Name) of
															
 
																-        ok ->
															
 
																-            supervisor:delete_child(?SERVER, Name);
															
 
																-        Err ->
															
 
																-            Err
															
 
																-    end.
															
 
																-
															
 
																-worker_spec(Id, LinkConf) ->
															
 
																-    #{
															
 
																-        id => Id,
															
 
																-        start => {?COORDINATOR_MOD, start_link, [LinkConf]},
															
 
																-        restart => permanent,
															
 
																-        shutdown => 5000,
															
 
																-        type => worker,
															
 
																-        modules => [?COORDINATOR_MOD]
															
 
																-    }.
															
 
																-
															
 
																-children(LinksConf) ->
															
 
																-    [worker_spec(Name, Conf) || #{upstream := Name, enable := true} = Conf <- LinksConf].
															
--- a/apps/emqx_cluster_link/src/emqx_cluster_link_coordinator.erl
+++ b/apps/emqx_cluster_link/src/emqx_cluster_link_coordinator.erl
@@ -1,454 +0,0 @@
 
																-%%--------------------------------------------------------------------
															
 
																-%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
															
 
																-%%--------------------------------------------------------------------
															
 
																-
															
 
																-%% @doc experimental prototype implementation.
															
 
																-%% The idea is to add a sync point for all cluster route operations,
															
 
																-%% so that, routes can be batched/shrunk (via using emqx_route_syncer) before pushing them to linked clusters.
															
 
																-%% The expected result is reduced communication between linked clusters:
															
 
																-%% each nodes communicates with other clusters through coordinator.
															
 
																-%% The drawbacks are numerous though:
															
 
																-%%   - complexity/leader elections,
															
 
																-%%   - routes removal seems hard to implement unless remote cluster routes as stored per node,
															
 
																-%%     in that case global coordinator per cluster is not needed any more. - TBD
															
 
																--module(emqx_cluster_link_coordinator).
															
 
																-
															
 
																--behaviour(gen_statem).
															
 
																-
															
 
																-%% API
															
 
																--export([
															
 
																-    route_op/2,
															
 
																-    on_link_ack/3
															
 
																-]).
															
 
																-
															
 
																--export([start_link/1]).
															
 
																-
															
 
																-%% gen_statem
															
 
																--export([
															
 
																-    callback_mode/0,
															
 
																-    init/1,
															
 
																-    terminate/3
															
 
																-]).
															
 
																-
															
 
																-%% gen_statem state functions
															
 
																--export([
															
 
																-    wait_for_coordinator/3,
															
 
																-    connecting/3,
															
 
																-    init_linking/3,
															
 
																-    bootstrapping/3,
															
 
																-    coordinating/3,
															
 
																-    following/3
															
 
																-]).
															
 
																-
															
 
																--export([select_routes/1]).
															
 
																-
															
 
																--include_lib("emqx/include/emqx.hrl").
															
 
																--include_lib("emqx/include/emqx_router.hrl").
															
 
																--include_lib("emqx/include/logger.hrl").
															
 
																-
															
 
																--define(COORDINATOR(UpstreamName), {?MODULE, UpstreamName}).
															
 
																--define(SERVER, ?MODULE).
															
 
																--define(WAIT_COORD_RETRY_INTERVAL, 100).
															
 
																--define(CONN_RETRY_INTERVAL, 5000).
															
 
																--define(INIT_LINK_RESP_TIMEOUT, 15_000).
															
 
																--define(INIT_LINK_RETRIES, 5).
															
 
																--define(UPSTREAM_DEST, {external, {link, _}}).
															
 
																--define(IS_ROUTE_OP(Op), Op =:= <<"add">>; Op =:= <<"delete">>).
															
 
																-
															
 
																-start_link(Conf) ->
															
 
																-    gen_statem:start_link(?MODULE, Conf, []).
															
 
																-
															
 
																-route_op(Op, Topic) ->
															
 
																-    lists:foreach(
															
 
																-        fun(#{upstream := UpstreamName, topics := LinkFilters}) ->
															
 
																-            case topic_intersect_any(Topic, LinkFilters) of
															
 
																-                false -> ok;
															
 
																-                TopicOrFilter -> maybe_cast(UpstreamName, {Op, TopicOrFilter})
															
 
																-            end
															
 
																-        end,
															
 
																-        emqx:get_config([cluster, links])
															
 
																-    ).
															
 
																-
															
 
																-on_link_ack(ClusterName, ReqId, Res) ->
															
 
																-    maybe_cast(ClusterName, {ack_link, ClusterName, ReqId, Res}).
															
 
																-
															
 
																-callback_mode() ->
															
 
																-    [state_functions, state_enter].
															
 
																-
															
 
																-init(LinkConf) ->
															
 
																-    process_flag(trap_exit, true),
															
 
																-    %% It helps to avoid unnecessary global name conflicts (and, as a result, coordinator re-election),
															
 
																-    %% e.g. when a down nodes comes back
															
 
																-    %% TODO: need to better understand `global` behaviour
															
 
																-    _ = global:sync(),
															
 
																-    Data = #{is_coordinator => false, link_conf => LinkConf},
															
 
																-    {ok, wait_for_coordinator, Data}.
															
 
																-
															
 
																-wait_for_coordinator(enter, _OldState, _Data) ->
															
 
																-    {keep_state_and_data, [{state_timeout, 0, do_wait_for_coordinator}]};
															
 
																-wait_for_coordinator(_, do_wait_for_coordinator, Data) ->
															
 
																-    #{link_conf := #{upstream := Name}} = Data,
															
 
																-    case global:whereis_name(?COORDINATOR(Name)) of
															
 
																-        undefined ->
															
 
																-            case register_coordinator(Name) of
															
 
																-                yes ->
															
 
																-                    {next_state, connecting, Data#{is_coordinator => true}};
															
 
																-                no ->
															
 
																-                    %% TODO: this should not happen forever, if it does, we need to detect it
															
 
																-                    {keep_state_and_data, [
															
 
																-                        {state_timeout, ?WAIT_COORD_RETRY_INTERVAL, do_wait_for_coordinator}
															
 
																-                    ]}
															
 
																-            end;
															
 
																-        %% Can be a prev stale pid?
															
 
																-        %% Let it crash with case_clause if it happens...
															
 
																-        Pid when is_pid(Pid) andalso Pid =/= self() ->
															
 
																-            Data1 = Data#{coordinator_mon => erlang:monitor(process, Pid), coordinator_pid => Pid},
															
 
																-            {next_state, following, Data1}
															
 
																-    end;
															
 
																-wait_for_coordinator(cast, {Op, _Topic}, _Data) when ?IS_ROUTE_OP(Op) ->
															
 
																-    %% Ignore any route op, until bootstrapping is started.
															
 
																-    %% All ignored route ops are expected to be caught up during the bootstrap.
															
 
																-    keep_state_and_data;
															
 
																-wait_for_coordinator(EventType, Event, Data) ->
															
 
																-    handle_event_(?FUNCTION_NAME, EventType, Event, Data).
															
 
																-
															
 
																-connecting(enter, _OldState, _Data) ->
															
 
																-    {keep_state_and_data, [{state_timeout, 0, reconnect}]};
															
 
																-connecting(cast, {Op, _Topic}, _Data) when ?IS_ROUTE_OP(Op) ->
															
 
																-    %% Ignore any route op, until bootstrapping is started.
															
 
																-    %% All ignored route ops are expected to be caught up during the bootstrap.
															
 
																-    keep_state_and_data;
															
 
																-connecting(_EventType, reconnect, Data) ->
															
 
																-    ensure_conn_pool(init_linking, Data);
															
 
																-connecting(EventType, Event, Data) ->
															
 
																-    handle_event_(?FUNCTION_NAME, EventType, Event, Data).
															
 
																-
															
 
																-init_linking(enter, _OldState, Data) ->
															
 
																-    {keep_state, Data#{link_retries => ?INIT_LINK_RETRIES}, [{state_timeout, 0, init_link}]};
															
 
																-init_linking(cast, {ack_link, _ClusterName, ReqId, Res}, #{link_req_id := ReqId} = Data) ->
															
 
																-    case Res of
															
 
																-        %% This state machine is not suitable to bootstrap the upstream cluster conditionally,
															
 
																-        %% since it ignores any route ops received before bootstrapping...
															
 
																-        {ok, #{proto_ver := _, need_bootstrap := _}} ->
															
 
																-            {next_state, bootstrapping, maps:without([link_req_id, link_retries], Data)};
															
 
																-        {error, <<"bad_upstream_name">>} ->
															
 
																-            %% unrecoverable error that needs a user intervention,
															
 
																-            %% TODO: maybe need to transition to some error state
															
 
																-            {keep_state, maps:without([link_req_id, link_retries], Data), [{state_timeout, cancel}]}
															
 
																-    end;
															
 
																-init_linking(_, init_link, #{link_conf := #{upstream := Name}, link_retries := Retries} = Data) ->
															
 
																-    case Retries > 0 of
															
 
																-        true ->
															
 
																-            {ReqId, {ok, _}} = emqx_cluster_link_mqtt:init_link(Name),
															
 
																-            Data1 = Data#{link_req_id => ReqId, link_retries => Retries - 1},
															
 
																-            {keep_state, Data1, [{state_timeout, ?INIT_LINK_RESP_TIMEOUT, init_link}]};
															
 
																-        false ->
															
 
																-            ?SLOG(error, #{
															
 
																-                msg => "no_link_ack_response_received",
															
 
																-                link_name => Name
															
 
																-            }),
															
 
																-            %% unrecoverable error that needs a user intervention,
															
 
																-            %% TODO: maybe need to transition to some error state
															
 
																-            keep_state_and_data
															
 
																-    end;
															
 
																-init_linking(cast, {Op, _Topic}, _Data) when ?IS_ROUTE_OP(Op) ->
															
 
																-    %% Ignore any route op, until bootstrapping is started.
															
 
																-    %% All ignored route ops are expected to be caught up during the bootstrap.
															
 
																-    keep_state_and_data;
															
 
																-init_linking(EventType, Event, Data) ->
															
 
																-    handle_event_(?FUNCTION_NAME, EventType, Event, Data).
															
 
																-
															
 
																-bootstrapping(enter, _OldState, #{link_conf := LinkConf} = Data) ->
															
 
																-    #{topics := LinkFilters, upstream := ClusterName} = LinkConf,
															
 
																-    %% TODO add timeout?
															
 
																-    {Pid, Ref} = erlang:spawn_monitor(fun() -> bootstrap(ClusterName, LinkFilters) end),
															
 
																-    {keep_state, Data#{bootstrap_pid => Pid, bootstrap_ref => Ref}};
															
 
																-bootstrapping(info, {'DOWN', Ref, process, _Pid, Reason}, #{bootstrap_ref := Ref} = Data) ->
															
 
																-    %% TODO: think about the best way to proceed if bootstrapping failed,
															
 
																-    %% perhaps just transition back to connecting state?
															
 
																-    normal = Reason,
															
 
																-    Data1 = maps:without([bootstrap_ref, bootstrap_pid], Data),
															
 
																-    {next_state, coordinating, Data1};
															
 
																-%% Accumulate new route ops, since there is no guarantee
															
 
																-%% they will be included in the bootstrapped data
															
 
																-bootstrapping(cast, {Op, _Topic}, _Data) when ?IS_ROUTE_OP(Op) ->
															
 
																-    {keep_state_and_data, [postpone]};
															
 
																-bootstrapping(EventType, Event, Data) ->
															
 
																-    handle_event_(?FUNCTION_NAME, EventType, Event, Data).
															
 
																-
															
 
																-coordinating(enter, _OldState, _Data) ->
															
 
																-    keep_state_and_data;
															
 
																-coordinating(cast, {Op, Topic}, Data) when ?IS_ROUTE_OP(Op) ->
															
 
																-    #{link_conf := #{upstream := ClusterName}} = Data,
															
 
																-    %% TODO: batching
															
 
																-    case emqx_cluster_link_mqtt:publish_route_op(async, ClusterName, Op, Topic) of
															
 
																-        {error, _} ->
															
 
																-            %% Conn pool error, reconnect.
															
 
																-            {next_state, connecting, stop_conn_pool(Data)};
															
 
																-        _Ref ->
															
 
																-            keep_state_and_data
															
 
																-    end;
															
 
																-%% TODO: this can also be received in other states, move to generic handler?
															
 
																-coordinating(info, {global_name_conflict, CoordName}, Data) ->
															
 
																-    LogData = #{
															
 
																-        msg => "emqx_cluster_link_coordinator_name_conflict",
															
 
																-        coordinator_name => CoordName
															
 
																-    },
															
 
																-    LogData1 =
															
 
																-        %% TODO: this can be a previous (self) coordinator?
															
 
																-        case global:whereis_name(CoordName) of
															
 
																-            undefined -> LogData;
															
 
																-            Pid -> LogData#{new_coordinator => Pid, coordinator_node => node(Pid)}
															
 
																-        end,
															
 
																-    ?SLOG(warning, LogData1),
															
 
																-    Data1 = stop_conn_pool(Data),
															
 
																-    {next_state, wait_for_coordinator, Data1#{is_coordinator => false}};
															
 
																-%% only errors results are expected
															
 
																-%% TODO: a single error causes reconnection and re-bootstrapping,
															
 
																-%% it's worth considering some optimizations.
															
 
																-coordinating(info, {pub_result, _Ref, {error, Reason}}, #{link_conf := #{upstream := Name}} = Data) ->
															
 
																-    ?SLOG(error, #{
															
 
																-        msg => "failed_to_replicate_route_op_to_linked_cluster",
															
 
																-        link_name => Name,
															
 
																-        reason => Reason
															
 
																-    }),
															
 
																-    %% TODO: check errors, some may be not possible to correct by re-connecting
															
 
																-    Data1 = stop_conn_pool(Data),
															
 
																-    {next_state, connecting, Data1};
															
 
																-coordinating(EventType, Event, Data) ->
															
 
																-    handle_event_(?FUNCTION_NAME, EventType, Event, Data).
															
 
																-
															
 
																-following(enter, _OldState, _Data) ->
															
 
																-    keep_state_and_data;
															
 
																-following(info, {'DOWN', MRef, process, _Pid, _Info}, #{coordinator_mon := MRef} = Data) ->
															
 
																-    {next_state, wait_for_coordinator, maps:without([coordinator_mon, coordinator_pid], Data)};
															
 
																-following(EventType, Event, Data) ->
															
 
																-    handle_event_(?FUNCTION_NAME, EventType, Event, Data).
															
 
																-
															
 
																-handle_event_(_State, info, {'DOWN', Ref, process, _Pid, Reason}, Data) ->
															
 
																-    case Data of
															
 
																-        #{conn_pool_mons := #{Ref := WorkerName}, is_coordinator := true} ->
															
 
																-            ?SLOG(warning, #{
															
 
																-                msg => "cluster_link_route_connection_is_down",
															
 
																-                reason => Reason,
															
 
																-                worker => WorkerName
															
 
																-            }),
															
 
																-            {next_state, connecting, stop_conn_pool(Data)};
															
 
																-        _ ->
															
 
																-            %% Must be a stale 'DOWN' msg (e.g., from the next worker) which is already handled.
															
 
																-            keep_state_and_data
															
 
																-    end;
															
 
																-handle_event_(State, EventType, Event, Data) ->
															
 
																-    ?SLOG(warning, #{
															
 
																-        msg => "unexpected_event",
															
 
																-        event => Event,
															
 
																-        event_type => EventType,
															
 
																-        state => State,
															
 
																-        data => Data
															
 
																-    }),
															
 
																-    keep_state_and_data.
															
 
																-
															
 
																-terminate(Reason, _State, #{link_conf := #{upstream := ClusterName}} = Data) ->
															
 
																-    %% TODO unregister coordinator?
															
 
																-    IsCoordinator = maps:get(is_coordinator, Data, false),
															
 
																-    case Reason of
															
 
																-        shutdown when IsCoordinator ->
															
 
																-            %% must be sync, since we are going to stop the pool
															
 
																-            %% NOTE: there is no guarantee that unlink op will arrive the last one
															
 
																-            %% (since there may be other route op sent over another pool worker)
															
 
																-            %% and clear everything, but it must be good enough to GC most of the routes.
															
 
																-            _ = emqx_cluster_link_mqtt:remove_link(ClusterName);
															
 
																-        _ ->
															
 
																-            ok
															
 
																-    end,
															
 
																-    _ = stop_conn_pool(Data),
															
 
																-    ok.
															
 
																-
															
 
																-%%--------------------------------------------------------------------
															
 
																-%% Internal functions
															
 
																-%%--------------------------------------------------------------------
															
 
																-
															
 
																-topic_intersect_any(Topic, [LinkFilter | T]) ->
															
 
																-    case emqx_topic:intersection(Topic, LinkFilter) of
															
 
																-        false -> topic_intersect_any(Topic, T);
															
 
																-        TopicOrFilter -> TopicOrFilter
															
 
																-    end;
															
 
																-topic_intersect_any(_Topic, []) ->
															
 
																-    false.
															
 
																-
															
 
																-bootstrap(ClusterName, LinkFilters) ->
															
 
																-    %% TODO: do this in chunks
															
 
																-    Topics = select_routes(LinkFilters),
															
 
																-    {ok, _} = emqx_cluster_link_mqtt:publish_routes(sync, ClusterName, Topics).
															
 
																-
															
 
																-%% TODO: if a local route matches link filter exactly,
															
 
																-%% it's enough to only select this matching filter itself and skip any other routes?
															
 
																-%% E.g., local routes: "t/global/#", "t/global/1/+", clsuter link topics = ["t/global/#"],
															
 
																-%% it's enough to replicate "t/global/#" only to the linked cluster.
															
 
																-%% What to do when "t/global/#" subscriber unsubscribers
															
 
																-%% and we start to get forwarded messages (e.g. "t/global/2/3") matching no subscribers?
															
 
																-%% How can we efficiently replace "t/global/#" route with "t/global/1/+"
															
 
																-%% (intersection of "t/global/#" and "t/global/#")?
															
 
																-%% So maybe better not to do it at all and replicate both "t/global/1/+" and "t/global/#" ?
															
 
																-select_routes(LinkFilters) ->
															
 
																-    {Wildcards, Topics} = lists:partition(fun emqx_topic:wildcard/1, LinkFilters),
															
 
																-    Routes = select_routes_by_topics(Topics),
															
 
																-    Routes1 = intersecting_routes(Wildcards),
															
 
																-    AllRoutes = Routes ++ Routes1,
															
 
																-    case emqx_router:get_schema_vsn() of
															
 
																-        v1 -> AllRoutes;
															
 
																-        %% v2 stores filters (Wildcard subscriptions routes) in a separate index,
															
 
																-        %% so WildcardRoutes contains only non-wildcard routes matching wildcard link filters.
															
 
																-        %% Thus, we need to select wildcard routes additionally
															
 
																-        v2 -> intersecting_routes_v2(Wildcards) ++ AllRoutes
															
 
																-    end.
															
 
																-
															
 
																-select_routes_by_topics([]) ->
															
 
																-    [];
															
 
																-select_routes_by_topics([Topic | T]) ->
															
 
																-    case filter_out_upstream_routes(emqx_router:match_routes(Topic)) of
															
 
																-        [_ | _] ->
															
 
																-            %% These are non-wildcard link topics, so we don't care about actual
															
 
																-            %% routes as long as they are matched, and just need to replicate
															
 
																-            %% topic routes to the linked cluster
															
 
																-            [Topic | select_routes_by_topics(T)];
															
 
																-        _ ->
															
 
																-            select_routes_by_topics(T)
															
 
																-    end.
															
 
																-
															
 
																-filter_out_upstream_routes(Routes) ->
															
 
																-    lists:filter(
															
 
																-        fun
															
 
																-            (#route{dest = ?UPSTREAM_DEST}) -> false;
															
 
																-            (_) -> true
															
 
																-        end,
															
 
																-        Routes
															
 
																-    ).
															
 
																-
															
 
																-%% selects only non-wildcard routes that match wildcards (filters),
															
 
																-%% can only be done as a linear search over all routes
															
 
																-intersecting_routes([]) ->
															
 
																-    [];
															
 
																-intersecting_routes(Wildcards) ->
															
 
																-    Res = ets:foldl(
															
 
																-        fun
															
 
																-            (#route{dest = ?UPSTREAM_DEST}, Acc) ->
															
 
																-                Acc;
															
 
																-            (#route{topic = T}, Acc) ->
															
 
																-                %% TODO: probably nice to validate cluster link topic filters
															
 
																-                %% to have no intersections between each other?
															
 
																-                case topic_intersect_any(T, Wildcards) of
															
 
																-                    false -> Acc;
															
 
																-                    Intersection -> Acc#{Intersection => undefined}
															
 
																-                end
															
 
																-        end,
															
 
																-        #{},
															
 
																-        ?ROUTE_TAB
															
 
																-    ),
															
 
																-    maps:keys(Res).
															
 
																-
															
 
																-intersecting_routes_v2([]) ->
															
 
																-    [];
															
 
																-intersecting_routes_v2(Wildcards) ->
															
 
																-    lists:foldl(
															
 
																-        fun(Wildcard, Acc) ->
															
 
																-            MatchedFilters = matched_filters_v2(Wildcard),
															
 
																-            all_intersections(Wildcard, MatchedFilters, Acc)
															
 
																-        end,
															
 
																-        [],
															
 
																-        Wildcards
															
 
																-    ).
															
 
																-
															
 
																-matched_filters_v2(Wildcard) ->
															
 
																-    MatchesAcc = lists:foldl(
															
 
																-        fun(M, Acc) ->
															
 
																-            case emqx_topic_index:get_id(M) of
															
 
																-                ?UPSTREAM_DEST ->
															
 
																-                    Acc;
															
 
																-                _ ->
															
 
																-                    Acc#{emqx_topic_index:get_topic(M) => undefined}
															
 
																-            end
															
 
																-        end,
															
 
																-        #{},
															
 
																-        emqx_topic_index:matches_filter(Wildcard, ?ROUTE_TAB_FILTERS, [])
															
 
																-    ),
															
 
																-    maps:keys(MatchesAcc).
															
 
																-
															
 
																-all_intersections(Wildcard, [W | Wildcards], Acc) ->
															
 
																-    case emqx_topic:intersection(Wildcard, W) of
															
 
																-        false -> all_intersections(Wildcard, Wildcards, Acc);
															
 
																-        Intersection -> all_intersections(Wildcard, Wildcards, [Intersection | Acc])
															
 
																-    end;
															
 
																-all_intersections(_, [], Acc) ->
															
 
																-    lists:usort(Acc).
															
 
																-
															
 
																-maybe_cast(UpstreamName, Msg) ->
															
 
																-    case global:whereis_name(?COORDINATOR(UpstreamName)) of
															
 
																-        Pid when is_pid(Pid) ->
															
 
																-            gen_statem:cast(Pid, Msg);
															
 
																-        undefined ->
															
 
																-            %% Ignore and rely on coordinator bootstrapping once it's elected
															
 
																-            ok
															
 
																-    end.
															
 
																-
															
 
																-register_coordinator(UpstreamName) ->
															
 
																-    case mria_config:role() of
															
 
																-        core ->
															
 
																-            global:register_name(
															
 
																-                ?COORDINATOR(UpstreamName), self(), fun global:random_notify_name/3
															
 
																-            );
															
 
																-        _ ->
															
 
																-            no
															
 
																-    end.
															
 
																-
															
 
																-%% connecting state helper
															
 
																-ensure_conn_pool(NextState, #{link_conf := LinkConf} = Data) ->
															
 
																-    Res = start_conn_pool(LinkConf),
															
 
																-    Data1 = Data#{conn_pool => Res},
															
 
																-    case Res of
															
 
																-        {ok, _} ->
															
 
																-            Data2 = Data1#{conn_pool_mons => mon_pool_workers(LinkConf)},
															
 
																-            {next_state, NextState, Data2};
															
 
																-        _Err ->
															
 
																-            {keep_state, Data1, [{state_timeout, ?CONN_RETRY_INTERVAL, reconnect}]}
															
 
																-    end.
															
 
																-
															
 
																-start_conn_pool(LinkConf) ->
															
 
																-    case emqx_cluster_link_mqtt:start_routing_pool(LinkConf) of
															
 
																-        {ok, _Pid} = Ok ->
															
 
																-            Ok;
															
 
																-        {error, Reason} = Err ->
															
 
																-            #{upstream := Name} = LinkConf,
															
 
																-            ?SLOG(error, #{
															
 
																-                msg => "failed_to_connect_to_linked_cluster",
															
 
																-                cluster_name => Name,
															
 
																-                reason => Reason
															
 
																-            }),
															
 
																-            Err
															
 
																-    end.
															
 
																-
															
 
																-stop_conn_pool(#{link_conf := #{upstream := Name}} = Data) ->
															
 
																-    case Data of
															
 
																-        #{conn_pool := {ok, _}} ->
															
 
																-            Data1 = maybe_unmointor_workers(Data),
															
 
																-            Data1#{conn_pool => {stopped, emqx_cluster_link_mqtt:stop_routing_pool(Name)}};
															
 
																-        _ ->
															
 
																-            Data
															
 
																-    end.
															
 
																-
															
 
																-maybe_unmointor_workers(#{conn_pool_mons := MonitorsMap} = Data) ->
															
 
																-    _ = maps:foreach(
															
 
																-        fun(Mref, _Name) ->
															
 
																-            erlang:demonitor(Mref)
															
 
																-        end,
															
 
																-        MonitorsMap
															
 
																-    ),
															
 
																-    maps:remove(conn_pool_mons, Data);
															
 
																-maybe_unmointor_workers(Data) ->
															
 
																-    Data.
															
 
																-
															
 
																-mon_pool_workers(LinkConf) ->
															
 
																-    maps:from_list([
															
 
																-        {erlang:monitor(process, Pid), Name}
															
 
																-     || {Name, Pid} <- emqx_cluster_link_mqtt:routing_pool_workers(LinkConf)
															
 
																-    ]).
															
--- a/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl
+++ b/apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl
@@ -9,8 +9,6 @@
 
																 -include_lib("emqx/include/emqx_mqtt.hrl").
															
 
																 -include_lib("emqx/include/logger.hrl").
															
 
																-%-include_lib("emqtt/include/emqtt.hrl").
															
 
																-
															
 
																 -behaviour(emqx_resource).
															
 
																 -behaviour(ecpool_worker).
															
@@ -30,16 +28,6 @@
 
																 -export([
															
 
																     ensure_msg_fwd_resource/1,
															
 
																     stop_msg_fwd_resource/1,
															
 
																-    start_routing_pool/1,
															
 
																-    stop_routing_pool/1,
															
 
																-    routing_pool_workers/1,
															
 
																-    init_link/1,
															
 
																-    ack_link/4,
															
 
																-    remove_link/1,
															
 
																-    publish_route_op/4,
															
 
																-    publish_routes/3,
															
 
																-    cleanup_routes/1,
															
 
																-    decode_ctrl_msg/2,
															
 
																     decode_route_op/1,
															
 
																     decode_forwarded_msg/1
															
 
																 ]).
															
@@ -54,37 +42,24 @@
 
																     forward/2
															
 
																 ]).
															
 
																--define(ROUTE_CLIENTID_SUFFIX, ":route:").
															
 
																 -define(MSG_CLIENTID_SUFFIX, ":msg:").
															
 
																--define(CLIENTID(Base, Suffix), emqx_bridge_mqtt_lib:clientid_base([Base, Suffix])).
															
 
																 -define(MQTT_HOST_OPTS, #{default_port => 1883}).
															
 
																 -define(MY_CLUSTER_NAME, emqx_cluster_link_config:cluster()).
															
 
																 -define(ROUTE_TOPIC, <<?ROUTE_TOPIC_PREFIX, (?MY_CLUSTER_NAME)/binary>>).
															
 
																 -define(MSG_FWD_TOPIC, <<?MSG_TOPIC_PREFIX, (?MY_CLUSTER_NAME)/binary>>).
															
 
																--define(CTRL_TOPIC(ClusterName), <<?CTRL_TOPIC_PREFIX, (ClusterName)/binary>>).
															
 
																+%%-define(CTRL_TOPIC(ClusterName), <<?CTRL_TOPIC_PREFIX, (ClusterName)/binary>>).
															
 
																-%% ecpool and emqx_resource names
															
 
																--define(ROUTE_POOL_PREFIX, "emqx_cluster_link_mqtt:route:").
															
 
																 -define(MSG_POOL_PREFIX, "emqx_cluster_link_mqtt:msg:").
															
 
																 -define(RES_NAME(Prefix, ClusterName), <<Prefix, ClusterName/binary>>).
															
 
																 -define(ROUTE_POOL_NAME(ClusterName), ?RES_NAME(?ROUTE_POOL_PREFIX, ClusterName)).
															
 
																 -define(MSG_RES_ID(ClusterName), ?RES_NAME(?MSG_POOL_PREFIX, ClusterName)).
															
 
																 -define(HEALTH_CHECK_TIMEOUT, 1000).
															
 
																 -define(RES_GROUP, <<"emqx_cluster_link">>).
															
 
																--define(DEFAULT_POOL_KEY, <<"default">>).
															
 
																 %% Protocol
															
 
																--define(PROTO_VER, <<"1.0">>).
															
 
																--define(INIT_LINK_OP, <<"init_link">>).
															
 
																--define(ACK_LINK_OP, <<"ack_link">>).
															
 
																--define(UNLINK_OP, <<"unlink">>).
															
 
																--define(BATCH_ROUTES_OP, <<"add_routes">>).
															
 
																--define(CLEANUP_ROUTES_OP, <<"cleanup_routes">>).
															
 
																-%% It's worth optimizing non-batch op payload size,
															
 
																-%% thus it's encoded as a plain binary
															
 
																--define(TOPIC_WITH_OP(Op, Topic), <<Op/binary, "_", Topic/binary>>).
															
 
																+%% -define(PROTO_VER, <<"1.0">>).
															
 
																 -define(DECODE(Payload), erlang:binary_to_term(Payload, [safe])).
															
 
																 -define(ENCODE(Payload), erlang:term_to_binary(Payload)).
															
@@ -290,121 +265,9 @@ connect(Options) ->
 
																     end.
															
 
																 %%--------------------------------------------------------------------
															
 
																-%% Routing
															
 
																+%% Protocol
															
 
																 %%--------------------------------------------------------------------
															
 
																-routing_pool_workers(#{upstream := ClusterName} = _ClusterConf) ->
															
 
																-    ecpool:workers(?ROUTE_POOL_NAME(ClusterName)).
															
 
																-
															
 
																-start_routing_pool(#{upstream := ClusterName} = ClusterConf) ->
															
 
																-    start_pool(?ROUTE_POOL_NAME(ClusterName), ?ROUTE_CLIENTID_SUFFIX, ClusterConf).
															
 
																-
															
 
																-stop_routing_pool(ClusterName) ->
															
 
																-    ecpool:stop_sup_pool(?ROUTE_POOL_NAME(ClusterName)).
															
 
																-
															
 
																-init_link(ClusterName) ->
															
 
																-    Payload = #{
															
 
																-        <<"op">> => ?INIT_LINK_OP,
															
 
																-        <<"proto_ver">> => ?PROTO_VER,
															
 
																-        <<"upstream">> => ClusterName,
															
 
																-        %% TODO: may no need to reserve it as it is a map?
															
 
																-        <<"extra">> => #{}
															
 
																-    },
															
 
																-    ReqId = emqx_utils_conv:bin(emqx_utils:gen_id(16)),
															
 
																-    Properties = #{
															
 
																-        'Response-Topic' => ?CTRL_TOPIC(ClusterName),
															
 
																-        'Correlation-Data' => ReqId
															
 
																-    },
															
 
																-    Topic = ?CTRL_TOPIC(?MY_CLUSTER_NAME),
															
 
																-    {ReqId, publish(sync, ClusterName, ?DEFAULT_POOL_KEY, Payload, Properties, Topic, ?QOS_1)}.
															
 
																-
															
 
																-ack_link(ClusterName, Result, RespTopic, ReqId) ->
															
 
																-    Payload = #{
															
 
																-        <<"op">> => ?ACK_LINK_OP,
															
 
																-        %% The links may compare and downgrade/adjust protocol in future
															
 
																-        <<"proto_ver">> => ?PROTO_VER,
															
 
																-        %% may be used in future to avoud re-bootrstrapping all the routes,
															
 
																-        %% for example, if the connection was abrupted for a while but the cluster was healthy
															
 
																-        %% and didn't lost any routes. In that case, retrying lost route updates would be sufficient.
															
 
																-        %% For now, it's always true for simplicitiy reasons.
															
 
																-        <<"need_bootstrap">> => true,
															
 
																-        <<"extra">> => #{}
															
 
																-    },
															
 
																-    Payload1 =
															
 
																-        case Result of
															
 
																-            {ok, _} ->
															
 
																-                Payload#{<<"result">> => <<"ok">>};
															
 
																-            {error, Reason} ->
															
 
																-                Payload#{<<"result">> => <<"error">>, reason => Reason}
															
 
																-        end,
															
 
																-    Props = #{'Correlation-Data' => ReqId},
															
 
																-    Query = {RespTopic, Props, Payload1, ?QOS_1},
															
 
																-    %% Using msg forwading resource to send the response back.
															
 
																-    %% TODO: maybe async query?
															
 
																-    emqx_resource:query(?MSG_RES_ID(ClusterName), Query, #{
															
 
																-        query_mode => simple_sync, pick_key => RespTopic
															
 
																-    }).
															
 
																-
															
 
																-remove_link(ClusterName) ->
															
 
																-    Payload = #{<<"op">> => ?UNLINK_OP},
															
 
																-    Topic = ?CTRL_TOPIC(?MY_CLUSTER_NAME),
															
 
																-    publish(sync, ClusterName, ?DEFAULT_POOL_KEY, Payload, #{}, Topic, ?QOS_0).
															
 
																-
															
 
																-publish_routes(QueryType, ClusterName, Topics) ->
															
 
																-    %% Picks the same pool worker consistently.
															
 
																-    %% Although, as writes are idompotent we can pick it randomly - TBD.
															
 
																-    publish_routes(QueryType, ClusterName, ?DEFAULT_POOL_KEY, Topics).
															
 
																-
															
 
																-publish_routes(QueryType, ClusterName, PoolKey, Topics) ->
															
 
																-    Payload = #{<<"op">> => ?BATCH_ROUTES_OP, <<"topics">> => Topics},
															
 
																-    publish(QueryType, ClusterName, PoolKey, Payload).
															
 
																-
															
 
																-cleanup_routes(ClusterName) ->
															
 
																-    Payload = #{<<"op">> => ?CLEANUP_ROUTES_OP},
															
 
																-    publish(sync, ClusterName, ?DEFAULT_POOL_KEY, Payload, #{}, ?ROUTE_TOPIC, ?QOS_0).
															
 
																-
															
 
																-publish_route_op(QueryType, ClusterName, Op, Topic) when Op =:= <<"add">>; Op =:= <<"delete">> ->
															
 
																-    Payload = ?TOPIC_WITH_OP(Op, Topic),
															
 
																-    publish(QueryType, ClusterName, Topic, Payload).
															
 
																-
															
 
																-publish(QueryType, ClusterName, PoolKey, Payload) ->
															
 
																-    publish(QueryType, ClusterName, PoolKey, Payload, #{}).
															
 
																-
															
 
																-publish(QueryType, ClusterName, PoolKey, Payload, Props) ->
															
 
																-    %% Deletes are not implemented for now, writes are idempotent, so QOS_1 is fine.
															
 
																-    publish(QueryType, ClusterName, PoolKey, Payload, Props, ?ROUTE_TOPIC, ?QOS_1).
															
 
																-
															
 
																-publish(async, ClusterName, PoolKey, Payload, Props, Topic, QoS) ->
															
 
																-    ecpool:pick_and_do(
															
 
																-        {?ROUTE_POOL_NAME(ClusterName), PoolKey},
															
 
																-        fun(ConnPid) ->
															
 
																-            Ref = erlang:make_ref(),
															
 
																-            Cb = {fun publish_result/3, [self(), Ref]},
															
 
																-            emqtt:publish_async(
															
 
																-                ConnPid, Topic, Props, ?ENCODE(Payload), [{qos, QoS}], ?PUB_TIMEOUT, Cb
															
 
																-            ),
															
 
																-            Ref
															
 
																-        end,
															
 
																-        no_handover
															
 
																-    );
															
 
																-publish(sync, ClusterName, PoolKey, Payload, Props, Topic, QoS) ->
															
 
																-    ecpool:pick_and_do(
															
 
																-        {?ROUTE_POOL_NAME(ClusterName), PoolKey},
															
 
																-        fun(ConnPid) ->
															
 
																-            emqtt:publish(ConnPid, Topic, Props, ?ENCODE(Payload), [{qos, QoS}])
															
 
																-        end,
															
 
																-        no_handover
															
 
																-    ).
															
 
																-
															
 
																-publish_result(Caller, Ref, Result) ->
															
 
																-    case handle_send_result(Result) of
															
 
																-        ok ->
															
 
																-            %% avoid extra message passing, we only care about errors for now
															
 
																-            ok;
															
 
																-        Err ->
															
 
																-            Caller ! {pub_result, Ref, Err}
															
 
																-    end.
															
 
																-
															
 
																 %%% New leader-less Syncer/Actor implementation
															
 
																 publish_actor_init_sync(ClientPid, Actor, Incarnation) ->
															
@@ -427,63 +290,6 @@ publish_route_sync(ClientPid, Actor, Incarnation, Updates) ->
 
																     },
															
 
																     emqtt:publish(ClientPid, PubTopic, ?ENCODE(Payload), ?QOS_1).
															
 
																-%%--------------------------------------------------------------------
															
 
																-%% Protocol
															
 
																-%%--------------------------------------------------------------------
															
 
																-
															
 
																-decode_ctrl_msg(Payload, ClusterName) ->
															
 
																-    decode_ctrl_msg1(?DECODE(Payload), ClusterName).
															
 
																-
															
 
																-decode_ctrl_msg1(
															
 
																-    #{
															
 
																-        <<"op">> := ?INIT_LINK_OP,
															
 
																-        <<"proto_ver">> := ProtoVer,
															
 
																-        <<"upstream">> := UpstreamName
															
 
																-    },
															
 
																-    ClusterName
															
 
																-) ->
															
 
																-    ProtoVer1 = decode_proto_ver(ProtoVer, ClusterName),
															
 
																-    %% UpstreamName is the name the remote linked cluster refers to this cluster,
															
 
																-    %% so it must equal to the local cluster name, more clear naming is desired...
															
 
																-    MyClusterName = ?MY_CLUSTER_NAME,
															
 
																-    case UpstreamName of
															
 
																-        MyClusterName ->
															
 
																-            {init_link, {ok, #{proto_ver => ProtoVer1}}};
															
 
																-        _ ->
															
 
																-            ?SLOG(error, #{
															
 
																-                msg => "misconfigured_cluster_link_name",
															
 
																-                %% How this cluster names itself
															
 
																-                local_name => MyClusterName,
															
 
																-                %% How the remote cluster names itself
															
 
																-                link_name => ClusterName,
															
 
																-                %% How the remote cluster names this local cluster
															
 
																-                upstream_name => UpstreamName
															
 
																-            }),
															
 
																-            {init_link, {error, <<"bad_upstream_name">>}}
															
 
																-    end;
															
 
																-decode_ctrl_msg1(
															
 
																-    #{
															
 
																-        <<"op">> := ?ACK_LINK_OP,
															
 
																-        <<"result">> := <<"ok">>,
															
 
																-        <<"proto_ver">> := ProtoVer,
															
 
																-        <<"need_bootstrap">> := IsBootstrapNeeded
															
 
																-    },
															
 
																-    ClusterName
															
 
																-) ->
															
 
																-    ProtoVer1 = decode_proto_ver(ProtoVer, ClusterName),
															
 
																-    {ack_link, {ok, #{proto_ver => ProtoVer1, need_bootstrap => IsBootstrapNeeded}}};
															
 
																-decode_ctrl_msg1(
															
 
																-    #{
															
 
																-        <<"op">> := ?ACK_LINK_OP,
															
 
																-        <<"result">> := <<"error">>,
															
 
																-        <<"reason">> := Reason
															
 
																-    },
															
 
																-    _ClusterName
															
 
																-) ->
															
 
																-    {ack_link, {error, Reason}};
															
 
																-decode_ctrl_msg1(#{<<"op">> := ?UNLINK_OP}, _ClusterName) ->
															
 
																-    unlink.
															
 
																-
															
 
																 decode_route_op(Payload) ->
															
 
																     decode_route_op1(?DECODE(Payload)).
															
@@ -501,14 +307,6 @@ decode_route_op1(#{
 
																 }) ->
															
 
																     RouteOps1 = lists:map(fun(Op) -> decode_field(route, Op) end, RouteOps),
															
 
																     {route_updates, #{actor => Actor, incarnation => Incr}, RouteOps1};
															
 
																-%%decode_route_op1(<<"add_", Topic/binary>>) ->
															
 
																-%%    {add, Topic};
															
 
																-%%decode_route_op1(<<"delete_", Topic/binary>>) ->
															
 
																-%%    {delete, Topic};
															
 
																-%%decode_route_op1(#{<<"op">> := ?BATCH_ROUTES_OP, <<"topics">> := Topics}) when is_list(Topics) ->
															
 
																-%%    {add, Topics};
															
 
																-%%decode_route_op1(#{<<"op">> := ?CLEANUP_ROUTES_OP}) ->
															
 
																-%%    cleanup_routes;
															
 
																 decode_route_op1(Payload) ->
															
 
																     ?SLOG(warning, #{
															
 
																         msg => "unexpected_cluster_link_route_op_payload",
															
@@ -528,29 +326,6 @@ decode_forwarded_msg(Payload) ->
 
																             {error, Payload}
															
 
																     end.
															
 
																-decode_proto_ver(ProtoVer, ClusterName) ->
															
 
																-    {MyMajor, MyMinor} = decode_proto_ver1(?PROTO_VER),
															
 
																-    case decode_proto_ver1(ProtoVer) of
															
 
																-        {Major, Minor} = Res when
															
 
																-            Major > MyMajor;
															
 
																-            Minor > MyMinor
															
 
																-        ->
															
 
																-            ?SLOG(notice, #{
															
 
																-                msg => "different_cluster_link_protocol_versions",
															
 
																-                protocol_version => ?PROTO_VER,
															
 
																-                link_protocol_version => ProtoVer,
															
 
																-                link_name => ClusterName
															
 
																-            }),
															
 
																-            Res;
															
 
																-        Res ->
															
 
																-            Res
															
 
																-    end.
															
 
																-
															
 
																-decode_proto_ver1(ProtoVer) ->
															
 
																-    [Major, Minor] = binary:split(ProtoVer, <<".">>),
															
 
																-    %% Let it fail (for now), we don't expect invalid data to pass through the linking protocol..
															
 
																-    {emqx_utils_conv:int(Major), emqx_utils_conv:int(Minor)}.
															
 
																-
															
 
																 encode_field(route, {add, Route = {_Topic, _ID}}) ->
															
 
																     Route;
															
 
																 encode_field(route, {delete, {Topic, ID}}) ->
															
@@ -573,38 +348,7 @@ forward(ClusterName, #delivery{message = #message{topic = Topic} = Msg}) ->
 
																 %% Internal functions
															
 
																 %%--------------------------------------------------------------------
															
 
																-emqtt_client_opts(
															
 
																-    ClientIdSuffix, #{server := Server, ssl := #{enable := EnableSsl} = Ssl} = ClusterConf
															
 
																-) ->
															
 
																-    BaseClientId = maps:get(client_id, ClusterConf, ?MY_CLUSTER_NAME),
															
 
																-    ClientId = ?CLIENTID(BaseClientId, ClientIdSuffix),
															
 
																-    #{hostname := Host, port := Port} = emqx_schema:parse_server(Server, ?MQTT_HOST_OPTS),
															
 
																-    Opts = #{
															
 
																-        host => Host,
															
 
																-        port => Port,
															
 
																-        clientid => ClientId,
															
 
																-        proto_ver => v5,
															
 
																-        ssl => EnableSsl,
															
 
																-        ssl_opts => maps:to_list(maps:remove(enable, Ssl))
															
 
																-    },
															
 
																-    with_password(with_user(Opts, ClusterConf), ClusterConf).
															
 
																-
															
 
																-with_user(Opts, #{username := U} = _ClusterConf) ->
															
 
																-    Opts#{username => U};
															
 
																-with_user(Opts, _ClusterConf) ->
															
 
																-    Opts.
															
 
																-
															
 
																-with_password(Opts, #{password := P} = _ClusterConf) ->
															
 
																-    Opts#{password => emqx_secret:unwrap(P)};
															
 
																-with_password(Opts, _ClusterConf) ->
															
 
																-    Opts.
															
 
																-
															
 
																-start_pool(PoolName, ClientIdSuffix, #{pool_size := PoolSize} = ClusterConf) ->
															
 
																-    ClientOpts = emqtt_client_opts(ClientIdSuffix, ClusterConf),
															
 
																-    Opts = [
															
 
																-        {name, PoolName},
															
 
																-        {pool_size, PoolSize},
															
 
																-        {pool_type, hash},
															
 
																-        {client_opts, ClientOpts}
															
 
																-    ],
															
 
																-    ecpool:start_sup_pool(PoolName, ?MODULE, Opts).
															
 
																+emqtt_client_opts(ClientIdSuffix, ClusterConf) ->
															
 
																+    #{clientid := BaseClientId} = Opts = emqx_cluster_link_config:mk_emqtt_options(ClusterConf),
															
 
																+    ClientId = emqx_bridge_mqtt_lib:clientid_base([BaseClientId, ClientIdSuffix]),
															
 
																+    Opts#{clientid => ClientId}.