Kaynağa Gözat

fix(clusterlink): communicate bootstrap requirements via actor handshake

`session_present` flag is not reliable to decide whether bootstrap is needed if durable sessions is enabled.
In this case, the client session may survive cluster restart while all the external routes are lost, as they are not persistent.
Serge Tupchii 1 yıl önce
ebeveyn
işleme
21711c6e0d

+ 1 - 4
apps/emqx_cluster_link/src/emqx_cluster_link.erl

@@ -170,10 +170,7 @@ actor_init(
                 case MyClusterName of
                 case MyClusterName of
                     TargetCluster ->
                     TargetCluster ->
                         Env = #{timestamp => erlang:system_time(millisecond)},
                         Env = #{timestamp => erlang:system_time(millisecond)},
-                        {ok, _} = emqx_cluster_link_extrouter:actor_init(
-                            ClusterName, Actor, Incr, Env
-                        ),
-                        ok;
+                        emqx_cluster_link_extrouter:actor_init(ClusterName, Actor, Incr, Env);
                     _ ->
                     _ ->
                         %% The remote cluster uses a different name to refer to this cluster
                         %% The remote cluster uses a different name to refer to this cluster
                         ?SLOG(error, #{
                         ?SLOG(error, #{

+ 13 - 5
apps/emqx_cluster_link/src/emqx_cluster_link_extrouter.erl

@@ -19,7 +19,8 @@
     actor_state/3,
     actor_state/3,
     actor_apply_operation/2,
     actor_apply_operation/2,
     actor_apply_operation/3,
     actor_apply_operation/3,
-    actor_gc/1
+    actor_gc/1,
+    is_present_incarnation/1
 ]).
 ]).
 
 
 %% Internal API
 %% Internal API
@@ -140,7 +141,8 @@ match_to_route(M) ->
     cluster :: cluster(),
     cluster :: cluster(),
     actor :: actor(),
     actor :: actor(),
     incarnation :: incarnation(),
     incarnation :: incarnation(),
-    lane :: lane() | undefined
+    lane :: lane() | undefined,
+    extra :: map()
 }).
 }).
 
 
 -type state() :: #state{}.
 -type state() :: #state{}.
@@ -159,6 +161,12 @@ actor_init(Cluster, Actor, Incarnation, Env = #{timestamp := Now}) ->
             actor_init(Cluster, Actor, Incarnation, Env)
             actor_init(Cluster, Actor, Incarnation, Env)
     end.
     end.
 
 
+-spec is_present_incarnation(state()) -> boolean().
+is_present_incarnation(#state{extra = #{is_present_incarnation := IsNew}}) ->
+    IsNew;
+is_present_incarnation(_State) ->
+    false.
+
 mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
 mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
     %% NOTE
     %% NOTE
     %% We perform this heavy-weight transaction only in the case of a new route
     %% We perform this heavy-weight transaction only in the case of a new route
@@ -173,7 +181,7 @@ mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
     case mnesia:read(?EXTROUTE_ACTOR_TAB, ActorID, write) of
     case mnesia:read(?EXTROUTE_ACTOR_TAB, ActorID, write) of
         [#actor{incarnation = Incarnation, lane = Lane} = Rec] ->
         [#actor{incarnation = Incarnation, lane = Lane} = Rec] ->
             ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec#actor{until = bump_actor_ttl(TS)}, write),
             ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec#actor{until = bump_actor_ttl(TS)}, write),
-            {ok, State#state{lane = Lane}};
+            {ok, State#state{lane = Lane, extra = #{is_present_incarnation => true}}};
         [] ->
         [] ->
             Lane = mnesia_assign_lane(Cluster),
             Lane = mnesia_assign_lane(Cluster),
             Rec = #actor{
             Rec = #actor{
@@ -183,7 +191,7 @@ mnesia_actor_init(Cluster, Actor, Incarnation, TS) ->
                 until = bump_actor_ttl(TS)
                 until = bump_actor_ttl(TS)
             },
             },
             ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec, write),
             ok = mnesia:write(?EXTROUTE_ACTOR_TAB, Rec, write),
-            {ok, State#state{lane = Lane}};
+            {ok, State#state{lane = Lane, extra = #{is_present_incarnation => false}}};
         [#actor{incarnation = Outdated} = Rec] when Incarnation > Outdated ->
         [#actor{incarnation = Outdated} = Rec] when Incarnation > Outdated ->
             {reincarnate, Rec};
             {reincarnate, Rec};
         [#actor{incarnation = Newer}] ->
         [#actor{incarnation = Newer}] ->
@@ -321,7 +329,7 @@ mnesia_clean_incarnation(#actor{id = Actor, incarnation = Incarnation, lane = La
 clean_lane(Lane) ->
 clean_lane(Lane) ->
     ets:foldl(
     ets:foldl(
         fun(#extroute{entry = Entry, mcounter = MCounter}, _) ->
         fun(#extroute{entry = Entry, mcounter = MCounter}, _) ->
-            apply_operation(Entry, MCounter, del, Lane)
+            apply_operation(Entry, MCounter, delete, Lane)
         end,
         end,
         0,
         0,
         ?EXTROUTE_TAB
         ?EXTROUTE_TAB

+ 20 - 5
apps/emqx_cluster_link/src/emqx_cluster_link_mqtt.erl

@@ -71,6 +71,7 @@
 -define(F_TARGET_CLUSTER, 13).
 -define(F_TARGET_CLUSTER, 13).
 -define(F_PROTO_VER, 14).
 -define(F_PROTO_VER, 14).
 -define(F_RESULT, 15).
 -define(F_RESULT, 15).
+-define(F_NEED_BOOTSTRAP, 16).
 
 
 -define(ROUTE_DELETE, 100).
 -define(ROUTE_DELETE, 100).
 
 
@@ -279,18 +280,29 @@ actor_init_ack_resp_msg(Actor, InitRes, ReqId, RespTopic) ->
     Payload = #{
     Payload = #{
         ?F_OPERATION => ?OP_ACTOR_INIT_ACK,
         ?F_OPERATION => ?OP_ACTOR_INIT_ACK,
         ?F_PROTO_VER => ?PROTO_VER,
         ?F_PROTO_VER => ?PROTO_VER,
-        ?F_ACTOR => Actor,
-        ?F_RESULT => InitRes
+        ?F_ACTOR => Actor
     },
     },
+    Payload1 = with_res_and_bootstrap(Payload, InitRes),
     emqx_message:make(
     emqx_message:make(
         undefined,
         undefined,
         ?QOS_1,
         ?QOS_1,
         RespTopic,
         RespTopic,
-        ?ENCODE(Payload),
+        ?ENCODE(Payload1),
         #{},
         #{},
         #{properties => #{'Correlation-Data' => ReqId}}
         #{properties => #{'Correlation-Data' => ReqId}}
     ).
     ).
 
 
+with_res_and_bootstrap(Payload, {ok, ActorState}) ->
+    Payload#{
+        ?F_RESULT => ok,
+        ?F_NEED_BOOTSTRAP => not emqx_cluster_link_extrouter:is_present_incarnation(ActorState)
+    };
+with_res_and_bootstrap(Payload, Error) ->
+    Payload#{
+        ?F_RESULT => Error,
+        ?F_NEED_BOOTSTRAP => false
+    }.
+
 publish_route_sync(ClientPid, Actor, Incarnation, Updates) ->
 publish_route_sync(ClientPid, Actor, Incarnation, Updates) ->
     PubTopic = ?ROUTE_TOPIC,
     PubTopic = ?ROUTE_TOPIC,
     Payload = #{
     Payload = #{
@@ -339,9 +351,12 @@ decode_resp1(#{
     ?F_OPERATION := ?OP_ACTOR_INIT_ACK,
     ?F_OPERATION := ?OP_ACTOR_INIT_ACK,
     ?F_ACTOR := Actor,
     ?F_ACTOR := Actor,
     ?F_PROTO_VER := ProtoVer,
     ?F_PROTO_VER := ProtoVer,
-    ?F_RESULT := InitResult
+    ?F_RESULT := InitResult,
+    ?F_NEED_BOOTSTRAP := NeedBootstrap
 }) ->
 }) ->
-    {actor_init_ack, #{actor => Actor, result => InitResult, proto_ver => ProtoVer}}.
+    {actor_init_ack, #{
+        actor => Actor, result => InitResult, proto_ver => ProtoVer, need_bootstrap => NeedBootstrap
+    }}.
 
 
 decode_forwarded_msg(Payload) ->
 decode_forwarded_msg(Payload) ->
     case ?DECODE(Payload) of
     case ?DECODE(Payload) of

+ 12 - 17
apps/emqx_cluster_link/src/emqx_cluster_link_router_syncer.erl

@@ -164,14 +164,6 @@ refine_client_options(Options = #{clientid := ClientID}, Actor) ->
         retry_interval => 0
         retry_interval => 0
     }.
     }.
 
 
-client_session_present(ClientPid) ->
-    Info = emqtt:info(ClientPid),
-    %% FIXME: waitnig for emqtt release that fixes session_present type (must be a boolean)
-    case proplists:get_value(session_present, Info, 0) of
-        0 -> false;
-        1 -> true
-    end.
-
 announce_client(Actor, TargetCluster, Pid) ->
 announce_client(Actor, TargetCluster, Pid) ->
     Name =
     Name =
         case Actor of
         case Actor of
@@ -334,13 +326,15 @@ handle_info(
     {publish, #{payload := Payload, properties := #{'Correlation-Data' := ReqId}}},
     {publish, #{payload := Payload, properties := #{'Correlation-Data' := ReqId}}},
     St = #st{actor_init_req_id = ReqId}
     St = #st{actor_init_req_id = ReqId}
 ) ->
 ) ->
-    {actor_init_ack, #{result := Res} = AckInfoMap} = emqx_cluster_link_mqtt:decode_resp(Payload),
+    {actor_init_ack, #{result := Res, need_bootstrap := NeedBootstrap} = AckInfoMap} = emqx_cluster_link_mqtt:decode_resp(
+        Payload
+    ),
     St1 = St#st{
     St1 = St#st{
         actor_init_req_id = undefined, actor_init_timer = undefined, remote_actor_info = AckInfoMap
         actor_init_req_id = undefined, actor_init_timer = undefined, remote_actor_info = AckInfoMap
     },
     },
     case Res of
     case Res of
         ok ->
         ok ->
-            {noreply, post_actor_init(St1)};
+            {noreply, post_actor_init(St1, NeedBootstrap)};
         Error ->
         Error ->
             ?SLOG(error, #{
             ?SLOG(error, #{
                 msg => "failed_to_init_link",
                 msg => "failed_to_init_link",
@@ -410,10 +404,11 @@ init_remote_actor(
     St#st{actor_init_req_id = ReqId, actor_init_timer = TRef}.
     St#st{actor_init_req_id = ReqId, actor_init_timer = TRef}.
 
 
 post_actor_init(
 post_actor_init(
-    St = #st{client = ClientPid, target = TargetCluster, actor = Actor, incarnation = Incr}
+    St = #st{client = ClientPid, target = TargetCluster, actor = Actor, incarnation = Incr},
+    NeedBootstrap
 ) ->
 ) ->
     ok = start_syncer(TargetCluster, Actor, Incr),
     ok = start_syncer(TargetCluster, Actor, Incr),
-    process_bootstrap(St#st{client = ClientPid}).
+    process_bootstrap(St#st{client = ClientPid}, NeedBootstrap).
 
 
 handle_connect_error(_Reason, St) ->
 handle_connect_error(_Reason, St) ->
     %% TODO: logs
     %% TODO: logs
@@ -426,14 +421,14 @@ handle_client_down(_Reason, St = #st{target = TargetCluster, actor = Actor}) ->
     ok = close_syncer(TargetCluster, Actor),
     ok = close_syncer(TargetCluster, Actor),
     process_connect(St#st{client = undefined}).
     process_connect(St#st{client = undefined}).
 
 
-process_bootstrap(St = #st{bootstrapped = false}) ->
+process_bootstrap(St = #st{bootstrapped = false}, _NeedBootstrap) ->
     run_bootstrap(St);
     run_bootstrap(St);
-process_bootstrap(St = #st{client = ClientPid, bootstrapped = true}) ->
-    case client_session_present(ClientPid) of
+process_bootstrap(St = #st{bootstrapped = true}, NeedBootstrap) ->
+    case NeedBootstrap of
         true ->
         true ->
-            process_bootstrapped(St);
+            run_bootstrap(St);
         false ->
         false ->
-            run_bootstrap(St)
+            process_bootstrapped(St)
     end.
     end.
 
 
 %% Bootstrapping.
 %% Bootstrapping.