Browse Source

fix(bridge): check health immediately after updated

Shawn 4 năm trước cách đây
mục cha
commit
11736dc1d7

+ 1 - 1
apps/emqx_bridge/src/emqx_bridge.erl

@@ -247,7 +247,7 @@ update(Type, Name, {OldConf, Conf}) ->
                     ?SLOG(warning, #{ msg => "updating_a_non-exist_bridge_need_create_a_new_one"
                                     , type => Type, name => Name, config => Conf}),
                     create(Type, Name, Conf);
-                {error, Reason} -> {update_bridge_failed, Reason}
+                {error, Reason} -> {error, {update_bridge_failed, Reason}}
             end;
         true ->
             %% we don't need to recreate the bridge if this config change is only to

+ 8 - 10
apps/emqx_resource/src/emqx_resource_health_check.erl

@@ -36,28 +36,25 @@ start_link(Name, Sleep) ->
     {ok, Pid}.
 
 create_checker(Name, Sleep) ->
+    create_checker(Name, Sleep, false).
+
+create_checker(Name, Sleep, Retry) ->
     case supervisor:start_child(?SUP, child_spec(Name, Sleep)) of
         {ok, _} -> ok;
         {error, already_present} -> ok;
-        {error, {already_started, _}} ->
+        {error, {already_started, _}} when Retry == false ->
             ok = delete_checker(Name),
-            create_checker(Name, Sleep);
+            create_checker(Name, Sleep, true);
         Error -> Error
     end.
 
 delete_checker(Name) ->
-    case supervisor:terminate_child(?SUP, {health_check, Name}) of
-        ok ->
-            case supervisor:delete_child(?SUP, {health_check, Name}) of
-                {error, not_found} -> ok;
-                Error -> Error
-            end;
-        {error, not_found} -> ok;
+    case supervisor:terminate_child(?SUP, ?ID(Name)) of
+        ok -> supervisor:delete_child(?SUP, ?ID(Name));
         Error -> Error
 	end.
 
 health_check(Name, SleepTime) ->
-    timer:sleep(SleepTime),
     case emqx_resource:health_check(Name) of
         ok ->
             emqx_alarm:deactivate(Name);
@@ -65,4 +62,5 @@ health_check(Name, SleepTime) ->
             emqx_alarm:activate(Name, #{name => Name},
                 <<Name/binary, " health check failed">>)
     end,
+    timer:sleep(SleepTime),
     health_check(Name, SleepTime).

+ 15 - 12
apps/emqx_resource/src/emqx_resource_instance.erl

@@ -140,11 +140,14 @@ code_change(_OldVsn, State, _Extra) ->
 %%------------------------------------------------------------------------------
 
 %% suppress the race condition check, as these functions are protected in gproc workers
--dialyzer({nowarn_function, [do_recreate/4,
-                             do_create/4,
-                             do_restart/2,
-                             do_stop/1,
-                             do_health_check/1]}).
+-dialyzer({nowarn_function, [ do_recreate/4
+                            , do_create/4
+                            , do_restart/2
+                            , do_start/4
+                            , do_stop/1
+                            , do_health_check/1
+                            , start_and_check/5
+                            ]}).
 
 do_recreate(InstId, ResourceType, NewConfig, Opts) ->
     case lookup(InstId) of
@@ -183,12 +186,12 @@ do_create(InstId, ResourceType, Config, Opts) ->
 
 do_create_dry_run(ResourceType, Config) ->
     InstId = make_test_id(),
-    Opts = #{async_create => false},
-    case do_create(InstId, ResourceType, Config, Opts) of
-        {ok, Data} ->
-            Return = do_health_check(Data),
-            _ = do_remove(Data),
-            Return;
+    case emqx_resource:call_start(InstId, ResourceType, Config) of
+        {ok, ResourceState} ->
+            case emqx_resource:call_health_check(InstId, ResourceType, ResourceState) of
+                {ok, _} -> ok;
+                {error, Reason, _} -> {error, Reason}
+            end;
         {error, Reason} ->
             {error, Reason}
     end.
@@ -252,7 +255,7 @@ do_stop(#{state := undefined}) ->
     ok;
 do_stop(#{id := InstId, mod := Mod, state := ResourceState} = Data) ->
     _ = emqx_resource:call_stop(InstId, Mod, ResourceState),
-    ok = emqx_resource_health_check:delete_checker(InstId),
+    _ = emqx_resource_health_check:delete_checker(InstId),
     ets:insert(emqx_resource_instance, {InstId, Data#{status => stopped}}),
     ok.