Просмотр исходного кода

fix: stop otel deps appication before reboot

zhongwencool 2 лет назад
Родитель
Сommit
c215fe3736

+ 2 - 0
apps/emqx_machine/src/emqx_machine_boot.erl

@@ -62,6 +62,8 @@ stop_apps() ->
     ?SLOG(notice, #{msg => "stopping_emqx_apps"}),
     _ = emqx_alarm_handler:unload(),
     ok = emqx_conf_app:unset_config_loaded(),
+    %% Mute otel deps application.
+    _ = emqx_otel:stop_otel(),
     lists:foreach(fun stop_one_app/1, lists:reverse(sorted_reboot_apps())).
 
 %% Those port apps are terminated after the main apps

+ 22 - 2
apps/emqx_opentelemetry/src/emqx_otel.erl

@@ -17,10 +17,25 @@
 -module(emqx_otel).
 -include_lib("emqx/include/logger.hrl").
 
--export([start_link/1, cleanup/0]).
+-export([start_otel/1, stop_otel/0]).
 -export([get_cluster_gauge/1, get_stats_gauge/1, get_vm_gauge/1, get_metric_counter/1]).
+-export([start_link/1]).
 -export([init/1, handle_continue/2, handle_call/3, handle_cast/2, handle_info/2, terminate/2]).
 
+-define(SUPERVISOR, emqx_otel_sup).
+
+start_otel(Conf) ->
+    Spec = emqx_otel_sup:worker_spec(?MODULE, Conf),
+    assert_started(supervisor:start_child(?SUPERVISOR, Spec)).
+
+stop_otel() ->
+    ok = cleanup(),
+    case supervisor:terminate_child(?SUPERVISOR, ?MODULE) of
+        ok -> supervisor:delete_child(?SUPERVISOR, ?MODULE);
+        {error, not_found} -> ok;
+        Error -> Error
+    end.
+
 start_link(Conf) ->
     gen_server:start_link({local, ?MODULE}, ?MODULE, Conf, []).
 
@@ -47,7 +62,7 @@ setup(Conf = #{enable := true}) ->
     ensure_apps(Conf),
     create_metric_views();
 setup(_Conf) ->
-    cleanup(),
+    ok = cleanup(),
     ok.
 
 ensure_apps(Conf) ->
@@ -225,3 +240,8 @@ create_counter(Meter, Counters, CallBack) ->
 
 normalize_name(Name) ->
     list_to_existing_atom(lists:flatten(string:replace(atom_to_list(Name), "_", ".", all))).
+
+assert_started({ok, _Pid}) -> ok;
+assert_started({ok, _Pid, _Info}) -> ok;
+assert_started({error, {already_started, _Pid}}) -> ok;
+assert_started({error, Reason}) -> {error, Reason}.

+ 3 - 3
apps/emqx_opentelemetry/src/emqx_otel_config.erl

@@ -52,7 +52,7 @@ post_config_update(_ConfPath, _Req, _NewConf, _OldConf, _AppEnvs) ->
     ok.
 
 ensure_otel(#{enable := true} = Conf) ->
-    _ = emqx_otel_sup:stop_otel(),
-    emqx_otel_sup:start_otel(Conf);
+    _ = emqx_otel:stop_otel(),
+    emqx_otel:start_otel(Conf);
 ensure_otel(#{enable := false}) ->
-    emqx_otel_sup:stop_otel().
+    emqx_otel:stop_otel().

+ 12 - 32
apps/emqx_opentelemetry/src/emqx_otel_sup.erl

@@ -19,36 +19,21 @@
 
 -export([start_link/0]).
 -export([init/1]).
--export([start_otel/1]).
--export([stop_otel/0]).
-
--define(CHILD(Mod, Opts), #{
-    id => Mod,
-    start => {Mod, start_link, [Opts]},
-    restart => permanent,
-    shutdown => 5000,
-    type => worker,
-    modules => [Mod]
-}).
-
--define(WORKER, emqx_otel).
+-export([worker_spec/2]).
+
+worker_spec(Mod, Opts) ->
+    #{
+        id => Mod,
+        start => {Mod, start_link, [Opts]},
+        restart => permanent,
+        shutdown => 5000,
+        type => worker,
+        modules => [Mod]
+    }.
 
 start_link() ->
     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
 
--spec start_otel(map()) -> ok.
-start_otel(Conf) ->
-    assert_started(supervisor:start_child(?MODULE, ?CHILD(?WORKER, Conf))).
-
--spec stop_otel() -> ok | {error, term()}.
-stop_otel() ->
-    ok = emqx_otel:cleanup(),
-    case supervisor:terminate_child(?MODULE, ?WORKER) of
-        ok -> supervisor:delete_child(?MODULE, ?WORKER);
-        {error, not_found} -> ok;
-        Error -> Error
-    end.
-
 init([]) ->
     SupFlags = #{
         strategy => one_for_one,
@@ -58,11 +43,6 @@ init([]) ->
     Children =
         case emqx_conf:get([opentelemetry]) of
             #{enable := false} -> [];
-            #{enable := true} = Conf -> [?CHILD(?WORKER, Conf)]
+            #{enable := true} = Conf -> [worker_spec(emqx_otel, Conf)]
         end,
     {ok, {SupFlags, Children}}.
-
-assert_started({ok, _Pid}) -> ok;
-assert_started({ok, _Pid, _Info}) -> ok;
-assert_started({error, {already_started, _Pid}}) -> ok;
-assert_started({error, Reason}) -> {error, Reason}.