Procházet zdrojové kódy

fix: sys_mem alarm is not triggered after reboot.

zhongwencool před 4 roky
rodič
revize
78f294cda2

+ 26 - 0
src/emqx.appup.src

@@ -7,6 +7,7 @@
     , {load_module,emqx_channel,brutal_purge,soft_purge,[]}
     , {load_module,emqx_session,brutal_purge,soft_purge,[]}
     , {load_module,emqx_alarm,brutal_purge,soft_purge,[]}
+    , {load_module,emqx_os_mon,brutal_purge,soft_purge,[]}
     , {load_module,emqx_limiter,brutal_purge,soft_purge,[]}
     ]},
    {"4.3.11",
@@ -20,6 +21,7 @@
      {load_module,emqx_sys_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_http_lib,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.10",
@@ -34,6 +36,7 @@
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_connection,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.9",
@@ -53,6 +56,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.8",
@@ -72,6 +76,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.7",
@@ -93,6 +98,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.6",
@@ -115,6 +121,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.5",
@@ -138,6 +145,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.4",
@@ -162,6 +170,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.3",
@@ -187,6 +196,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.2",
@@ -212,6 +222,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.1",
@@ -241,6 +252,7 @@
      {load_module,emqx_mqueue,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.0",
@@ -273,6 +285,7 @@
      {load_module,emqx_ctl,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {<<".*">>,[]}],
@@ -282,6 +295,7 @@
     , {load_module,emqx_metrics,brutal_purge,soft_purge,[]}
     , {load_module,emqx_session,brutal_purge,soft_purge,[]}
     , {load_module,emqx_alarm,brutal_purge,soft_purge,[]}
+    , {load_module,emqx_os_mon,brutal_purge,soft_purge,[]}
     , {load_module,emqx_limiter,brutal_purge,soft_purge,[]}
     ]},
    {"4.3.11",
@@ -294,6 +308,7 @@
      {load_module,emqx_sys_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_http_lib,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.10",
@@ -307,6 +322,7 @@
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_connection,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.9",
@@ -325,6 +341,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.8",
@@ -343,6 +360,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.7",
@@ -363,6 +381,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.6",
@@ -384,6 +403,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.5",
@@ -406,6 +426,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.4",
@@ -429,6 +450,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.3",
@@ -453,6 +475,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.2",
@@ -477,6 +500,7 @@
      {load_module,emqx_frame,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.1",
@@ -505,6 +529,7 @@
      {load_module,emqx_ctl,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {"4.3.0",
@@ -535,6 +560,7 @@
      {load_module,emqx_mqueue,brutal_purge,soft_purge,[]},
      {load_module,emqx_rpc,brutal_purge,soft_purge,[]},
      {load_module,emqx_alarm,brutal_purge,soft_purge,[]},
+     {load_module,emqx_os_mon,brutal_purge,soft_purge,[]},
      {load_module,emqx_app,brutal_purge,soft_purge,[]},
      {load_module,emqx_limiter,brutal_purge,soft_purge,[]}]},
    {<<".*">>,[]}]}.

+ 0 - 2
src/emqx.erl

@@ -228,7 +228,6 @@ shutdown() ->
 
 shutdown(Reason) ->
     ?LOG(critical, "emqx shutdown for ~s", [Reason]),
-    _ = emqx_alarm_handler:unload(),
     _ = emqx_plugins:unload(),
     lists:foreach(fun application:stop/1
                  , lists:reverse(default_started_applications())
@@ -254,4 +253,3 @@ reload_config(ConfFile) ->
     lists:foreach(fun({App, Vals}) ->
                       [application:set_env(App, Par, Val) || {Par, Val} <- Vals]
                   end, Conf).
-

+ 3 - 0
src/emqx_alarm.erl

@@ -165,6 +165,8 @@ init([Opts]) ->
     Actions = proplists:get_value(actions, Opts),
     SizeLimit = proplists:get_value(size_limit, Opts),
     ValidityPeriod = timer:seconds(proplists:get_value(validity_period, Opts)),
+    ok = emqx_alarm_handler:load(),
+    process_flag(trap_exit, true),
     {ok, ensure_delete_timer(#state{actions = Actions,
                                     size_limit = SizeLimit,
                                     validity_period = ValidityPeriod})}.
@@ -234,6 +236,7 @@ handle_info(Info, State) ->
     {noreply, State}.
 
 terminate(_Reason, _State) ->
+    _ = emqx_alarm_handler:unload(),
     ok.
 
 code_change(_OldVsn, State, _Extra) ->

+ 0 - 1
src/emqx_app.erl

@@ -50,7 +50,6 @@ start(_Type, _Args) ->
     _ = emqx_plugins:load(),
     _ = start_ce_modules(),
     register(emqx, self()),
-    ok = emqx_alarm_handler:load(),
     print_vsn(),
     {ok, Sup}.
 

+ 20 - 1
src/emqx_os_mon.erl

@@ -105,8 +105,10 @@ call(Req) ->
 
 init([Opts]) ->
     set_mem_check_interval(proplists:get_value(mem_check_interval, Opts)),
-    set_sysmem_high_watermark(proplists:get_value(sysmem_high_watermark, Opts)),
+    SysHW = proplists:get_value(sysmem_high_watermark, Opts),
+    set_sysmem_high_watermark(SysHW),
     set_procmem_high_watermark(proplists:get_value(procmem_high_watermark, Opts)),
+    ensure_system_memory_alarm(SysHW),
     {ok, ensure_check_timer(#{cpu_high_watermark => proplists:get_value(cpu_high_watermark, Opts),
                               cpu_low_watermark => proplists:get_value(cpu_low_watermark, Opts),
                               cpu_check_interval => proplists:get_value(cpu_check_interval, Opts),
@@ -177,3 +179,20 @@ ensure_check_timer(State = #{cpu_check_interval := Interval}) ->
         "x86_64-pc-linux-musl" -> State;
         _ -> State#{timer := emqx_misc:start_timer(timer:seconds(Interval), check)}
     end.
+
+%% At startup, memsup starts first and checks for memory alarms,
+%% but emqx_alarm_handler is not yet used instead of alarm_handler,
+%% so alarm_handler is used directly for notification (normally emqx_alarm_handler should be used).
+%%The internal memsup will no longer trigger events that have been alerted,
+%% and there is no exported function to remove the alerted flag,
+%% so it can only be checked again at startup.
+ensure_system_memory_alarm(HW) ->
+    case erlang:whereis(memsup) of
+        undefined -> ok;
+        _Pid ->
+            {Allocated, Total, _Worst} = memsup:get_memory_data(),
+            case Total =/= 0 andalso Allocated/Total * 100 >= HW of
+                true -> emqx_alarm:activate(high_system_memory_usage, #{high_watermark => HW});
+                false -> ok
+            end
+    end.

+ 0 - 1
test/emqx_alarm_SUITE.erl

@@ -109,4 +109,3 @@ get_alarm(Name, [_Alarm | More]) ->
     get_alarm(Name, More);
 get_alarm(_Name, []) ->
     {error, not_found}.
-

+ 13 - 9
test/emqx_os_mon_SUITE.erl

@@ -24,10 +24,23 @@
 all() -> emqx_ct:all(?MODULE).
 
 init_per_suite(Config) ->
+    emqx_ct_helpers:boot_modules(all),
+    emqx_ct_helpers:start_apps([],
+        fun(emqx) ->
+            application:set_env(emqx, os_mon, [
+                {cpu_check_interval, 1},
+                {cpu_high_watermark, 5},
+                {cpu_low_watermark, 80},
+                {mem_check_interval, 60},
+                {sysmem_high_watermark, 70},
+                {procmem_high_watermark, 5}]);
+            (_) -> ok
+        end),
     application:ensure_all_started(os_mon),
     Config.
 
 end_per_suite(_Config) ->
+    emqx_ct_helpers:stop_apps([]),
     application:stop(os_mon).
 
 % t_set_mem_check_interval(_) ->
@@ -40,13 +53,6 @@ end_per_suite(_Config) ->
 %     error('TODO').
 
 t_api(_) ->
-    gen_event:swap_handler(alarm_handler, {emqx_alarm_handler, swap}, {alarm_handler, []}),
-    {ok, _} = emqx_os_mon:start_link([{cpu_check_interval, 1},
-                                      {cpu_high_watermark, 5},
-                                      {cpu_low_watermark, 80},
-                                      {mem_check_interval, 60},
-                                      {sysmem_high_watermark, 70},
-                                      {procmem_high_watermark, 5}]),
     ?assertEqual(1, emqx_os_mon:get_cpu_check_interval()),
     ?assertEqual(5, emqx_os_mon:get_cpu_high_watermark()),
     ?assertEqual(80, emqx_os_mon:get_cpu_low_watermark()),
@@ -55,7 +61,6 @@ t_api(_) ->
     ?assertEqual(5, emqx_os_mon:get_procmem_high_watermark()),
     % timer:sleep(2000),
     % ?assertEqual(true, lists:keymember(cpu_high_watermark, 1, alarm_handler:get_alarms())),
-
     emqx_os_mon:set_cpu_check_interval(0.05),
     emqx_os_mon:set_cpu_high_watermark(80),
     emqx_os_mon:set_cpu_low_watermark(75),
@@ -69,4 +74,3 @@ t_api(_) ->
     emqx_os_mon ! ignored,
     gen_server:stop(emqx_os_mon),
     ok.
-