Przeglądaj źródła

fix: start os_mon application temporary

zhongwencool 2 lat temu
rodzic
commit
b817e03c08

+ 11 - 7
apps/emqx/src/emqx_os_mon.erl

@@ -38,15 +38,14 @@
 %% gen_server callbacks
 -export([
     init/1,
+    handle_continue/2,
     handle_call/3,
     handle_cast/2,
     handle_info/2,
     terminate/2,
     code_change/3
 ]).
--ifdef(TEST).
--export([is_sysmem_check_supported/0]).
--endif.
+-export([is_os_check_supported/0]).
 
 -include("emqx.hrl").
 
@@ -83,12 +82,17 @@ current_sysmem_percent() ->
 %%--------------------------------------------------------------------
 
 init([]) ->
+    %% start os_mon temporarily
+    {ok, _} = application:ensure_all_started(os_mon),
+    {ok, undefined, {continue, setup}}.
+
+handle_continue(setup, undefined) ->
     %% memsup is not reliable, ignore
     memsup:set_sysmem_high_watermark(1.0),
     SysHW = init_os_monitor(),
     MemRef = start_mem_check_timer(),
     CpuRef = start_cpu_check_timer(),
-    {ok, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
+    {noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.
 
 init_os_monitor() ->
     init_os_monitor(emqx:get_config([sysmon, os])).
@@ -182,12 +186,12 @@ start_cpu_check_timer() ->
         _ -> start_timer(Interval, cpu_check)
     end.
 
-is_sysmem_check_supported() ->
+is_os_check_supported() ->
     {unix, linux} =:= os:type().
 
 start_mem_check_timer() ->
     Interval = emqx:get_config([sysmon, os, mem_check_interval]),
-    case is_integer(Interval) andalso is_sysmem_check_supported() of
+    case is_integer(Interval) andalso is_os_check_supported() of
         true ->
             start_timer(Interval, mem_check);
         false ->
@@ -205,7 +209,7 @@ update_mem_alarm_status(HWM) when HWM > 1.0 orelse HWM < 0.0 ->
         <<"Deactivated mem usage alarm due to out of range threshold">>
     );
 update_mem_alarm_status(HWM) ->
-    is_sysmem_check_supported() andalso
+    is_os_check_supported() andalso
         do_update_mem_alarm_status(HWM),
     ok.
 

+ 1 - 1
apps/emqx/src/emqx_schema.erl

@@ -3659,7 +3659,7 @@ shared_subscription_strategy() ->
         )}.
 
 default_mem_check_interval() ->
-    case emqx_sys_sup:is_os_mon_supported() of
+    case emqx_os_mon:is_os_check_supported() of
         true -> <<"60s">>;
         false -> disabled
     end.

+ 6 - 2
apps/emqx/src/emqx_sys_mon.erl

@@ -29,6 +29,7 @@
 %% gen_server callbacks
 -export([
     init/1,
+    handle_continue/2,
     handle_call/3,
     handle_cast/2,
     handle_info/2,
@@ -70,11 +71,14 @@ update(VM) ->
 
 init([]) ->
     emqx_logger:set_proc_metadata(#{sysmon => true}),
-    init_system_monitor(),
+    {ok, undefined, {continue, setup}}.
 
+handle_continue(setup, undefined) ->
+    init_system_monitor(),
     %% Monitor cluster partition event
     ekka:monitor(partition, fun handle_partition_event/1),
-    {ok, start_timer(#{timer => undefined, events => []})}.
+    NewState = start_timer(#{timer => undefined, events => []}),
+    {noreply, NewState, hibernate}.
 
 start_timer(State) ->
     State#{timer := emqx_utils:start_timer(timer:seconds(2), reset)}.

+ 1 - 6
apps/emqx/src/emqx_sys_sup.erl

@@ -19,8 +19,6 @@
 -behaviour(supervisor).
 
 -export([start_link/0]).
--export([is_os_mon_supported/0]).
-
 -export([init/1]).
 
 start_link() ->
@@ -28,7 +26,7 @@ start_link() ->
 
 init([]) ->
     OsMon =
-        case is_os_mon_supported() of
+        case emqx_os_mon:is_os_check_supported() of
             true -> [child_spec(emqx_os_mon)];
             false -> []
         end,
@@ -45,9 +43,6 @@ init([]) ->
 %% Internal functions
 %%--------------------------------------------------------------------
 
-is_os_mon_supported() ->
-    erlang:function_exported(memsup, get_procmem_high_watermark, 0).
-
 child_spec(Mod) ->
     child_spec(Mod, []).
 

+ 22 - 10
apps/emqx/src/emqx_vm.erl

@@ -44,7 +44,7 @@
     get_otp_version/0
 ]).
 
--export([cpu_util/0]).
+-export([cpu_util/0, cpu_util/1]).
 
 -ifdef(TEST).
 -compile(export_all).
@@ -378,18 +378,30 @@ avg15() ->
 cpu_util() ->
     compat_windows(fun cpu_sup:util/0).
 
+cpu_util(Args) ->
+    compat_windows(fun cpu_sup:util/1, Args).
+
 compat_windows(Fun) ->
-    case os:type() of
-        {win32, nt} ->
-            0.0;
-        _Type ->
-            case catch Fun() of
-                Val when is_float(Val) -> floor(Val * 100) / 100;
-                Val when is_number(Val) -> Val;
-                _Error -> 0.0
-            end
+    case compat_windows(Fun, []) of
+        Val when is_float(Val) -> floor(Val * 100) / 100;
+        Val when is_number(Val) -> Val;
+        _ -> 0.0
     end.
 
+compat_windows(Fun, Args) ->
+    try
+        case is_windows() of
+            true -> 0.0;
+            false when Args =:= [] -> Fun();
+            false -> Fun(Args)
+        end
+    catch
+        _:_ -> 0.0
+    end.
+
+is_windows() ->
+    os:type() =:= {win32, nt}.
+
 load(Avg) ->
     floor((Avg / 256) * 100) / 100.
 

+ 33 - 15
apps/emqx/test/emqx_os_mon_SUITE.erl

@@ -39,29 +39,47 @@ init_per_testcase(t_cpu_check_alarm, Config) ->
         %% 200ms
         cpu_check_interval => 200
     }),
-    ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
-    {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
+    restart_os_mon(),
     Config;
 init_per_testcase(t_sys_mem_check_alarm, Config) ->
-    case emqx_os_mon:is_sysmem_check_supported() of
+    case emqx_os_mon:is_os_check_supported() of
         true ->
             SysMon = emqx_config:get([sysmon, os], #{}),
             emqx_config:put([sysmon, os], SysMon#{
                 sysmem_high_watermark => 0.51,
                 %% 200ms
                 mem_check_interval => 200
-            }),
-            ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
-            {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon),
-            Config;
+            });
         false ->
-            Config
-    end;
+            ok
+    end,
+    restart_os_mon(),
+    Config;
 init_per_testcase(_, Config) ->
-    emqx_common_test_helpers:boot_modules(all),
-    emqx_common_test_helpers:start_apps([]),
+    restart_os_mon(),
     Config.
 
+restart_os_mon() ->
+    case emqx_os_mon:is_os_check_supported() of
+        true ->
+            ok = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
+            {ok, _} = supervisor:restart_child(emqx_sys_sup, emqx_os_mon);
+        false ->
+            _ = supervisor:terminate_child(emqx_sys_sup, emqx_os_mon),
+            _ = supervisor:delete_child(emqx_sys_sup, emqx_os_mon),
+            %% run test on mac/windows.
+            Mod = emqx_os_mon,
+            OsMon = #{
+                id => Mod,
+                start => {Mod, start_link, []},
+                restart => permanent,
+                shutdown => 5000,
+                type => worker,
+                modules => [Mod]
+            },
+            {ok, _} = supervisor:start_child(emqx_sys_sup, OsMon)
+    end.
+
 t_api(_) ->
     ?assertEqual(0.7, emqx_os_mon:get_sysmem_high_watermark()),
     ?assertEqual(ok, emqx_os_mon:set_sysmem_high_watermark(0.8)),
@@ -81,7 +99,7 @@ t_api(_) ->
     ok.
 
 t_sys_mem_check_disable(Config) ->
-    case emqx_os_mon:is_sysmem_check_supported() of
+    case emqx_os_mon:is_os_check_supported() of
         true -> do_sys_mem_check_disable(Config);
         false -> skip
     end.
@@ -100,7 +118,7 @@ do_sys_mem_check_disable(_Config) ->
     ok.
 
 t_sys_mem_check_alarm(Config) ->
-    case emqx_os_mon:is_sysmem_check_supported() of
+    case emqx_os_mon:is_os_check_supported() of
         true -> do_sys_mem_check_alarm(Config);
         false -> skip
     end.
@@ -167,7 +185,7 @@ t_cpu_check_alarm(_) ->
         util,
         fun() -> CpuUtil end,
         fun() ->
-            timer:sleep(500),
+            timer:sleep(1000),
             Alarms = emqx_alarm:get_alarms(activated),
             ?assert(
                 emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
@@ -193,7 +211,7 @@ t_cpu_check_alarm(_) ->
             ?assert(is_binary(Msg)),
             emqx_config:put([sysmon, os, cpu_high_watermark], 1),
             emqx_config:put([sysmon, os, cpu_low_watermark], 0.96),
-            timer:sleep(500),
+            timer:sleep(800),
             ?assertNot(
                 emqx_vm_mon_SUITE:is_existing(high_cpu_usage, emqx_alarm:get_alarms(activated))
             )

+ 2 - 1
apps/emqx_machine/priv/reboot_lists.eterm

@@ -17,7 +17,8 @@
             asn1,
             syntax_tools,
             ssl,
-            os_mon,
+            %% started temporary in emqx to prevent crash vm when permanent.
+            {os_mon, load},
             inets,
             compiler,
             runtime_tools,

+ 8 - 5
apps/emqx_management/src/emqx_mgmt.erl

@@ -197,13 +197,16 @@ vm_stats() ->
     ].
 
 vm_stats('cpu.idle') ->
-    case cpu_sup:util([detailed]) of
-        %% Not support for Windows
-        {_, 0, 0, _} -> 0;
-        {_Num, _Use, IdleList, _} -> proplists:get_value(idle, IdleList, 0)
+    case emqx_vm:cpu_util([detailed]) of
+        {_Num, _Use, List, _} when is_list(List) -> proplists:get_value(idle, List, 0);
+        %% return {all, 0, 0, []} when cpu_sup is not started
+        _ -> 0
     end;
 vm_stats('cpu.use') ->
-    100 - vm_stats('cpu.idle');
+    case vm_stats('cpu.idle') of
+        0 -> 0;
+        Idle -> 100 - Idle
+    end;
 vm_stats('total.memory') ->
     {_, MemTotal} = get_sys_memory(),
     MemTotal;

+ 2 - 0
changes/ce/fix-11445.en.md

@@ -0,0 +1,2 @@
+Removed os_mon application monitor support on Windows platforms to prevent VM crashes.
+Functionality remains on non-Windows platforms.