Explorar o código

fix: /api/nodes is timeout if emqx in high load

Zhongwen Deng %!s(int64=3) %!d(string=hai) anos
pai
achega
2d67bb3fb6

+ 5 - 3
apps/emqx/src/emqx_os_mon.erl

@@ -130,8 +130,10 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat
 handle_info({timeout, _Timer, cpu_check}, State) ->
     CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
     CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
-    case emqx_vm:cpu_util() of
-        0 ->
+    CPUVal = emqx_vm:cpu_util(),
+    case CPUVal of
+        %% 0 or 0.0
+        Busy when Busy == 0 ->
             ok;
         Busy when Busy > CPUHighWatermark ->
             _ = emqx_alarm:activate(
@@ -236,5 +238,5 @@ do_update_mem_alarm_status(HWM0) ->
     ok.
 
 usage_msg(Usage, What) ->
-    %% devide by 1.0 to ensure float point number
+    %% divide by 1.0 to ensure float point number
     iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])).

+ 7 - 4
apps/emqx/src/emqx_vm.erl

@@ -232,8 +232,10 @@ mem_info() ->
     Free = proplists:get_value(free_memory, Dataset),
     [{total_memory, Total}, {used_memory, Total - Free}].
 
-ftos(F) ->
-    io_lib:format("~.2f", [F / 1.0]).
+ftos(F) when is_float(F) ->
+    float_to_binary(F, [{decimals, 2}]);
+ftos(F) when is_integer(F) ->
+    ftos(F / 1.0).
 
 %%%% erlang vm scheduler_usage  fun copied from recon
 scheduler_usage(Interval) when is_integer(Interval) ->
@@ -391,11 +393,12 @@ cpu_util() ->
 compat_windows(Fun) ->
     case os:type() of
         {win32, nt} ->
-            0;
+            0.0;
         _Type ->
             case catch Fun() of
+                Val when is_float(Val) -> floor(Val * 100) / 100;
                 Val when is_number(Val) -> Val;
-                _Error -> 0
+                _Error -> 0.0
             end
     end.
 

+ 5 - 2
apps/emqx/src/emqx_vm_mon.erl

@@ -63,7 +63,7 @@ handle_info({timeout, _Timer, check}, State) ->
     ProcessCount = erlang:system_info(process_count),
     case ProcessCount / erlang:system_info(process_limit) of
         Percent when Percent > ProcHighWatermark ->
-            Usage = io_lib:format("~p%", [Percent * 100]),
+            Usage = usage(Percent),
             Message = [Usage, " process usage"],
             emqx_alarm:activate(
                 too_many_processes,
@@ -75,7 +75,7 @@ handle_info({timeout, _Timer, check}, State) ->
                 Message
             );
         Percent when Percent < ProcLowWatermark ->
-            Usage = io_lib:format("~p%", [Percent * 100]),
+            Usage = usage(Percent),
             Message = [Usage, " process usage"],
             emqx_alarm:ensure_deactivated(
                 too_many_processes,
@@ -108,3 +108,6 @@ code_change(_OldVsn, State, _Extra) ->
 start_check_timer() ->
     Interval = emqx:get_config([sysmon, vm, process_check_interval]),
     emqx_misc:start_timer(Interval, check).
+
+usage(Percent) ->
+    integer_to_list(floor(Percent * 100)) ++ "%".

+ 1 - 1
apps/emqx_management/src/emqx_mgmt.erl

@@ -150,7 +150,7 @@ node_info() ->
 get_sys_memory() ->
     case os:type() of
         {unix, linux} ->
-            load_ctl:get_sys_memory();
+            emqx_mgmt_sys_memory:get_sys_memory();
         _ ->
             {0, 0}
     end.

+ 12 - 1
apps/emqx_management/src/emqx_mgmt_sup.erl

@@ -26,4 +26,15 @@ start_link() ->
     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
 
 init([]) ->
-    {ok, {{one_for_one, 1, 5}, []}}.
+    LC = child_spec(emqx_mgmt_sys_memory, 5000, worker),
+    {ok, {{one_for_one, 1, 5}, [LC]}}.
+
+child_spec(Mod, Shutdown, Type) ->
+    #{
+        id => Mod,
+        start => {Mod, start_link, []},
+        restart => permanent,
+        shutdown => Shutdown,
+        type => Type,
+        modules => [Mod]
+    }.

+ 79 - 0
apps/emqx_management/src/emqx_mgmt_sys_memory.erl

@@ -0,0 +1,79 @@
+%%--------------------------------------------------------------------
+%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%--------------------------------------------------------------------
+-module(emqx_mgmt_sys_memory).
+
+-behaviour(gen_server).
+-define(SYS_MEMORY_CACHE_KEY, ?MODULE).
+-define(TIMEOUT, 3000).
+
+-export([start_link/0, get_sys_memory/0, get_sys_memory/1]).
+-export([
+    init/1,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2,
+    terminate/2,
+    code_change/3
+]).
+
+get_sys_memory() ->
+    get_sys_memory(?TIMEOUT).
+
+get_sys_memory(Timeout) ->
+    try
+        gen_server:call(?MODULE, get_sys_memory, Timeout)
+    catch
+        exit:{timeout, _} ->
+            get_memory_from_cache()
+    end.
+
+start_link() ->
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+init([]) ->
+    {ok, #{last_time => 0}}.
+
+handle_call(get_sys_memory, _From, State = #{last_time := LastTime}) ->
+    Now = erlang:system_time(millisecond),
+    case Now - LastTime >= ?TIMEOUT of
+        true ->
+            Memory = load_ctl:get_sys_memory(),
+            persistent_term:put(?SYS_MEMORY_CACHE_KEY, Memory),
+            {reply, Memory, State#{last_time => Now}};
+        false ->
+            {reply, get_memory_from_cache(), State}
+    end;
+handle_call(_Request, _From, State = #{}) ->
+    {reply, ok, State}.
+
+handle_cast(_Request, State = #{}) ->
+    {noreply, State}.
+
+handle_info(_Info, State = #{}) ->
+    {noreply, State}.
+
+terminate(_Reason, _State = #{}) ->
+    ok.
+
+code_change(_OldVsn, State = #{}, _Extra) ->
+    {ok, State}.
+
+%%%===================================================================
+%%% Internal functions
+%%%===================================================================
+
+get_memory_from_cache() ->
+    persistent_term:get(?SYS_MEMORY_CACHE_KEY, {0, 0}).