|
|
@@ -65,10 +65,10 @@ set_mem_check_interval(Seconds) ->
|
|
|
memsup:set_check_interval(Seconds div 60000).
|
|
|
|
|
|
get_sysmem_high_watermark() ->
|
|
|
- memsup:get_sysmem_high_watermark().
|
|
|
+ gen_server:call(?OS_MON, ?FUNCTION_NAME, infinity).
|
|
|
|
|
|
set_sysmem_high_watermark(Float) ->
|
|
|
- memsup:set_sysmem_high_watermark(Float).
|
|
|
+ gen_server:call(?OS_MON, {?FUNCTION_NAME, Float}, infinity).
|
|
|
|
|
|
get_procmem_high_watermark() ->
|
|
|
memsup:get_procmem_high_watermark().
|
|
|
@@ -77,37 +77,34 @@ set_procmem_high_watermark(Float) ->
|
|
|
memsup:set_procmem_high_watermark(Float).
|
|
|
|
|
|
current_sysmem_percent() ->
|
|
|
- case erlang:whereis(memsup) of
|
|
|
- undefined ->
|
|
|
- undefined;
|
|
|
- _Pid ->
|
|
|
- {Total, Allocated, _Worst} = memsup:get_memory_data(),
|
|
|
- case Total =/= 0 of
|
|
|
- true ->
|
|
|
- erlang:floor((Allocated / Total) * 10000) / 100;
|
|
|
- false ->
|
|
|
- undefined
|
|
|
- end
|
|
|
- end.
|
|
|
+ Ratio = load_ctl:get_memory_usage(),
|
|
|
+ erlang:floor(Ratio * 10000) / 100.
|
|
|
|
|
|
%%--------------------------------------------------------------------
|
|
|
%% gen_server callbacks
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
|
|
init([]) ->
|
|
|
+ %% memsup is not reliable, ignore
|
|
|
+ memsup:set_sysmem_high_watermark(1.0),
|
|
|
#{
|
|
|
sysmem_high_watermark := SysHW,
|
|
|
procmem_high_watermark := PHW,
|
|
|
mem_check_interval := MCI
|
|
|
} = emqx:get_config([sysmon, os]),
|
|
|
|
|
|
- set_sysmem_high_watermark(SysHW),
|
|
|
set_procmem_high_watermark(PHW),
|
|
|
set_mem_check_interval(MCI),
|
|
|
- ensure_system_memory_alarm(SysHW),
|
|
|
- _ = start_check_timer(),
|
|
|
- {ok, #{}}.
|
|
|
-
|
|
|
+ update_mem_alarm_stauts(SysHW),
|
|
|
+ _ = start_mem_check_timer(),
|
|
|
+ _ = start_cpu_check_timer(),
|
|
|
+ {ok, #{sysmem_high_watermark => SysHW}}.
|
|
|
+
|
|
|
+handle_call(get_sysmem_high_watermark, _From, #{sysmem_high_watermark := HWM} = State) ->
|
|
|
+ {reply, HWM, State};
|
|
|
+handle_call({set_sysmem_high_watermark, New}, _From, #{sysmem_high_watermark := _Old} = State) ->
|
|
|
+ ok = update_mem_alarm_stauts(New),
|
|
|
+ {reply, ok, State#{sysmem_high_watermark := New}};
|
|
|
handle_call(Req, _From, State) ->
|
|
|
{reply, {error, {unexpected_call, Req}}, State}.
|
|
|
|
|
|
@@ -115,43 +112,40 @@ handle_cast(Msg, State) ->
|
|
|
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
|
|
|
{noreply, State}.
|
|
|
|
|
|
-handle_info({timeout, _Timer, check}, State) ->
|
|
|
+handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = State) ->
|
|
|
+ ok = update_mem_alarm_stauts(HWM),
|
|
|
+ ok = start_mem_check_timer(),
|
|
|
+ {noreply, State};
|
|
|
+handle_info({timeout, _Timer, cpu_check}, State) ->
|
|
|
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
|
|
|
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
|
|
|
- %% TODO: should be improved?
|
|
|
- _ =
|
|
|
- case emqx_vm:cpu_util() of
|
|
|
- 0 ->
|
|
|
- ok;
|
|
|
- Busy when Busy > CPUHighWatermark ->
|
|
|
- Usage = list_to_binary(io_lib:format("~.2f%", [Busy])),
|
|
|
- Message = <<Usage/binary, " cpu usage">>,
|
|
|
- emqx_alarm:activate(
|
|
|
- high_cpu_usage,
|
|
|
- #{
|
|
|
- usage => Usage,
|
|
|
- high_watermark => CPUHighWatermark,
|
|
|
- low_watermark => CPULowWatermark
|
|
|
- },
|
|
|
- Message
|
|
|
- ),
|
|
|
- start_check_timer();
|
|
|
- Busy when Busy < CPULowWatermark ->
|
|
|
- Usage = list_to_binary(io_lib:format("~.2f%", [Busy])),
|
|
|
- Message = <<Usage/binary, " cpu usage">>,
|
|
|
- emqx_alarm:deactivate(
|
|
|
- high_cpu_usage,
|
|
|
- #{
|
|
|
- usage => Usage,
|
|
|
- high_watermark => CPUHighWatermark,
|
|
|
- low_watermark => CPULowWatermark
|
|
|
- },
|
|
|
- Message
|
|
|
- ),
|
|
|
- start_check_timer();
|
|
|
- _Busy ->
|
|
|
- start_check_timer()
|
|
|
- end,
|
|
|
+ case emqx_vm:cpu_util() of
|
|
|
+ 0 ->
|
|
|
+ ok;
|
|
|
+ Busy when Busy > CPUHighWatermark ->
|
|
|
+ _ = emqx_alarm:activate(
|
|
|
+ high_cpu_usage,
|
|
|
+ #{
|
|
|
+ usage => Busy,
|
|
|
+ high_watermark => CPUHighWatermark,
|
|
|
+ low_watermark => CPULowWatermark
|
|
|
+ },
|
|
|
+ usage_msg(Busy, cpu)
|
|
|
+ );
|
|
|
+ Busy when Busy < CPULowWatermark ->
|
|
|
+ ok = emqx_alarm:ensure_deactivated(
|
|
|
+ high_cpu_usage,
|
|
|
+ #{
|
|
|
+ usage => Busy,
|
|
|
+ high_watermark => CPUHighWatermark,
|
|
|
+ low_watermark => CPULowWatermark
|
|
|
+ },
|
|
|
+ usage_msg(Busy, cpu)
|
|
|
+ );
|
|
|
+ _Busy ->
|
|
|
+ ok
|
|
|
+ end,
|
|
|
+ ok = start_cpu_check_timer(),
|
|
|
{noreply, State};
|
|
|
handle_info(Info, State) ->
|
|
|
?SLOG(error, #{msg => "unexpected_info", info => Info}),
|
|
|
@@ -167,26 +161,66 @@ code_change(_OldVsn, State, _Extra) ->
|
|
|
%% Internal functions
|
|
|
%%--------------------------------------------------------------------
|
|
|
|
|
|
-start_check_timer() ->
|
|
|
+start_cpu_check_timer() ->
|
|
|
Interval = emqx:get_config([sysmon, os, cpu_check_interval]),
|
|
|
case erlang:system_info(system_architecture) of
|
|
|
"x86_64-pc-linux-musl" -> ok;
|
|
|
- _ -> emqx_misc:start_timer(Interval, check)
|
|
|
+ _ -> start_timer(Interval, cpu_check)
|
|
|
end.
|
|
|
|
|
|
-%% At startup, memsup starts first and checks for memory alarms,
|
|
|
-%% but emqx_alarm_handler is not yet used instead of alarm_handler,
|
|
|
-%% so alarm_handler is used directly for notification (normally emqx_alarm_handler should be used).
|
|
|
-%%The internal memsup will no longer trigger events that have been alerted,
|
|
|
-%% and there is no exported function to remove the alerted flag,
|
|
|
-%% so it can only be checked again at startup.
|
|
|
-
|
|
|
-ensure_system_memory_alarm(HW) when HW =< 1.0 andalso HW >= 0 ->
|
|
|
- case current_sysmem_percent() of
|
|
|
- Usage when Usage > (HW * 100) ->
|
|
|
- gen_event:notify(
|
|
|
- alarm_handler, {set_alarm, {system_memory_high_watermark, []}}
|
|
|
- );
|
|
|
- _ ->
|
|
|
+start_mem_check_timer() ->
|
|
|
+ Interval = emqx:get_config([sysmon, os, mem_check_interval]),
|
|
|
+ IsSupported =
|
|
|
+ case os:type() of
|
|
|
+ {unix, linux} ->
|
|
|
+ true;
|
|
|
+ _ ->
|
|
|
+ %% sorry Mac and windows, for now
|
|
|
+ false
|
|
|
+ end,
|
|
|
+ case is_integer(Interval) andalso IsSupported of
|
|
|
+ true ->
|
|
|
+ start_timer(Interval, mem_check);
|
|
|
+ false ->
|
|
|
ok
|
|
|
end.
|
|
|
+
|
|
|
+start_timer(Interval, Msg) ->
|
|
|
+ _ = emqx_misc:start_timer(Interval, Msg),
|
|
|
+ ok.
|
|
|
+
|
|
|
+update_mem_alarm_stauts(HWM) when HWM > 1.0 orelse HWM < 0.0 ->
|
|
|
+ ?SLOG(warning, #{msg => "discarded_out_of_range_mem_alarm_threshold", value => HWM}),
|
|
|
+ ok = emqx_alarm:ensure_deactivated(
|
|
|
+ high_system_memory_usage,
|
|
|
+ #{},
|
|
|
+ <<"Deactivated mem usage alarm due to out of range threshold">>
|
|
|
+ );
|
|
|
+update_mem_alarm_stauts(HWM0) ->
|
|
|
+ HWM = HWM0 * 100,
|
|
|
+ Usage = current_sysmem_percent(),
|
|
|
+ case Usage > HWM of
|
|
|
+ true ->
|
|
|
+ _ = emqx_alarm:activate(
|
|
|
+ high_system_memory_usage,
|
|
|
+ #{
|
|
|
+ usage => Usage,
|
|
|
+ high_watermark => HWM
|
|
|
+ },
|
|
|
+ usage_msg(Usage, mem)
|
|
|
+ );
|
|
|
+ _ ->
|
|
|
+ ok = emqx_alarm:ensure_deactivated(
|
|
|
+ high_system_memory_usage,
|
|
|
+ #{
|
|
|
+ usage => Usage,
|
|
|
+ high_watermark => HWM
|
|
|
+ },
|
|
|
+ usage_msg(Usage, mem)
|
|
|
+ )
|
|
|
+ end,
|
|
|
+ ok.
|
|
|
+
|
|
|
+usage_msg(Usage, What) ->
|
|
|
+ %% devide by 1.0 to ensure float point number
|
|
|
+ iolist_to_binary(io_lib:format("~.2f% ~p usage", [Usage / 1.0, What])).
|