Преглед на файлове

Merge pull request #11661 from zmstone/0922-fix-json-log-formatter

0922 fix json log formatter
Zaiming (Stone) Shi преди 2 години
родител
ревизия
4e15edb5e4

+ 10 - 22
apps/emqx/src/config/emqx_config_logger.erl

@@ -216,38 +216,26 @@ log_formatter(HandlerName, Conf) ->
         end,
     SingleLine = conf_get("single_line", Conf),
     Depth = conf_get("max_depth", Conf),
+    Format =
+        case HandlerName of
+            ?AUDIT_HANDLER ->
+                json;
+            _ ->
+                conf_get("formatter", Conf)
+        end,
     do_formatter(
-        HandlerName, conf_get("formatter", Conf), CharsLimit, SingleLine, TimeOffSet, Depth
+        Format, CharsLimit, SingleLine, TimeOffSet, Depth
     ).
 
 %% helpers
-do_formatter(?AUDIT_HANDLER, _, CharsLimit, SingleLine, TimeOffSet, Depth) ->
-    {emqx_logger_jsonfmt, #{
-        template => [
-            time,
-            " [",
-            level,
-            "] ",
-            %% http api
-            {method, [code, " ", method, " ", operate_id, " ", username, " "], []},
-            %% cli
-            {cmd, [cmd, " "], []},
-            msg,
-            "\n"
-        ],
-        chars_limit => CharsLimit,
-        single_line => SingleLine,
-        time_offset => TimeOffSet,
-        depth => Depth
-    }};
-do_formatter(_, json, CharsLimit, SingleLine, TimeOffSet, Depth) ->
+do_formatter(json, CharsLimit, SingleLine, TimeOffSet, Depth) ->
     {emqx_logger_jsonfmt, #{
         chars_limit => CharsLimit,
         single_line => SingleLine,
         time_offset => TimeOffSet,
         depth => Depth
     }};
-do_formatter(_, text, CharsLimit, SingleLine, TimeOffSet, Depth) ->
+do_formatter(text, CharsLimit, SingleLine, TimeOffSet, Depth) ->
     {emqx_logger_textfmt, #{
         template => [time, " [", level, "] ", msg, "\n"],
         chars_limit => CharsLimit,

+ 115 - 45
apps/emqx/src/emqx_logger_jsonfmt.erl

@@ -51,7 +51,8 @@
 -type config() :: #{
     depth => pos_integer() | unlimited,
     report_cb => logger:report_cb(),
-    single_line => boolean()
+    single_line => boolean(),
+    chars_limit => unlimited | pos_integer()
 }.
 
 -define(IS_STRING(String), (is_list(String) orelse is_binary(String))).
@@ -64,19 +65,17 @@
 best_effort_json(Input) ->
     best_effort_json(Input, [pretty, force_utf8]).
 best_effort_json(Input, Opts) ->
-    Config = #{depth => unlimited, single_line => true},
+    Config = #{depth => unlimited, single_line => true, chars_limit => unlimited},
     JsonReady = best_effort_json_obj(Input, Config),
     emqx_utils_json:encode(JsonReady, Opts).
 
 -spec format(logger:log_event(), config()) -> iodata().
-format(#{level := Level, msg := Msg, meta := Meta} = Event, Config0) when is_map(Config0) ->
+format(#{level := Level, msg := Msg, meta := Meta}, Config0) when is_map(Config0) ->
     Config = add_default_config(Config0),
-    MsgBin = format(Msg, Meta#{level => Level}, Config),
-    logger_formatter:format(Event#{msg => {string, MsgBin}}, Config).
+    [format(Msg, Meta#{level => Level}, Config), "\n"].
 
-format(Msg, Meta0, Config) ->
-    Meta = maps:without([time, level], Meta0),
-    Data0 =
+format(Msg, Meta, Config) ->
+    Data =
         try maybe_format_msg(Msg, Meta, Config) of
             Map when is_map(Map) ->
                 maps:merge(Map, Meta);
@@ -92,8 +91,7 @@ format(Msg, Meta0, Config) ->
                     fmt_stacktrace => S
                 }
         end,
-    Data = maps:without([report_cb], Data0),
-    emqx_utils_json:encode(json_obj(Data, Config)).
+    emqx_utils_json:encode(json_obj_root(Data, Config)).
 
 maybe_format_msg({report, Report} = Msg, #{report_cb := Cb} = Meta, Config) ->
     case is_map(Report) andalso Cb =:= ?DEFAULT_FORMATTER of
@@ -128,7 +126,7 @@ format_msg({report, Report}, #{report_cb := Fun} = Meta, Config) when is_functio
     end;
 format_msg({report, Report}, #{report_cb := Fun}, Config) when is_function(Fun, 2) ->
     %% a format callback function of arity 2
-    case Fun(Report, maps:with([depth, single_line], Config)) of
+    case Fun(Report, maps:with([depth, single_line, chars_limit], Config)) of
         Chardata when ?IS_STRING(Chardata) ->
             try
                 unicode:characters_to_binary(Chardata, utf8)
@@ -152,11 +150,13 @@ format_msg({Fmt, Args}, _Meta, Config) ->
 
 do_format_msg(Format0, Args, #{
     depth := Depth,
-    single_line := SingleLine
+    single_line := SingleLine,
+    chars_limit := Limit
 }) ->
+    Opts = chars_limit_to_opts(Limit),
     Format1 = io_lib:scan_format(Format0, Args),
     Format = reformat(Format1, Depth, SingleLine),
-    Text0 = io_lib:build_text(Format, []),
+    Text0 = io_lib:build_text(Format, Opts),
     Text =
         case SingleLine of
             true -> re:replace(Text0, ",?\r?\n\s*", ", ", [{return, list}, global, unicode]);
@@ -164,6 +164,9 @@ do_format_msg(Format0, Args, #{
         end,
     trim(unicode:characters_to_binary(Text, utf8)).
 
+chars_limit_to_opts(unlimited) -> [];
+chars_limit_to_opts(Limit) -> [{chars_limit, Limit}].
+
 %% Get rid of the leading spaces.
 %% leave alone the trailing spaces.
 trim(<<$\s, Rest/binary>>) -> trim(Rest);
@@ -233,52 +236,70 @@ json(P, C) when is_port(P) -> json(port_to_list(P), C);
 json(F, C) when is_function(F) -> json(erlang:fun_to_list(F), C);
 json(B, Config) when is_binary(B) ->
     best_effort_unicode(B, Config);
-json(L, Config) when is_list(L), is_integer(hd(L)) ->
-    best_effort_unicode(L, Config);
 json(M, Config) when is_list(M), is_tuple(hd(M)), tuple_size(hd(M)) =:= 2 ->
     best_effort_json_obj(M, Config);
 json(L, Config) when is_list(L) ->
-    [json(I, Config) || I <- L];
+    try unicode:characters_to_binary(L, utf8) of
+        B when is_binary(B) -> B;
+        _ -> [json(I, Config) || I <- L]
+    catch
+        _:_ ->
+            [json(I, Config) || I <- L]
+    end;
 json(Map, Config) when is_map(Map) ->
     best_effort_json_obj(Map, Config);
 json(Term, Config) ->
     do_format_msg("~p", [Term], Config).
 
+json_obj_root(Data0, Config) ->
+    Time = maps:get(time, Data0, undefined),
+    Level = maps:get(level, Data0, undefined),
+    Msg1 =
+        case maps:get(msg, Data0, undefined) of
+            undefined ->
+                maps:get('$kind', Data0, undefined);
+            Msg0 ->
+                Msg0
+        end,
+    Msg =
+        case Msg1 of
+            undefined ->
+                undefined;
+            _ ->
+                json(Msg1, Config)
+        end,
+    Mfal = emqx_utils:format_mfal(Data0),
+    Data =
+        maps:fold(
+            fun(K, V, D) ->
+                {K1, V1} = json_kv(K, V, Config),
+                [{K1, V1} | D]
+            end,
+            [],
+            maps:without(
+                [time, gl, file, report_cb, msg, '$kind', mfa, level, line, is_trace], Data0
+            )
+        ),
+    lists:filter(
+        fun({_, V}) -> V =/= undefined end,
+        [{time, Time}, {level, Level}, {msg, Msg}, {mfa, Mfal}]
+    ) ++ Data.
+
 json_obj(Data, Config) ->
     maps:fold(
         fun(K, V, D) ->
-            json_kv(K, V, D, Config)
+            {K1, V1} = json_kv(K, V, Config),
+            maps:put(K1, V1, D)
         end,
         maps:new(),
         Data
     ).
 
-json_kv(mfa, {M, F, A}, Data, _Config) ->
-    maps:put(
-        mfa,
-        <<
-            (atom_to_binary(M, utf8))/binary,
-            $:,
-            (atom_to_binary(F, utf8))/binary,
-            $/,
-            (integer_to_binary(A))/binary
-        >>,
-        Data
-    );
-%% snabbkaffe
-json_kv('$kind', Kind, Data, Config) ->
-    maps:put(msg, json(Kind, Config), Data);
-json_kv(gl, _, Data, _Config) ->
-    %% drop gl because it's not interesting
-    Data;
-json_kv(file, _, Data, _Config) ->
-    %% drop 'file' because we have mfa
-    Data;
-json_kv(K0, V, Data, Config) ->
+json_kv(K0, V, Config) ->
     K = json_key(K0),
     case is_map(V) of
-        true -> maps:put(json(K, Config), best_effort_json_obj(V, Config), Data);
-        false -> maps:put(json(K, Config), json(V, Config), Data)
+        true -> {K, best_effort_json_obj(V, Config)};
+        false -> {K, json(V, Config)}
     end.
 
 json_key(A) when is_atom(A) -> json_key(atom_to_binary(A, utf8));
@@ -373,23 +394,72 @@ p_config() ->
     proper_types:shrink_list(
         [
             {depth, p_limit()},
-            {single_line, proper_types:boolean()}
+            {single_line, proper_types:boolean()},
+            {chars_limit, p_limit()}
         ]
     ).
 
+%% NOTE: pretty-printing format is asserted in the test
+%% This affects the CLI output format, consult the team before changing
+%% the format.
 best_effort_json_test() ->
     ?assertEqual(
         <<"{\n  \n}">>,
-        emqx_logger_jsonfmt:best_effort_json([])
+        best_effort_json([])
     ),
     ?assertEqual(
         <<"{\n  \"key\" : [\n    \n  ]\n}">>,
-        emqx_logger_jsonfmt:best_effort_json(#{key => []})
+        best_effort_json(#{key => []})
     ),
     ?assertEqual(
         <<"[\n  {\n    \"key\" : [\n      \n    ]\n  }\n]">>,
-        emqx_logger_jsonfmt:best_effort_json([#{key => []}])
+        best_effort_json([#{key => []}])
     ),
     ok.
 
+config() ->
+    #{
+        chars_limit => unlimited,
+        depth => unlimited,
+        single_line => true
+    }.
+
+make_log(Report) ->
+    #{
+        level => info,
+        msg => Report,
+        meta => #{time => 1111, report_cb => ?DEFAULT_FORMATTER}
+    }.
+
+ensure_json_output_test() ->
+    JSON = format(make_log({report, #{foo => bar}}), config()),
+    ?assert(is_map(emqx_utils_json:decode(JSON))),
+    ok.
+
+chars_limit_not_applied_on_raw_map_fields_test() ->
+    Limit = 32,
+    Len = 100,
+    LongStr = lists:duplicate(Len, $a),
+    Config0 = config(),
+    Config = Config0#{
+        chars_limit => Limit
+    },
+    JSON = format(make_log({report, #{foo => LongStr}}), Config),
+    #{<<"foo">> := LongStr1} = emqx_utils_json:decode(JSON),
+    ?assertEqual(Len, size(LongStr1)),
+    ok.
+
+chars_limit_applied_on_format_result_test() ->
+    Limit = 32,
+    Len = 100,
+    LongStr = lists:duplicate(Len, $a),
+    Config0 = config(),
+    Config = Config0#{
+        chars_limit => Limit
+    },
+    JSON = format(make_log({string, LongStr}), Config),
+    #{<<"msg">> := LongStr1} = emqx_utils_json:decode(JSON),
+    ?assertEqual(Limit, size(LongStr1)),
+    ok.
+
 -endif.

+ 5 - 10
apps/emqx/src/emqx_logger_textfmt.erl

@@ -56,8 +56,7 @@ enrich_report(ReportRaw, Meta) ->
         end,
     ClientId = maps:get(clientid, Meta, undefined),
     Peer = maps:get(peername, Meta, undefined),
-    MFA = maps:get(mfa, Meta, undefined),
-    Line = maps:get(line, Meta, undefined),
+    MFA = emqx_utils:format_mfal(Meta),
     Msg = maps:get(msg, ReportRaw, undefined),
     %% turn it into a list so that the order of the fields is determined
     lists:foldl(
@@ -70,8 +69,7 @@ enrich_report(ReportRaw, Meta) ->
             {topic, try_format_unicode(Topic)},
             {clientid, try_format_unicode(ClientId)},
             {peername, Peer},
-            {line, Line},
-            {mfa, mfa(MFA)},
+            {mfa, try_format_unicode(MFA)},
             {msg, Msg}
         ]
     ).
@@ -84,7 +82,7 @@ try_format_unicode(Char) ->
             case unicode:characters_to_list(Char) of
                 {error, _, _} -> error;
                 {incomplete, _, _} -> error;
-                Binary -> Binary
+                List1 -> List1
             end
         catch
             _:_ ->
@@ -95,8 +93,8 @@ try_format_unicode(Char) ->
         _ -> List
     end.
 
-enrich_mfa({Fmt, Args}, #{mfa := Mfa, line := Line}) when is_list(Fmt) ->
-    {Fmt ++ " mfa: ~ts line: ~w", Args ++ [mfa(Mfa), Line]};
+enrich_mfa({Fmt, Args}, Data) when is_list(Fmt) ->
+    {Fmt ++ " mfa: ~ts", Args ++ [emqx_utils:format_mfal(Data)]};
 enrich_mfa(Msg, _) ->
     Msg.
 
@@ -113,6 +111,3 @@ enrich_topic({Fmt, Args}, #{topic := Topic}) when is_list(Fmt) ->
     {" topic: ~ts" ++ Fmt, [Topic | Args]};
 enrich_topic(Msg, _) ->
     Msg.
-
-mfa(undefined) -> undefined;
-mfa({M, F, A}) -> [atom_to_list(M), ":", atom_to_list(F), "/" ++ integer_to_list(A)].

+ 1 - 0
apps/emqx_conf/src/emqx_conf_schema.erl

@@ -1252,6 +1252,7 @@ log_handler_common_confs(Handler, Default) ->
             sc(
                 hoconsc:enum([text, json]),
                 #{
+                    aliases => [format],
                     default => maps:get(formatter, Default, text),
                     desc => ?DESC("common_handler_formatter"),
                     importance => ?IMPORTANCE_MEDIUM

+ 2 - 2
apps/emqx_schema_registry/test/emqx_schema_registry_http_api_SUITE.erl

@@ -278,7 +278,7 @@ t_crud(Config) ->
             <<"code">> := <<"BAD_REQUEST">>,
             <<"message">> :=
                 #{
-                    <<"expected">> := [_ | _],
+                    <<"expected">> := <<"avro | protobuf">>,
                     <<"field_name">> := <<"type">>
                 }
         }},
@@ -301,7 +301,7 @@ t_crud(Config) ->
             <<"code">> := <<"BAD_REQUEST">>,
             <<"message">> :=
                 #{
-                    <<"expected">> := [_ | _],
+                    <<"expected">> := <<"avro | protobuf">>,
                     <<"field_name">> := <<"type">>
                 }
         }},

+ 26 - 1
apps/emqx_utils/src/emqx_utils.erl

@@ -61,7 +61,8 @@
     diff_lists/3,
     merge_lists/3,
     tcp_keepalive_opts/4,
-    format/1
+    format/1,
+    format_mfal/1
 ]).
 
 -export([
@@ -529,6 +530,30 @@ tcp_keepalive_opts(OS, _Idle, _Interval, _Probes) ->
 format(Term) ->
     iolist_to_binary(io_lib:format("~0p", [Term])).
 
+%% @doc Helper function for log formatters.
+-spec format_mfal(map()) -> undefined | binary().
+format_mfal(Data) ->
+    Line =
+        case maps:get(line, Data, undefined) of
+            undefined ->
+                <<"">>;
+            Num ->
+                ["(", integer_to_list(Num), ")"]
+        end,
+    case maps:get(mfa, Data, undefined) of
+        {M, F, A} ->
+            iolist_to_binary([
+                atom_to_binary(M, utf8),
+                $:,
+                atom_to_binary(F, utf8),
+                $/,
+                integer_to_binary(A),
+                Line
+            ]);
+        _ ->
+            undefined
+    end.
+
 %%------------------------------------------------------------------------------
 %% Internal Functions
 %%------------------------------------------------------------------------------

+ 3 - 0
changes/ce/fix-11661.en.md

@@ -0,0 +1,3 @@
+Fix log formatter when log.HANDLER.formatter is set to 'json'.
+
+The bug was introduced in v5.0.4 where the log line was no longer a valid JSON, but prefixed with timestamp string and level name.

+ 3 - 2
rel/i18n/emqx_conf_schema.hocon

@@ -101,7 +101,7 @@ common_handler_flush_qlen.label:
 
 common_handler_chars_limit.desc:
 """Set the maximum length of a single log message. If this length is exceeded, the log message will be truncated.
-NOTE: Restrict char limiter if formatter is JSON , it will get a truncated incomplete JSON data, which is not recommended."""
+When formatter is <code>json</code> the truncation is done on the JSON values, but not on the log message itself."""
 
 common_handler_chars_limit.label:
 """Single Log Max Length"""
@@ -660,7 +660,8 @@ Can be one of:
   - <code>system</code>: the time offset used by the local system
   - <code>utc</code>: the UTC time offset
   - <code>+-[hh]:[mm]</code>: user specified time offset, such as "-02:00" or "+00:00"
-Defaults to: <code>system</code>."""
+Defaults to: <code>system</code>.
+This config has no effect for when formatter is <code>json</code> as the timestamp in JSON is milliseconds since epoch."""
 
 common_handler_time_offset.label:
 """Time Offset"""

+ 1 - 0
scripts/spellcheck/dicts/emqx.txt

@@ -166,6 +166,7 @@ ip
 ipv
 jenkins
 jq
+json
 kb
 keepalive
 keyfile