Просмотр исходного кода

fix(rule_engine_api): don't crash when formatting empty metrics

Fixes https://emqx.atlassian.net/browse/EMQX-10073
Fixes https://github.com/emqx/emqx/issues/10714#issuecomment-1567987664

Similar issue to https://github.com/emqx/emqx/pull/10743, but on the
rule engine API.
Thales Macedo Garitezi 2 лет назад
Родитель
Сommit
57aacb471c

+ 63 - 41
apps/emqx_rule_engine/src/emqx_rule_engine_api.erl

@@ -529,47 +529,69 @@ printable_function_name(Mod, Func) ->
     list_to_binary(lists:concat([Mod, ":", Func])).
 
 get_rule_metrics(Id) ->
-    Format = fun(
-        Node,
-        #{
-            counters :=
-                #{
-                    'matched' := Matched,
-                    'passed' := Passed,
-                    'failed' := Failed,
-                    'failed.exception' := FailedEx,
-                    'failed.no_result' := FailedNoRes,
-                    'actions.total' := OTotal,
-                    'actions.failed' := OFailed,
-                    'actions.failed.out_of_service' := OFailedOOS,
-                    'actions.failed.unknown' := OFailedUnknown,
-                    'actions.success' := OFailedSucc
-                },
-            rate :=
-                #{
-                    'matched' :=
-                        #{current := Current, max := Max, last5m := Last5M}
-                }
-        }
-    ) ->
-        #{
-            metrics => ?METRICS(
-                Matched,
-                Passed,
-                Failed,
-                FailedEx,
-                FailedNoRes,
-                OTotal,
-                OFailed,
-                OFailedOOS,
-                OFailedUnknown,
-                OFailedSucc,
-                Current,
-                Max,
-                Last5M
-            ),
-            node => Node
-        }
+    Format = fun
+        (
+            Node,
+            #{
+                counters :=
+                    #{
+                        'matched' := Matched,
+                        'passed' := Passed,
+                        'failed' := Failed,
+                        'failed.exception' := FailedEx,
+                        'failed.no_result' := FailedNoRes,
+                        'actions.total' := OTotal,
+                        'actions.failed' := OFailed,
+                        'actions.failed.out_of_service' := OFailedOOS,
+                        'actions.failed.unknown' := OFailedUnknown,
+                        'actions.success' := OFailedSucc
+                    },
+                rate :=
+                    #{
+                        'matched' :=
+                            #{current := Current, max := Max, last5m := Last5M}
+                    }
+            }
+        ) ->
+            #{
+                metrics => ?METRICS(
+                    Matched,
+                    Passed,
+                    Failed,
+                    FailedEx,
+                    FailedNoRes,
+                    OTotal,
+                    OFailed,
+                    OFailedOOS,
+                    OFailedUnknown,
+                    OFailedSucc,
+                    Current,
+                    Max,
+                    Last5M
+                ),
+                node => Node
+            };
+        (Node, _Metrics) ->
+            %% Empty metrics: can happen when a node joins another and a bridge is not yet
+            %% replicated to it, so the counters map is empty.
+            #{
+                metrics => ?METRICS(
+                    _Matched = 0,
+                    _Passed = 0,
+                    _Failed = 0,
+                    _FailedEx = 0,
+                    _FailedNoRes = 0,
+                    _OTotal = 0,
+                    _OFailed = 0,
+                    _OFailedOOS = 0,
+                    _OFailedUnknown = 0,
+                    _OFailedSucc = 0,
+                    _Current = 0,
+                    _Max = 0,
+                    _Last5M = 0
+                ),
+                node => Node
+            }
     end,
     [
         Format(Node, emqx_plugin_libs_proto_v1:get_metrics(Node, rule_metrics, Id))

+ 20 - 0
apps/emqx_rule_engine/test/emqx_rule_engine_api_SUITE.erl

@@ -94,6 +94,26 @@ t_crud_rule_api(_Config) ->
     ct:pal("RMetrics : ~p", [Metrics]),
     ?assertMatch(#{id := RuleId, metrics := _, node_metrics := _}, Metrics),
 
+    %% simulating a node joining a cluster and lagging the configuration replication; in
+    %% such cases, when fetching metrics, a rule may exist in the cluster but not on the
+    %% new node.  We just check that it doesn't provoke a crash.
+    emqx_common_test_helpers:with_mock(
+        emqx_metrics_worker,
+        get_metrics,
+        fun(HandlerName, MetricId) ->
+            %% change the metric id to some unknown id.
+            meck:passthrough([HandlerName, <<"unknown-", MetricId/binary>>])
+        end,
+        fun() ->
+            {200, Metrics1} = emqx_rule_engine_api:'/rules/:id/metrics'(get, #{
+                bindings => #{id => RuleId}
+            }),
+            ct:pal("RMetrics : ~p", [Metrics1]),
+            ?assertMatch(#{id := RuleId, metrics := _, node_metrics := _}, Metrics1),
+            ok
+        end
+    ),
+
     {200, Rule2} = emqx_rule_engine_api:'/rules/:id'(put, #{
         bindings => #{id => RuleId},
         body => ?SIMPLE_RULE(RuleId)#{<<"sql">> => <<"select * from \"t/b\"">>}

+ 1 - 0
changes/ce/fix-10884.en.md

@@ -0,0 +1 @@
+Fixes an issue where trying to get rule info or metrics could result in a crash when a node is joining a cluster.