Просмотр исходного кода

fix(otel): merge connect & disconnect, subscribe & unsubscribe

JimMoen 1 год назад
Родитель
Сommit
26e60b25ec

+ 3 - 5
apps/emqx_opentelemetry/src/emqx_otel_api.erl

@@ -108,14 +108,12 @@ otel_config_example() ->
                 trace_mode => legacy,
                 e2e_tracing_options => #{
                     attribute_meta_value => "emqxcl",
-                    mqtt_publish_trace_level => basic,
+                    msg_trace_level => 0,
                     clientid_match_rules_max => 30,
                     topic_match_rules_max => 30,
                     sample_ratio => "10%",
-                    client_connect => true,
-                    client_disconnect => true,
-                    client_subscribe => true,
-                    client_unsubscribe => true,
+                    client_connect_disconnect => true,
+                    client_subscribe_unsubscribe => true,
                     client_publish => true
                 }
             }

+ 33 - 28
apps/emqx_opentelemetry/src/emqx_otel_schema.erl

@@ -4,6 +4,7 @@
 -module(emqx_otel_schema).
 
 -include("emqx_otel_trace.hrl").
+-include_lib("emqx/include/emqx_mqtt.hrl").
 -include_lib("hocon/include/hoconsc.hrl").
 
 -export([
@@ -244,18 +245,16 @@ fields("e2e_tracing_options") ->
             ?HOCON(
                 string(),
                 #{
-                    default => <<"emqxcl">>,
                     desc => ?DESC(e2e_attribute_meta_value),
                     importance => ?IMPORTANCE_MEDIUM
                 }
             )},
-        %% TODO: Rename
-        {mqtt_publish_trace_level,
+        {msg_trace_level,
             ?HOCON(
-                ?ENUM([basic, first_ack, all]),
+                emqx_schema:qos(),
                 #{
-                    default => basic,
-                    desc => ?DESC(publish_response_trace_level),
+                    default => ?QOS_0,
+                    desc => ?DESC(msg_trace_level),
                     importance => ?IMPORTANCE_MEDIUM
                 }
             )},
@@ -285,20 +284,35 @@ fields("e2e_tracing_options") ->
                     desc => ?DESC(sample_ratio),
                     importance => ?IMPORTANCE_MEDIUM
                 }
+            )},
+        {client_connect_disconnect,
+            ?HOCON(
+                boolean(),
+                #{
+                    desc => ?DESC(client_connect_disconnect),
+                    default => false,
+                    importance => ?IMPORTANCE_MEDIUM
+                }
+            )},
+        {client_subscribe_unsubscribe,
+            ?HOCON(
+                boolean(),
+                #{
+                    desc => ?DESC(client_subscribe_unsubscribe),
+                    default => false,
+                    importance => ?IMPORTANCE_MEDIUM
+                }
+            )},
+        {client_publish,
+            ?HOCON(
+                boolean(),
+                #{
+                    desc => ?DESC(client_publish),
+                    default => false,
+                    importance => ?IMPORTANCE_MEDIUM
+                }
             )}
-    ] ++
-        [
-            {TraceEvent,
-                ?HOCON(
-                    boolean(),
-                    #{
-                        desc => ?DESC(TraceEvent),
-                        default => false,
-                        importance => ?IMPORTANCE_MEDIUM
-                    }
-                )}
-         || TraceEvent <- root_span_names()
-        ].
+    ].
 
 desc("opentelemetry") ->
     ?DESC(opentelemetry);
@@ -317,15 +331,6 @@ desc("e2e_tracing_options") ->
 desc(_) ->
     undefined.
 
-root_span_names() ->
-    [
-        client_connect,
-        client_disconnect,
-        client_subscribe,
-        client_unsubscribe,
-        client_publish
-    ].
-
 %% Compatibility with the previous schema that defined only metrics fields
 legacy_metrics_converter(OtelConf, _Opts) when is_map(OtelConf) ->
     Otel1 =

+ 15 - 25
apps/emqx_opentelemetry/src/sampler/emqx_otel_sampler.erl

@@ -131,12 +131,7 @@ record_count() ->
 %% OpenTelemetry Sampler Callback
 %%--------------------------------------------------------------------
 
-setup(
-    #{
-        mqtt_publish_trace_level := Level,
-        sample_ratio := Ratio
-    } = InitOpts
-) ->
+setup(#{sample_ratio := Ratio} = InitOpts) ->
     IdUpper =
         case Ratio of
             R when R =:= +0.0 ->
@@ -149,16 +144,14 @@ setup(
 
     Opts = (maps:with(
         [
-            client_connect,
-            client_disconnect,
-            client_subscribe,
-            client_unsubscribe,
+            client_connect_disconnect,
+            client_subscribe_unsubscribe,
             client_publish,
+            msg_trace_level,
             attribute_meta_value
         ],
         InitOpts
     ))#{
-        response_trace_qos => level_to_qos(Level),
         id_upper => IdUpper
     },
 
@@ -169,11 +162,6 @@ setup(
 
     Opts.
 
--compile({inline, [level_to_qos/1]}).
-level_to_qos(basic) -> ?QOS_0;
-level_to_qos(first_ack) -> ?QOS_1;
-level_to_qos(all) -> ?QOS_2.
-
 %% TODO: description
 description(_Opts) ->
     <<"AttributeSampler">>.
@@ -213,7 +201,7 @@ should_sample(
     _SpanKind,
     _Attributes,
     #{
-        response_trace_qos := QoS,
+        msg_trace_level := QoS,
         attribute_meta_value := MetaValue
     } = _Opts
 ) ->
@@ -256,14 +244,16 @@ decide_by_traceid_ratio(TraceId, SpanName, #{id_upper := IdUpperBound} = Opts) -
             false
     end.
 
-span_name_to_config_key(?CLIENT_CONNECT_SPAN_NAME) ->
-    client_connect;
-span_name_to_config_key(?CLIENT_DISCONNECT_SPAN_NAME) ->
-    client_disconnect;
-span_name_to_config_key(?CLIENT_SUBSCRIBE_SPAN_NAME) ->
-    client_subscribe;
-span_name_to_config_key(?CLIENT_UNSUBSCRIBE_SPAN_NAME) ->
-    client_unsubscribe;
+span_name_to_config_key(SpanName) when
+    SpanName =:= ?CLIENT_CONNECT_SPAN_NAME orelse
+        SpanName =:= ?CLIENT_DISCONNECT_SPAN_NAME
+->
+    client_connect_disconnect;
+span_name_to_config_key(SpanName) when
+    SpanName =:= ?CLIENT_SUBSCRIBE_SPAN_NAME orelse
+        SpanName =:= ?CLIENT_UNSUBSCRIBE_SPAN_NAME
+->
+    client_subscribe_unsubscribe;
 span_name_to_config_key(?CLIENT_PUBLISH_SPAN_NAME) ->
     client_publish.
 

+ 22 - 22
rel/i18n/emqx_otel_schema.hocon

@@ -71,29 +71,32 @@ e2e_attribute_meta_value.desc:
 Typically, set a simple and easily recognizable name or use the cluster name to identify different EMQX clusters."""
 e2e_attribute_meta_value.label: "Attribute Meta Value"
 
-## TODO: Rename
-mqtt_publish_trace_level.desc:
+msg_trace_level.desc:
 """Trace level for all message exchanges during the message publishing process.<br/>
-Note: this config only takes effect when <code>sample</code> is set to <code>false</code>.
-- `basic`: Only `PUBLISH` packets are sampled for all QoS level (both QoS0, QoS1, QoS2).
-- `first_ack`: In addition to `PUBLISH` packets for all QoS. `PUBACK` and `PUBREC` are also sampled.
-   That is, the first response packet in the QoS1 or QoS2 message interaction.
-- `all`: Both `PUBLISH` packets and all response packets `PUBACK`, `PUBREC`, `PUBREL`, `PUBCOMP` will be sampled."""
-mqtt_publish_trace_level.label: "Publish Trace Level"
-
-client_connect.desc: """In addition to the given rules, whether to sample Client Connect event."""
-client_connect.label: "Client Connect"
-
-client_disconnect.desc: """In addition to the given rules, whether to sample Client Disconnect event."""
-client_disconnect.label: "Client Disconnect"
+Note: This is effective for all message traces. It is transparent to whitelist sampling or ratio sampling.</br>
+- `0`: Basic events during message publish will be recorded as Spans.</br>
+  Includes 'client.connect', 'client.authz', 'message.route', 'message.forward', 'message.handle_forward', 'broker.publish'.</br>
+- `1`: In addition to basic events.</br>
+  The events 'broker.puback', 'client.puback', 'broker.pubrec', 'client.pubrec' will be recorded as Spans.</br>
+  That is, the first response packet in the QoS1 or QoS2 message interaction.
+- `2`: In addition to level 1.</br>
+  The events 'broker.pubrel', 'client.pubrel', 'broker.pubcomp', 'client.pubcomp' will be recorded as Spans.</br>"""
+msg_trace_level.label: "Message Trace Level"
+
+sample_ratio.desc:
+"""Sampling ratio for the event types. Transparent to events switch and whitelist sampling.</br>"""
+sample_ratio.label: "Sampling Ratio"
 
-client_subscribe.desc: """In addition to the given rules, whether to sample Client Subscribe event."""
-client_subscribe.label: "Client Subscribe"
+client_connect_disconnect.desc:
+"""In addition to white list, whether to trace the client connect and disconnect events."""
+client_connect_disconnect.label: "Client Connect/Disconnect"
 
-client_unsubscribe.desc: """In addition to the given rules, whether to sample Client Unsubscribe event."""
-client_unsubscribe.label: "Client Unsubscribe"
+client_subscribe_unsubscribe.desc:
+"""In addition to white list, whether to trace the client subscribe and unsubscribe events."""
+client_subscribe_unsubscribe.label: "Client Subscribe/Unsubscribe"
 
-client_publish.desc: """In addition to the given rules, whether to sample Client Publish event."""
+client_publish.desc:
+"""In addition to white list, whether to trace the client publish event."""
 client_publish.label: "Client Publish"
 
 clientid_match_rules_max.desc: """Maximum length of the rule list based on clientid matching."""
@@ -102,7 +105,4 @@ clientid_match_rules_max.label: "Client ID Match Rules Max"
 topic_match_rules_max.desc: """Maximum length of the rule list based on topic matching."""
 topic_match_rules_max.label: "Topic Match Rules Max"
 
-sample_ratio.desc: "Sampling ratio for the event types."
-sample_ratio.label: "Sampling Ratio"
-
 }

+ 4 - 0
scripts/spellcheck/dicts/emqx.txt

@@ -319,3 +319,7 @@ PKCE
 Datalayers
 OpenTelemetry
 opentelemetry
+puback
+pubrec
+pubrel
+pubcomp