Browse Source

feat(variform): initialize client_attrs with variform

Moved regular expression extraction as a variform function.
zmstone 1 year ago
parent
commit
b76b6fbe63

+ 24 - 131
apps/emqx/src/emqx_channel.erl

@@ -251,7 +251,7 @@ init(
             MP -> MP
         end,
     ListenerId = emqx_listeners:listener_id(Type, Listener),
-    ClientInfo0 = set_peercert_infos(
+    ClientInfo = set_peercert_infos(
         Peercert,
         #{
             zone => Zone,
@@ -269,7 +269,6 @@ init(
         },
         Zone
     ),
-    ClientInfo = initialize_client_attrs_from_cert(ClientInfo0, Peercert),
     {NClientInfo, NConnInfo} = take_ws_cookie(ClientInfo, ConnInfo),
     #channel{
         conninfo = NConnInfo,
@@ -1586,60 +1585,6 @@ enrich_client(ConnPkt, Channel = #channel{clientinfo = ClientInfo}) ->
             {error, ReasonCode, Channel#channel{clientinfo = NClientInfo}}
     end.
 
-initialize_client_attrs_from_cert(#{zone := Zone} = ClientInfo, Peercert) ->
-    Inits = get_client_attrs_init_config(Zone),
-    lists:foldl(
-        fun(Init, Acc) ->
-            do_initialize_client_attrs_from_cert(Init, Acc, Peercert)
-        end,
-        ClientInfo,
-        Inits
-    ).
-
-do_initialize_client_attrs_from_cert(
-    #{
-        extract_from := From,
-        extract_regexp := Regexp,
-        extract_as := AttrName
-    },
-    ClientInfo,
-    Peercert
-) when From =:= cn orelse From =:= dn ->
-    Attrs0 = maps:get(client_attrs, ClientInfo, #{}),
-    Attrs =
-        case extract_client_attr_from_cert(From, Regexp, Peercert) of
-            {ok, Value} ->
-                ?SLOG(
-                    debug,
-                    #{
-                        msg => "client_attr_init_from_cert",
-                        extracted_as => AttrName,
-                        extracted_value => Value
-                    }
-                ),
-                Attrs0#{AttrName => Value};
-            _ ->
-                Attrs0
-        end,
-    ClientInfo#{client_attrs => Attrs};
-do_initialize_client_attrs_from_cert(_, ClientInfo, _Peercert) ->
-    ClientInfo.
-
-extract_client_attr_from_cert(cn, Regexp, Peercert) ->
-    CN = esockd_peercert:common_name(Peercert),
-    re_extract(CN, Regexp);
-extract_client_attr_from_cert(dn, Regexp, Peercert) ->
-    DN = esockd_peercert:subject(Peercert),
-    re_extract(DN, Regexp).
-
-re_extract(Str, Regexp) when is_binary(Str) ->
-    case re:run(Str, Regexp, [{capture, all_but_first, list}]) of
-        {match, [_ | _] = List} -> {ok, iolist_to_binary(List)};
-        _ -> nomatch
-    end;
-re_extract(_NotStr, _Regexp) ->
-    ignored.
-
 set_username(
     #mqtt_packet_connect{username = Username},
     ClientInfo = #{username := undefined}
@@ -1681,33 +1626,36 @@ maybe_assign_clientid(#mqtt_packet_connect{clientid = ClientId}, ClientInfo) ->
     {ok, ClientInfo#{clientid => ClientId}}.
 
 get_client_attrs_init_config(Zone) ->
-    case get_mqtt_conf(Zone, client_attrs_init, []) of
-        L when is_list(L) -> L;
-        M when is_map(M) -> [M]
-    end.
+    get_mqtt_conf(Zone, client_attrs_init, []).
 
-maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo0) ->
+maybe_set_client_initial_attrs(ConnPkt, #{zone := Zone} = ClientInfo) ->
     Inits = get_client_attrs_init_config(Zone),
-    ClientInfo = initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo0),
-    {ok, initialize_client_attrs_from_clientinfo(Inits, ClientInfo)}.
+    UserProperty = get_user_property_as_map(ConnPkt),
+    {ok, initialize_client_attrs(Inits, ClientInfo#{user_property => UserProperty})}.
 
-initialize_client_attrs_from_clientinfo(Inits, ClientInfo) ->
+initialize_client_attrs(Inits, ClientInfo) ->
     lists:foldl(
-        fun(Init, Acc) ->
+        fun(#{expression := Variform, set_as_attr := Name}, Acc) ->
             Attrs = maps:get(client_attrs, ClientInfo, #{}),
-            case extract_attr_from_clientinfo(Init, ClientInfo) of
+            case emqx_variform:render(Variform, ClientInfo) of
                 {ok, Value} ->
-                    #{extract_as := Name} = Init,
                     ?SLOG(
                         debug,
                         #{
-                            msg => "client_attr_init_from_clientinfo",
-                            extracted_as => Name,
-                            extracted_value => Value
+                            msg => "client_attr_initialized",
+                            set_as_attr => Name,
+                            attr_value => Value
                         }
                     ),
                     Acc#{client_attrs => Attrs#{Name => Value}};
-                _ ->
+                {error, Reason} ->
+                    ?SLOG(
+                        warning,
+                        #{
+                            msg => "client_attr_initialization_failed",
+                            reason => Reason
+                        }
+                    ),
                     Acc
             end
         end,
@@ -1715,67 +1663,12 @@ initialize_client_attrs_from_clientinfo(Inits, ClientInfo) ->
         Inits
     ).
 
-initialize_client_attrs_from_user_property(Inits, ConnPkt, ClientInfo) ->
-    lists:foldl(
-        fun(Init, Acc) ->
-            do_initialize_client_attrs_from_user_property(Init, ConnPkt, Acc)
-        end,
-        ClientInfo,
-        Inits
-    ).
-
-do_initialize_client_attrs_from_user_property(
-    #{
-        extract_from := user_property,
-        extract_as := PropertyKey
-    },
-    ConnPkt,
-    ClientInfo
-) ->
-    Attrs0 = maps:get(client_attrs, ClientInfo, #{}),
-    Attrs =
-        case extract_client_attr_from_user_property(ConnPkt, PropertyKey) of
-            {ok, Value} ->
-                ?SLOG(
-                    debug,
-                    #{
-                        msg => "client_attr_init_from_user_property",
-                        extracted_as => PropertyKey,
-                        extracted_value => Value
-                    }
-                ),
-                Attrs0#{PropertyKey => Value};
-            _ ->
-                Attrs0
-        end,
-    ClientInfo#{client_attrs => Attrs};
-do_initialize_client_attrs_from_user_property(_, _ConnPkt, ClientInfo) ->
-    ClientInfo.
-
-extract_client_attr_from_user_property(
-    #mqtt_packet_connect{properties = #{'User-Property' := UserProperty}}, PropertyKey
-) ->
-    case lists:keyfind(PropertyKey, 1, UserProperty) of
-        {_, Value} ->
-            {ok, Value};
-        _ ->
-            not_found
-    end;
-extract_client_attr_from_user_property(_ConnPkt, _PropertyKey) ->
-    ignored.
-
-extract_attr_from_clientinfo(#{extract_from := clientid, extract_regexp := Regexp}, #{
-    clientid := ClientId
-}) ->
-    re_extract(ClientId, Regexp);
-extract_attr_from_clientinfo(#{extract_from := username, extract_regexp := Regexp}, #{
-    username := Username
-}) when
-    Username =/= undefined
+get_user_property_as_map(#mqtt_packet_connect{properties = #{'User-Property' := UserProperty}}) when
+    is_list(UserProperty)
 ->
-    re_extract(Username, Regexp);
-extract_attr_from_clientinfo(_Config, _CLientInfo) ->
-    ignored.
+    maps:from_list(UserProperty);
+get_user_property_as_map(_) ->
+    #{}.
 
 fix_mountpoint(#{mountpoint := undefined} = ClientInfo) ->
     ClientInfo;

+ 26 - 8
apps/emqx/src/emqx_schema.erl

@@ -1734,20 +1734,38 @@ fields(durable_storage) ->
     emqx_ds_schema:schema();
 fields("client_attrs_init") ->
     [
-        {extract_from,
+        {expression,
             sc(
-                hoconsc:enum([clientid, username, cn, dn, user_property]),
-                #{desc => ?DESC("client_attrs_init_extract_from")}
+                typerefl:alias("string", any()),
+                #{
+                    desc => ?DESC("client_attrs_init_expression"),
+                    converter => fun compile_variform/2
+                }
             )},
-        {extract_regexp, sc(binary(), #{desc => ?DESC("client_attrs_init_extract_regexp")})},
-        {extract_as,
+        {set_as_attr,
             sc(binary(), #{
-                default => <<"alias">>,
-                desc => ?DESC("client_attrs_init_extract_as"),
+                desc => ?DESC("client_attrs_init_set_as_attr"),
                 validator => fun restricted_string/1
             })}
     ].
 
+compile_variform(undefined, _Opts) ->
+    undefined;
+compile_variform(Expression, #{make_serializable := true}) ->
+    case is_binary(Expression) of
+        true ->
+            Expression;
+        false ->
+            emqx_variform:decompile(Expression)
+    end;
+compile_variform(Expression, _Opts) ->
+    case emqx_variform:compile(Expression) of
+        {ok, Compiled} ->
+            Compiled;
+        {error, Reason} ->
+            throw(#{expression => Expression, reason => Reason})
+    end.
+
 restricted_string(Str) ->
     case emqx_utils:is_restricted_str(Str) of
         true -> ok;
@@ -3552,7 +3570,7 @@ mqtt_general() ->
             )},
         {"client_attrs_init",
             sc(
-                hoconsc:union([hoconsc:array(ref("client_attrs_init")), ref("client_attrs_init")]),
+                hoconsc:array(ref("client_attrs_init")),
                 #{
                     default => [],
                     desc => ?DESC("client_attrs_init")

+ 14 - 10
apps/emqx/test/emqx_client_SUITE.erl

@@ -395,13 +395,14 @@ t_certdn_as_alias(_) ->
 
 test_cert_extraction_as_alias(Which) ->
     %% extract the first two chars
-    Re = <<"^(..).*$">>,
     ClientId = iolist_to_binary(["ClientIdFor_", atom_to_list(Which)]),
-    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{
-        extract_from => Which,
-        extract_regexp => Re,
-        extract_as => <<"alias">>
-    }),
+    {ok, Compiled} = emqx_variform:compile("substr(" ++ atom_to_list(Which) ++ ",0,2)"),
+    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [
+        #{
+            expression => Compiled,
+            set_as_attr => <<"alias">>
+        }
+    ]),
     SslConf = emqx_common_test_helpers:client_mtls('tlsv1.2'),
     {ok, Client} = emqtt:start_link([
         {clientid, ClientId}, {port, 8883}, {ssl, true}, {ssl_opts, SslConf}
@@ -416,10 +417,13 @@ test_cert_extraction_as_alias(Which) ->
 
 t_client_attr_from_user_property(_Config) ->
     ClientId = atom_to_binary(?FUNCTION_NAME),
-    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{
-        extract_from => user_property,
-        extract_as => <<"group">>
-    }),
+    {ok, Compiled} = emqx_variform:compile("user_property.group"),
+    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [
+        #{
+            expression => Compiled,
+            set_as_attr => <<"group">>
+        }
+    ]),
     SslConf = emqx_common_test_helpers:client_mtls('tlsv1.3'),
     {ok, Client} = emqtt:start_link([
         {clientid, ClientId},

+ 7 - 5
apps/emqx/test/emqx_listeners_SUITE.erl

@@ -150,11 +150,13 @@ t_client_attr_as_mountpoint(_Config) ->
         <<"limiter">> => #{},
         <<"mountpoint">> => <<"groups/${client_attrs.ns}/">>
     },
-    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{
-        extract_from => clientid,
-        extract_regexp => <<"^(.+)-.+$">>,
-        extract_as => <<"ns">>
-    }),
+    {ok, Compiled} = emqx_variform:compile("nth(1,tokens(clientid,'-'))"),
+    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [
+        #{
+            expression => Compiled,
+            set_as_attr => <<"ns">>
+        }
+    ]),
     emqx_logger:set_log_level(debug),
     with_listener(tcp, attr_as_moutpoint, ListenerConf, fun() ->
         {ok, Client} = emqtt:start_link(#{

+ 8 - 6
apps/emqx_auth/test/emqx_authz/emqx_authz_SUITE.erl

@@ -557,12 +557,14 @@ t_publish_last_will_testament_denied_topic(_Config) ->
 
 t_alias_prefix(_Config) ->
     {ok, _} = emqx_authz:update(?CMD_REPLACE, [?SOURCE_FILE_CLIENT_ATTR]),
-    ExtractSuffix = <<"^.*-(.*)$">>,
-    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], #{
-        extract_from => clientid,
-        extract_regexp => ExtractSuffix,
-        extract_as => <<"alias">>
-    }),
+    %% '^.*-(.*)$': extract the suffix after the last '-'
+    {ok, Compiled} = emqx_variform:compile("concat(regex_extract(clientid,'^.*-(.*)$'))"),
+    emqx_config:put_zone_conf(default, [mqtt, client_attrs_init], [
+        #{
+            expression => Compiled,
+            set_as_attr => <<"alias">>
+        }
+    ]),
     ClientId = <<"org1-name2">>,
     SubTopic = <<"name2/#">>,
     SubTopicNotAllowed = <<"name3/#">>,

+ 31 - 31
apps/emqx_rule_engine/src/emqx_rule_funcs.erl

@@ -771,66 +771,66 @@ is_array(_) -> false.
 %% String Funcs
 %%------------------------------------------------------------------------------
 
-coalesce(List) -> emqx_variform_str:coalesce(List).
+coalesce(List) -> emqx_variform_bif:coalesce(List).
 
-coalesce(A, B) -> emqx_variform_str:coalesce(A, B).
+coalesce(A, B) -> emqx_variform_bif:coalesce(A, B).
 
-lower(S) -> emqx_variform_str:lower(S).
+lower(S) -> emqx_variform_bif:lower(S).
 
-ltrim(S) -> emqx_variform_str:ltrim(S).
+ltrim(S) -> emqx_variform_bif:ltrim(S).
 
-reverse(S) -> emqx_variform_str:reverse(S).
+reverse(S) -> emqx_variform_bif:reverse(S).
 
-rtrim(S) -> emqx_variform_str:rtrim(S).
+rtrim(S) -> emqx_variform_bif:rtrim(S).
 
-strlen(S) -> emqx_variform_str:strlen(S).
+strlen(S) -> emqx_variform_bif:strlen(S).
 
-substr(S, Start) -> emqx_variform_str:substr(S, Start).
+substr(S, Start) -> emqx_variform_bif:substr(S, Start).
 
-substr(S, Start, Length) -> emqx_variform_str:substr(S, Start, Length).
+substr(S, Start, Length) -> emqx_variform_bif:substr(S, Start, Length).
 
-trim(S) -> emqx_variform_str:trim(S).
+trim(S) -> emqx_variform_bif:trim(S).
 
-upper(S) -> emqx_variform_str:upper(S).
+upper(S) -> emqx_variform_bif:upper(S).
 
-split(S, P) -> emqx_variform_str:split(S, P).
+split(S, P) -> emqx_variform_bif:split(S, P).
 
-split(S, P, Position) -> emqx_variform_str:split(S, P, Position).
+split(S, P, Position) -> emqx_variform_bif:split(S, P, Position).
 
-tokens(S, Separators) -> emqx_variform_str:tokens(S, Separators).
+tokens(S, Separators) -> emqx_variform_bif:tokens(S, Separators).
 
-tokens(S, Separators, NoCRLF) -> emqx_variform_str:tokens(S, Separators, NoCRLF).
+tokens(S, Separators, NoCRLF) -> emqx_variform_bif:tokens(S, Separators, NoCRLF).
 
-concat(S1, S2) -> emqx_variform_str:concat(S1, S2).
+concat(S1, S2) -> emqx_variform_bif:concat(S1, S2).
 
-concat(List) -> emqx_variform_str:concat(List).
+concat(List) -> emqx_variform_bif:concat(List).
 
-sprintf_s(Format, Args) -> emqx_variform_str:sprintf_s(Format, Args).
+sprintf_s(Format, Args) -> emqx_variform_bif:sprintf_s(Format, Args).
 
-pad(S, Len) -> emqx_variform_str:pad(S, Len).
+pad(S, Len) -> emqx_variform_bif:pad(S, Len).
 
-pad(S, Len, Position) -> emqx_variform_str:pad(S, Len, Position).
+pad(S, Len, Position) -> emqx_variform_bif:pad(S, Len, Position).
 
-pad(S, Len, Position, Char) -> emqx_variform_str:pad(S, Len, Position, Char).
+pad(S, Len, Position, Char) -> emqx_variform_bif:pad(S, Len, Position, Char).
 
-replace(SrcStr, Pattern, RepStr) -> emqx_variform_str:replace(SrcStr, Pattern, RepStr).
+replace(SrcStr, Pattern, RepStr) -> emqx_variform_bif:replace(SrcStr, Pattern, RepStr).
 
 replace(SrcStr, Pattern, RepStr, Position) ->
-    emqx_variform_str:replace(SrcStr, Pattern, RepStr, Position).
+    emqx_variform_bif:replace(SrcStr, Pattern, RepStr, Position).
 
-regex_match(Str, RE) -> emqx_variform_str:regex_match(Str, RE).
+regex_match(Str, RE) -> emqx_variform_bif:regex_match(Str, RE).
 
-regex_replace(SrcStr, RE, RepStr) -> emqx_variform_str:regex_replace(SrcStr, RE, RepStr).
+regex_replace(SrcStr, RE, RepStr) -> emqx_variform_bif:regex_replace(SrcStr, RE, RepStr).
 
-ascii(Char) -> emqx_variform_str:ascii(Char).
+ascii(Char) -> emqx_variform_bif:ascii(Char).
 
-find(S, P) -> emqx_variform_str:find(S, P).
+find(S, P) -> emqx_variform_bif:find(S, P).
 
-find(S, P, Position) -> emqx_variform_str:find(S, P, Position).
+find(S, P, Position) -> emqx_variform_bif:find(S, P, Position).
 
-join_to_string(Str) -> emqx_variform_str:join_to_string(Str).
+join_to_string(Str) -> emqx_variform_bif:join_to_string(Str).
 
-join_to_string(Sep, List) -> emqx_variform_str:join_to_string(Sep, List).
+join_to_string(Sep, List) -> emqx_variform_bif:join_to_string(Sep, List).
 
 join_to_sql_values_string(List) ->
     QuotedList =
@@ -878,7 +878,7 @@ jq(FilterProgram, JSONBin) ->
         ])
     ).
 
-unescape(Str) -> emqx_variform_str:unescape(Str).
+unescape(Str) -> emqx_variform_bif:unescape(Str).
 
 %%------------------------------------------------------------------------------
 %% Array Funcs

+ 116 - 43
apps/emqx_utils/src/emqx_variform.erl

@@ -28,14 +28,35 @@
     erase_allowed_module/1,
     erase_allowed_modules/1
 ]).
+
 -export([render/2, render/3]).
+-export([compile/1, decompile/1]).
+
+-export_type([compiled/0]).
+
+-type compiled() :: #{expr := string(), form := term()}.
+-define(BIF_MOD, emqx_variform_bif).
+-define(IS_ALLOWED_MOD(M),
+    (M =:= ?BIF_MOD orelse
+        M =:= lists orelse
+        M =:= maps)
+).
+
+-define(COALESCE_BADARG,
+    throw(#{
+        reason => coalesce_badarg,
+        explain =>
+            "must be an array, or a call to a function which returns an array, "
+            "for example: coalesce([a,b,c]) or coalesce(tokens(var,','))"
+    })
+).
 
 %% @doc Render a variform expression with bindings.
 %% A variform expression is a template string which supports variable substitution
 %% and function calls.
 %%
 %% The function calls are in the form of `module.function(arg1, arg2, ...)` where `module`
-%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_str` module.
+%% is optional, and if not provided, the function is assumed to be in the `emqx_variform_bif` module.
 %% Both module and function must be existing atoms, and only whitelisted functions are allowed.
 %%
 %% A function arg can be a constant string or a number.
@@ -49,18 +70,54 @@
 %%
 %% For unresolved variables, empty string (but not "undefined") is used.
 %% In case of runtime exeption, an error is returned.
+%% In case of unbound variable is referenced, error is returned.
 -spec render(string(), map()) -> {ok, binary()} | {error, term()}.
 render(Expression, Bindings) ->
     render(Expression, Bindings, #{}).
 
-render(Expression, Bindings, Opts) when is_binary(Expression) ->
-    render(unicode:characters_to_list(Expression), Bindings, Opts);
+render(#{form := Form}, Bindings, Opts) ->
+    eval_as_string(Form, Bindings, Opts);
 render(Expression, Bindings, Opts) ->
+    case compile(Expression) of
+        {ok, Compiled} ->
+            render(Compiled, Bindings, Opts);
+        {error, Reason} ->
+            {error, Reason}
+    end.
+
+eval_as_string(Expr, Bindings, _Opts) ->
+    try
+        {ok, return_str(eval(Expr, Bindings, #{}))}
+    catch
+        throw:Reason ->
+            {error, Reason};
+        C:E:S ->
+            {error, #{exception => C, reason => E, stack_trace => S}}
+    end.
+
+%% Force the expression to return binary string.
+return_str(Str) when is_binary(Str) -> Str;
+return_str(Num) when is_integer(Num) -> integer_to_binary(Num);
+return_str(Num) when is_float(Num) -> float_to_binary(Num, [{decimals, 10}, compact]);
+return_str(Other) ->
+    throw(#{
+        reason => bad_return,
+        expected => string,
+        got => Other
+    }).
+
+%% @doc Compile varifom expression.
+-spec compile(string() | binary() | compiled()) -> {ok, compiled()} | {error, any()}.
+compile(#{form := _} = Compiled) ->
+    {ok, Compiled};
+compile(Expression) when is_binary(Expression) ->
+    compile(unicode:characters_to_list(Expression));
+compile(Expression) ->
     case emqx_variform_scan:string(Expression) of
         {ok, Tokens, _Line} ->
             case emqx_variform_parser:parse(Tokens) of
-                {ok, Expr} ->
-                    eval_as_string(Expr, Bindings, Opts);
+                {ok, Form} ->
+                    {ok, #{expr => Expression, form => Form}};
                 {error, {_, emqx_variform_parser, Msg}} ->
                     %% syntax error
                     {error, lists:flatten(Msg)};
@@ -71,40 +128,59 @@ render(Expression, Bindings, Opts) ->
             {error, Reason}
     end.
 
-eval_as_string(Expr, Bindings, _Opts) ->
-    try
-        {ok, str(eval(Expr, Bindings))}
-    catch
-        throw:Reason ->
-            {error, Reason};
-        C:E:S ->
-            {error, #{exception => C, reason => E, stack_trace => S}}
-    end.
+decompile(#{expr := Expression}) ->
+    Expression;
+decompile(Expression) ->
+    Expression.
 
-eval({str, Str}, _Bindings) ->
-    str(Str);
-eval({integer, Num}, _Bindings) ->
+eval({str, Str}, _Bindings, _Opts) ->
+    unicode:characters_to_binary(Str);
+eval({integer, Num}, _Bindings, _Opts) ->
     Num;
-eval({float, Num}, _Bindings) ->
+eval({float, Num}, _Bindings, _Opts) ->
     Num;
-eval({array, Args}, Bindings) ->
-    eval(Args, Bindings);
-eval({call, FuncNameStr, Args}, Bindings) ->
+eval({array, Args}, Bindings, Opts) ->
+    eval_loop(Args, Bindings, Opts);
+eval({call, FuncNameStr, Args}, Bindings, Opts) ->
     {Mod, Fun} = resolve_func_name(FuncNameStr),
     ok = assert_func_exported(Mod, Fun, length(Args)),
-    call(Mod, Fun, eval(Args, Bindings));
-eval({var, VarName}, Bindings) ->
-    resolve_var_value(VarName, Bindings);
-eval([Arg | Args], Bindings) ->
-    [eval(Arg, Bindings) | eval(Args, Bindings)];
-eval([], _Bindings) ->
-    [].
+    case {Mod, Fun} of
+        {?BIF_MOD, coalesce} ->
+            eval_coalesce(Args, Bindings, Opts);
+        _ ->
+            call(Mod, Fun, eval_loop(Args, Bindings, Opts))
+    end;
+eval({var, VarName}, Bindings, Opts) ->
+    resolve_var_value(VarName, Bindings, Opts).
+
+eval_loop([], _, _) -> [];
+eval_loop([H | T], Bindings, Opts) -> [eval(H, Bindings, Opts) | eval_loop(T, Bindings, Opts)].
+
+%% coalesce treats var_unbound exception as empty string ''
+eval_coalesce([{array, Args}], Bindings, Opts) ->
+    NewArgs = [lists:map(fun(Arg) -> try_eval(Arg, Bindings, Opts) end, Args)],
+    call(?BIF_MOD, coalesce, NewArgs);
+eval_coalesce([Arg], Bindings, Opts) ->
+    case try_eval(Arg, Bindings, Opts) of
+        List when is_list(List) ->
+            call(?BIF_MOD, coalesce, List);
+        <<>> ->
+            <<>>;
+        _ ->
+            ?COALESCE_BADARG
+    end;
+eval_coalesce(_Args, _Bindings, _Opts) ->
+    ?COALESCE_BADARG.
+
+try_eval(Arg, Bindings, Opts) ->
+    try
+        eval(Arg, Bindings, Opts)
+    catch
+        throw:#{reason := var_unbound} ->
+            <<>>
+    end.
 
 %% Some functions accept arbitrary number of arguments but implemented as /1.
-call(emqx_variform_str, concat, Args) ->
-    str(emqx_variform_str:concat(Args));
-call(emqx_variform_str, coalesce, Args) ->
-    str(emqx_variform_str:coalesce(Args));
 call(Mod, Fun, Args) ->
     erlang:apply(Mod, Fun, Args).
 
@@ -144,23 +220,23 @@ resolve_func_name(FuncNameStr) ->
                             function => Fun
                         })
                 end,
-            {emqx_variform_str, FuncName};
+            {?BIF_MOD, FuncName};
         _ ->
             throw(#{reason => invalid_function_reference, function => FuncNameStr})
     end.
 
-resolve_var_value(VarName, Bindings) ->
+%% _Opts can be extended in the future. For example, unbound var as 'undfeined'
+resolve_var_value(VarName, Bindings, _Opts) ->
     case emqx_template:lookup_var(split(VarName), Bindings) of
         {ok, Value} ->
             Value;
         {error, _Reason} ->
-            <<>>
+            throw(#{
+                var_name => VarName,
+                reason => var_unbound
+            })
     end.
 
-assert_func_exported(emqx_variform_str, concat, _Arity) ->
-    ok;
-assert_func_exported(emqx_variform_str, coalesce, _Arity) ->
-    ok;
 assert_func_exported(Mod, Fun, Arity) ->
     ok = try_load(Mod),
     case erlang:function_exported(Mod, Fun, Arity) of
@@ -187,7 +263,7 @@ try_load(Mod) ->
             ok
     end.
 
-assert_module_allowed(emqx_variform_str) ->
+assert_module_allowed(Mod) when ?IS_ALLOWED_MOD(Mod) ->
     ok;
 assert_module_allowed(Mod) ->
     Allowed = get_allowed_modules(),
@@ -220,8 +296,5 @@ erase_allowed_modules(Modules) when is_list(Modules) ->
 get_allowed_modules() ->
     persistent_term:get({emqx_variform, allowed_modules}, []).
 
-str(Value) ->
-    emqx_utils_conv:bin(Value).
-
 split(VarName) ->
     lists:map(fun erlang:iolist_to_binary/1, string:tokens(VarName, ".")).

+ 31 - 8
apps/emqx_utils/src/emqx_variform_str.erl

@@ -14,13 +14,11 @@
 %% limitations under the License.
 %%--------------------------------------------------------------------
 
-%% Predefined functions string templating
--module(emqx_variform_str).
+%% Predefined functions for variform expressions.
+-module(emqx_variform_bif).
 
 %% String Funcs
 -export([
-    coalesce/1,
-    coalesce/2,
     lower/1,
     ltrim/1,
     ltrim/2,
@@ -47,15 +45,22 @@
     replace/4,
     regex_match/2,
     regex_replace/3,
+    regex_extract/2,
     ascii/1,
     find/2,
     find/3,
     join_to_string/1,
     join_to_string/2,
     unescape/1,
-    nth/2
+    any_to_str/1
 ]).
 
+%% Array functions
+-export([nth/2]).
+
+%% Control functions
+-export([coalesce/1, coalesce/2]).
+
 -define(IS_EMPTY(X), (X =:= <<>> orelse X =:= "" orelse X =:= undefined)).
 
 %%------------------------------------------------------------------------------
@@ -143,8 +148,10 @@ tokens(S, Separators, <<"nocrlf">>) ->
 concat(S1, S2) ->
     concat([S1, S2]).
 
+%% @doc Concatenate a list of strings.
+%% NOTE: it converts non-string elements to Erlang term literals for backward compatibility
 concat(List) ->
-    unicode:characters_to_binary(lists:map(fun str/1, List), unicode).
+    unicode:characters_to_binary(lists:map(fun any_to_str/1, List), unicode).
 
 sprintf_s(Format, Args) when is_list(Args) ->
     erlang:iolist_to_binary(io_lib:format(binary_to_list(Format), Args)).
@@ -190,6 +197,22 @@ regex_match(Str, RE) ->
 regex_replace(SrcStr, RE, RepStr) ->
     re:replace(SrcStr, RE, RepStr, [global, {return, binary}]).
 
+%% @doc Searches the string Str for patterns specified by Regexp.
+%% If matches are found, it returns a list of all captured groups from these matches.
+%% If no matches are found or there are no groups captured, it returns an empty list.
+%% This function can be used to extract parts of a string based on a regular expression,
+%% excluding the complete match itself.
+%% Examples:
+%%  ("Number: 12345", "(\\d+)") -> [<<"12345">>]
+%%  ("Hello, world!", "(\\w+)") -> [<<"Hello">>, <<"world">>]
+%%  ("No numbers here!", "(\\d+)") -> []
+%%  ("Date: 2021-05-20", "(\\d{4})-(\\d{2})-(\\d{2})") -> [<<"2021">>, <<"05">>, <<"20">>]
+regex_extract(Str, Regexp) ->
+    case re:run(Str, Regexp, [{capture, all_but_first, list}]) of
+        {match, [_ | _] = L} -> lists:map(fun erlang:iolist_to_binary/1, L);
+        _ -> []
+    end.
+
 ascii(Char) when is_binary(Char) ->
     [FirstC | _] = binary_to_list(Char),
     FirstC.
@@ -212,7 +235,7 @@ join_to_string(List) when is_list(List) ->
     join_to_string(<<", ">>, List).
 
 join_to_string(Sep, List) when is_list(List), is_binary(Sep) ->
-    iolist_to_binary(lists:join(Sep, [str(Item) || Item <- List])).
+    iolist_to_binary(lists:join(Sep, [any_to_str(Item) || Item <- List])).
 
 unescape(Bin) when is_binary(Bin) ->
     UnicodeList = unicode:characters_to_list(Bin, utf8),
@@ -364,5 +387,5 @@ is_hex_digit(_) -> false.
 %% Data Type Conversion Funcs
 %%------------------------------------------------------------------------------
 
-str(Data) ->
+any_to_str(Data) ->
     emqx_utils_conv:bin(Data).

+ 59 - 0
apps/emqx_utils/test/emqx_variform_bif_tests.erl

@@ -0,0 +1,59 @@
+%%--------------------------------------------------------------------
+%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%--------------------------------------------------------------------
+
+%% Most of the functions are tested as rule-engine string funcs
+-module(emqx_variform_bif_tests).
+
+-compile(export_all).
+-compile(nowarn_export_all).
+
+-include_lib("eunit/include/eunit.hrl").
+
+regex_extract_test_() ->
+    [
+        ?_assertEqual([<<"12345">>], regex_extract("Order number: 12345", "(\\d+)")),
+        ?_assertEqual(
+            [<<"Hello">>, <<"world">>], regex_extract("Hello, world!", "(\\w+).*\s(\\w+)")
+        ),
+        ?_assertEqual([], regex_extract("No numbers here!", "(\\d+)")),
+        ?_assertEqual(
+            [<<"2021">>, <<"05">>, <<"20">>],
+            regex_extract("Date: 2021-05-20", "(\\d{4})-(\\d{2})-(\\d{2})")
+        ),
+        ?_assertEqual([<<"Hello">>], regex_extract("Hello, world!", "(Hello)")),
+        ?_assertEqual(
+            [<<"12">>, <<"34">>], regex_extract("Items: 12, Price: 34", "(\\d+).*\s(\\d+)")
+        ),
+        ?_assertEqual(
+            [<<"john.doe@example.com">>],
+            regex_extract("Contact: john.doe@example.com", "([\\w\\.]+@[\\w\\.]+)")
+        ),
+        ?_assertEqual([], regex_extract("Just some text, nothing more.", "([A-Z]\\d{3})")),
+        ?_assertEqual(
+            [<<"admin">>, <<"1234">>],
+            regex_extract("User: admin, Pass: 1234", "User: (\\w+), Pass: (\\d+)")
+        ),
+        ?_assertEqual([], regex_extract("", "(\\d+)")),
+        ?_assertEqual([], regex_extract("$$$###!!!", "(\\d+)")),
+        ?_assertEqual([<<"23.1">>], regex_extract("Erlang 23.1 version", "(\\d+\\.\\d+)")),
+        ?_assertEqual(
+            [<<"192.168.1.1">>],
+            regex_extract("Server IP: 192.168.1.1 at port 8080", "(\\d+\\.\\d+\\.\\d+\\.\\d+)")
+        )
+    ].
+
+regex_extract(Str, RegEx) ->
+    emqx_variform_bif:regex_extract(Str, RegEx).

+ 36 - 14
apps/emqx_utils/test/emqx_variform_tests.erl

@@ -27,14 +27,16 @@ redner_test_() ->
     [
         {"direct var reference", fun() -> ?assertEqual({ok, <<"1">>}, render("a", #{a => 1})) end},
         {"concat strings", fun() ->
-            ?assertEqual({ok, <<"a,b">>}, render("concat('a',',','b')", #{}))
+            ?assertEqual({ok, <<"a,b">>}, render("concat(['a',',','b'])", #{}))
+        end},
+        {"concat empty string", fun() ->
+            ?assertEqual({ok, <<"">>}, render("concat([''])", #{}))
         end},
-        {"concat empty string", fun() -> ?assertEqual({ok, <<"">>}, render("concat('')", #{})) end},
         {"tokens 1st", fun() ->
             ?assertEqual({ok, <<"a">>}, render("nth(1,tokens(var, ','))", #{var => <<"a,b">>}))
         end},
-        {"unknown var as empty str", fun() ->
-            ?assertEqual({ok, <<>>}, render("var", #{}))
+        {"unknown var return error", fun() ->
+            ?assertMatch({error, #{reason := var_unbound}}, render("var", #{}))
         end},
         {"out of range nth index", fun() ->
             ?assertEqual({ok, <<>>}, render("nth(2, tokens(var, ','))", #{var => <<"a">>}))
@@ -97,7 +99,7 @@ unknown_func_test_() ->
         {"unknown function in a known module", fun() ->
             ?assertMatch(
                 {error, #{reason := unknown_variform_function}},
-                render("emqx_variform_str.nonexistingatom__(a)", #{})
+                render("emqx_variform_bif.nonexistingatom__(a)", #{})
             )
         end},
         {"invalid func reference", fun() ->
@@ -133,19 +135,39 @@ inject_allowed_module_test() ->
 
 coalesce_test_() ->
     [
-        {"coalesce first", fun() ->
-            ?assertEqual({ok, <<"a">>}, render("coalesce('a','b')", #{}))
+        {"first", fun() ->
+            ?assertEqual({ok, <<"a">>}, render("coalesce(['a','b'])", #{}))
+        end},
+        {"second", fun() ->
+            ?assertEqual({ok, <<"b">>}, render("coalesce(['', 'b'])", #{}))
+        end},
+        {"first var", fun() ->
+            ?assertEqual({ok, <<"a">>}, render("coalesce([a,b])", #{a => <<"a">>, b => <<"b">>}))
+        end},
+        {"second var", fun() ->
+            ?assertEqual({ok, <<"b">>}, render("coalesce([a,b])", #{b => <<"b">>}))
+        end},
+        {"empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce([a,b])", #{})) end},
+        {"arg from other func", fun() ->
+            ?assertEqual({ok, <<"b">>}, render("coalesce(tokens(a,','))", #{a => <<",,b,c">>}))
         end},
-        {"coalesce second", fun() ->
-            ?assertEqual({ok, <<"b">>}, render("coalesce('', 'b')", #{}))
+        {"var unbound", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a)", #{})) end},
+        {"var unbound in call", fun() ->
+            ?assertEqual({ok, <<>>}, render("coalesce(concat(a))", #{}))
         end},
-        {"coalesce first var", fun() ->
-            ?assertEqual({ok, <<"a">>}, render("coalesce(a,b)", #{a => <<"a">>, b => <<"b">>}))
+        {"var unbound in calls", fun() ->
+            ?assertEqual({ok, <<"c">>}, render("coalesce([any_to_str(a),any_to_str(b),'c'])", #{}))
         end},
-        {"coalesce second var", fun() ->
-            ?assertEqual({ok, <<"b">>}, render("coalesce(a,b)", #{b => <<"b">>}))
+        {"badarg", fun() ->
+            ?assertMatch(
+                {error, #{reason := coalesce_badarg}}, render("coalesce(a,b)", #{a => 1, b => 2})
+            )
         end},
-        {"coalesce empty", fun() -> ?assertEqual({ok, <<>>}, render("coalesce(a,b)", #{})) end}
+        {"badarg from return", fun() ->
+            ?assertMatch(
+                {error, #{reason := coalesce_badarg}}, render("coalesce(any_to_str(a))", #{a => 1})
+            )
+        end}
     ].
 
 syntax_error_test_() ->

+ 2 - 2
changes/ce/feat-12750.en.md

@@ -7,8 +7,8 @@ an MQTT connection.
 
 ### Initialization of `client_attrs`
 
-- The `client_attrs` fields can be initially populated based on the configuration from one of the
-  following sources:
+- The `client_attrs` fields can be initially populated from one of the
+  following `clientinfo` fields:
   - `cn`: The common name from the TLS client's certificate.
   - `dn`: The distinguished name from the TLS client's certificate, that is, the certificate "Subject".
   - `clientid`: The MQTT client ID provided by the client.

+ 20 - 31
rel/i18n/emqx_schema.hocon

@@ -1575,48 +1575,37 @@ client_attrs_init {
   label: "Client Attributes Initialization"
   desc: """~
     Specify how to initialize client attributes.
-    This config accepts one initialization rule, or a list of rules.
-    Client attributes can be initialized as `client_attrs.NAME`,
-    where `NAME` is the name of the attribute specified in the config `extract_as`.
+    Each client attribute can be initialized as `client_attrs.{NAME}`,
+    where `{NAME}` is the name of the attribute specified in the config field `set_as_attr`.
     The initialized client attribute will be stored in the `client_attrs` property with the specified name,
     and can be used as a placeholder in a template for authentication and authorization.
-    For example, use `${client_attrs.alias}` to render an HTTP POST body when `extract_as = alias`,
+    For example, use `${client_attrs.alias}` to render an HTTP POST body when `set_as_attr = alias`,
     or render listener config `moutpoint = devices/${client_attrs.alias}/` to initialize a per-client topic namespace."""
 }
 
-client_attrs_init_extract_from {
-  label: "Client Property to Extract Attribute"
-  desc: """~
-    Specify from which client property the client attribute should be extracted.
-
-    Supported values:
-    - `clientid`: Extract from the client ID.
-    - `username`: Extract from the username.
-    - `cn`: Extract from the Common Name (CN) field of the client certificate.
-    - `dn`: Extract from the Distinguished Name (DN) field of the client certificate.
-    - `user_property`: Extract from the user property sent in the MQTT v5 `CONNECT` packet.
-      In this case, `extract_regexp` is not applicable, and `extract_as` should be the user property key.
-
-    NOTE: this extraction happens **after** `clientid` or `username` is initialized
-    from `peer_cert_as_clientid` or `peer_cert_as_username` config."""
-}
-
-client_attrs_init_extract_regexp {
+client_attrs_init_expression {
   label: "Client Attribute Extraction Regular Expression"
   desc: """~
-    The regular expression to extract a client attribute from the client property specified by `client_attrs_init.extract_from` config.
-    The expression should match the entire client property value, and capturing groups are concatenated to make the client attribute.
-    For example if the client attribute is the first part of the client ID delimited by a dash, the regular expression would be `^(.+?)-.*$`.
-    Note that failure to match the regular expression will result in the client attribute being absent but not an empty string.
-    Note also that currently only printable ASCII characters are allowed as input for the regular expression extraction."""
+    A one line expression to evaluate a set of predefined string functions (like in the rule engine SQL statements).
+    The expression accepts direct variable reference, or one function call with nested calls for its arguments,
+    but it does not provide variable binding or user-defined functions and pre-bound variables.
+    For example, to extract the prefix of client ID delimited by a dot: `nth(1, tokens(clientid, '.'))`.
+
+    The variables pre-bound variables are:
+    - `cn`: Client's TLS certificate common name.
+    - `dn`: Client's TLS certificate distinguished name (the subject).
+    - `clientid`: MQTT Client ID.
+    - `username`: MQTT Client's username.
+    - `user_property.{NAME}`: User properties in the CONNECT packet.
+
+    You can read more about variform expressions in EMQX docs."""
 }
 
-client_attrs_init_extract_as {
+client_attrs_init_set_as_attr {
   label: "Name The Extracted Attribute"
   desc: """~
-    The name of the client attribute extracted from the client property specified by `client_attrs_init.extract_from` config.
-    The extracted attribute will be stored in the `client_attrs` property with this name.
-    In case `extract_from = user_property`, this should be the key of the user property."""
+    The name of the client attribute extracted from the client data.
+    The extracted attribute will be stored in the `client_attrs` property with this name."""
 }
 
 }

+ 1 - 0
scripts/spellcheck/dicts/emqx.txt

@@ -259,6 +259,7 @@ uplink
 url
 utc
 util
+variform
 ver
 vm
 vsn