Просмотр исходного кода

Merge pull request #14409 from JimMoen/fix-sqlserver-utf16-little-encoding

feat: rule func `str_utf16_le/1` and `sqlserver_bin2hexstr/1`
JimMoen 1 год назад
Родитель
Сommit
2d6b729d20

+ 1 - 1
apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.app.src

@@ -1,6 +1,6 @@
 {application, emqx_bridge_sqlserver, [
     {description, "EMQX Enterprise SQL Server Bridge"},
-    {vsn, "0.2.6"},
+    {vsn, "0.2.7"},
     {registered, []},
     {applications, [kernel, stdlib, emqx_resource, odbc]},
     {env, [

+ 2 - 2
apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver_connector.erl

@@ -601,9 +601,9 @@ proc_batch_sql(BatchReqs, BatchInserts, Tokens, ChannelConf) ->
     <<BatchInserts/binary, " values ", Values/binary>>.
 
 proc_msg(Tokens, Msg, #{undefined_vars_as_null := true}) ->
-    emqx_placeholder:proc_sql_param_str2(Tokens, Msg);
+    emqx_placeholder:proc_sqlserver_param_str2(Tokens, Msg);
 proc_msg(Tokens, Msg, _) ->
-    emqx_placeholder:proc_sql_param_str(Tokens, Msg).
+    emqx_placeholder:proc_sqlserver_param_str(Tokens, Msg).
 
 to_bin(List) when is_list(List) ->
     unicode:characters_to_binary(List, utf8).

+ 31 - 3
apps/emqx_rule_engine/src/emqx_rule_funcs.erl

@@ -109,6 +109,7 @@
 -export([
     str/1,
     str_utf8/1,
+    str_utf16_le/1,
     bool/1,
     int/1,
     float/1,
@@ -116,7 +117,10 @@
     float2str/2,
     map/1,
     bin2hexstr/1,
-    hexstr2bin/1
+    bin2hexstr/2,
+    hexstr2bin/1,
+    hexstr2bin/2,
+    sqlserver_bin2hexstr/1
 ]).
 
 %% Data Type Validation Funcs
@@ -713,6 +717,11 @@ str_utf8(Data) when is_binary(Data); is_list(Data) ->
 str_utf8(Data) ->
     unicode:characters_to_binary(str(Data)).
 
+str_utf16_le(Data) when is_binary(Data); is_list(Data) ->
+    unicode:characters_to_binary(Data, utf8, {utf16, little});
+str_utf16_le(Data) ->
+    unicode:characters_to_binary(str(Data), utf8, {utf16, little}).
+
 bool(Data) ->
     emqx_utils_conv:bool(Data).
 
@@ -744,10 +753,29 @@ map(Data) ->
     error(badarg, [Data]).
 
 bin2hexstr(Bin) ->
-    emqx_variform_bif:bin2hexstr(Bin).
+    bin2hexstr(Bin, undefined).
+
+bin2hexstr(Bin, undefined) ->
+    emqx_variform_bif:bin2hexstr(Bin);
+bin2hexstr(Bin, Prefix) when is_binary(Prefix) ->
+    <<Prefix/binary, (emqx_variform_bif:bin2hexstr(Bin))/binary>>.
 
 hexstr2bin(Str) ->
-    emqx_variform_bif:hexstr2bin(Str).
+    hexstr2bin(Str, undefined).
+
+hexstr2bin(Str, undefined) ->
+    emqx_variform_bif:hexstr2bin(Str);
+hexstr2bin(Str, Prefix) when is_binary(Prefix) ->
+    Length = size(Prefix),
+    case Str of
+        <<Prefix:Length/binary, Rest/binary>> ->
+            emqx_variform_bif:hexstr2bin(Rest);
+        _ ->
+            error(binary_prefix_unmatch)
+    end.
+
+sqlserver_bin2hexstr(Str) ->
+    bin2hexstr(Str, <<"0x">>).
 
 %%------------------------------------------------------------------------------
 %% NULL Funcs

+ 65 - 0
apps/emqx_rule_engine/test/emqx_rule_funcs_SUITE.erl

@@ -125,6 +125,28 @@ t_str(_) ->
     ?assertEqual(<<"true">>, emqx_rule_funcs:str_utf8(true)),
     ?assertError(_, emqx_rule_funcs:str_utf8({a, v})).
 
+t_str_utf16_le(_) ->
+    ?assertEqual(<<"abc"/utf16-little>>, emqx_rule_funcs:str_utf16_le("abc")),
+    ?assertEqual(<<"abc"/utf16-little>>, emqx_rule_funcs:str_utf16_le(abc)),
+    ?assertEqual(<<"{\"a\":1}"/utf16-little>>, emqx_rule_funcs:str_utf16_le(#{a => 1})),
+    ?assertEqual(<<"1"/utf16-little>>, emqx_rule_funcs:str_utf16_le(1)),
+    ?assertEqual(<<"2.0"/utf16-little>>, emqx_rule_funcs:str_utf16_le(2.0)),
+    ?assertEqual(<<"true"/utf16-little>>, emqx_rule_funcs:str_utf16_le(true)),
+    ?assertError(_, emqx_rule_funcs:str_utf16_le({a, v})),
+
+    ?assertEqual(<<"abc"/utf16-little>>, emqx_rule_funcs:str_utf16_le("abc")),
+    ?assertEqual(<<"abc 你好"/utf16-little>>, emqx_rule_funcs:str_utf16_le("abc 你好")),
+    ?assertEqual(<<"abc 你好"/utf16-little>>, emqx_rule_funcs:str_utf16_le(<<"abc 你好"/utf8>>)),
+    ?assertEqual(<<"abc"/utf16-little>>, emqx_rule_funcs:str_utf16_le(abc)),
+    ?assertEqual(
+        <<"{\"a\":\"abc 你好\"}"/utf16-little>>,
+        emqx_rule_funcs:str_utf16_le(#{a => <<"abc 你好"/utf8>>})
+    ),
+    ?assertEqual(<<"1"/utf16-little>>, emqx_rule_funcs:str_utf16_le(1)),
+    ?assertEqual(<<"2.0"/utf16-little>>, emqx_rule_funcs:str_utf16_le(2.0)),
+    ?assertEqual(<<"true"/utf16-little>>, emqx_rule_funcs:str_utf16_le(true)),
+    ?assertError(_, emqx_rule_funcs:str_utf16_le({a, v})).
+
 t_int(_) ->
     ?assertEqual(1, emqx_rule_funcs:int("1")),
     ?assertEqual(1, emqx_rule_funcs:int(<<"1.0">>)),
@@ -201,10 +223,38 @@ t_hexstr2bin(_) ->
     ?assertEqual(<<1, 2>>, emqx_rule_funcs:hexstr2bin(<<"0102">>)),
     ?assertEqual(<<17, 33>>, emqx_rule_funcs:hexstr2bin(<<"1121">>)).
 
+t_hexstr2bin_with_prefix(_) ->
+    ?assertEqual(<<6, 54, 79>>, emqx_rule_funcs:hexstr2bin(<<"0x6364f">>, <<"0x">>)),
+    ?assertEqual(<<10>>, emqx_rule_funcs:hexstr2bin(<<"0Xa">>, <<"0X">>)),
+    ?assertEqual(<<15>>, emqx_rule_funcs:hexstr2bin(<<"0bf">>, <<"0b">>)),
+    ?assertEqual(<<5>>, emqx_rule_funcs:hexstr2bin(<<"0B5">>, <<"0B">>)),
+    ?assertEqual(<<1, 2>>, emqx_rule_funcs:hexstr2bin(<<"0x0102">>, <<"0x">>)),
+    ?assertEqual(<<17, 33>>, emqx_rule_funcs:hexstr2bin(<<"0X1121">>, <<"0X">>)).
+
+t_hexstr2bin_with_invalid_prefix(_) ->
+    [
+        begin
+            ?assertError(binary_prefix_unmatch, emqx_rule_funcs:hexstr2bin(HexStr, Prefix))
+        end
+     || {HexStr, Prefix} <- [
+            {<<"0x6364f">>, <<"ab">>},
+            {<<"0Xa">>, <<"ef">>},
+            {<<"0bf">>, <<"你好👋"/utf8>>},
+            {<<"0B5">>, <<"🐸"/utf8>>}
+        ]
+    ].
+
 t_bin2hexstr(_) ->
     ?assertEqual(<<"0102">>, emqx_rule_funcs:bin2hexstr(<<1, 2>>)),
     ?assertEqual(<<"1121">>, emqx_rule_funcs:bin2hexstr(<<17, 33>>)).
 
+t_bin2hexstr_with_prefix(_) ->
+    ?assertEqual(<<"0x0102">>, emqx_rule_funcs:bin2hexstr(<<1, 2>>, <<"0x">>)),
+    ?assertEqual(<<"0X0102">>, emqx_rule_funcs:bin2hexstr(<<1, 2>>, <<"0X">>)),
+    ?assertEqual(<<"0b1121">>, emqx_rule_funcs:bin2hexstr(<<17, 33>>, <<"0b">>)),
+    ?assertEqual(<<"0B1121">>, emqx_rule_funcs:bin2hexstr(<<17, 33>>, <<"0B">>)),
+    ?assertEqual(<<"🧠0102"/utf8>>, emqx_rule_funcs:bin2hexstr(<<1, 2>>, <<"🧠"/utf8>>)).
+
 t_bin2hexstr_not_even_bytes(_) ->
     ?assertEqual(<<"0102">>, emqx_rule_funcs:bin2hexstr(<<1:5, 2>>)),
     ?assertEqual(<<"1002">>, emqx_rule_funcs:bin2hexstr(<<16:5, 2>>)),
@@ -218,6 +268,21 @@ t_bin2hexstr_not_even_bytes(_) ->
     ?assertEqual(<<"1121">>, emqx_rule_funcs:bin2hexstr(<<17, 33>>)),
     ?assertEqual(<<"01121">>, emqx_rule_funcs:bin2hexstr(<<17:9, 33>>)).
 
+t_sqlserver_bin2hexstr(_) ->
+    ?assertEqual(<<"0x0102">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<1, 2>>)),
+    ?assertEqual(<<"0x1121">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<17, 33>>)),
+    ?assertEqual(<<"0x0102">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<1:5, 2>>)),
+    ?assertEqual(<<"0x1002">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<16:5, 2>>)),
+    ?assertEqual(<<"0x1002">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<16:8, 2>>)),
+    ?assertEqual(<<"0x102">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<1:4, 2>>)),
+    ?assertEqual(<<"0x102">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<1:3, 2>>)),
+    ?assertEqual(<<"0x102">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<1:1, 2>>)),
+    ?assertEqual(<<"0x002">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<2:1, 2>>)),
+    ?assertEqual(<<"0x02">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<2>>)),
+    ?assertEqual(<<"0x2">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<2:2>>)),
+    ?assertEqual(<<"0x1121">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<17, 33>>)),
+    ?assertEqual(<<"0x01121">>, emqx_rule_funcs:sqlserver_bin2hexstr(<<17:9, 33>>)).
+
 t_hex_convert(_) ->
     ?PROPTEST(hex_convert).
 

+ 16 - 12
apps/emqx_utils/src/emqx_placeholder.erl

@@ -29,8 +29,8 @@
     preproc_sql/1,
     preproc_sql/2,
     proc_sql/2,
-    proc_sql_param_str/2,
-    proc_sql_param_str2/2,
+    proc_sqlserver_param_str/2,
+    proc_sqlserver_param_str2/2,
     proc_cql_param_str/2,
     proc_param_str/3,
     preproc_tmpl_deep/1,
@@ -183,17 +183,13 @@ preproc_sql(Sql, Opts) ->
 proc_sql(Tokens, Data) ->
     proc_tmpl(Tokens, Data, #{return => rawlist, var_trans => fun sql_data/1}).
 
--spec proc_sql_param_str(tmpl_token(), map()) -> binary().
-proc_sql_param_str(Tokens, Data) ->
-    % NOTE
-    % This is a bit misleading: currently, escaping logic in `quote_sql/1` likely
-    % won't work with pgsql since it does not support C-style escapes by default.
-    % https://www.postgresql.org/docs/14/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS
-    proc_param_str(Tokens, Data, fun quote_sql/1).
+-spec proc_sqlserver_param_str(tmpl_token(), map()) -> binary().
+proc_sqlserver_param_str(Tokens, Data) ->
+    proc_param_str(Tokens, Data, fun quote_sqlserver/1).
 
--spec proc_sql_param_str2(tmpl_token(), map()) -> binary().
-proc_sql_param_str2(Tokens, Data) ->
-    proc_param_str(Tokens, Data, fun quote_sql2/1).
+-spec proc_sqlserver_param_str2(tmpl_token(), map()) -> binary().
+proc_sqlserver_param_str2(Tokens, Data) ->
+    proc_param_str(Tokens, Data, fun quote_sqlserver2/1).
 
 -spec proc_cql_param_str(tmpl_token(), map()) -> binary().
 proc_cql_param_str(Tokens, Data) ->
@@ -291,6 +287,14 @@ quote_mysql(Str) ->
 quote_mysql2(Str) ->
     emqx_utils_sql:to_sql_string(Str, #{escaping => mysql}).
 
+-spec quote_sqlserver(_Str) -> iolist().
+quote_sqlserver(Str) ->
+    emqx_utils_sql:to_sql_string(Str, #{escaping => sqlserver, undefined => <<"undefined">>}).
+
+-spec quote_sqlserver2(_Str) -> iolist().
+quote_sqlserver2(Str) ->
+    emqx_utils_sql:to_sql_string(Str, #{escaping => sqlserver}).
+
 lookup_var(Var, Value) when Var == ?PH_VAR_THIS orelse Var == [] ->
     Value;
 lookup_var([Prop | Rest], Data0) ->

+ 43 - 8
apps/emqx_utils/src/emqx_utils_sql.erl

@@ -32,7 +32,11 @@
 -type statement_type() :: select | insert | delete | update.
 -type value() :: null | binary() | number() | boolean() | [value()].
 
--define(INSERT_RE_MP_KEY, insert_re_mp).
+%% The type Copied from stdlib/src/re.erl to compatibility with OTP 26
+%% Since `re:mp()` exported after OTP 27
+-type mp() :: {re_pattern, _, _, _, _}.
+
+-define(INSERT_RE_MP_KEY, {?MODULE, insert_re_mp}).
 -define(INSERT_RE_BIN, <<
     %% case-insensitive
     "(?i)^\\s*",
@@ -49,20 +53,24 @@
     "\\s*$"
 >>).
 
+-define(HEX_RE_MP_KEY, {?MODULE, hex_re_mp}).
+-define(HEX_RE_BIN, <<"^[0-9a-fA-F]+$">>).
+
 -dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}).
 
--on_load(put_insert_mp/0).
+-on_load(on_load/0).
+
+on_load() ->
+    ok = put_insert_mp(),
+    ok = put_hex_re_mp().
 
 put_insert_mp() ->
-    persistent_term:put({?MODULE, ?INSERT_RE_MP_KEY}, re:compile(?INSERT_RE_BIN)),
+    persistent_term:put(?INSERT_RE_MP_KEY, re:compile(?INSERT_RE_BIN)),
     ok.
 
-%% The type Copied from stdlib/src/re.erl to compatibility with OTP 26
-%% Since `re:mp()` exported after OTP 27
--type mp() :: {re_pattern, _, _, _, _}.
 -spec get_insert_mp() -> {ok, mp()}.
 get_insert_mp() ->
-    case persistent_term:get({?MODULE, ?INSERT_RE_MP_KEY}, undefined) of
+    case persistent_term:get(?INSERT_RE_MP_KEY, undefined) of
         undefined ->
             ok = put_insert_mp(),
             get_insert_mp();
@@ -70,6 +78,20 @@ get_insert_mp() ->
             {ok, MP}
     end.
 
+put_hex_re_mp() ->
+    persistent_term:put(?HEX_RE_MP_KEY, re:compile(?HEX_RE_BIN)),
+    ok.
+
+-spec get_hex_re_mp() -> {ok, mp()}.
+get_hex_re_mp() ->
+    case persistent_term:get(?HEX_RE_MP_KEY, undefined) of
+        undefined ->
+            ok = put_hex_re_mp(),
+            get_hex_re_mp();
+        {ok, MP} ->
+            {ok, MP}
+    end.
+
 -spec get_statement_type(iodata()) -> statement_type() | {error, unknown}.
 get_statement_type(Query) ->
     KnownTypes = #{
@@ -114,7 +136,7 @@ to_sql_value(Map) when is_map(Map) -> emqx_utils_json:encode(Map).
 %% SQL statements. The value is escaped if necessary.
 -spec to_sql_string(term(), Options) -> unicode:chardata() when
     Options :: #{
-        escaping => mysql | sql | cql,
+        escaping => mysql | sql | cql | sqlserver,
         undefined => null | unicode:chardata()
     }.
 to_sql_string(undefined, #{undefined := Str} = Opts) when Str =/= null ->
@@ -132,6 +154,8 @@ to_sql_string(Term, #{escaping := mysql}) ->
     maybe_escape(Term, fun escape_mysql/1);
 to_sql_string(Term, #{escaping := cql}) ->
     maybe_escape(Term, fun escape_cql/1);
+to_sql_string(Term, #{escaping := sqlserver}) ->
+    maybe_escape(Term, fun escape_sqlserver/1);
 to_sql_string(Term, #{}) ->
     maybe_escape(Term, fun escape_sql/1).
 
@@ -174,6 +198,17 @@ escape_snowflake(S) ->
     ES = binary:replace(S, <<"\"">>, <<"\"">>, [global, {insert_replaced, 1}]),
     [$", ES, $"].
 
+escape_sqlserver(<<"0x", Rest/binary>> = S) ->
+    {ok, MP} = get_hex_re_mp(),
+    case re:run(Rest, MP, []) of
+        {match, _} ->
+            [S];
+        _ ->
+            escape_sql(S)
+    end;
+escape_sqlserver(S) ->
+    escape_sql(S).
+
 %% NOTE
 %% This thing looks more complicated than needed because it's optimized for as few
 %% intermediate memory (re)allocations as possible.

+ 19 - 2
apps/emqx_utils/test/emqx_placeholder_SUITE.erl

@@ -106,12 +106,29 @@ t_preproc_sql2(_) ->
     ?assertEqual(<<"a:$a,b:b},c:{c},d:${d">>, PrepareStatement),
     ?assertEqual([], emqx_placeholder:proc_sql(ParamsTokens, Selected)).
 
-t_preproc_sql3(_) ->
+t_preproc_sqlserver(_) ->
     Selected = #{a => <<"1">>, b => 1, c => 1.0, d => #{d1 => <<"hi">>}},
     ParamsTokens = emqx_placeholder:preproc_tmpl(<<"a:${a},b:${b},c:${c},d:${d}">>),
     ?assertEqual(
         <<"a:'1',b:1,c:1.0,d:'{\"d1\":\"hi\"}'">>,
-        emqx_placeholder:proc_sql_param_str(ParamsTokens, Selected)
+        emqx_placeholder:proc_sqlserver_param_str(ParamsTokens, Selected)
+    ).
+
+t_preproc_sqlserver_sql(_) ->
+    Selected = #{
+        a => <<"abc_hello你好👋"/utf8>>,
+        b => 1,
+        c => 1.0,
+        d => #{d1 => <<"hi">>},
+        hex_str => <<"0x0010">>,
+        not_hex => <<"0xabcdefghijk_你好🐸"/utf8>>
+    },
+    ParamsTokens = emqx_placeholder:preproc_tmpl(
+        <<"a:${a},b:${b},c:${c},d:${d},hex_str:${hex_str},not_hex:${not_hex}"/utf8>>
+    ),
+    ?assertEqual(
+        <<"a:'abc_hello你好👋',b:1,c:1.0,d:'{\"d1\":\"hi\"}',hex_str:0x0010,not_hex:'0xabcdefghijk_你好🐸'"/utf8>>,
+        emqx_placeholder:proc_sqlserver_param_str(ParamsTokens, Selected)
     ).
 
 t_preproc_mysql1(_) ->

+ 7 - 0
changes/feat-14409.en.md

@@ -0,0 +1,7 @@
+Add two rule functions to convert UTF-8 strings to UTF-16-little-endian for compatibility with SQL Server versions that do not support UTF-8.
+
+- Convert a UTF-8 string to UTF-16-little-endian.
+  `str_utf16_le/1`
+
+- Convert any string or Binary to SQL Server hex binary format with `0x` prefix.
+  `sqlserver_bin2hexstr/1`