Преглед изворни кода

Merge pull request #13189 from JimMoen/fix-apply-sqlserver-template

fix(utils_sql): improve insert sql regular expression
JimMoen пре 1 година
родитељ
комит
78a196e215

+ 41 - 11
apps/emqx_utils/src/emqx_utils_sql.erl

@@ -31,8 +31,44 @@
 -type statement_type() :: select | insert | delete | update.
 -type value() :: null | binary() | number() | boolean() | [value()].
 
+-define(INSERT_RE_MP_KEY, insert_re_mp).
+-define(INSERT_RE_BIN, <<
+    %% case-insensitive
+    "(?i)^\\s*",
+    %% Group-1: insert into, table name and columns (when existed).
+    %% All space characters suffixed to <TABLE_NAME> will be kept
+    %% `INSERT INTO <TABLE_NAME> [(<COLUMN>, ..)]`
+    "(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)",
+    %% Keyword: `VALUES`
+    "\\s*values\\s*",
+    %% Group-2: literals value(s) or placeholder(s) with round brackets.
+    %% And the sub-pattern in brackets does not do any capturing
+    %% `([<VALUE> | <PLACEHOLDER>], ..])`
+    "(\\((?:[^()]++|(?2))*\\))",
+    "\\s*$"
+>>).
+
 -dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}).
 
+-on_load(put_insert_mp/0).
+
+put_insert_mp() ->
+    persistent_term:put({?MODULE, ?INSERT_RE_MP_KEY}, re:compile(?INSERT_RE_BIN)),
+    ok.
+
+%% The type Copied from stdlib/src/re.erl to compatibility with OTP 26
+%% Since `re:mp()` exported after OTP 27
+-type mp() :: {re_pattern, _, _, _, _}.
+-spec get_insert_mp() -> {ok, mp()}.
+get_insert_mp() ->
+    case persistent_term:get({?MODULE, ?INSERT_RE_MP_KEY}, undefined) of
+        undefined ->
+            ok = put_insert_mp(),
+            get_insert_mp();
+        {ok, MP} ->
+            {ok, MP}
+    end.
+
 -spec get_statement_type(iodata()) -> statement_type() | {error, unknown}.
 get_statement_type(Query) ->
     KnownTypes = #{
@@ -54,17 +90,11 @@ get_statement_type(Query) ->
 -spec parse_insert(iodata()) ->
     {ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}.
 parse_insert(SQL) ->
-    case re:split(SQL, "((?i)values)", [{return, binary}]) of
-        [Part1, _, Part3] ->
-            case string:trim(Part1, leading) of
-                <<"insert", _/binary>> = InsertSQL ->
-                    {ok, {InsertSQL, Part3}};
-                <<"INSERT", _/binary>> = InsertSQL ->
-                    {ok, {InsertSQL, Part3}};
-                _ ->
-                    {error, not_insert_sql}
-            end;
-        _ ->
+    {ok, MP} = get_insert_mp(),
+    case re:run(SQL, MP, [{capture, all_but_first, binary}]) of
+        {match, [InsertInto, ValuesTemplate]} ->
+            {ok, {InsertInto, ValuesTemplate}};
+        nomatch ->
             {error, not_insert_sql}
     end.
 

+ 132 - 0
apps/emqx_utils/test/emqx_utils_sql_SUITE.erl

@@ -0,0 +1,132 @@
+%%--------------------------------------------------------------------
+%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%--------------------------------------------------------------------
+
+-module(emqx_utils_sql_SUITE).
+
+-compile(export_all).
+-compile(nowarn_export_all).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-import(emqx_utils_sql, [get_statement_type/1, parse_insert/1]).
+
+all() ->
+    emqx_common_test_helpers:all(?MODULE).
+
+t_get_statement_type(_) ->
+    ?assertEqual(select, get_statement_type("SELECT * FROM abc")),
+    ?assertEqual(insert, get_statement_type("INSERT INTO abc (c1, c2, c3)VALUES(1, 2, 3)")),
+    ?assertEqual(update, get_statement_type("UPDATE abc SET c1 = 1, c2 = 2, c3 = 3")),
+    ?assertEqual(delete, get_statement_type("DELETE FROM abc WHERE c1 = 1")),
+    ?assertEqual({error, unknown}, get_statement_type("drop table books")).
+
+t_parse_insert(_) ->
+    %% `values` in table name
+    run_pi(
+        <<"insert into tag_VALUES(tag_values,Timestamp) values (${tagvalues},${date})"/utf8>>,
+        {<<"insert into tag_VALUES(tag_values,Timestamp)"/utf8>>, <<"(${tagvalues},${date})"/utf8>>}
+    ),
+    run_pi(
+        <<"INSERT INTO Values_таблица (идентификатор, имя, возраст)   VALUES \t (${id}, 'Иван', 25)  "/utf8>>,
+        {<<"INSERT INTO Values_таблица (идентификатор, имя, возраст)"/utf8>>,
+            <<"(${id}, 'Иван', 25)"/utf8>>}
+    ),
+
+    %% `values` in column name
+    run_pi(
+        <<"insert into PI.dbo.tags(tag_values,Timestamp) values (${tagvalues},${date}  )"/utf8>>,
+        {<<"insert into PI.dbo.tags(tag_values,Timestamp)"/utf8>>,
+            <<"(${tagvalues},${date}  )"/utf8>>}
+    ),
+
+    run_pi(
+        <<"INSERT INTO mqtt_test(payload, arrived) VALUES (${payload}, FROM_UNIXTIME((${timestamp}/1000)))"/utf8>>,
+        {<<"INSERT INTO mqtt_test(payload, arrived)"/utf8>>,
+            <<"(${payload}, FROM_UNIXTIME((${timestamp}/1000)))">>}
+    ),
+
+    run_pi(
+        <<"insert into таблица (идентификатор,имя,возраст) VALUES(${id},'Алексей',30)"/utf8>>,
+        {<<"insert into таблица (идентификатор,имя,возраст)"/utf8>>,
+            <<"(${id},'Алексей',30)"/utf8>>}
+    ),
+    run_pi(
+        <<"INSERT into 表格 (标识, 名字, 年龄) VALUES (${id}, '张三', 22)"/utf8>>,
+        {<<"INSERT into 表格 (标识, 名字, 年龄)"/utf8>>, <<"(${id}, '张三', 22)"/utf8>>}
+    ),
+    run_pi(
+        <<"  inSErt into 表格(标识,名字,年龄)values(${id},'李四', 35)"/utf8>>,
+        {<<"inSErt into 表格(标识,名字,年龄)"/utf8>>, <<"(${id},'李四', 35)"/utf8>>}
+    ),
+    run_pi(
+        <<"insert into PI.dbo.tags( tag_value,Timestamp)  VALUES\t\t(   ${tagvalues},   ${date} )"/utf8>>,
+        {<<"insert into PI.dbo.tags( tag_value,Timestamp)"/utf8>>,
+            <<"(   ${tagvalues},   ${date} )"/utf8>>}
+    ),
+    run_pi(
+        <<"insert into PI.dbo.tags(tag_value , Timestamp )vALues(${tagvalues},${date})"/utf8>>,
+        {<<"insert into PI.dbo.tags(tag_value , Timestamp )"/utf8>>,
+            <<"(${tagvalues},${date})"/utf8>>}
+    ),
+
+    run_pi(
+        <<"inSErt  INTO  table75 (column1, column2, column3) values (${one}, ${two},${three})"/utf8>>,
+        {<<"inSErt  INTO  table75 (column1, column2, column3)"/utf8>>,
+            <<"(${one}, ${two},${three})"/utf8>>}
+    ),
+    run_pi(
+        <<"INSERT Into some_table      values\t(${tag1},   ${tag2}  )">>,
+        {<<"INSERT Into some_table      "/utf8>>, <<"(${tag1},   ${tag2}  )">>}
+    ).
+
+t_parse_insert_nested_brackets(_) ->
+    InsertPart = <<"INSERT INTO test_tab (val1, val2)">>,
+    ValueLs = [
+        <<"(ABS(POWER((2 * POWER(ABS( (-3 + 1) * 4), (2 * (1 + ABS( (-3 + 1) * 4))))), (3 - POWER(4, 2)))), ",
+            "POWER(ABS( (-3 + 1) * 4), (2 * (1 + ABS( (-3 + 1) * 4)))))">>,
+        <<"(GREATEST(LEAST(5, 10), ABS(-7)), LEAST(GREATEST(3, 2), 9))">>,
+        <<"(ABS(POWER((2 * 2), (3 - 1))), POWER(ABS(-3), (2 * (1 + 1))))">>,
+        <<"(SQRT(POWER(4, 2)), MOD((10 + 3), (2 * 5)))">>,
+        <<"(ROUND(CEIL(3.14159 * 2), 2), CEIL(ROUND(7.5, 1)))">>,
+        <<"(FLOOR(SQRT(ABS(-8.99))), SIGN(POWER(-2, 3)))">>,
+        <<"(TRUNCATE(RAND() * 100, 2), ROUND(RAND() * 10, 1))">>,
+        <<"(EXP(LOG(POWER(2, 3))), LOG(EXP(5)))">>,
+        <<"(COS(PI() / (3 - 1)), PI() / COS(PI() / 4))">>,
+        <<"(SIN(TAN(PI() / 4)), TAN(SIN(PI() / 6)))">>
+    ],
+
+    [
+        run_pi(<<InsertPart/binary, " VALUES ", ValueL/binary>>, {InsertPart, ValueL})
+     || ValueL <- ValueLs
+    ].
+
+t_parse_insert_failed(_) ->
+    run_pi("drop table books"),
+    run_pi("SELECT * FROM abc"),
+    run_pi("UPDATE abc SET c1 = 1, c2 = 2, c3 = 3"),
+    run_pi("DELETE FROM abc WHERE c1 = 1"),
+    run_pi("insert intotable(a,b)values(1,2)"),
+    run_pi("insert into (a,val)values(1,'val')").
+
+run_pi(SQL) ->
+    ?assertEqual({error, not_insert_sql}, parse_insert(SQL)),
+    ct:pal("SQL:~n~ts~n", [SQL]).
+
+run_pi(SQL, {InsertPart, Values}) ->
+    {ok, {InsertPart0, Values0}} = parse_insert(SQL),
+    ?assertEqual(InsertPart, InsertPart0),
+    ?assertEqual(Values, Values0),
+    ct:pal("SQL:~n~ts~n", [SQL]).

+ 1 - 0
changes/ce/fix-13189.en.md

@@ -0,0 +1 @@
+Fixed an issue where the data integration with Microsoft SQL Server or MySQL could not use SQL templates with substring `values` in table name or column name.