emqx_utils_sql.erl 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. %%--------------------------------------------------------------------
  2. %% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
  3. %%
  4. %% Licensed under the Apache License, Version 2.0 (the "License");
  5. %% you may not use this file except in compliance with the License.
  6. %% You may obtain a copy of the License at
  7. %%
  8. %% http://www.apache.org/licenses/LICENSE-2.0
  9. %%
  10. %% Unless required by applicable law or agreed to in writing, software
  11. %% distributed under the License is distributed on an "AS IS" BASIS,
  12. %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. %% See the License for the specific language governing permissions and
  14. %% limitations under the License.
  15. %%--------------------------------------------------------------------
  16. -module(emqx_utils_sql).
  17. -export([get_statement_type/1]).
  18. -export([parse_insert/1]).
  19. -export([to_sql_value/1]).
  20. -export([to_sql_string/2]).
  21. -export([escape_sql/1]).
  22. -export([escape_cql/1]).
  23. -export([escape_mysql/1]).
  24. -export_type([value/0]).
  25. -type statement_type() :: select | insert | delete | update.
  26. -type value() :: null | binary() | number() | boolean() | [value()].
  27. -dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}).
  28. -spec get_statement_type(iodata()) -> statement_type() | {error, unknown}.
  29. get_statement_type(Query) ->
  30. KnownTypes = #{
  31. <<"select">> => select,
  32. <<"insert">> => insert,
  33. <<"update">> => update,
  34. <<"delete">> => delete
  35. },
  36. case re:run(Query, <<"^\\s*([a-zA-Z]+)">>, [{capture, all_but_first, binary}]) of
  37. {match, [Token]} ->
  38. maps:get(string:lowercase(Token), KnownTypes, {error, unknown});
  39. _ ->
  40. {error, unknown}
  41. end.
  42. %% @doc Parse an INSERT SQL statement into its INSERT part and the VALUES part.
  43. %% SQL = <<"INSERT INTO \"abc\" (c1, c2, c3) VALUES (${a}, ${b}, ${c.prop})">>
  44. %% {ok, {<<"INSERT INTO \"abc\" (c1, c2, c3)">>, <<"(${a}, ${b}, ${c.prop})">>}}
  45. -spec parse_insert(iodata()) ->
  46. {ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}.
  47. parse_insert(SQL) ->
  48. Pattern = <<
  49. %% case-insensitive
  50. "(?i)^\\s*",
  51. %% Group-1: insert into, table name and columns (when existed).
  52. %% All space characters suffixed to <TABLE_NAME> will be kept
  53. %% `INSERT INTO <TABLE_NAME> [(<COLUMN>, ..)]`
  54. "(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)",
  55. %% Keyword: `VALUES`
  56. "\\s*values\\s*",
  57. %% Group-2: literals value(s) or placeholder(s) with round brackets.
  58. %% And the sub-pattern in brackets does not do any capturing
  59. %% `([<VALUE> | <PLACEHOLDER>], ..])`
  60. "(\\((?:[^()]++|(?2))*\\))",
  61. "\\s*$"
  62. >>,
  63. case re:run(SQL, Pattern, [{capture, all_but_first, binary}]) of
  64. {match, [InsertInto, ValuesTemplate]} ->
  65. {ok, {InsertInto, ValuesTemplate}};
  66. nomatch ->
  67. {error, not_insert_sql}
  68. end.
  69. %% @doc Convert an Erlang term to a value that can be used primarily in
  70. %% prepared SQL statements.
  71. -spec to_sql_value(term()) -> value().
  72. to_sql_value(undefined) -> null;
  73. to_sql_value(List) when is_list(List) -> List;
  74. to_sql_value(Bin) when is_binary(Bin) -> Bin;
  75. to_sql_value(Num) when is_number(Num) -> Num;
  76. to_sql_value(Bool) when is_boolean(Bool) -> Bool;
  77. to_sql_value(Atom) when is_atom(Atom) -> atom_to_binary(Atom, utf8);
  78. to_sql_value(Map) when is_map(Map) -> emqx_utils_json:encode(Map).
  79. %% @doc Convert an Erlang term to a string that can be interpolated in literal
  80. %% SQL statements. The value is escaped if necessary.
  81. -spec to_sql_string(term(), Options) -> unicode:chardata() when
  82. Options :: #{
  83. escaping => mysql | sql | cql,
  84. undefined => null | unicode:chardata()
  85. }.
  86. to_sql_string(undefined, #{undefined := Str} = Opts) when Str =/= null ->
  87. to_sql_string(Str, Opts);
  88. to_sql_string(undefined, #{}) ->
  89. <<"NULL">>;
  90. to_sql_string(String, #{escaping := mysql}) when is_binary(String) ->
  91. try
  92. escape_mysql(String)
  93. catch
  94. throw:invalid_utf8 ->
  95. [<<"0x">>, binary:encode_hex(String)]
  96. end;
  97. to_sql_string(Term, #{escaping := mysql}) ->
  98. maybe_escape(Term, fun escape_mysql/1);
  99. to_sql_string(Term, #{escaping := cql}) ->
  100. maybe_escape(Term, fun escape_cql/1);
  101. to_sql_string(Term, #{}) ->
  102. maybe_escape(Term, fun escape_sql/1).
  103. -spec maybe_escape(_Value, fun((binary()) -> iodata())) -> unicode:chardata().
  104. maybe_escape(Str, EscapeFun) when is_binary(Str) ->
  105. EscapeFun(Str);
  106. maybe_escape(Str, EscapeFun) when is_list(Str) ->
  107. case unicode:characters_to_binary(Str) of
  108. Bin when is_binary(Bin) ->
  109. EscapeFun(Bin);
  110. Otherwise ->
  111. error(Otherwise)
  112. end;
  113. maybe_escape(Val, EscapeFun) when is_atom(Val) orelse is_map(Val) ->
  114. EscapeFun(emqx_template:to_string(Val));
  115. maybe_escape(Val, _EscapeFun) ->
  116. emqx_template:to_string(Val).
  117. -spec escape_sql(binary()) -> iodata().
  118. escape_sql(S) ->
  119. % NOTE
  120. % This is a bit misleading: currently, escaping logic in `escape_sql/1` likely
  121. % won't work with pgsql since it does not support C-style escapes by default.
  122. % https://www.postgresql.org/docs/14/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS
  123. ES = binary:replace(S, [<<"\\">>, <<"'">>], <<"\\">>, [global, {insert_replaced, 1}]),
  124. [$', ES, $'].
  125. -spec escape_cql(binary()) -> iodata().
  126. escape_cql(S) ->
  127. ES = binary:replace(S, <<"'">>, <<"'">>, [global, {insert_replaced, 1}]),
  128. [$', ES, $'].
  129. -spec escape_mysql(binary()) -> iodata().
  130. escape_mysql(S0) ->
  131. % https://dev.mysql.com/doc/refman/8.0/en/string-literals.html
  132. [$', escape_mysql(S0, 0, 0, S0), $'].
  133. %% NOTE
  134. %% This thing looks more complicated than needed because it's optimized for as few
  135. %% intermediate memory (re)allocations as possible.
  136. escape_mysql(<<$', Rest/binary>>, I, Run, Src) ->
  137. escape_prepend(I, Run, Src, [<<"\\'">> | escape_mysql(Rest, I + Run + 1, 0, Src)]);
  138. escape_mysql(<<$\\, Rest/binary>>, I, Run, Src) ->
  139. escape_prepend(I, Run, Src, [<<"\\\\">> | escape_mysql(Rest, I + Run + 1, 0, Src)]);
  140. escape_mysql(<<0, Rest/binary>>, I, Run, Src) ->
  141. escape_prepend(I, Run, Src, [<<"\\0">> | escape_mysql(Rest, I + Run + 1, 0, Src)]);
  142. escape_mysql(<<_/utf8, Rest/binary>> = S, I, Run, Src) ->
  143. CWidth = byte_size(S) - byte_size(Rest),
  144. escape_mysql(Rest, I, Run + CWidth, Src);
  145. escape_mysql(<<>>, 0, _, Src) ->
  146. Src;
  147. escape_mysql(<<>>, I, Run, Src) ->
  148. binary:part(Src, I, Run);
  149. escape_mysql(_, _I, _Run, _Src) ->
  150. throw(invalid_utf8).
  151. escape_prepend(_RunI, 0, _Src, Tail) ->
  152. Tail;
  153. escape_prepend(I, Run, Src, Tail) ->
  154. [binary:part(Src, I, Run) | Tail].