Sfoglia il codice sorgente

feat(ft-fs): ensure filsystem-safe handling of client input

Also restrict the filenames clients may specify in a filemeta to some
safe subset, as a future proofing measure.
Andrew Mayorov 2 anni fa
parent
commit
2707b4500f

+ 84 - 0
apps/emqx_ft/src/emqx_ft_fs_util.erl

@@ -18,6 +18,10 @@
 
 -include_lib("snabbkaffe/include/trace.hrl").
 
+-export([is_filename_safe/1]).
+-export([escape_filename/1]).
+-export([unescape_filename/1]).
+
 -export([read_decode_file/2]).
 
 -export([fold/4]).
@@ -35,6 +39,86 @@
         ) -> Acc
     ).
 
+-define(IS_UNSAFE(C),
+    ((C) =:= $% orelse
+        (C) =:= $: orelse
+        (C) =:= $\\ orelse
+        (C) =:= $/)
+).
+
+-define(IS_PRINTABLE(C),
+    % NOTE: See `io_lib:printable_unicode_list/1`
+    (((C) >= 32 andalso (C) =< 126) orelse
+        ((C) >= 16#A0 andalso (C) < 16#D800) orelse
+        ((C) > 16#DFFF andalso (C) < 16#FFFE) orelse
+        ((C) > 16#FFFF andalso (C) =< 16#10FFFF))
+).
+
+%%
+
+-spec is_filename_safe(file:filename_all()) -> ok | {error, atom()}.
+is_filename_safe(FN) when is_binary(FN) ->
+    is_filename_safe(unicode:characters_to_list(FN));
+is_filename_safe("") ->
+    {error, empty};
+is_filename_safe(FN) when FN == "." orelse FN == ".." ->
+    {error, special};
+is_filename_safe(FN) ->
+    verify_filename_safe(FN).
+
+verify_filename_safe([$% | Rest]) ->
+    verify_filename_safe(Rest);
+verify_filename_safe([C | _]) when ?IS_UNSAFE(C) ->
+    {error, unsafe};
+verify_filename_safe([C | _]) when not ?IS_PRINTABLE(C) ->
+    {error, nonprintable};
+verify_filename_safe([_ | Rest]) ->
+    verify_filename_safe(Rest);
+verify_filename_safe([]) ->
+    ok.
+
+-spec escape_filename(binary()) -> file:name().
+escape_filename(Name) when Name == <<".">> orelse Name == <<"..">> ->
+    lists:reverse(percent_encode(Name, ""));
+escape_filename(Name) ->
+    escape(Name, "").
+
+escape(<<C/utf8, Rest/binary>>, Acc) when ?IS_UNSAFE(C) ->
+    escape(Rest, percent_encode(<<C/utf8>>, Acc));
+escape(<<C/utf8, Rest/binary>>, Acc) when not ?IS_PRINTABLE(C) ->
+    escape(Rest, percent_encode(<<C/utf8>>, Acc));
+escape(<<C/utf8, Rest/binary>>, Acc) ->
+    escape(Rest, [C | Acc]);
+escape(<<>>, Acc) ->
+    lists:reverse(Acc).
+
+-spec unescape_filename(file:name()) -> binary().
+unescape_filename(Name) ->
+    unescape(Name, <<>>).
+
+unescape([$%, A, B | Rest], Acc) ->
+    unescape(Rest, percent_decode(A, B, Acc));
+unescape([C | Rest], Acc) ->
+    unescape(Rest, <<Acc/binary, C/utf8>>);
+unescape([], Acc) ->
+    Acc.
+
+percent_encode(<<A:4, B:4, Rest/binary>>, Acc) ->
+    percent_encode(Rest, [dec2hex(B), dec2hex(A), $% | Acc]);
+percent_encode(<<>>, Acc) ->
+    Acc.
+
+percent_decode(A, B, Acc) ->
+    <<Acc/binary, (hex2dec(A) * 16 + hex2dec(B))>>.
+
+dec2hex(X) when (X >= 0) andalso (X =< 9) -> X + $0;
+dec2hex(X) when (X >= 10) andalso (X =< 15) -> X + $A - 10.
+
+hex2dec(X) when (X >= $0) andalso (X =< $9) -> X - $0;
+hex2dec(X) when (X >= $A) andalso (X =< $F) -> X - $A + 10;
+hex2dec(X) when (X >= $a) andalso (X =< $f) -> X - $a + 10;
+hex2dec(_) -> error(badarg).
+
 %%
 
 -spec read_decode_file(file:name(), fun((binary()) -> Value)) ->

+ 8 - 2
apps/emqx_ft/src/emqx_ft_schema.erl

@@ -130,8 +130,11 @@ desc(local_storage_gc) ->
 schema(filemeta) ->
     #{
         roots => [
-            % TODO nonempty
-            {name, hoconsc:mk(string(), #{required => true})},
+            {name,
+                hoconsc:mk(string(), #{
+                    required => true,
+                    validator => validator(filename)
+                })},
             {size, hoconsc:mk(non_neg_integer())},
             {expire_at, hoconsc:mk(non_neg_integer())},
             {checksum, hoconsc:mk({atom(), binary()}, #{converter => converter(checksum)})},
@@ -140,6 +143,9 @@ schema(filemeta) ->
         ]
     }.
 
+validator(filename) ->
+    fun emqx_ft_fs_util:is_filename_safe/1.
+
 converter(checksum) ->
     fun
         (undefined, #{}) ->

+ 10 - 1
apps/emqx_ft/src/emqx_ft_storage_exporter_fs.erl

@@ -343,7 +343,16 @@ mk_result_reldir(Transfer = {ClientId, FileId}) ->
         Bucket2:?BUCKET2_LEN/binary,
         BucketRest/binary
     >> = binary:encode_hex(Hash),
-    [Bucket1, Bucket2, BucketRest, ClientId, FileId].
+    [
+        Bucket1,
+        Bucket2,
+        BucketRest,
+        emqx_ft_fs_util:escape_filename(ClientId),
+        emqx_ft_fs_util:escape_filename(FileId)
+    ].
+
+dirnames_to_transfer(ClientId, FileId) ->
+    {emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}.
 
 mk_transfer_hash(Transfer) ->
     crypto:hash(?BUCKET_HASH, term_to_binary(Transfer)).

+ 10 - 8
apps/emqx_ft/src/emqx_ft_storage_fs.erl

@@ -250,12 +250,12 @@ transfers(Storage) ->
         )}.
 
 transfers(Storage, ClientId, AccIn) ->
-    Dirname = mk_client_filedir(Storage, ClientId),
+    Dirname = filename:join(get_storage_root(Storage), ClientId),
     case file:list_dir(Dirname) of
         {ok, FileIds} ->
             lists:foldl(
                 fun(FileId, Acc) ->
-                    Transfer = {filename_to_binary(ClientId), filename_to_binary(FileId)},
+                    Transfer = dirnames_to_transfer(ClientId, FileId),
                     read_transferinfo(Storage, Transfer, Acc)
                 end,
                 AccIn,
@@ -327,10 +327,15 @@ break_segment_filename(Filename) ->
     end.
 
 mk_filedir(Storage, {ClientId, FileId}, SubDirs) ->
-    filename:join([get_storage_root(Storage), ClientId, FileId | SubDirs]).
+    filename:join([
+        get_storage_root(Storage),
+        emqx_ft_fs_util:escape_filename(ClientId),
+        emqx_ft_fs_util:escape_filename(FileId)
+        | SubDirs
+    ]).
 
-mk_client_filedir(Storage, ClientId) ->
-    filename:join([get_storage_root(Storage), ClientId]).
+dirnames_to_transfer(ClientId, FileId) ->
+    {emqx_ft_fs_util:unescape_filename(ClientId), emqx_ft_fs_util:unescape_filename(FileId)}.
 
 mk_filepath(Storage, Transfer, SubDirs, Filename) ->
     filename:join(mk_filedir(Storage, Transfer, SubDirs), Filename).
@@ -432,6 +437,3 @@ read_frag_filemeta(_Filename, Filepath) ->
 
 read_frag_segmentinfo(Filename, _Filepath) ->
     break_segment_filename(Filename).
-
-filename_to_binary(S) when is_list(S) -> unicode:characters_to_binary(S);
-filename_to_binary(B) when is_binary(B) -> B.

+ 41 - 0
apps/emqx_ft/test/emqx_ft_SUITE.erl

@@ -165,6 +165,29 @@ t_invalid_fileid(Config) ->
         emqtt:publish(C, <<"$file//init">>, <<>>, 1)
     ).
 
+t_invalid_filename(Config) ->
+    C = ?config(client, Config),
+    ?assertRCName(
+        unspecified_error,
+        emqtt:publish(C, mk_init_topic(<<"f1">>), emqx_json:encode(meta(".", <<>>)), 1)
+    ),
+    ?assertRCName(
+        unspecified_error,
+        emqtt:publish(C, mk_init_topic(<<"f2">>), emqx_json:encode(meta("..", <<>>)), 1)
+    ),
+    ?assertRCName(
+        unspecified_error,
+        emqtt:publish(C, mk_init_topic(<<"f2">>), emqx_json:encode(meta("../nice", <<>>)), 1)
+    ),
+    ?assertRCName(
+        unspecified_error,
+        emqtt:publish(C, mk_init_topic(<<"f3">>), emqx_json:encode(meta("/etc/passwd", <<>>)), 1)
+    ),
+    ?assertRCName(
+        success,
+        emqtt:publish(C, mk_init_topic(<<"f4">>), emqx_json:encode(meta("146%", <<>>)), 1)
+    ).
+
 t_simple_transfer(Config) ->
     C = ?config(client, Config),
 
@@ -202,6 +225,24 @@ t_simple_transfer(Config) ->
         read_export(Export)
     ).
 
+t_nasty_clientids_fileids(_Config) ->
+    Transfers = [
+        {<<".">>, <<".">>},
+        {<<"🌚"/utf8>>, <<"🌝"/utf8>>},
+        {<<"../..">>, <<"😤"/utf8>>},
+        {<<"/etc/passwd">>, <<"whitehat">>},
+        {<<"; rm -rf / ;">>, <<"whitehat">>}
+    ],
+
+    ok = lists:foreach(
+        fun({ClientId, FileId}) ->
+            ok = emqx_ft_test_helpers:upload_file(ClientId, FileId, "justfile", ClientId),
+            [Export] = list_exports(ClientId),
+            ?assertEqual({ok, ClientId}, read_export(Export))
+        end,
+        Transfers
+    ).
+
 t_meta_conflict(Config) ->
     C = ?config(client, Config),
 

+ 2 - 2
apps/emqx_ft/test/emqx_ft_api_SUITE.erl

@@ -60,7 +60,7 @@ end_per_testcase(_Case, _Config) ->
 t_list_ready_transfers(Config) ->
     ClientId = client_id(Config),
 
-    ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, <<"data">>, node()),
+    ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, "f1", <<"data">>),
 
     {ok, 200, #{<<"files">> := Files}} =
         request(get, uri(["file_transfer", "files"]), fun json/1),
@@ -73,7 +73,7 @@ t_list_ready_transfers(Config) ->
 t_download_transfer(Config) ->
     ClientId = client_id(Config),
 
-    ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, <<"data">>, node()),
+    ok = emqx_ft_test_helpers:upload_file(ClientId, <<"f1">>, "f1", <<"data">>),
 
     ?assertMatch(
         {ok, 400, #{<<"code">> := <<"BAD_REQUEST">>}},

+ 65 - 0
apps/emqx_ft/test/emqx_ft_fs_util_tests.erl

@@ -0,0 +1,65 @@
+%%--------------------------------------------------------------------
+%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%--------------------------------------------------------------------
+
+-module(emqx_ft_fs_util_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+filename_safe_test_() ->
+    [
+        ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe("im.safe")),
+        ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"im.safe">>)),
+        ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<".safe.100%">>)),
+        ?_assertEqual(ok, emqx_ft_fs_util:is_filename_safe(<<"safe.as.🦺"/utf8>>))
+    ].
+
+filename_unsafe_test_() ->
+    [
+        ?_assertEqual({error, empty}, emqx_ft_fs_util:is_filename_safe("")),
+        ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(".")),
+        ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe("..")),
+        ?_assertEqual({error, special}, emqx_ft_fs_util:is_filename_safe(<<"..">>)),
+        ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe(<<".././..">>)),
+        ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("/etc/passwd")),
+        ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("../cookie")),
+        ?_assertEqual({error, unsafe}, emqx_ft_fs_util:is_filename_safe("C:$cookie")),
+        ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([1, 2, 3])),
+        ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe(<<4, 5, 6>>)),
+        ?_assertEqual({error, nonprintable}, emqx_ft_fs_util:is_filename_safe([$a, 16#7F, $z]))
+    ].
+
+-define(NAMES, [
+    {"just.file", <<"just.file">>},
+    {".hidden", <<".hidden">>},
+    {".~what", <<".~what">>},
+    {"100%25.file", <<"100%.file">>},
+    {"%2E%2E", <<"..">>},
+    {"...", <<"...">>},
+    {"%2Fetc%2Fpasswd", <<"/etc/passwd">>},
+    {"%01%02%0A ", <<1, 2, 10, 32>>}
+]).
+
+escape_filename_test_() ->
+    [
+        ?_assertEqual(Filename, emqx_ft_fs_util:escape_filename(Input))
+     || {Filename, Input} <- ?NAMES
+    ].
+
+unescape_filename_test_() ->
+    [
+        ?_assertEqual(Input, emqx_ft_fs_util:unescape_filename(Filename))
+     || {Filename, Input} <- ?NAMES
+    ].

+ 5 - 2
apps/emqx_ft/test/emqx_ft_test_helpers.erl

@@ -60,14 +60,17 @@ tcp_port(Node) ->
 root(Config, Node, Tail) ->
     filename:join([?config(priv_dir, Config), "file_transfer", Node | Tail]).
 
-upload_file(ClientId, FileId, Data, Node) ->
+upload_file(ClientId, FileId, Name, Data) ->
+    upload_file(ClientId, FileId, Name, Data, node()).
+
+upload_file(ClientId, FileId, Name, Data, Node) ->
     Port = tcp_port(Node),
     Size = byte_size(Data),
 
     {ok, C1} = emqtt:start_link([{proto_ver, v5}, {clientid, ClientId}, {port, Port}]),
     {ok, _} = emqtt:connect(C1),
     Meta = #{
-        name => FileId,
+        name => unicode:characters_to_binary(Name),
         expire_at => erlang:system_time(_Unit = second) + 3600,
         size => Size
     },