Просмотр исходного кода

feat: add a backup copies for cluster.hocon

Zaiming (Stone) Shi 2 лет назад
Родитель
Сommit
5146de5b1c
2 измененных файлов с 136 добавлено и 24 удалено
  1. 73 21
      apps/emqx/src/emqx_config.erl
  2. 63 3
      apps/emqx/test/emqx_config_SUITE.erl

+ 73 - 21
apps/emqx/src/emqx_config.erl

@@ -91,7 +91,7 @@
 -export([ensure_atom_conf_path/2]).
 
 -ifdef(TEST).
--export([erase_all/0]).
+-export([erase_all/0, backup_and_write/2]).
 -endif.
 
 -include("logger.hrl").
@@ -105,6 +105,7 @@
 -define(LISTENER_CONF_PATH(TYPE, LISTENER, PATH), [listeners, TYPE, LISTENER | PATH]).
 
 -define(CONFIG_NOT_FOUND_MAGIC, '$0tFound').
+-define(MAX_KEEP_BACKUP_CONFIGS, 10).
 
 -export_type([
     update_request/0,
@@ -601,43 +602,94 @@ save_to_config_map(Conf, RawConf) ->
 -spec save_to_override_conf(boolean(), raw_config(), update_opts()) -> ok | {error, term()}.
 save_to_override_conf(_, undefined, _) ->
     ok;
-%% TODO: Remove deprecated override conf file when 5.1
 save_to_override_conf(true, RawConf, Opts) ->
     case deprecated_conf_file(Opts) of
         undefined ->
             ok;
         FileName ->
-            ok = filelib:ensure_dir(FileName),
-            case file:write_file(FileName, hocon_pp:do(RawConf, #{})) of
-                ok ->
-                    ok;
-                {error, Reason} ->
-                    ?SLOG(error, #{
-                        msg => "failed_to_write_override_file",
-                        filename => FileName,
-                        reason => Reason
-                    }),
-                    {error, Reason}
-            end
+            backup_and_write(FileName, hocon_pp:do(RawConf, #{}))
     end;
 save_to_override_conf(false, RawConf, _Opts) ->
     case cluster_hocon_file() of
         undefined ->
             ok;
         FileName ->
-            ok = filelib:ensure_dir(FileName),
-            case file:write_file(FileName, hocon_pp:do(RawConf, #{})) of
+            backup_and_write(FileName, hocon_pp:do(RawConf, #{}))
+    end.
+
+%% @priv This is the same human-readable timestamp format as
+%% hocon-cli generated app.<time>.config file name.
+now_time() ->
+    Ts = os:system_time(millisecond),
+    {{Y, M, D}, {HH, MM, SS}} = calendar:system_time_to_local_time(Ts, millisecond),
+    Res = io_lib:format(
+        "~0p.~2..0b.~2..0b.~2..0b.~2..0b.~2..0b.~3..0b",
+        [Y, M, D, HH, MM, SS, Ts rem 1000]
+    ),
+    lists:flatten(Res).
+
+%% @private Backup the current config to a file with a timestamp suffix and
+%% then save the new config to the config file.
+backup_and_write(Path, Content) ->
+    %% this may fail, but we don't care
+    %% e.g. read-only file system
+    _ = filelib:ensure_dir(Path),
+    TmpFile = Path ++ ".tmp",
+    case file:write_file(TmpFile, Content) of
+        ok ->
+            backup_and_replace(Path, TmpFile);
+        {error, Reason} ->
+            ?SLOG(error, #{
+                msg => "failed_to_save_conf_file",
+                hint =>
+                    "The updated cluster config is note saved on this node, please check the file system.",
+                filename => TmpFile,
+                reason => Reason
+            }),
+            %% e.g. read-only, it's not the end of the world
+            ok
+    end.
+
+backup_and_replace(Path, TmpPath) ->
+    Backup = Path ++ "." ++ now_time() ++ ".bak",
+    case file:rename(Path, Backup) of
+        ok ->
+            ok = file:rename(TmpPath, Path),
+            ok = prune_backup_files(Path);
+        {error, enoent} ->
+            %% not created yet
+            ok = file:rename(TmpPath, Path);
+        {error, Reason} ->
+            ?SLOG(warning, #{
+                msg => "failed_to_backup_conf_file",
+                filename => Backup,
+                reason => Reason
+            }),
+            ok
+    end.
+
+prune_backup_files(Path) ->
+    Files0 = filelib:wildcard(Path ++ ".*"),
+    Re = "\\.[0-9]{4}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{3}\\.bak$",
+    Files = lists:filter(fun(F) -> re:run(F, Re) =/= nomatch end, Files0),
+    Sorted = lists:reverse(lists:sort(Files)),
+    {_Keeps, Deletes} = lists:split(min(?MAX_KEEP_BACKUP_CONFIGS, length(Sorted)), Sorted),
+    lists:foreach(
+        fun(F) ->
+            case file:delete(F) of
                 ok ->
                     ok;
                 {error, Reason} ->
-                    ?SLOG(error, #{
-                        msg => "failed_to_save_conf_file",
-                        filename => FileName,
+                    ?SLOG(warning, #{
+                        msg => "failed_to_delete_backup_conf_file",
+                        filename => F,
                         reason => Reason
                     }),
-                    {error, Reason}
+                    ok
             end
-    end.
+        end,
+        Deletes
+    ).
 
 add_handlers() ->
     ok = emqx_config_logger:add_handler(),

+ 63 - 3
apps/emqx/test/emqx_config_SUITE.erl

@@ -31,7 +31,24 @@ init_per_suite(Config) ->
 end_per_suite(_Config) ->
     emqx_common_test_helpers:stop_apps([]).
 
-t_fill_default_values(_) ->
+init_per_testcase(TestCase, Config) ->
+    try
+        ?MODULE:TestCase({init, Config})
+    catch
+        error:function_clause ->
+            ok
+    end,
+    Config.
+
+end_per_testcase(TestCase, Config) ->
+    try
+        ?MODULE:TestCase({'end', Config})
+    catch
+        error:function_clause ->
+            ok
+    end.
+
+t_fill_default_values(C) when is_list(C) ->
     Conf = #{
         <<"broker">> => #{
             <<"perf">> => #{},
@@ -60,7 +77,7 @@ t_fill_default_values(_) ->
     _ = emqx_utils_json:encode(WithDefaults),
     ok.
 
-t_init_load(_Config) ->
+t_init_load(C) when is_list(C) ->
     ConfFile = "./test_emqx.conf",
     ok = file:write_file(ConfFile, <<"">>),
     ExpectRootNames = lists:sort(hocon_schema:root_names(emqx_schema)),
@@ -79,7 +96,7 @@ t_init_load(_Config) ->
     ?assertMatch({ok, #{raw_config := 128}}, emqx:update_config([mqtt, max_topic_levels], 128)),
     ok = file:delete(DeprecatedFile).
 
-t_unknown_rook_keys(_) ->
+t_unknown_rook_keys(C) when is_list(C) ->
     ?check_trace(
         #{timetrap => 1000},
         begin
@@ -96,3 +113,46 @@ t_unknown_rook_keys(_) ->
         end
     ),
     ok.
+
+t_cluster_hocon_backup({init, C}) ->
+    C;
+t_cluster_hocon_backup({'end', _C}) ->
+    File = "backup-test.hocon",
+    Files = [File | filelib:wildcard(File ++ ".*.bak")],
+    lists:foreach(fun file:delete/1, Files);
+t_cluster_hocon_backup(C) when is_list(C) ->
+    Write = fun(Path, Content) ->
+        %% avoid name clash
+        timer:sleep(1),
+        emqx_config:backup_and_write(Path, Content)
+    end,
+    File = "backup-test.hocon",
+    %% write 12 times, 10 backups should be kept
+    %% the latest one is File itself without suffix
+    %% the oldest one is expected to be deleted
+    N = 12,
+    Inputs = lists:seq(1, N),
+    Backups = lists:seq(N - 10, N - 1),
+    InputContents = [integer_to_binary(I) || I <- Inputs],
+    BackupContents = [integer_to_binary(I) || I <- Backups],
+    lists:foreach(
+        fun(Content) ->
+            Write(File, Content)
+        end,
+        InputContents
+    ),
+    LatestContent = integer_to_binary(N),
+    ?assertEqual({ok, LatestContent}, file:read_file(File)),
+    Re = "\\.[0-9]{4}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{2}\\.[0-9]{3}\\.bak$",
+    Files = filelib:wildcard(File ++ ".*.bak"),
+    ?assert(lists:all(fun(F) -> re:run(F, Re) =/= nomatch end, Files)),
+    %% keep only the latest 10
+    ?assertEqual(10, length(Files)),
+    FilesSorted = lists:zip(lists:sort(Files), BackupContents),
+    lists:foreach(
+        fun({BackupFile, ExpectedContent}) ->
+            ?assertEqual({ok, ExpectedContent}, file:read_file(BackupFile))
+        end,
+        FilesSorted
+    ),
+    ok.