nodetool 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #!/usr/bin/env escript
  2. %% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*-
  3. %% ex: ft=erlang ts=4 sw=4 et
  4. %% -------------------------------------------------------------------
  5. %%
  6. %% nodetool: Helper Script for interacting with live nodes
  7. %%
  8. %% -------------------------------------------------------------------
  9. -mode(compile).
  10. -define(SHUTDOWN_TIMEOUT_MS, 120_000).
  11. main(Args) ->
  12. case os:type() of
  13. {win32, nt} -> ok;
  14. _nix ->
  15. case init:get_argument(start_epmd) of
  16. {ok, [["true"]]} ->
  17. ok = start_epmd();
  18. _ ->
  19. ok
  20. end
  21. end,
  22. ok = add_libs_dir(),
  23. case Args of
  24. ["hocon" | Rest] ->
  25. %% forward the call to hocon_cli
  26. hocon_cli:main(Rest);
  27. ["check_license_key", Key0] ->
  28. Key = cleanup_key(Key0),
  29. check_license(#{key => Key});
  30. _ ->
  31. do(Args)
  32. end.
  33. %% the key is a string (list) representation of a binary, so we need
  34. %% to remove the leading and trailing angle brackets.
  35. cleanup_key(Str0) ->
  36. Str1 = iolist_to_binary(string:replace(Str0, "<<", "", leading)),
  37. iolist_to_binary(string:replace(Str1, ">>", "", trailing)).
  38. do(Args) ->
  39. ok = do_with_halt(Args, "mnesia_dir", fun create_mnesia_dir/2),
  40. ok = do_with_halt(Args, "chkconfig", fun("-config", X) -> chkconfig(X) end),
  41. ok = do_with_halt(Args, "chkconfig", fun chkconfig/1),
  42. Args1 = do_with_ret(Args, "-name",
  43. fun(TargetName) ->
  44. ThisNode = this_node_name(longnames, TargetName),
  45. {ok, _} = net_kernel:start([ThisNode, longnames]),
  46. put(target_node, nodename(TargetName))
  47. end),
  48. Args2 = do_with_ret(Args1, "-sname",
  49. fun(TargetName) ->
  50. ThisNode = this_node_name(shortnames, TargetName),
  51. {ok, _} = net_kernel:start([ThisNode, shortnames]),
  52. put(target_node, nodename(TargetName))
  53. end),
  54. RestArgs = do_with_ret(Args2, "-setcookie",
  55. fun(Cookie) ->
  56. erlang:set_cookie(node(), list_to_atom(Cookie))
  57. end),
  58. [application:start(App) || App <- [crypto, public_key, ssl]],
  59. TargetNode = get(target_node),
  60. %% See if the node is currently running -- if it's not, we'll bail
  61. case {net_kernel:hidden_connect_node(TargetNode), net_adm:ping(TargetNode)} of
  62. {true, pong} ->
  63. ok;
  64. {false, pong} ->
  65. io:format(standard_error, "Failed to connect to node ~p\n", [TargetNode]),
  66. halt(1);
  67. {_, pang} ->
  68. io:format(standard_error, "Node ~p not responding to pings.\n", [TargetNode]),
  69. halt(1)
  70. end,
  71. %% Mute logger from now on.
  72. %% Otherwise Erlang distribution over TLS (inet_tls_dist) warning logs
  73. %% and supervisor reports may contaminate io:format outputs
  74. logger:set_primary_config(level, none),
  75. case RestArgs of
  76. ["getpid"] ->
  77. io:format("~p\n", [list_to_integer(rpc:call(TargetNode, os, getpid, []))]);
  78. ["ping"] ->
  79. %% If we got this far, the node already responded to a ping, so just dump
  80. %% a "pong"
  81. io:format("pong\n");
  82. ["stop"] ->
  83. Pid = start_shutdown_status(),
  84. Res = rpc:call(TargetNode, emqx_machine, graceful_shutdown, [], ?SHUTDOWN_TIMEOUT_MS),
  85. true = stop_shutdown_status(Pid),
  86. case Res of
  87. ok ->
  88. ok;
  89. {badrpc, timeout} ->
  90. io:format("EMQX is still shutting down, it failed to stop gracefully "
  91. "within the configured timeout of: ~ps\n",
  92. [erlang:convert_time_unit(?SHUTDOWN_TIMEOUT_MS, millisecond, second)]),
  93. halt(1);
  94. {badrpc, nodedown} ->
  95. %% nodetool commands are always executed after a ping
  96. %% which if the code gets here, it's because the target node
  97. %% has shutdown before RPC returns.
  98. ok
  99. end;
  100. ["rpc", Module, Function | RpcArgs] ->
  101. case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
  102. [RpcArgs], 60000) of
  103. ok ->
  104. ok;
  105. {error, cmd_not_found} ->
  106. halt(1);
  107. {error, Reason} ->
  108. io:format("RPC to ~s error: ~p\n", [TargetNode, Reason]),
  109. halt(1);
  110. {badrpc, Reason} ->
  111. io:format("RPC to ~s failed: ~p\n", [TargetNode, Reason]),
  112. halt(1);
  113. _ ->
  114. halt(1)
  115. end;
  116. ["rpc_infinity", Module, Function | RpcArgs] ->
  117. case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), [RpcArgs], infinity) of
  118. ok ->
  119. ok;
  120. {badrpc, Reason} ->
  121. io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]),
  122. halt(1);
  123. _ ->
  124. halt(1)
  125. end;
  126. ["rpcterms", Module, Function | ArgsAsString] ->
  127. case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function),
  128. consult(lists:flatten(ArgsAsString)), 60000) of
  129. {badrpc, Reason} ->
  130. io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]),
  131. halt(1);
  132. Other ->
  133. io:format("~p\n", [Other])
  134. end;
  135. ["eval" | ListOfArgs] ->
  136. Parsed = parse_eval_args(ListOfArgs),
  137. % and evaluate it on the remote node
  138. case rpc:call(TargetNode, erl_eval, exprs, [Parsed, [] ]) of
  139. {value, Value, _} ->
  140. io:format ("~p~n",[Value]);
  141. {badrpc, Reason} ->
  142. io:format("RPC to ~p failed: ~p~n", [TargetNode, Reason]),
  143. halt(1)
  144. end;
  145. Other ->
  146. io:format("Other: ~p~n", [Other]),
  147. io:format("Usage: nodetool chkconfig|getpid|ping|stop|rpc|rpc_infinity|rpcterms|eval|cold_eval [Terms] [RPC]\n")
  148. end,
  149. net_kernel:stop().
  150. start_shutdown_status() ->
  151. spawn_link(fun shutdown_status_loop/0).
  152. stop_shutdown_status(Pid) ->
  153. true = unlink(Pid),
  154. true = exit(Pid, stop).
  155. shutdown_status_loop() ->
  156. timer:sleep(10_000),
  157. io:format("EMQX is shutting down, please wait...\n", []),
  158. shutdown_status_loop().
  159. parse_eval_args(Args) ->
  160. % shells may process args into more than one, and end up stripping
  161. % spaces, so this converts all of that to a single string to parse
  162. String = binary_to_list(
  163. list_to_binary(
  164. join(Args," ")
  165. )
  166. ),
  167. % then just as a convenience to users, if they forgot a trailing
  168. % '.' add it for them.
  169. Normalized =
  170. case lists:reverse(String) of
  171. [$. | _] -> String;
  172. R -> lists:reverse([$. | R])
  173. end,
  174. % then scan and parse the string
  175. {ok, Scanned, _} = erl_scan:string(Normalized),
  176. {ok, Parsed } = erl_parse:parse_exprs(Scanned),
  177. Parsed.
  178. do_with_ret(Args, Name, Handler) ->
  179. {arity, Arity} = erlang:fun_info(Handler, arity),
  180. case take_args(Args, Name, Arity) of
  181. false ->
  182. Args;
  183. {Args1, Rest} ->
  184. _ = erlang:apply(Handler, Args1),
  185. Rest
  186. end.
  187. do_with_halt(Args, Name, Handler) ->
  188. {arity, Arity} = erlang:fun_info(Handler, arity),
  189. case take_args(Args, Name, Arity) of
  190. false ->
  191. ok;
  192. {Args1, _Rest} ->
  193. erlang:apply(Handler, Args1), %% should halt
  194. io:format(standard_error, "~s handler did not halt", [Name]),
  195. halt(?LINE)
  196. end.
  197. %% Return option args list if found, otherwise 'false'.
  198. take_args(Args, OptName, 0) ->
  199. lists:member(OptName, Args) andalso [];
  200. take_args(Args, OptName, OptArity) ->
  201. take_args(Args, OptName, OptArity, _Scanned = []).
  202. take_args([], _, _, _) -> false; %% no such option
  203. take_args([Name | Rest], Name, Arity, Scanned) ->
  204. length(Rest) >= Arity orelse error({not_enough_args_for, Name}),
  205. {Result, Tail} = lists:split(Arity, Rest),
  206. {Result, lists:reverse(Scanned) ++ Tail};
  207. take_args([Other | Rest], Name, Arity, Scanned) ->
  208. take_args(Rest, Name, Arity, [Other | Scanned]).
  209. start_epmd() ->
  210. [] = os:cmd("\"" ++ epmd_path() ++ "\" -daemon"),
  211. ok.
  212. epmd_path() ->
  213. ErtsBinDir = filename:dirname(escript:script_name()),
  214. Name = "epmd",
  215. case os:find_executable(Name, ErtsBinDir) of
  216. false ->
  217. case os:find_executable(Name) of
  218. false ->
  219. io:format("Could not find epmd.~n"),
  220. halt(1);
  221. GlobalEpmd ->
  222. GlobalEpmd
  223. end;
  224. Epmd ->
  225. Epmd
  226. end.
  227. nodename(Name) ->
  228. case re:split(Name, "@", [{return, list}, unicode]) of
  229. [_Node, _Host] ->
  230. list_to_atom(Name);
  231. [Node] ->
  232. [_, Host] = re:split(atom_to_list(node()), "@", [{return, list}, unicode]),
  233. list_to_atom(lists:concat([Node, "@", Host]))
  234. end.
  235. this_node_name(longnames, Name) ->
  236. [Node, Host] = re:split(Name, "@", [{return, list}, unicode]),
  237. list_to_atom(lists:concat(["remsh_maint_", Node, node_name_suffix_id(), "@", Host]));
  238. this_node_name(shortnames, Name) ->
  239. list_to_atom(lists:concat(["remsh_maint_", Name, node_name_suffix_id()])).
  240. %% use the reversed value that from pid mod 1000 as the node name suffix
  241. node_name_suffix_id() ->
  242. Pid = os:getpid(),
  243. string:slice(string:reverse(Pid), 0, 3).
  244. %% For windows???
  245. create_mnesia_dir(DataDir, NodeName) ->
  246. MnesiaDir = filename:join(DataDir, NodeName),
  247. file:make_dir(MnesiaDir),
  248. io:format("~s", [MnesiaDir]),
  249. halt(0).
  250. chkconfig(File) ->
  251. case file:consult(File) of
  252. {ok, Terms} ->
  253. case validate(Terms) of
  254. ok ->
  255. halt(0);
  256. {error, Problems} ->
  257. lists:foreach(fun print_issue/1, Problems),
  258. %% halt(1) if any problems were errors
  259. halt(case [x || {error, _} <- Problems] of
  260. [] -> 0;
  261. _ -> 1
  262. end)
  263. end;
  264. {error, {Line, Mod, Term}} ->
  265. io:format(standard_error, ["Error on line ", file:format_error({Line, Mod, Term}), "\n"], []),
  266. halt(1);
  267. {error, Error} ->
  268. io:format(standard_error, ["Error reading config file: ", File, " ", file:format_error(Error), "\n"], []),
  269. halt(1)
  270. end.
  271. check_license(Config) ->
  272. ok = ensure_application_load(emqx_license),
  273. %% This checks formal license validity to ensure
  274. %% that the node can successfully start with the given license.
  275. %% However, a valid license may be expired. In this case, the node will
  276. %% start but will not be able to receive connections due to connection limits.
  277. %% It may receive license updates from the cluster further.
  278. case emqx_license:read_license(Config) of
  279. {ok, _} -> ok;
  280. {error, Error} ->
  281. io:format(standard_error, "Error reading license: ~p~n", [Error]),
  282. halt(1)
  283. end.
  284. %%
  285. %% Given a string or binary, parse it into a list of terms, ala file:consult/0
  286. %%
  287. consult(Str) when is_list(Str) ->
  288. consult([], Str, []);
  289. consult(Bin) when is_binary(Bin)->
  290. consult([], binary_to_list(Bin), []).
  291. consult(Cont, Str, Acc) ->
  292. case erl_scan:tokens(Cont, Str, 0) of
  293. {done, Result, Remaining} ->
  294. case Result of
  295. {ok, Tokens, _} ->
  296. {ok, Term} = erl_parse:parse_term(Tokens),
  297. consult([], Remaining, [Term | Acc]);
  298. {eof, _Other} ->
  299. lists:reverse(Acc);
  300. {error, Info, _} ->
  301. {error, Info}
  302. end;
  303. {more, Cont1} ->
  304. consult(Cont1, eof, Acc)
  305. end.
  306. %%
  307. %% Validation functions for checking the app.config
  308. %%
  309. validate([Terms]) ->
  310. Results = [ValidateFun(Terms) || ValidateFun <- get_validation_funs()],
  311. Failures = [Res || Res <- Results, Res /= true],
  312. case Failures of
  313. [] ->
  314. ok;
  315. _ ->
  316. {error, Failures}
  317. end.
  318. %% Some initial and basic checks for the app.config file
  319. get_validation_funs() ->
  320. [ ].
  321. print_issue({warning, Warning}) ->
  322. io:format(standard_error, "Warning in app.config: ~s~n", [Warning]);
  323. print_issue({error, Error}) ->
  324. io:format(standard_error, "Error in app.config: ~s~n", [Error]).
  325. %% string:join/2 copy; string:join/2 is getting obsoleted
  326. %% and replaced by lists:join/2, but lists:join/2 is too new
  327. %% for version support (only appeared in 19.0) so it cannot be
  328. %% used. Instead we just adopt join/2 locally and hope it works
  329. %% for most unicode use cases anyway.
  330. join([], Sep) when is_list(Sep) ->
  331. [];
  332. join([H|T], Sep) ->
  333. H ++ lists:append([Sep ++ X || X <- T]).
  334. add_libs_dir() ->
  335. [_ | _] = RootDir = os:getenv("RUNNER_ROOT_DIR"),
  336. CurrentVsn = os:getenv("REL_VSN"),
  337. RelFile = filename:join([RootDir, "releases", "RELEASES"]),
  338. case file:consult(RelFile) of
  339. {ok, [Releases]} ->
  340. Release = lists:keyfind(CurrentVsn, 3, Releases),
  341. {release, _Name, _AppVsn, _ErtsVsn, Libs, _State} = Release,
  342. lists:foreach(
  343. fun({Name, Vsn, _}) ->
  344. add_lib_dir(RootDir, Name, Vsn)
  345. end, Libs);
  346. {error, Reason} ->
  347. %% rel file was been deleted by release handler
  348. error({failed_to_read_RELEASES_file, RelFile, Reason})
  349. end,
  350. ok = add_patches_dir(filename:join([RootDir, "data", "patches"])),
  351. ok = add_patches_dir("/var/lib/emqx/patches").
  352. add_patches_dir(PatchesDir) ->
  353. case filelib:is_dir(PatchesDir) of
  354. true ->
  355. true = code:add_patha(PatchesDir),
  356. ok;
  357. false ->
  358. ok
  359. end.
  360. add_lib_dir(RootDir, Name, Vsn) ->
  361. LibDir = filename:join([RootDir, lib, atom_to_list(Name) ++ "-" ++ Vsn, ebin]),
  362. case code:add_patha(LibDir) of
  363. true ->
  364. %% load all applications into application controller, before performing
  365. %% the configuration check of HOCON
  366. %%
  367. %% It helps to implement the feature of dynamically searching schema.
  368. %% See `emqx_gateway_schema:fields(gateway)`
  369. is_emqx_application(Name) andalso ensure_application_load(Name),
  370. ok;
  371. {error, _} -> error(LibDir)
  372. end.
  373. is_emqx_application(Name) when is_atom(Name) ->
  374. is_emqx_application(atom_to_list(Name));
  375. is_emqx_application("emqx_" ++ _Rest) ->
  376. true;
  377. is_emqx_application(_) ->
  378. false.
  379. ensure_application_load(Name) ->
  380. case application:load(Name) of
  381. ok -> ok;
  382. {error, {already_loaded, _}} -> ok;
  383. {error, Reason} -> error({failed_to_load_application, Name, Reason})
  384. end.