|
|
@@ -0,0 +1,407 @@
|
|
|
+%%--------------------------------------------------------------------
|
|
|
+%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
|
|
|
+%%
|
|
|
+%% Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
+%% you may not use this file except in compliance with the License.
|
|
|
+%% You may obtain a copy of the License at
|
|
|
+%%
|
|
|
+%% http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+%%
|
|
|
+%% Unless required by applicable law or agreed to in writing, software
|
|
|
+%% distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+%% See the License for the specific language governing permissions and
|
|
|
+%% limitations under the License.
|
|
|
+%%--------------------------------------------------------------------
|
|
|
+
|
|
|
+%% This module provides a loose parser for URIs.
|
|
|
+%% The standard library's `uri_string' module is strict and does not allow
|
|
|
+%% to parse invalid URIs, like templates: `http://example.com/${username}'.
|
|
|
+
|
|
|
+-module(emqx_utils_uri).
|
|
|
+
|
|
|
+-export([parse/1, format/1]).
|
|
|
+
|
|
|
+-export([
|
|
|
+ scheme/1,
|
|
|
+ userinfo/1,
|
|
|
+ host/1,
|
|
|
+ port/1,
|
|
|
+ path/1,
|
|
|
+ query/1,
|
|
|
+ fragment/1,
|
|
|
+ base_url/1,
|
|
|
+ request_base/1
|
|
|
+]).
|
|
|
+
|
|
|
+-type scheme() :: binary().
|
|
|
+-type userinfo() :: binary().
|
|
|
+-type host() :: binary().
|
|
|
+-type port_number() :: inet:port_number().
|
|
|
+-type path() :: binary().
|
|
|
+-type query() :: binary().
|
|
|
+-type fragment() :: binary().
|
|
|
+-type request_base() :: #{
|
|
|
+ scheme := http | https,
|
|
|
+ host := iolist(),
|
|
|
+ port := inet:port_number()
|
|
|
+}.
|
|
|
+
|
|
|
+-type authority() :: #{
|
|
|
+ userinfo := emqx_maybe:t(userinfo()),
|
|
|
+ host := host(),
|
|
|
+ %% Types:
|
|
|
+ %% ipv6: `\[[a-z\d:\.]*\]` — bracketed "ivp6-like" address
|
|
|
+ %% regular: `example.com` — arbitrary host not containg `:` which is forbidden in hostnames other than ipv6
|
|
|
+ %% loose: non ipv6-like host containing `:`, probably invalid for a strictly valid URI
|
|
|
+ host_type := ipv6 | regular | loose,
|
|
|
+ port := emqx_maybe:t(port_number())
|
|
|
+}.
|
|
|
+
|
|
|
+-type uri() :: #{
|
|
|
+ scheme := emqx_maybe:t(scheme()),
|
|
|
+ authority := emqx_maybe:t(authority()),
|
|
|
+ path := path(),
|
|
|
+ query := emqx_maybe:t(query()),
|
|
|
+ fragment := emqx_maybe:t(fragment())
|
|
|
+}.
|
|
|
+
|
|
|
+-export_type([
|
|
|
+ scheme/0,
|
|
|
+ userinfo/0,
|
|
|
+ host/0,
|
|
|
+ port_number/0,
|
|
|
+ path/0,
|
|
|
+ query/0,
|
|
|
+ fragment/0,
|
|
|
+ authority/0,
|
|
|
+ uri/0,
|
|
|
+ request_base/0
|
|
|
+]).
|
|
|
+
|
|
|
+-on_load(init/0).
|
|
|
+
|
|
|
+%% https://datatracker.ietf.org/doc/html/rfc3986#appendix-B
|
|
|
+%%
|
|
|
+%% > The following line is the regular expression for breaking-down a
|
|
|
+%% > well-formed URI reference into its components.
|
|
|
+%%
|
|
|
+%% > ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
|
|
|
+%%
|
|
|
+%% We skip capturing some unused parts of the regex.
|
|
|
+
|
|
|
+-define(URI_REGEX,
|
|
|
+ ("^(?:(?<scheme>[^:/?#]+):)?(?<authority>//[^/?#]*)?"
|
|
|
+ "(?<path>[^?#]*)(?<query>\\?[^#]*)?(?<fragment>#.*)?")
|
|
|
+).
|
|
|
+
|
|
|
+-define(URI_REGEX_PT_KEY, {?MODULE, uri_re}).
|
|
|
+
|
|
|
+-define(AUTHORITY_REGEX,
|
|
|
+ ("^(?<userinfo>.*@)?"
|
|
|
+ "(?:(?:\\[(?<host_ipv6>[a-z\\d\\.:]*)\\])|(?<host_regular>[^:]*?)|(?<host_loose>.*?))"
|
|
|
+ "(?<port>:\\d+)?$")
|
|
|
+).
|
|
|
+
|
|
|
+-define(AUTHORITY_REGEX_PT_KEY, {?MODULE, authority_re}).
|
|
|
+
|
|
|
+%%-------------------------------------------------------------------
|
|
|
+%% Internal API
|
|
|
+%%-------------------------------------------------------------------
|
|
|
+
|
|
|
+init() ->
|
|
|
+ {ok, UriRE} = re:compile(?URI_REGEX),
|
|
|
+ persistent_term:put(?URI_REGEX_PT_KEY, UriRE),
|
|
|
+
|
|
|
+ {ok, AuthorityRE} = re:compile(?AUTHORITY_REGEX, [caseless]),
|
|
|
+ persistent_term:put(?AUTHORITY_REGEX_PT_KEY, AuthorityRE).
|
|
|
+
|
|
|
+%%-------------------------------------------------------------------
|
|
|
+%% API
|
|
|
+%%-------------------------------------------------------------------
|
|
|
+
|
|
|
+-spec parse(binary()) -> uri().
|
|
|
+parse(URIString) ->
|
|
|
+ {match, [SchemeMatch, AuthorityMatch, PathMatch, QueryMatch, FragmentMatch]} = re:run(
|
|
|
+ URIString, uri_regexp(), [{capture, [scheme, authority, path, query, fragment], binary}]
|
|
|
+ ),
|
|
|
+ Scheme = parse_scheme(SchemeMatch),
|
|
|
+ Authority = parse_authority(AuthorityMatch),
|
|
|
+ Path = PathMatch,
|
|
|
+ Query = parse_query(QueryMatch),
|
|
|
+ Fragment = parse_fragment(FragmentMatch),
|
|
|
+
|
|
|
+ #{
|
|
|
+ scheme => Scheme,
|
|
|
+ authority => Authority,
|
|
|
+ path => Path,
|
|
|
+ query => Query,
|
|
|
+ fragment => Fragment
|
|
|
+ }.
|
|
|
+
|
|
|
+-spec base_url(uri()) -> iodata().
|
|
|
+base_url(#{scheme := Scheme, authority := Authority}) ->
|
|
|
+ [format_scheme(Scheme), format_authority(Authority)].
|
|
|
+
|
|
|
+-spec format(uri()) -> iodata().
|
|
|
+format(#{path := Path, query := Query, fragment := Fragment} = URI) ->
|
|
|
+ [
|
|
|
+ base_url(URI),
|
|
|
+ Path,
|
|
|
+ format_query(Query),
|
|
|
+ format_fragment(Fragment)
|
|
|
+ ].
|
|
|
+
|
|
|
+-spec scheme(uri()) -> emqx_maybe:t(scheme()).
|
|
|
+scheme(#{scheme := Scheme}) -> Scheme.
|
|
|
+
|
|
|
+-spec userinfo(uri()) -> emqx_maybe:t(userinfo()).
|
|
|
+userinfo(#{authority := undefined}) -> undefined;
|
|
|
+userinfo(#{authority := #{userinfo := UserInfo}}) -> UserInfo.
|
|
|
+
|
|
|
+-spec host(uri()) -> emqx_maybe:t(host()).
|
|
|
+host(#{authority := undefined}) -> undefined;
|
|
|
+host(#{authority := #{host := Host}}) -> Host.
|
|
|
+
|
|
|
+-spec port(uri()) -> emqx_maybe:t(port_number()).
|
|
|
+port(#{authority := undefined}) -> undefined;
|
|
|
+port(#{authority := #{port := Port}}) -> Port.
|
|
|
+
|
|
|
+-spec path(uri()) -> path().
|
|
|
+path(#{path := Path}) -> Path.
|
|
|
+
|
|
|
+-spec query(uri()) -> emqx_maybe:t(query()).
|
|
|
+query(#{query := Query}) -> Query.
|
|
|
+
|
|
|
+-spec fragment(uri()) -> emqx_maybe:t(fragment()).
|
|
|
+fragment(#{fragment := Fragment}) -> Fragment.
|
|
|
+
|
|
|
+-spec request_base(uri()) -> {ok, request_base()} | {error, term()}.
|
|
|
+request_base(URI) when is_map(URI) ->
|
|
|
+ case emqx_http_lib:uri_parse(iolist_to_binary(base_url(URI))) of
|
|
|
+ {error, Reason} -> {error, Reason};
|
|
|
+ {ok, URIMap} -> {ok, maps:with([scheme, host, port], URIMap)}
|
|
|
+ end;
|
|
|
+request_base(URIString) when is_list(URIString) orelse is_binary(URIString) ->
|
|
|
+ request_base(parse(URIString)).
|
|
|
+
|
|
|
+%%--------------------------------------------------------------------
|
|
|
+%% Helper functions
|
|
|
+%%--------------------------------------------------------------------
|
|
|
+
|
|
|
+parse_scheme(<<>>) -> undefined;
|
|
|
+parse_scheme(Scheme) -> Scheme.
|
|
|
+
|
|
|
+parse_query(<<>>) -> undefined;
|
|
|
+parse_query(<<$?, Query/binary>>) -> Query.
|
|
|
+
|
|
|
+parse_fragment(<<>>) -> undefined;
|
|
|
+parse_fragment(<<$#, Fragment/binary>>) -> Fragment.
|
|
|
+
|
|
|
+authority_regexp() ->
|
|
|
+ persistent_term:get(?AUTHORITY_REGEX_PT_KEY).
|
|
|
+
|
|
|
+parse_authority(<<>>) ->
|
|
|
+ undefined;
|
|
|
+parse_authority(<<$/, $/, Authority/binary>>) ->
|
|
|
+ %% Authority regexp always matches
|
|
|
+ {match, [UserInfoMatch, HostIPv6, HostRegular, HostLoose, PortMatch]} = re:run(
|
|
|
+ Authority, authority_regexp(), [
|
|
|
+ {capture, [userinfo, host_ipv6, host_regular, host_loose, port], binary}
|
|
|
+ ]
|
|
|
+ ),
|
|
|
+ UserInfo = parse_userinfo(UserInfoMatch),
|
|
|
+ {HostType, Host} = parse_host(HostIPv6, HostRegular, HostLoose),
|
|
|
+ Port = parse_port(PortMatch),
|
|
|
+ #{
|
|
|
+ userinfo => UserInfo,
|
|
|
+ host => Host,
|
|
|
+ host_type => HostType,
|
|
|
+ port => Port
|
|
|
+ }.
|
|
|
+
|
|
|
+parse_userinfo(<<>>) -> undefined;
|
|
|
+parse_userinfo(UserInfoMatch) -> binary:part(UserInfoMatch, 0, byte_size(UserInfoMatch) - 1).
|
|
|
+
|
|
|
+parse_host(<<>>, <<>>, Host) -> {loose, Host};
|
|
|
+parse_host(<<>>, Host, <<>>) -> {regular, Host};
|
|
|
+parse_host(Host, <<>>, <<>>) -> {ipv6, Host}.
|
|
|
+
|
|
|
+parse_port(<<>>) -> undefined;
|
|
|
+parse_port(<<$:, Port/binary>>) -> binary_to_integer(Port).
|
|
|
+
|
|
|
+uri_regexp() ->
|
|
|
+ persistent_term:get(?URI_REGEX_PT_KEY).
|
|
|
+
|
|
|
+format_scheme(undefined) -> <<>>;
|
|
|
+format_scheme(Scheme) -> [Scheme, $:].
|
|
|
+
|
|
|
+format_authority(undefined) ->
|
|
|
+ <<>>;
|
|
|
+format_authority(#{userinfo := UserInfo, host := Host, host_type := HostType, port := Port}) ->
|
|
|
+ [$/, $/, format_userinfo(UserInfo), format_host(HostType, Host), format_port(Port)].
|
|
|
+
|
|
|
+format_userinfo(undefined) -> <<>>;
|
|
|
+format_userinfo(UserInfo) -> [UserInfo, $@].
|
|
|
+
|
|
|
+format_host(ipv6, Host) -> [$[, Host, $]];
|
|
|
+format_host(_, Host) -> Host.
|
|
|
+
|
|
|
+format_port(undefined) -> <<>>;
|
|
|
+format_port(Port) -> [$:, integer_to_binary(Port)].
|
|
|
+
|
|
|
+format_query(undefined) -> <<>>;
|
|
|
+format_query(Query) -> [$?, Query].
|
|
|
+
|
|
|
+format_fragment(undefined) -> <<>>;
|
|
|
+format_fragment(Fragment) -> [$#, Fragment].
|
|
|
+
|
|
|
+-ifdef(TEST).
|
|
|
+-include_lib("eunit/include/eunit.hrl").
|
|
|
+
|
|
|
+-define(URLS, [
|
|
|
+ "https://www.example.com/page",
|
|
|
+ "http://subdomain.example.com/path/to/page",
|
|
|
+ "https://www.example.com:8080/path/to/page",
|
|
|
+ "https://user:password@example.com/path/to/page",
|
|
|
+ "https://www.example.com/path%20with%20${spaces}",
|
|
|
+ "http://192.0.2.1/path/to/page",
|
|
|
+ "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/${path}/to/page",
|
|
|
+ "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/to/page",
|
|
|
+ "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:4444/to/page",
|
|
|
+ "ftp://ftp.example.com/${path}/to/file",
|
|
|
+ "ftps://ftp.example.com/path/to/file",
|
|
|
+ "mailto:user@example.com",
|
|
|
+ "tel:+1234567890",
|
|
|
+ "sms:+1234567890?body=Hello%20World",
|
|
|
+ "git://github.com/user/repo.git",
|
|
|
+ "a:b:c",
|
|
|
+ "svn://svn.example.com/project/trunk",
|
|
|
+ "https://www.${example}.com/path/to/page?query_param=value",
|
|
|
+ "https://www.example.com/path/to/page?query_param1=value1&query_param2=value2",
|
|
|
+ "https://www.example.com?query_param1=value1&query_param2=value2",
|
|
|
+ "https://www.example.com/path/to/page#section1",
|
|
|
+ "https://www.example.com/path/to/page?query_param=value#section1",
|
|
|
+ "https://www.example.com/path/to/page?query_param1=value1&query_param2=${value2}#section1",
|
|
|
+ "https://www.example.com?query_param1=value1&query_param2=value2#section1",
|
|
|
+ "file:///path/to/file.txt",
|
|
|
+ "localhost",
|
|
|
+ "localhost:8080",
|
|
|
+ "localhost:8080/path/to/page",
|
|
|
+ "localhost:8080/path/to/page?query_param=value",
|
|
|
+ "localhost:8080/path/to/page?query_param1=value1&query_param2=value2",
|
|
|
+ "/abc/${def}",
|
|
|
+ "/abc/def?query_param=value",
|
|
|
+ "?query_param=value",
|
|
|
+ "#section1"
|
|
|
+]).
|
|
|
+
|
|
|
+parse_format_test_() ->
|
|
|
+ [
|
|
|
+ {URI, ?_assertEqual(list_to_binary(URI), iolist_to_binary(format(parse(URI))))}
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+base_url_test_() ->
|
|
|
+ [
|
|
|
+ {URI, ?_assert(is_prefix(iolist_to_binary(base_url(parse(URI))), list_to_binary(URI)))}
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+scheme_test_() ->
|
|
|
+ [
|
|
|
+ if_parseable_by_uri_string(URI, fun(Expected, Parsed) ->
|
|
|
+ ?assertEqual(maybe_get_bin(scheme, Expected), scheme(Parsed))
|
|
|
+ end)
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+host_test_() ->
|
|
|
+ [
|
|
|
+ if_parseable_by_uri_string(URI, fun(Expected, Parsed) ->
|
|
|
+ ?assertEqual(maybe_get_bin(host, Expected), host(Parsed))
|
|
|
+ end)
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+path_test_() ->
|
|
|
+ [
|
|
|
+ if_parseable_by_uri_string(URI, fun(Expected, Parsed) ->
|
|
|
+ ?assertEqual(maybe_get_bin(path, Expected), path(Parsed))
|
|
|
+ end)
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+query_test_() ->
|
|
|
+ [
|
|
|
+ if_parseable_by_uri_string(URI, fun(Expected, Parsed) ->
|
|
|
+ ?assertEqual(maybe_get_bin(query, Expected), query(Parsed))
|
|
|
+ end)
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+fragment_test_() ->
|
|
|
+ [
|
|
|
+ if_parseable_by_uri_string(URI, fun(Expected, Parsed) ->
|
|
|
+ ?assertEqual(maybe_get_bin(fragment, Expected), fragment(Parsed))
|
|
|
+ end)
|
|
|
+ || URI <- ?URLS
|
|
|
+ ].
|
|
|
+
|
|
|
+templates_test_() ->
|
|
|
+ [
|
|
|
+ {"template in path",
|
|
|
+ ?_assertEqual(
|
|
|
+ <<"/${client_attrs.group}">>,
|
|
|
+ path(parse("https://www.example.com/${client_attrs.group}"))
|
|
|
+ )},
|
|
|
+ {"template in query, no path",
|
|
|
+ ?_assertEqual(
|
|
|
+ <<"group=${client_attrs.group}">>,
|
|
|
+ query(parse("https://www.example.com?group=${client_attrs.group}"))
|
|
|
+ )},
|
|
|
+ {"template in query, path",
|
|
|
+ ?_assertEqual(
|
|
|
+ <<"group=${client_attrs.group}">>,
|
|
|
+ query(parse("https://www.example.com/path/?group=${client_attrs.group}"))
|
|
|
+ )}
|
|
|
+ ].
|
|
|
+
|
|
|
+request_target_test_() ->
|
|
|
+ [
|
|
|
+ ?_assertEqual(
|
|
|
+ {ok, #{port => 443, scheme => https, host => "www.example.com"}},
|
|
|
+ request_base(parse("https://www.example.com/path/to/page?query_param=value#fr"))
|
|
|
+ ),
|
|
|
+ ?_assertEqual(
|
|
|
+ {error, empty_host_not_allowed},
|
|
|
+ request_base(parse("localhost?query_param=value#fr"))
|
|
|
+ ),
|
|
|
+ ?_assertEqual(
|
|
|
+ {error, {unsupported_scheme, <<"ftp">>}},
|
|
|
+ request_base(parse("ftp://localhost"))
|
|
|
+ )
|
|
|
+ ].
|
|
|
+
|
|
|
+is_prefix(Prefix, Binary) ->
|
|
|
+ case Binary of
|
|
|
+ <<Prefix:(byte_size(Prefix))/binary, _/binary>> -> true;
|
|
|
+ _ -> false
|
|
|
+ end.
|
|
|
+
|
|
|
+if_parseable_by_uri_string(URI, Fun) ->
|
|
|
+ case uri_string:parse(URI) of
|
|
|
+ {error, _, _} ->
|
|
|
+ {"skipped", fun() -> true end};
|
|
|
+ ExpectedMap ->
|
|
|
+ ParsedMap = parse(URI),
|
|
|
+ {URI, fun() -> Fun(ExpectedMap, ParsedMap) end}
|
|
|
+ end.
|
|
|
+
|
|
|
+maybe_get_bin(Key, Map) ->
|
|
|
+ maybe_bin(maps:get(Key, Map, undefined)).
|
|
|
+
|
|
|
+maybe_bin(String) when is_list(String) -> list_to_binary(String);
|
|
|
+maybe_bin(undefined) -> undefined.
|
|
|
+
|
|
|
+-endif.
|