浏览代码

Merge pull request #13521 from zmstone/0725-add-ldap-reconnect-on-timeout

0725 add ldap reconnect on timeout
zmstone 1 年之前
父节点
当前提交
9a5d50f26a

+ 61 - 0
.ci/docker-compose-file/openldap/README.md

@@ -0,0 +1,61 @@
+# LDAP authentication
+
+To run manual tests with the default docker-compose files.
+
+Expose openldap container port by uncommenting the `ports` config in `docker-compose-ldap.yaml `
+
+To start openldap:
+
+```
+docker-compose -f ./.ci/docker-compose-file/docker-compose.yaml -f ./.ci/docker-compose-file/docker-compose-ldap.yaml up -docker
+```
+
+## LDAP database
+
+LDAP database is populated from below files:
+```
+apps/emqx_ldap/test/data/emqx.io.ldif /usr/local/etc/openldap/schema/emqx.io.ldif
+apps/emqx_ldap/test/data/emqx.schema /usr/local/etc/openldap/schema/emqx.schema
+```
+
+## Minimal EMQX config
+
+```
+authentication = [
+  {
+    backend = ldap
+    base_dn = "uid=${username},ou=testdevice,dc=emqx,dc=io"
+    filter = "(& (objectClass=mqttUser) (uid=${username}))"
+    mechanism = password_based
+    method {
+      is_superuser_attribute = isSuperuser
+      password_attribute = userPassword
+      type = hash
+    }
+    password = public
+    pool_size = 8
+    query_timeout = "5s"
+    request_timeout = "10s"
+    server = "localhost:1389"
+    username = "cn=root,dc=emqx,dc=io"
+  }
+]
+```
+
+## Example ldapsearch command
+
+```
+ldapsearch -x -H ldap://localhost:389 -D "cn=root,dc=emqx,dc=io" -W -b "uid=mqttuser0007,ou=testdevice,dc=emqx,dc=io" "(&(objectClass=mqttUser)(uid=mqttuser0007))"
+```
+
+## Example mqttx command
+
+The client password hashes are generated from their username.
+
+```
+# disabled user
+mqttx pub -t 't/1' -h localhost -p 1883 -m x -u mqttuser0006 -P mqttuser0006
+
+# enabled super-user
+mqttx pub -t 't/1' -h localhost -p 1883 -m x -u mqttuser0007 -P mqttuser0007
+```

+ 82 - 11
apps/emqx_auth_ldap/test/emqx_authn_ldap_SUITE.erl

@@ -21,6 +21,7 @@
 -include_lib("emqx_auth/include/emqx_authn.hrl").
 -include_lib("eunit/include/eunit.hrl").
 -include_lib("common_test/include/ct.hrl").
+-include_lib("snabbkaffe/include/snabbkaffe.hrl").
 
 -define(LDAP_HOST, "ldap").
 -define(LDAP_DEFAULT_PORT, 389).
@@ -46,13 +47,6 @@ init_per_suite(Config) ->
             Apps = emqx_cth_suite:start([emqx, emqx_conf, emqx_auth, emqx_auth_ldap], #{
                 work_dir => ?config(priv_dir, Config)
             }),
-            {ok, _} = emqx_resource:create_local(
-                ?LDAP_RESOURCE,
-                ?AUTHN_RESOURCE_GROUP,
-                emqx_ldap,
-                ldap_config(),
-                #{}
-            ),
             [{apps, Apps} | Config];
         false ->
             {skip, no_ldap}
@@ -63,7 +57,6 @@ end_per_suite(Config) ->
         [authentication],
         ?GLOBAL
     ),
-    ok = emqx_resource:remove_local(?LDAP_RESOURCE),
     ok = emqx_cth_suite:stop(?config(apps, Config)).
 
 %%------------------------------------------------------------------------------
@@ -128,6 +121,87 @@ t_create_invalid(_Config) ->
         InvalidConfigs
     ).
 
+t_authenticate_timeout_cause_reconnect(_Config) ->
+    TestPid = self(),
+    meck:new(eldap, [non_strict, no_link, passthrough]),
+    try
+        %% cause eldap process to be killed
+        meck:expect(
+            eldap,
+            search,
+            fun
+                (Pid, [{base, <<"uid=mqttuser0007", _/binary>>} | _]) ->
+                    TestPid ! {eldap_pid, Pid},
+                    {error, {gen_tcp_error, timeout}};
+                (Pid, Args) ->
+                    meck:passthrough([Pid, Args])
+            end
+        ),
+
+        Credentials = fun(Username) ->
+            #{
+                username => Username,
+                password => Username,
+                listener => 'tcp:default',
+                protocol => mqtt
+            }
+        end,
+
+        SpecificConfigParams = #{},
+        Result = {ok, #{is_superuser => true}},
+
+        Timeout = 1000,
+        Config0 = raw_ldap_auth_config(),
+        Config = Config0#{
+            <<"pool_size">> => 1,
+            <<"request_timeout">> => Timeout
+        },
+        AuthConfig = maps:merge(Config, SpecificConfigParams),
+        {ok, _} = emqx:update_config(
+            ?PATH,
+            {create_authenticator, ?GLOBAL, AuthConfig}
+        ),
+
+        %% 0006 is a disabled user
+        ?assertEqual(
+            {error, user_disabled},
+            emqx_access_control:authenticate(Credentials(<<"mqttuser0006">>))
+        ),
+        ?assertEqual(
+            {error, not_authorized},
+            emqx_access_control:authenticate(Credentials(<<"mqttuser0007">>))
+        ),
+        ok = wait_for_ldap_pid(1000),
+        [#{id := ResourceID}] = emqx_resource_manager:list_all(),
+        ?retry(1_000, 10, {ok, connected} = emqx_resource_manager:health_check(ResourceID)),
+        %% turn back to normal
+        meck:expect(
+            eldap,
+            search,
+            2,
+            fun(Pid2, Query) ->
+                meck:passthrough([Pid2, Query])
+            end
+        ),
+        %% expect eldap process to be restarted
+        ?assertEqual(Result, emqx_access_control:authenticate(Credentials(<<"mqttuser0007">>))),
+        emqx_authn_test_lib:delete_authenticators(
+            [authentication],
+            ?GLOBAL
+        )
+    after
+        meck:unload(eldap)
+    end.
+
+wait_for_ldap_pid(After) ->
+    receive
+        {eldap_pid, Pid} ->
+            ?assertNot(is_process_alive(Pid)),
+            ok
+    after After ->
+        error(timeout)
+    end.
+
 t_authenticate(_Config) ->
     ok = lists:foreach(
         fun(Sample) ->
@@ -300,6 +374,3 @@ user_seeds() ->
 
 ldap_server() ->
     iolist_to_binary(io_lib:format("~s:~B", [?LDAP_HOST, ?LDAP_DEFAULT_PORT])).
-
-ldap_config() ->
-    emqx_ldap_SUITE:ldap_config([]).

+ 0 - 14
apps/emqx_auth_ldap/test/emqx_authz_ldap_SUITE.erl

@@ -44,7 +44,6 @@ init_per_suite(Config) ->
                 ],
                 #{work_dir => emqx_cth_suite:work_dir(Config)}
             ),
-            ok = create_ldap_resource(),
             [{apps, Apps} | Config];
         false ->
             {skip, no_ldap}
@@ -167,21 +166,8 @@ setup_config(SpecialParams) ->
 ldap_server() ->
     iolist_to_binary(io_lib:format("~s:~B", [?LDAP_HOST, ?LDAP_DEFAULT_PORT])).
 
-ldap_config() ->
-    emqx_ldap_SUITE:ldap_config([]).
-
 start_apps(Apps) ->
     lists:foreach(fun application:ensure_all_started/1, Apps).
 
 stop_apps(Apps) ->
     lists:foreach(fun application:stop/1, Apps).
-
-create_ldap_resource() ->
-    {ok, _} = emqx_resource:create_local(
-        ?LDAP_RESOURCE,
-        ?AUTHZ_RESOURCE_GROUP,
-        emqx_ldap,
-        ldap_config(),
-        #{}
-    ),
-    ok.

+ 19 - 2
apps/emqx_ldap/src/emqx_ldap.erl

@@ -41,6 +41,7 @@
 -export([namespace/0, roots/0, fields/1, desc/1]).
 
 -export([do_get_status/1, get_status_with_poolname/1]).
+-export([search/2]).
 
 -define(LDAP_HOST_OPTIONS, #{
     default_port => 389
@@ -273,6 +274,22 @@ on_query(
             Error
     end.
 
+search(Pid, SearchOptions) ->
+    case eldap:search(Pid, SearchOptions) of
+        {error, ldap_closed} ->
+            %% ldap server closing the socket does not result in
+            %% process restart, so we need to kill it to trigger a quick reconnect
+            %% instead of waiting for the next health-check
+            _ = exit(Pid, kill),
+            {error, ldap_closed};
+        {error, {gen_tcp_error, _} = Reason} ->
+            %% kill the process to trigger reconnect
+            _ = exit(Pid, kill),
+            {error, Reason};
+        Result ->
+            Result
+    end.
+
 do_ldap_query(
     InstId,
     SearchOptions,
@@ -283,7 +300,7 @@ do_ldap_query(
     case
         ecpool:pick_and_do(
             PoolName,
-            {eldap, search, [SearchOptions]},
+            {?MODULE, search, [SearchOptions]},
             handover
         )
     of
@@ -319,7 +336,7 @@ do_ldap_query(
             ?SLOG(
                 error,
                 LogMeta#{
-                    msg => "ldap_connector_do_query_failed",
+                    msg => "ldap_connector_query_failed",
                     reason => emqx_utils:redact(Reason)
                 }
             ),

+ 4 - 0
changes/ce/feat-13521.en.md

@@ -0,0 +1,4 @@
+Fix LDAP query timeout issue.
+
+Previously, LDAP query timeout may cause the underlying connection to be unusable.
+Fixed to always reconnect if timeout happens.

+ 19 - 0
rel/config/ee-examples/ldap-authn.conf

@@ -0,0 +1,19 @@
+authentication = [
+  {
+    backend = ldap
+    base_dn = "uid=${username},ou=testdevice,dc=emqx,dc=io"
+    filter = "(& (objectClass=mqttUser) (uid=${username}))"
+    mechanism = password_based
+    method {
+      is_superuser_attribute = isSuperuser
+      password_attribute = userPassword
+      type = hash
+    }
+    password = public
+    pool_size = 8
+    query_timeout = "5s"
+    request_timeout = "10s"
+    server = "localhost:1389"
+    username = "cn=root,dc=emqx,dc=io"
+  }
+]