Browse Source

Merge pull request #6028 from emqx/fvt-tests-with-replicant

test(fvt): extend functional verification tests to use replicant node

This parameterizes the Functional Verification Tests (FVTs) that run
in CI to use a replication log (RLOG) role of "replicant" for one of
the nodes. With this addition, our FVTs may explore more scenarios
with data replication.

For this new cluster configuration, we target only the replicant node
during the tests to avoid race conditions due to replication lag.
Thales Macedo Garitezi 4 years atrás
parent
commit
f5c45dee91

+ 33 - 0
.ci/docker-compose-file/docker-compose-emqx-cluster-rlog.override.yaml

@@ -0,0 +1,33 @@
+x-default-emqx: &default-emqx
+    image: $TARGET:$EMQX_TAG
+    env_file:
+      - conf.cluster.env
+    healthcheck:
+      test: ["CMD", "/opt/emqx/bin/emqx_ctl", "status"]
+      interval: 5s
+      timeout: 25s
+      retries: 5
+
+services:
+  emqx1:
+    <<: *default-emqx
+    container_name: node1.emqx.io
+    environment:
+      - "EMQX_HOST=node1.emqx.io"
+      - "EMQX_CLUSTER__DB_BACKEND=rlog"
+      - "EMQX_CLUSTER__RLOG__ROLE=core"
+      - "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]"
+      - "EMQX_LISTENERS__TCP__DEFAULT__PROXY_PROTOCOL=false"
+      - "EMQX_LISTENERS__WS__DEFAULT__PROXY_PROTOCOL=false"
+
+  emqx2:
+    <<: *default-emqx
+    container_name: node2.emqx.io
+    environment:
+      - "EMQX_HOST=node2.emqx.io"
+      - "EMQX_CLUSTER__DB_BACKEND=rlog"
+      - "EMQX_CLUSTER__RLOG__ROLE=replicant"
+      - "EMQX_CLUSTER__RLOG__CORE_NODES=emqx@node1.emqx.io"
+      - "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]"
+      - "EMQX_LISTENERS__TCP__DEFAULT__PROXY_PROTOCOL=false"
+      - "EMQX_LISTENERS__WS__DEFAULT__PROXY_PROTOCOL=false"

+ 12 - 16
.ci/docker-compose-file/docker-compose-emqx-cluster.yaml

@@ -1,5 +1,15 @@
 version: '3.9'
 
+x-default-emqx: &default-emqx
+    image: $TARGET:$EMQX_TAG
+    env_file:
+      - conf.cluster.env
+    healthcheck:
+      test: ["CMD", "/opt/emqx/bin/emqx_ctl", "status"]
+      interval: 5s
+      timeout: 25s
+      retries: 5
+
 services:
   haproxy:
     container_name: haproxy
@@ -28,34 +38,20 @@ services:
         haproxy -f /usr/local/etc/haproxy/haproxy.cfg
 
   emqx1:
+    <<: *default-emqx
     container_name: node1.emqx.io
-    image: $TARGET:$EMQX_TAG
-    env_file:
-      - conf.cluster.env
     environment:
       - "EMQX_HOST=node1.emqx.io"
-    healthcheck:
-      test: ["CMD", "/opt/emqx/bin/emqx_ctl", "status"]
-      interval: 5s
-      timeout: 25s
-      retries: 5
     networks:
       emqx_bridge:
         aliases:
         - node1.emqx.io
 
   emqx2:
+    <<: *default-emqx
     container_name: node2.emqx.io
-    image: $TARGET:$EMQX_TAG
-    env_file:
-      - conf.cluster.env
     environment:
       - "EMQX_HOST=node2.emqx.io"
-    healthcheck:
-      test: ["CMD", "/opt/emqx/bin/emqx", "ping"]
-      interval: 5s
-      timeout: 25s
-      retries: 5
     networks:
       emqx_bridge:
         aliases:

+ 0 - 1
.ci/docker-compose-file/haproxy/haproxy.cfg

@@ -54,7 +54,6 @@ backend emqx_dashboard_back
     server emqx-1 node1.emqx.io:18083
     server emqx-2 node2.emqx.io:18083
 
-
 ##----------------------------------------------------------------
 ## public
 ##----------------------------------------------------------------

+ 13 - 4
.ci/docker-compose-file/python/pytest.sh

@@ -1,21 +1,30 @@
 #!/bin/sh
 
 ## This script is to run emqx cluster smoke tests (fvt) in github action
-## This script is executed in pacho_client
+## This script is executed in paho_client
 
 set -x
 set +e
 
-LB="haproxy"
+EMQX_TEST_DB_BACKEND=$1
+if [ "$EMQX_TEST_DB_BACKEND" = "rlog" ]
+then
+  # TODO: target only replica to avoid replication races
+  # see: https://github.com/emqx/emqx/issues/6094
+  TARGET_HOST="node2.emqx.io"
+else
+  # use loadbalancer
+  TARGET_HOST="haproxy"
+fi
 
 apk update && apk add git curl
 git clone -b develop-4.0 https://github.com/emqx/paho.mqtt.testing.git /paho.mqtt.testing
 pip install pytest
 
-pytest -v /paho.mqtt.testing/interoperability/test_client/V5/test_connect.py -k test_basic --host "$LB"
+pytest -v /paho.mqtt.testing/interoperability/test_client/V5/test_connect.py -k test_basic --host "$TARGET_HOST"
 RESULT=$?
 
-pytest -v /paho.mqtt.testing/interoperability/test_client --host "$LB"
+pytest -v /paho.mqtt.testing/interoperability/test_client --host "$TARGET_HOST"
 RESULT=$(( RESULT + $? ))
 
 # pytest -v /paho.mqtt.testing/interoperability/test_cluster --host1 "node1.emqx.io" --host2 "node2.emqx.io"

+ 47 - 0
.ci/docker-compose-file/scripts/run-emqx.sh

@@ -0,0 +1,47 @@
+#!/bin/bash
+set -euxo pipefail
+
+if [ "$EMQX_TEST_DB_BACKEND" = "rlog" ]
+then
+  CLUSTER_OVERRIDES="-f .ci/docker-compose-file/docker-compose-emqx-cluster-rlog.override.yaml"
+else
+  CLUSTER_OVERRIDES=""
+fi
+
+{
+  echo "HOCON_ENV_OVERRIDE_PREFIX=EMQX_"
+  echo "EMQX_ZONES__DEFAULT__MQTT__RETRY_INTERVAL=2s"
+  echo "EMQX_ZONES__DEFAULT__MQTT__MAX_TOPIC_ALIAS=10"
+} >> .ci/docker-compose-file/conf.cluster.env
+
+is_node_up() {
+  local node
+  node="$1"
+  docker exec -i "$node" \
+         bash -c "emqx eval \"['emqx@node1.emqx.io','emqx@node2.emqx.io'] = maps:get(running_nodes, ekka_cluster:info()).\"" > /dev/null 2>&1
+}
+
+is_node_listening() {
+  local node
+  node="$1"
+  docker exec -i "$node" \
+         emqx eval "ok = case gen_tcp:connect(\"localhost\", 1883, []) of {ok, P} -> gen_tcp:close(P), ok; _ -> exit(1) end." > /dev/null 2>&1
+}
+
+is_cluster_up() {
+  is_node_up node1.emqx.io && \
+    is_node_up node2.emqx.io && \
+    is_node_listening node1.emqx.io && \
+    is_node_listening node2.emqx.io
+}
+
+docker-compose \
+  -f .ci/docker-compose-file/docker-compose-emqx-cluster.yaml \
+  $CLUSTER_OVERRIDES \
+  -f .ci/docker-compose-file/docker-compose-python.yaml \
+  up -d
+
+while ! is_cluster_up; do
+  echo "['$(date -u +"%Y-%m-%dT%H:%M:%SZ")']:waiting emqx";
+  sleep 5;
+done

+ 12 - 15
.github/workflows/run_fvt_tests.yaml

@@ -69,8 +69,11 @@ jobs:
       fail-fast: false
       matrix:
         otp:
-        - 23.2.7.2-emqx-2
-        - 24.1.1-emqx-1
+          - 23.2.7.2-emqx-2
+          - 24.1.1-emqx-1
+        cluster_db_backend:
+          - "mnesia"
+          - "rlog"
 
     steps:
     - uses: actions/download-artifact@v2
@@ -91,24 +94,18 @@ jobs:
       timeout-minutes: 5
       working-directory: source
       run: |
-        set -e -u -x
-        echo "HOCON_ENV_OVERRIDE_PREFIX=EMQX_" >> .ci/docker-compose-file/conf.cluster.env
-        echo "EMQX_ZONES__DEFAULT__MQTT__RETRY_INTERVAL=2s" >> .ci/docker-compose-file/conf.cluster.env
-        echo "EMQX_ZONES__DEFAULT__MQTT__MAX_TOPIC_ALIAS=10" >> .ci/docker-compose-file/conf.cluster.env
-        docker-compose \
-            -f .ci/docker-compose-file/docker-compose-emqx-cluster.yaml \
-            -f .ci/docker-compose-file/docker-compose-python.yaml \
-            up -d
-        while ! docker exec -i node1.emqx.io bash -c "emqx eval \"['emqx@node1.emqx.io','emqx@node2.emqx.io'] = maps:get(running_nodes, ekka_cluster:info()).\"" > /dev/null 2>&1; do
-            echo "['$(date -u +"%Y-%m-%dT%H:%M:%SZ")']:waiting emqx";
-            sleep 5;
-        done
+        set -x
+        export EMQX_TEST_DB_BACKEND="${{ matrix.cluster_db_backend }}"
+        ./.ci/docker-compose-file/scripts/run-emqx.sh
     - name: make paho tests
       run: |
-        if ! docker exec -i python /scripts/pytest.sh; then
+        if ! docker exec -i python /scripts/pytest.sh "${{ matrix.cluster_db_backend }}"; then
           echo "DUMP_CONTAINER_LOGS_BGN"
+          echo "============== haproxy =============="
           docker logs haproxy
+          echo "==============  node1  =============="
           docker logs node1.emqx.io
+          echo "==============  node2  =============="
           docker logs node2.emqx.io
           echo "DUMP_CONTAINER_LOGS_END"
           exit 1