Przeglądaj źródła

refactor: run relup test nodes in individual docker containers

When running EMQX in lux shell inside a docker container
the node won't boot due to load_failed for redbug modules
reason is still unknown
Zaiming (Stone) Shi 3 lat temu
rodzic
commit
8b7fc490ae

+ 10 - 2
.ci/fvt_tests/http_server/src/http_server.erl

@@ -32,7 +32,7 @@
 
 start() ->
     application:ensure_all_started(minirest),
-    ets:new(relup_test_message, [named_table, public]),
+    _ = spawn(fun ets_owner/0),
     Handlers = [{"/", minirest:handler(#{modules => [?MODULE]})}],
     Dispatch = [{"/[...]", minirest, Handlers}],
     minirest:start_http(?MODULE, #{socket_opts => [inet, {port, 7077}]}, Dispatch).
@@ -42,7 +42,8 @@ stop() ->
     minirest:stop_http(?MODULE).
 
 get_counter(_Binding, _Params) ->
-    return({ok, ets:info(relup_test_message, size)}).
+    V = ets:info(relup_test_message, size),
+    return({ok, V}).
 
 add_counter(_Binding, Params) ->
     case lists:keymember(<<"payload">>, 1, Params) of
@@ -50,6 +51,13 @@ add_counter(_Binding, Params) ->
             {value, {<<"id">>, ID}, Params1} = lists:keytake(<<"id">>, 1, Params),
             ets:insert(relup_test_message, {ID, Params1});
         _ ->
+            io:format("discarded: ~p\n", [Params]),
             ok
     end,
     return().
+
+ets_owner() ->
+    ets:new(relup_test_message, [named_table, public]),
+    receive
+        stop -> ok
+    end.

+ 0 - 215
.ci/fvt_tests/relup.lux

@@ -1,215 +0,0 @@
-[config var=PROJ_ROOT]
-[config var=PROFILE]
-[config var=VSN]
-[config var=CUR_PKG]
-[config var=OLD_VSN]
-[config var=OLD_PKG]
-
-[config shell_cmd=/bin/bash]
-[config timeout=600000]
-
-[shell http_server]
-    !cd http_server
-    !rebar3 shell
-    ???Eshell
-    ???>
-    !http_server:start().
-    ?Start http_server listener on 7077 successfully.
-    ?ok
-    ?>
-
-[shell emqx1]
-    !cd $PROJ_ROOT
-    !mkdir -p emqx1
-    !tar -C emqx1 -zxf "$OLD_PKG"
-    ?SH-PROMPT
-
-    !cd emqx1
-    !export EMQX_NODE_NAME='emqx1@127.0.0.1'
-    !./bin/emqx start
-    ?EMQX .* is started successfully!
-    ?SH-PROMPT
-
-[shell emqx2]
-    !cd $PROJ_ROOT
-    !mkdir -p emqx2
-    !tar -C emqx2 -zxf "$OLD_PKG"
-    ?SH-PROMPT
-
-    !cd emqx2
-    !export EMQX_NODE__NAME='emqx2@127.0.0.1'
-    !export EMQX_STATSD__SERVER='127.0.0.1:8124'
-    !export EMQX_LISTENERS__TCP__DEFAULT__BIND='0.0.0.0:1882'
-    !export EMQX_LISTENERS__SSL__DEFAULT__BIND='0.0.0.0:8882'
-    !export EMQX_LISTENERS__WS__DEFAULT__BIND='0.0.0.0:8082'
-    !export EMQX_LISTENERS__WSS__DEFAULT__BIND='0.0.0.0:8085'
-    !export EMQX_DASHBOARD__LISTENERS__HTTP__BIND='0.0.0.0:18082'
-    !./bin/emqx start
-    ?EMQX .* is started successfully!
-    ?SH-PROMPT
-
-    !./bin/emqx_ctl cluster join emqx1@127.0.0.1
-    ???Join the cluster successfully.
-    ?SH-PROMPT
-
-    !./bin/emqx_ctl cluster status --json | jq -c .running_nodes
-    ???["emqx1@127.0.0.1","emqx2@127.0.0.1"]
-    ?SH-PROMPT
-
-    ## create a webhook data bridge with id "my_webhook"
-    !curl --user admin:public --silent --show-error 'http://localhost:18082/api/v5/bridges' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"name":"my_webhook","method":"post","url":"http://127.0.0.1:7077/counter","headers":{"content-type":"application/json"},"pool_size":4,"enable_pipelining":100,"connect_timeout":"5s","request_timeout":"5s","max_retries":3,"type":"webhook","ssl":{"enable":false,"verify":"verify_none"}}' | jq '.status'
-    ?connected
-    ?SH-PROMPT
-
-    ## create a rule that uses the webhook as action, the rule id = "rule_edsy"
-    !curl --user admin:public --silent --show-error 'http://localhost:18082/api/v5/rules' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"id":"rule_edsy","sql":"SELECT\n  *\nFROM\n  \"t/#\"","actions":["webhook:my_webhook"]}' | jq '.id'
-    ?rule_edsy
-    ?SH-PROMPT
-
-[shell emqx1]
-    ## verify the bridges and rules are sync to the other node
-    !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/bridges/webhook:my_webhook' -X 'GET' -H 'Content-Type: application/json' | jq '.name'
-    ?my_webhook
-    ?SH-PROMPT
-    !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/rules/rule_edsy' -X 'GET' -H 'Content-Type: application/json' | jq '.id'
-    ?rule_edsy
-    ?SH-PROMPT
-
-[shell bench]
-
-    !emqtt_bench pub -c 10 -I 1000 -t t/%i -s 64 -L 300
-    ???sent
-
-[shell emqx1]
-    !echo "" > log/emqx.log.1
-    ?SH-PROMPT
-
-    !cp -f ../$CUR_PKG releases/
-
-    ## 1. upgrade to the new version
-    !./bin/emqx install $VSN
-    ?Made release permanent: "$VSN"
-    ?SH-PROMPT
-
-    !./bin/emqx versions | grep permanent
-    ?(.*)$VSN
-    ?SH-PROMPT
-
-    ## 2. downgrade to the old version
-    !./bin/emqx install $OLD_VSN
-    ?Made release permanent:.*
-    ?SH-PROMPT
-
-    !./bin/emqx versions | grep permanent | grep -qs "$OLD_VSN"
-    ?SH-PROMPT:
-    !echo ==$$?==
-    ?^==0==
-    ?SH-PROMPT:
-
-    ## 3. again, upgrade to the new version
-    !./bin/emqx install $VSN
-    ?Made release permanent: "$VSN"
-    ?SH-PROMPT
-
-    !./bin/emqx versions | grep permanent
-    ?(.*)$VSN
-    ?SH-PROMPT
-
-    !./bin/emqx_ctl cluster status --json | jq -c .running_nodes
-    ???["emqx1@127.0.0.1","emqx2@127.0.0.1"]
-    ?SH-PROMPT
-
-[shell emqx2]
-    !echo "" > log/emqx.log.1
-    ?SH-PROMPT
-
-    !cp -f ../$CUR_PKG releases/
-
-    ## 1. upgrade to the new version
-    !./bin/emqx install $VSN
-    ?Made release permanent: "$VSN"
-    ?SH-PROMPT
-
-    !./bin/emqx versions |grep permanent
-    ?(.*)$VSN
-    ?SH-PROMPT
-
-    ## 2. downgrade to the old version
-    !./bin/emqx install $OLD_VSN
-    ?Made release permanent:.*
-    ?SH-PROMPT
-
-    !./bin/emqx versions | grep permanent | grep -qs "$OLD_VSN"
-    ?SH-PROMPT:
-    !echo ==$$?==
-    ?^==0==
-    ?SH-PROMPT:
-
-    ## 3. again, upgrade to the new version
-    !./bin/emqx install $VSN
-    ?Made release permanent: "$VSN"
-    ?SH-PROMPT
-
-    !./bin/emqx versions |grep permanent
-    ?(.*)$VSN
-    ?SH-PROMPT
-
-    !./bin/emqx_ctl cluster status --json | jq -c .running_nodes
-    ???["emqx1@127.0.0.1","emqx2@127.0.0.1"]
-    ?SH-PROMPT
-
-## We don't guarantee not to lose a single message!
-## So even if we received 290~300 messages, we consider it as success
-[shell bench]
-    ???publish complete
-    ??SH-PROMPT:
-    !sleep 5
-    ?SH-PROMPT
-
-    !curl --user admin:public --silent --show-error http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx1@127.0.0.1\") | .metrics.matched"
-    ?300
-    ?SH-PROMPT
-
-    !curl --user admin:public --silent --show-error http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx1@127.0.0.1\") | .metrics.\"actions.success\""
-    ?\{"data":(29[0-9])|(300),"code":0\}
-    ?SH-PROMPT
-
-    ## The /counter API is provided by .ci/fvt_test/http_server
-    !curl http://127.0.0.1:7077/counter
-    ?\{"data":(29[0-9])|(300),"code":0\}
-    ?SH-PROMPT
-
-[shell emqx2]
-    !cat log/emqx.log.1 | tail -n 100
-    -error
-    ??SH-PROMPT:
-
-    !./bin/emqx stop
-    ?ok
-    ?SH-PROMPT:
-
-    !rm -rf emqx2/
-    ?SH-PROMPT:
-
-[shell emqx1]
-    !cat log/emqx.log.1 | tail -n 100
-    -error
-    ??SH-PROMPT:
-
-    !./bin/emqx stop
-    ?ok
-    ?SH-PROMPT:
-
-    !rm -rf emqx1/
-    ?SH-PROMPT:
-
-[shell http_server]
-    !http_server:stop().
-    ?ok
-    ?>
-    !halt(3).
-    ?SH-PROMPT:
-
-[cleanup]
-    !echo ==$$?==
-    ?==0==

+ 27 - 52
.github/workflows/run_relup_tests.yaml

@@ -54,14 +54,12 @@ jobs:
       with:
         name: emqx_built
         path: |
-          emqx/_packages/*/*.tar.gz
-          emqx/.ci/fvt_tests
+          emqx/*
 
   relup_test_run:
     needs:
       - relup_test_plan
     runs-on: ubuntu-20.04
-    container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04"
     strategy:
       fail-fast: false
       matrix:
@@ -74,64 +72,41 @@ jobs:
       run:
         shell: bash
     steps:
+      # setup Erlang to run lux
+    - uses: erlef/setup-beam@v1
+      with:
+        otp-version: "24.2"
+    - uses: actions/checkout@v2
+      with:
+        repository: hawk/lux
+        ref: lux-2.8.1
+        path: lux
+    - name: Install lux
+      run: |
+        set -e -u -x
+        cd lux
+        autoconf
+        ./configure
+        make
+        echo "$(pwd)/bin" >> $GITHUB_PATH
     - uses: actions/download-artifact@v2
       name: Download built emqx and test scenario
       with:
         name: emqx_built
-        path: emqx_built
-    - name: Prepare packages
+        path: .
+    - name: run relup test
       run: |
         set -e -x -u
-        mkdir -p packages
-        cp emqx_built/_packages/*/*.tar.gz packages
-        cd packages
-        case "$OLD_VSN" in
-          e*)
-            profile='emqx-enterprise'
-            s3dir='emqx-ee'
-            ;;
-          v*)
-            profile='emqx'
-            s3dir='emqx-ce'
-            ;;
-          *)
-            echo "unknown old version $OLD_VSN"
-            exit 1
-            ;;
-        esac
-        wget --no-verbose https://s3-us-west-2.amazonaws.com/packages.emqx/${s3dir}/$OLD_VSN/${profile}-${OLD_VSN#[e|v]}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz
-    - name: Run relup test scenario
-      timeout-minutes: 5
-      run: |
-        set -x
-        case "$OLD_VSN" in
-          e*)
-            cur_vsn=$CUR_EE_VSN
-            profile='emqx-enterprise'
-            ;;
-          v*)
-            cur_vsn=$CUR_CE_VSN
-            profile='emqx'
-            ;;
-        esac
-        old_pkg="${profile}-${OLD_VSN#[e|v]}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz"
-        cur_pkg="${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz"
-        lux \
-        --progress verbose \
-        --case_timeout infinity \
-        --var PROJ_ROOT="$(pwd)" \
-        --var PROFILE="$profile" \
-        --var VSN="$cur_vsn" \
-        --var OLD_VSN="$OLD_VSN" \
-        --var CUR_PKG="$cur_pkg" \
-        --var OLD_PKG="$old_pkg" \
-        emqx_built/.ci/fvt_tests/relup.lux
+        cd emqx
+        if ! ./scripts/relup/run-relup-lux.sh $OLD_VSN; then
+          docker logs node1.emqx.io | tee lux_logs/emqx1.log
+          docker logs node2.emqx.io | tee lux_logs/emqx2.log
+          exit 1
+        fi
     - uses: actions/upload-artifact@v2
       name: Save debug data
       if: failure()
       with:
         name: debug_data
         path: |
-          packages/emqx1/*
-          packages/emqx2/*
-          lux_logs
+          wd/lux_logs

+ 1 - 0
.gitignore

@@ -66,3 +66,4 @@ mix.lock
 apps/emqx/test/emqx_static_checks_data/master.bpapi
 # rendered configurations
 *.conf.rendered
+lux_logs/

+ 41 - 0
scripts/relup/check-results.sh

@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+matched_node1="$(curl --user admin:public -sf http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node1.emqx.io\") | .metrics.matched")"
+# TODO
+matched_node2=0
+#matched_node2="$(curl --user admin:public -sf http://localhost:18084/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node2.emqx.io\") | .metrics.matched")"
+success_node1="$(curl --user admin:public -sf http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node1.emqx.io\") | .metrics.\"actions.success\"")"
+# TODO
+success_node2=0
+#success_node2="$(curl --user admin:public -sf http://localhost:18084/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node2.emqx.io\") | .metrics.\"actions.success\"")"
+webhook="$(curl -sf http://localhost:7077/counter | jq '.data')"
+
+MATCHED_TOTAL="$(( matched_node1 + matched_node2 ))"
+SUCCESS_TOTAL="$(( success_node1 + success_node2 ))"
+COLLECTED_TOTAL="$webhook"
+
+is_number() {
+    re='^[0-9]+$'
+    if ! [[ $2 =~ $re ]] ; then
+       echo "error: $1=$2 is not a number" >&2; exit 1
+    fi
+}
+
+is_number MATCHED_TOTAL "$MATCHED_TOTAL"
+is_number SUCCESS_TOTAL "$SUCCESS_TOTAL"
+is_number COLLECTED_TOTAL "$COLLECTED_TOTAL"
+
+if [ "$MATCHED_TOTAL" -lt 290 ] || \
+   [ "$SUCCESS_TOTAL" -lt 290 ] || \
+   [ "$COLLECTED_TOTAL" -lt 290 ]; then
+    echo "FAILED"
+    echo "MATCHED_TOTAL=$MATCHED_TOTAL"
+    echo "SUCCESS_TOTAL=$SUCCESS_TOTAL"
+    echo "COLLECTED_TOTAL=$COLLECTED_TOTAL"
+    exit 1
+else
+    echo "ALL_IS_WELL"
+    exit 0
+fi

+ 130 - 0
scripts/relup/relup.lux

@@ -0,0 +1,130 @@
+[config var=PROJ_ROOT]
+[config var=VSN]
+[config var=CUR_PKG]
+[config var=OLD_VSN]
+[config var=NODE1]
+[config var=NODE2]
+[config var=BENCH]
+
+[config shell_cmd=/bin/bash]
+[config timeout=600000]
+
+[shell emqx1]
+    !docker exec -it $NODE1 emqx_ctl cluster status
+    ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io']
+    ?SH-PROMPT
+
+    ## create a webhook data bridge with id "my_webhook"
+    !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/bridges' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"name":"my_webhook","body":"","method":"post","url":"http://webhook.emqx.io:7077/counter","headers":{"content-type":"application/json"},"pool_size":4,"enable_pipelining":100,"connect_timeout":"5s","request_timeout":"5s","max_retries":3,"type":"webhook","ssl":{"enable":false,"verify":"verify_none"}}' | jq '.status'
+    ?connected
+    ?SH-PROMPT
+
+    ## create a rule that uses the webhook as action, the rule id = "rule_edsy"
+    !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/rules' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"id":"rule_edsy","sql":"SELECT\n  *\nFROM\n  \"t/#\"","actions":["webhook:my_webhook"]}' | jq '.id'
+    ?rule_edsy
+    ?SH-PROMPT
+
+[shell emqx2]
+    ## verify the bridges and rules are sync to the other node
+    !curl --user admin:public --silent --show-error 'http://localhost:18084/api/v5/bridges/webhook:my_webhook' -X 'GET' -H 'Content-Type: application/json' | jq '.name'
+    ?my_webhook
+    ?SH-PROMPT
+    !curl --user admin:public --silent --show-error 'http://localhost:18084/api/v5/rules/rule_edsy' -X 'GET' -H 'Content-Type: application/json' | jq '.id'
+    ?rule_edsy
+    ?SH-PROMPT
+
+[shell bench]
+    !docker exec -it $BENCH emqtt_bench pub --host 'node1.emqx.io' --port 1883 -c 10 -I 1000 -t t/%i -s 64 -L 300
+
+[shell emqx1]
+    !docker cp $CUR_PKG $NODE1:/emqx/releases/
+
+    ## 1. upgrade to the new version
+    !docker exec -it $NODE1 emqx install $VSN
+    ?Made release permanent: "$VSN"
+    ?SH-PROMPT
+
+    !docker exec -it $NODE1 emqx versions | grep permanent
+    ?(.*)$VSN
+    ?SH-PROMPT
+
+    ## 2. downgrade to the old version
+    !docker exec -it $NODE1 emqx install $OLD_VSN
+    ?Made release permanent:.*
+    ?SH-PROMPT
+
+    !docker exec -it $NODE1 emqx versions | grep permanent | grep -qs "$OLD_VSN"
+    ?SH-PROMPT:
+    !echo ==$$?==
+    ?^==0==
+    ?SH-PROMPT:
+
+    ## 3. again, upgrade to the new version
+    !docker exec -it $NODE1 emqx install $VSN
+    ?Made release permanent: "$VSN"
+    ?SH-PROMPT
+
+    !docker exec -it $NODE1 emqx versions | grep permanent
+    ?(.*)$VSN
+    ?SH-PROMPT
+
+    !docker exec -it $NODE1 emqx_ctl cluster status
+    ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io']
+    ?SH-PROMPT
+
+[shell emqx2]
+    !docker cp $CUR_PKG $NODE2:/emqx/releases/
+
+    ## 1. upgrade to the new version
+    !docker exec -it $NODE2 emqx install $VSN
+    ?Made release permanent: "$VSN"
+    ?SH-PROMPT
+
+    !docker exec -it $NODE2 emqx versions | grep permanent
+    ?(.*)$VSN
+    ?SH-PROMPT
+
+    ## 2. downgrade to the old version
+    !docker exec -it $NODE2 emqx install $OLD_VSN
+    ?Made release permanent:.*
+    ?SH-PROMPT
+
+    !docker exec -it $NODE2 emqx versions | grep permanent | grep -qs "$OLD_VSN"
+    ?SH-PROMPT:
+    !echo ==$$?==
+    ?^==0==
+    ?SH-PROMPT:
+
+    ## 3. again, upgrade to the new version
+    !docker exec -it $NODE2 emqx install $VSN
+    ?Made release permanent: "$VSN"
+    ?SH-PROMPT
+
+    !docker exec -it $NODE2 emqx versions | grep permanent
+    ?(.*)$VSN
+    ?SH-PROMPT
+
+    !docker exec -it $NODE1 emqx_ctl cluster status
+    ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io']
+    ?SH-PROMPT
+
+## We don't guarantee not to lose a single message!
+## So even if we received 290~300 messages, we consider it as success
+[shell bench]
+    ???publish complete
+    ??SH-PROMPT:
+    !sleep 5
+    ?SH-PROMPT
+
+    !$PROJ_ROOT/scripts/relup/check-results.sh
+    !echo ==$$?==
+    ???ALL_IS_WELL
+    ?SH-PROMPT:
+
+    !echo ==$$?==
+    ?^==0==
+    ?SH-PROMPT:
+
+[cleanup]
+    !echo ==$$?==
+    ?==0==

+ 16 - 0
scripts/relup/run-pkg.sh

@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+## This script is intended to run in docker
+## extracts a .tar.gz package and runs EMQX in console mode
+
+set -euo pipefail
+
+PKG="$1"
+
+mkdir -p emqx
+tar -C emqx -zxf "$PKG"
+
+ln -s "$(pwd)/emqx/bin/emqx" /usr/bin/emqx
+ln -s "$(pwd)/emqx/bin/emqx_ctl" /usr/bin/emqx_ctl
+
+emqx console

+ 64 - 0
scripts/relup/run-relup-lux.sh

@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+
+## This script needs the 'lux' command in PATH
+## it runs the scripts/relup/relup.lux script
+
+set -euo pipefail
+
+old_vsn="${1:-}"
+if [ -z "$old_vsn" ]; then
+    echo "arg1 should be the upgrade base version"
+    exit 1
+fi
+
+# ensure dir
+cd -P -- "$(dirname -- "$0")/../.."
+
+set -x
+
+case "$old_vsn" in
+    e*)
+        cur_vsn="$(./pkg-vsn.sh emqx-enterprise)"
+        profile='emqx-enterprise'
+        ;;
+    v*)
+        cur_vsn="$(./pkg-vsn.sh emqx)"
+        profile='emqx'
+        ;;
+    *)
+        echo "unknown old version $old_vsn"
+        exit 1
+        ;;
+esac
+
+# From now on, no need for the v|e prefix
+OLD_VSN="${old_vsn#[e|v]}"
+
+OLD_PKG="$(pwd)/_upgrade_base/${profile}-${OLD_VSN}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz"
+CUR_PKG="$(pwd)/_packages/${profile}/${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz"
+
+if [ ! -f "$OLD_PKG" ]; then
+    echo "$OLD_PKG not found"
+    exit 1
+fi
+
+if [ ! -f "$CUR_PKG" ]; then
+    echo "$CUR_PKG not found"
+    exit 1
+fi
+
+# start two nodes and their friends (webhook server and a bench) in docker
+./scripts/relup/start-relup-test-cluster.sh 'ubuntu:20.04' "$OLD_PKG"
+
+# run relup tests
+lux \
+    --progress verbose \
+    --case_timeout infinity \
+    --var PROJ_ROOT="$(pwd)" \
+    --var VSN="$cur_vsn" \
+    --var CUR_PKG="$CUR_PKG" \
+    --var OLD_VSN="$OLD_VSN" \
+    --var NODE1="node1.emqx.io" \
+    --var NODE2="node2.emqx.io" \
+    --var BENCH="bench.emqx.io" \
+    ./scripts/relup/relup.lux

+ 111 - 0
scripts/relup/start-relup-test-cluster.sh

@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+## EMQX can only start with longname (https://erlang.org/doc/reference_manual/distributed.html)
+## The host name part of EMQX's node name has to be static, this means we should either
+## pre-assign static IP for containers, or ensure containers can communiate with each other by name
+## this is why a docker network is created, and the containers's names have a dot.
+
+# ensure dir
+cd -P -- "$(dirname -- "$0")/../.."
+
+set -x
+
+IMAGE="${1}"
+PKG="$(readlink -f "${2}")"
+
+NET='emqx.io'
+NODE1="node1.$NET"
+NODE2="node2.$NET"
+WEBHOOK="webhook.$NET"
+BENCH="bench.$NET"
+COOKIE='this-is-a-secret'
+## Erlang image is needed to run webhook server and emqtt-bench
+ERLANG_IMAGE="ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04"
+# builder has emqtt-bench installed
+BENCH_IMAGE="$ERLANG_IMAGE"
+
+## clean up
+docker rm -f "$BENCH" >/dev/null 2>&1 || true
+docker rm -f "$WEBHOOK" >/dev/null 2>&1 || true
+docker rm -f "$NODE1" >/dev/null 2>&1 || true
+docker rm -f "$NODE2" >/dev/null 2>&1 || true
+docker network rm "$NET" >/dev/null 2>&1 || true
+
+docker network create "$NET"
+
+docker run -d -t --name "$NODE1" \
+  --net "$NET" \
+  -e EMQX_LOG__CONSOLE_HANDLER__LEVEL=warning \
+  -e EMQX_NODE_NAME="emqx@$NODE1" \
+  -e EMQX_NODE_COOKIE="$COOKIE" \
+  -p 18083:18083 \
+  -v "$PKG:/emqx.tar.gz" \
+  -v "$(pwd)/scripts/relup/run-pkg.sh:/run-pkg.sh" \
+  "$IMAGE" /run-pkg.sh emqx.tar.gz
+
+docker run -d -t --name "$NODE2" \
+  --net "$NET" \
+  -e EMQX_LOG__CONSOLE_HANDLER__LEVEL=warning \
+  -e EMQX_NODE_NAME="emqx@$NODE2" \
+  -e EMQX_NODE_COOKIE="$COOKIE" \
+  -p 18084:18083 \
+  -v "$PKG:/emqx.tar.gz" \
+  -v "$(pwd)/scripts/relup/run-pkg.sh:/run-pkg.sh" \
+  "$IMAGE" /run-pkg.sh emqx.tar.gz
+
+docker run -d -t --name "$WEBHOOK" \
+  --net "$NET" \
+  -v "$(pwd)/.ci/fvt_tests/http_server:/http_server" \
+  -w /http_server \
+  -p 7077:7077 \
+  "$ERLANG_IMAGE" bash -c 'rebar3 compile; erl -pa _build/default/lib/*/ebin -eval "http_server:start()"'
+
+docker run -d -t --name "$BENCH" \
+    --net "$NET" \
+    "$BENCH_IMAGE" \
+    bash -c 'sleep 10000; exit 1'
+
+wait_limit=60
+wait_for_emqx() {
+    wait_sec=0
+    container="$1"
+    wait_limit="$2"
+    set +x
+    while ! docker exec "$container" emqx_ctl status >/dev/null 2>&1; do
+        wait_sec=$(( wait_sec + 1 ))
+        if [ $wait_sec -gt "$wait_limit" ]; then
+            echo "timeout wait for EMQX"
+            exit 1
+        fi
+        echo -n '.'
+        sleep 1
+    done
+}
+
+wait_for_webhook() {
+    wait_sec=0
+    wait_limit="$1"
+    set +x
+    while ! curl -f -s localhost:7077; do
+        wait_sec=$(( wait_sec + 1 ))
+        if [ $wait_sec -gt "$wait_limit" ]; then
+            echo "timeout wait for EMQX"
+            exit 1
+        fi
+        echo -n '.'
+        sleep 1
+    done
+}
+
+# wait for webhook http server to start,
+# it may take a while because it needs to compile from source code
+wait_for_webhook 120
+# after webhook start, it should not cost more than 30 seconds
+wait_for_emqx $NODE1 30
+# afer node1 is up, it should not cost more than 10 seconds
+wait_for_emqx $NODE2 10
+echo
+
+docker exec $NODE1 emqx_ctl cluster join "emqx@$NODE2"

+ 0 - 42
scripts/run-relup-lux.sh

@@ -1,42 +0,0 @@
-#!/usr/bin/env bash
-
-## This script needs the 'lux' command in PATH
-## it runs the .ci/fvt_tests/relup.lux script
-
-set -euo pipefail
-
-old_vsn="${1}"
-
-# ensure dir
-cd -P -- "$(dirname -- "$0")/.."
-
-set -x
-
-case "$old_vsn" in
-    e*)
-        cur_vsn="$(./pkg-vsn.sh emqx-enterprise)"
-        profile='emqx-enterprise'
-        ;;
-    v*)
-        cur_vsn="$(./pkg-vsn.sh emqx)"
-        profile='emqx'
-        ;;
-    *)
-        echo "unknown old version $old_vsn"
-        exit 1
-        ;;
-esac
-
-old_pkg="$(pwd)/_upgrade_base/${profile}-${old_vsn#[e|v]}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz"
-cur_pkg="$(pwd)/_packages/${profile}/${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz"
-
-lux \
-    --progress verbose \
-    --case_timeout infinity \
-    --var PROJ_ROOT="$(pwd)" \
-    --var PROFILE="$profile" \
-    --var VSN="$cur_vsn" \
-    --var OLD_VSN="$old_vsn" \
-    --var CUR_PKG="$cur_pkg" \
-    --var OLD_PKG="$old_pkg" \
-    .ci/fvt_tests/relup.lux