Переглянути джерело

Merge pull request #9982 from zmstone/0216-fix-bin-emqx-read-vm-args

0216 allow starting two nodes from the same boot script
Zaiming (Stone) Shi 3 роки тому
батько
коміт
92c3cd6882

+ 41 - 21
bin/emqx

@@ -76,6 +76,12 @@ logwarn() {
     fi
 }
 
+logdebug() {
+    if [ "$DEBUG" -eq 1 ]; then
+        echo "DEBUG: $*"
+    fi
+}
+
 die() {
     set +x
     logerr "$1"
@@ -453,24 +459,37 @@ if [ "$IS_ENTERPRISE" = 'yes' ]; then
     CONF_KEYS+=( 'license.key' )
 fi
 
+## To be backward compatible, read and then unset EMQX_NODE_NAME
+if [ -n "${EMQX_NODE_NAME:-}" ]; then
+    export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
+    unset EMQX_NODE_NAME
+fi
+
+# Turn off debug as the ps output can be quite noisy
+set +x
 
 ## Find the running node from 'ps -ef'
-## The primary grep pattern is $RUNNER_ROOT_DIR because one can start multiple nodes at the same time
-# shellcheck disable=SC2009
-PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
-if [ -n "${PS_LINE}" ]; then
-    RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | wc -l)"
+##  * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself
+##  * The running 'remsh' and 'nodetool' processes must be excluded
+if [ -n "${EMQX_NODE__NAME:-}" ]; then
+    # if node name is provided, filter by node name
+    # shellcheck disable=SC2009
+    PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -E "\s\-s?name\s${EMQX_NODE__NAME}" | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
 else
-    RUNNING_NODES_COUNT=0
+    # shellcheck disable=SC2009
+    PS_LINE="$(ps -ef | $GREP '[e]mqx' | $GREP -v -E '(remsh|nodetool)' | $GREP -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
 fi
+logdebug "PS_LINE=$PS_LINE"
+RUNNING_NODES_COUNT="$(echo -e "$PS_LINE" | sed '/^\s*$/d' | wc -l)"
+[ "$RUNNING_NODES_COUNT" -gt 1 ] && logdebug "More than one running node found: count=$RUNNING_NODES_COUNT"
 
-# Turn off debug as the ps output can be quite noisy
-set +x
 if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
     if [ "$RUNNING_NODES_COUNT" -gt 0 ] && [ "$COMMAND" != 'check_config' ]; then
-        tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true)
-        echo "Node ${tmp_nodename} is already running!"
-        exit 1
+        running_node_name=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true)
+        if [ -n "$running_node_name" ] && [ "$running_node_name" = "${EMQX_NODE__NAME:-}" ]; then
+            echo "Node ${running_node_name} is already running!"
+            exit 1
+        fi
     fi
     [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1
     maybe_use_portable_dynlibs
@@ -502,8 +521,6 @@ else
     #    then update the config in the file to 'node.name = "emqx@local.net"', after this change,
     #    there would be no way stop the running node 'emqx@127.0.0.1', because 'emqx stop' command
     #    would try to stop the new node instead.
-    #  * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself
-    #  * The running 'remsh' and 'nodetool' processes must be excluded
     if [ "$RUNNING_NODES_COUNT" -eq 1 ]; then
         ## only one emqx node is running, get running args from 'ps -ef' output
         tmp_nodename=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' || true)
@@ -520,14 +537,22 @@ else
         ## Make the format like what call_hocon multi_get prints out, but only need 4 args
         EMQX_BOOT_CONFIGS="node.name=${tmp_nodename}\nnode.cookie=${tmp_cookie}\ncluster.proto_dist=${tmp_proto}\nnode.dist_net_ticktime=$tmp_ticktime\nnode.data_dir=${tmp_datadir}"
     else
-        ## None or more than one node is running, resolve from boot config
-        ## we have no choiece but to read the bootstrap config (with environment overrides available in the current shell)
+        if [ "$RUNNING_NODES_COUNT" -gt 1 ]; then
+            if [ -z "${EMQX_NODE__NAME:-}" ]; then
+                tmp_nodenames=$(echo -e "$PS_LINE" | $GREP -oE "\s\-s?name.*" | awk '{print $2}' | tr '\n' ' ')
+                logerr "More than one EMQX node found running (root dir: ${RUNNER_ROOT_DIR})"
+                logerr "Running nodes: $tmp_nodenames"
+                logerr "Make sure environment variable EMQX_NODE__NAME is set to indicate for which node this command is intended."
+                exit 1
+            fi
+        fi
+        ## We have no choiece but to read the bootstrap config (with environment overrides available in the current shell)
         [ -f "$EMQX_ETC_DIR"/emqx.conf ] || die "emqx.conf is not found in $EMQX_ETC_DIR" 1
         maybe_use_portable_dynlibs
         EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
     fi
 fi
-[ "$DEBUG" -eq 1 ] && echo "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS"
+logdebug "EMQX_BOOT_CONFIGS: $EMQX_BOOT_CONFIGS"
 [ "$DEBUG" -eq 1 ] && set -x
 
 get_boot_config() {
@@ -877,11 +902,6 @@ maybe_log_to_console() {
     fi
 }
 
-## To be backward compatible, read and then unset EMQX_NODE_NAME
-if [ -n "${EMQX_NODE_NAME:-}" ]; then
-    export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
-    unset EMQX_NODE_NAME
-fi
 ## Possible ways to configure emqx node name:
 ## 1. configure node.name in emqx.conf
 ## 2. override with environment variable EMQX_NODE__NAME

scripts/start-two-nodes-in-docker.sh → scripts/test/start-two-nodes-in-docker.sh


+ 69 - 0
scripts/test/start-two-nodes-in-host.sh

@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+## This starts two nodes on the same host (not in docker).
+## The listener ports are shifted with an offset to avoid clashing.
+## The data and log directories are configured to use ./tmp/
+
+## By default, the boot script is ./_build/emqx/rel/emqx
+## it can be overriden with arg1 and arg2 for the two nodes respectfully
+
+# ensure dir
+cd -P -- "$(dirname -- "$0")/../../"
+
+DEFAULT_BOOT='./_build/emqx/rel/emqx/bin/emqx'
+
+BOOT1="${1:-$DEFAULT_BOOT}"
+BOOT2="${2:-$BOOT1}"
+
+export IP1='127.0.0.1'
+export IP2='127.0.0.2'
+
+# cannot use the same node name even IPs are different because Erlang distribution listens on 0.0.0.0
+NODE1="emqx1@$IP1"
+NODE2="emqx2@$IP2"
+
+start_cmd() {
+    local index="$1"
+    local nodehome
+    nodehome="$(pwd)/tmp/emqx${index}"
+    [ "$index" -eq 1 ] && BOOT_SCRIPT="$BOOT1"
+    [ "$index" -eq 2 ] && BOOT_SCRIPT="$BOOT2"
+    mkdir -p "${nodehome}/data" "${nodehome}/log"
+    cat <<-EOF
+env DEBUG="${DEBUG:-0}" \
+EMQX_CLUSTER__STATIC__SEEDS="[\"$NODE1\",\"$NODE2\"]" \
+EMQX_CLUSTER__DISCOVERY_STRATEGY=static \
+EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL="${EMQX_LOG__FILE_HANDLERS__DEFAULT__LEVEL:-debug}" \
+EMQX_LOG__FILE_HANDLERS__DEFAULT__FILE="${nodehome}/log/emqx.log" \
+EMQX_NODE_NAME="emqx${index}@\$IP${index}" \
+EMQX_NODE__COOKIE="${EMQX_NODE__COOKIE:-cookie1}" \
+EMQX_LOG_DIR="${nodehome}/log" \
+EMQX_NODE__DATA_DIR="${nodehome}/data" \
+EMQX_LISTENERS__TCP__DEFAULT__BIND="\$IP${index}:1883" \
+EMQX_LISTENERS__SSL__DEFAULT__BIND="\$IP${index}:8883" \
+EMQX_LISTENERS__WS__DEFAULT__BIND="\$IP${index}:8083" \
+EMQX_LISTENERS__WSS__DEFAULT__BIND="\$IP${index}:8084" \
+EMQX_DASHBOARD__LISTENERS__HTTP__BIND="\$IP${index}:18083" \
+$BOOT_SCRIPT start
+EOF
+}
+
+echo "Stopping $NODE1"
+env EMQX_NODE_NAME="$NODE1" ./_build/emqx/rel/emqx/bin/emqx stop || true
+
+echo "Stopping $NODE2"
+env EMQX_NODE_NAME="$NODE2" ./_build/emqx/rel/emqx/bin/emqx stop || true
+
+start_one_node() {
+    local index="$1"
+    local cmd
+    cmd="$(start_cmd "$index" | envsubst)"
+    echo "$cmd"
+    eval "$cmd"
+}
+
+## Fork-start node1, otherwise it'll keep waiting for node2 because we are using static cluster
+start_one_node 1 &
+start_one_node 2