performance_test.yaml 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. name: Performance Test
  2. on:
  3. workflow_dispatch:
  4. inputs:
  5. version:
  6. required: false
  7. download_url:
  8. required: false
  9. permissions:
  10. contents: read
  11. jobs:
  12. perftest:
  13. runs-on: ubuntu-latest
  14. strategy:
  15. max-parallel: 1
  16. matrix:
  17. scenario:
  18. - tests/ci/pubsub-2x2c4g-10k-20k-tps
  19. defaults:
  20. run:
  21. shell: bash
  22. steps:
  23. - name: Checkout tf-emqx-performance-test
  24. uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
  25. with:
  26. repository: emqx/tf-emqx-performance-test
  27. ref: v0.3.2
  28. - name: Setup Terraform
  29. uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2
  30. with:
  31. terraform_version: 1.6.4
  32. terraform_wrapper: false
  33. - uses: actions/setup-python@v5
  34. with:
  35. python-version: '3.11'
  36. - run: pip install -r requirements.txt
  37. - name: Download emqx package (custom URL)
  38. if: github.event.inputs.version == '' && github.event.inputs.download_url != ''
  39. run: |
  40. wget "${{ github.event.inputs.download_url }}"
  41. - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
  42. with:
  43. aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
  44. aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  45. aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
  46. - name: Download emqx package (specific version)
  47. env:
  48. AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
  49. if: github.event.inputs.version != '' && github.event.inputs.download_url == ''
  50. run: |
  51. version=${{ github.event.inputs.version }}
  52. aws s3 cp s3://$AWS_S3_BUCKET/emqx-ee/${version}/emqx-enterprise-${version}-ubuntu22.04-amd64.deb .
  53. - name: Download emqx package (latest version)
  54. if: github.event.inputs.version == '' && github.event.inputs.download_url == ''
  55. env:
  56. GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  57. run: |
  58. set -xeuo pipefail
  59. # get latest emqx version
  60. version=$(gh release list --repo emqx/emqx --limit 1 --json tagName --jq '.[] | .tagName')
  61. # remove 'v' prefix from the version
  62. version=${version:1}
  63. wget https://www.emqx.com/en/downloads/enterprise/${version}/emqx-enterprise-${version}-ubuntu22.04-amd64.deb
  64. - name: Configure AWS Credentials
  65. uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
  66. with:
  67. aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_PERF_TEST }}
  68. aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY_PERF_TEST }}
  69. aws-region: ${{ secrets.AWS_DEFAULT_REGION_PERF_TEST }}
  70. - name: Create infrastructure
  71. id: infra
  72. timeout-minutes: 30
  73. run: |
  74. mv emqx-enterprise-*.deb emqx-enterprise-ubuntu22.04-amd64.deb
  75. ls -lh *.deb
  76. echo "${{ secrets.EMQX_ENTERPRISE_LICENSE }}" > emqx5.lic
  77. cat ${{ matrix.scenario }}.env >> "$GITHUB_ENV"
  78. echo '{}' > slack-payload.json
  79. terraform init
  80. set +e
  81. terraform apply -var spec_file=${{ matrix.scenario }}.yaml -auto-approve -lock=false
  82. # retry once
  83. if [ $? != 0 ]; then
  84. echo "Retrying once"
  85. set -e
  86. terraform apply -var spec_file=${{ matrix.scenario }}.yaml -auto-approve -lock=false
  87. fi
  88. set -e
  89. echo "ssh_key_path=$(terraform output -raw ssh_key_path)" >> $GITHUB_OUTPUT
  90. - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
  91. if: success()
  92. with:
  93. name: ssh_private_key
  94. path: |
  95. ${{ steps.infra.outputs.ssh_key_path }}
  96. - name: Report failure
  97. if: failure()
  98. run: |
  99. jq -n '[{"color": "#ff0000", "fields": [{"title": "Failed to provision infrastructure", "short": false}]}]' > attachments.json
  100. jq -n --argjson attachments "$(<attachments.json)" '{"attachments": $attachments}' > slack-payload.json
  101. - name: Run benchmark
  102. if: success()
  103. id: benchmark
  104. timeout-minutes: 60
  105. run: |
  106. success=0
  107. export TMPDIR=$(mktemp -d)
  108. echo "TMPDIR=$TMPDIR" >> $GITHUB_ENV
  109. echo '[]' > attachments.json
  110. PERIOD=1m scripts/summary.sh
  111. MEM_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .mem' $TMPDIR/mem.json)
  112. MEM_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .mem' $TMPDIR/mem.json)
  113. if [ $(echo "$MEM_CORE_1 > $INITIAL_RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  114. || [ $(echo "$MEM_CORE_2 > $INITIAL_RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  115. success=1
  116. jq --arg mem1 "$MEM_CORE_1" --arg mem2 "$MEM_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "Initial RAM usage is too high", "short": false, "value": "Core 1: \($mem1)%\nCore 2: \($mem2)%"}]}]' \
  117. attachments.json 1<> attachments.json
  118. fi
  119. EMQX_API_URL=$(terraform output -raw emqx_dashboard_url)
  120. ansible loadgen -m command -a 'systemctl start loadgen' --become --limit 'loadgen-emqtt_bench-1.*'
  121. echo "Waiting for subscribers to connect"
  122. subs=0
  123. while [ $subs -lt 10000 ]; do
  124. curl -s -u perftest:perftest "$EMQX_API_URL/api/v5/monitor_current" > "$TMPDIR/monitor_current.json"
  125. subs=$(jq -r '.subscriptions' "$TMPDIR/monitor_current.json")
  126. sleep 1
  127. done
  128. ansible loadgen -m command -a 'systemctl start loadgen' --become --limit 'loadgen-emqtt_bench-2.*'
  129. echo "Waiting for publishers to connect"
  130. conns=$(jq -r '.live_connections' "$TMPDIR/monitor_current.json")
  131. while [ $conns -lt 20000 ]; do
  132. curl -s -u perftest:perftest "$EMQX_API_URL/api/v5/monitor_current" > "$TMPDIR/monitor_current.json"
  133. conns=$(jq -r '.live_connections' "$TMPDIR/monitor_current.json")
  134. sleep 1
  135. done
  136. echo "All clients connected, sleep for $DURATION seconds"
  137. sleep $DURATION
  138. PERIOD="${DURATION}s" scripts/summary.sh | tee -a $GITHUB_STEP_SUMMARY
  139. echo "success=$success" >> $GITHUB_OUTPUT
  140. - name: Cleanup infrastructure
  141. if: always()
  142. run: |
  143. terraform destroy -var spec_file=${{ matrix.scenario }}.yaml -auto-approve
  144. - name: Analyze results
  145. if: success()
  146. run: |
  147. success=${{ steps.benchmark.outputs.success }}
  148. echo "## Test results analysis" >> $GITHUB_STEP_SUMMARY
  149. echo '' >> $GITHUB_STEP_SUMMARY
  150. CPU_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .cpu' $TMPDIR/cpu.json)
  151. CPU_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .cpu' $TMPDIR/cpu.json)
  152. if [ $(echo "$CPU_CORE_1 > $CPU_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  153. || [ $(echo "$CPU_CORE_2 > $CPU_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  154. success=1
  155. jq --arg cpu1 "$CPU_CORE_1" --arg cpu2 "$CPU_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "CPU utilization was too high", "short": false, "value": "Core 1: \($cpu1)%\nCore 2: \($cpu2)%"}]}]' \
  156. attachments.json 1<> attachments.json
  157. echo "* CPU utilization was too high: Core 1: $CPU_CORE_1%, Core 2: $CPU_CORE_2%" >> $GITHUB_STEP_SUMMARY
  158. fi
  159. MEM_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .mem' $TMPDIR/mem.json)
  160. MEM_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .mem' $TMPDIR/mem.json)
  161. if [ $(echo "$MEM_CORE_1 > $RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  162. || [ $(echo "$MEM_CORE_2 > $RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  163. success=1
  164. jq --arg mem1 "$MEM_CORE_1" --arg mem2 "$MEM_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "RAM usage was too high", "short": false, "value": "Core 1: \($mem1)%\nCore 2: \($mem2)%"}]}]' \
  165. attachments.json 1<> attachments.json
  166. echo "* RAM usage was too high: Core 1: $MEM_CORE_1%, Core 2: $MEM_CORE_2%" >> $GITHUB_STEP_SUMMARY
  167. fi
  168. RECEIVED_MSG_RATE=$(jq -r '.received_msg_rate' $TMPDIR/emqx_metrics.json)
  169. SENT_MSG_RATE=$(jq -r '.sent_msg_rate' $TMPDIR/emqx_metrics.json)
  170. if [ $(echo "$RECEIVED_MSG_RATE < $RECEIVED_MSG_RATE_BASELINE * (1 - $ALLOWED_DEVIATION_MSG_RATE)" | bc -l) -eq 1 ] \
  171. || [ $(echo "$SENT_MSG_RATE < $SENT_MSG_RATE_BASELINE * (1 - $ALLOWED_DEVIATION_MSG_RATE)" | bc -l) -eq 1 ]; then
  172. success=1
  173. jq --arg received_msg_rate "$RECEIVED_MSG_RATE" --arg sent_msg_rate "$SENT_MSG_RATE" \
  174. '. += [{"color": "#ff0000", "fields": [{"title": "Message rate was too low", "short": false, "value": "Received message rate: \($received_msg_rate)\nSent message rate: \($sent_msg_rate)"}]}]' \
  175. attachments.json 1<> attachments.json
  176. echo "* Message rate was too low: Received message rate: $RECEIVED_MSG_RATE, Sent message rate: $SENT_MSG_RATE" >> $GITHUB_STEP_SUMMARY
  177. fi
  178. MESSAGES_DROPPED=$(jq -r '.messages_dropped' $TMPDIR/emqx_metrics.json)
  179. if [ $(echo "$MESSAGES_DROPPED > 100" | bc) -eq 1 ]; then
  180. success=1
  181. jq --arg dropped "$MESSAGES_DROPPED" '. += [{"color": "#ff0000", "fields": [{"title": "Too many dropped messages", "short": false, "value": "Dropped: \($dropped)"}]}]' \
  182. attachments.json 1<> attachments.json
  183. echo "* Too many dropped messages: $MESSAGES_DROPPED" >> $GITHUB_STEP_SUMMARY
  184. fi
  185. jq -n --argjson attachments "$(<attachments.json)" '{"attachments": $attachments}' > slack-payload.json
  186. exit $success
  187. - name: Post to Slack
  188. if: failure()
  189. uses: slackapi/slack-github-action@v1.27.0
  190. env:
  191. SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  192. with:
  193. channel-id: ${{ secrets.SLACK_PERFTEST_CHANNEL_ID }}
  194. slack-message: "EMQX performance test ${{ matrix.scenario }} failed. <${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}|Workflow Run>"
  195. payload-file-path: slack-payload.json
  196. payload-file-path-parsed: false
  197. - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
  198. if: failure()
  199. with:
  200. name: terraform
  201. path: |
  202. .terraform
  203. *.tfstate