performance_test.yaml 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. name: Performance Test
  2. on:
  3. workflow_call:
  4. secrets:
  5. AWS_ACCESS_KEY_PERF_TEST:
  6. required: true
  7. AWS_SECRET_ACCESS_KEY_PERF_TEST:
  8. required: true
  9. AWS_DEFAULT_REGION_PERF_TEST:
  10. required: true
  11. SLACK_BOT_TOKEN:
  12. required: true
  13. SLACK_PERFTEST_CHANNEL_ID:
  14. required: true
  15. EMQX_ENTERPRISE_LICENSE:
  16. required: true
  17. workflow_dispatch:
  18. inputs:
  19. emqx_version:
  20. required: false
  21. default: '5.8.0'
  22. permissions:
  23. contents: read
  24. jobs:
  25. perftest:
  26. runs-on: ubuntu-latest
  27. strategy:
  28. max-parallel: 1
  29. matrix:
  30. scenario:
  31. - tests/ci/pubsub-2x2c4g-10k-20k-tps
  32. defaults:
  33. run:
  34. shell: bash
  35. steps:
  36. - name: Configure AWS Credentials
  37. uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
  38. with:
  39. aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_PERF_TEST }}
  40. aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY_PERF_TEST }}
  41. aws-region: ${{ secrets.AWS_DEFAULT_REGION_PERF_TEST }}
  42. - name: Checkout tf-emqx-performance-test
  43. uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
  44. with:
  45. repository: emqx/tf-emqx-performance-test
  46. ref: v0.3.2
  47. - name: Setup Terraform
  48. uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2
  49. with:
  50. terraform_version: 1.6.4
  51. terraform_wrapper: false
  52. - uses: actions/setup-python@v5
  53. with:
  54. python-version: '3.11'
  55. - run: pip install -r requirements.txt
  56. - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
  57. if: github.event_name != 'workflow_dispatch'
  58. with:
  59. pattern: "emqx-enterprise-ubuntu22.04-amd64-*"
  60. - name: Download emqx package
  61. if: github.event_name == 'workflow_dispatch'
  62. run: |
  63. version=${{ github.event.inputs.emqx_version }}
  64. wget https://www.emqx.com/en/downloads/enterprise/${version}/emqx-enterprise-${version}-ubuntu22.04-amd64.deb
  65. - name: Create infrastructure
  66. id: infra
  67. timeout-minutes: 30
  68. run: |
  69. mv emqx-enterprise-*.deb emqx-enterprise-ubuntu22.04-amd64.deb
  70. ls -lh *.deb
  71. echo "${{ secrets.EMQX_ENTERPRISE_LICENSE }}" > emqx5.lic
  72. cat ${{ matrix.scenario }}.env >> "$GITHUB_ENV"
  73. echo '{}' > slack-payload.json
  74. terraform init
  75. set +e
  76. terraform apply -var spec_file=${{ matrix.scenario }}.yaml -auto-approve -lock=false
  77. # retry once
  78. if [ $? != 0 ]; then
  79. echo "Retrying once"
  80. set -e
  81. terraform apply -var spec_file=${{ matrix.scenario }}.yaml -auto-approve -lock=false
  82. fi
  83. set -e
  84. echo "ssh_key_path=$(terraform output -raw ssh_key_path)" >> $GITHUB_OUTPUT
  85. - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
  86. if: success()
  87. with:
  88. name: ssh_private_key
  89. path: |
  90. ${{ steps.infra.outputs.ssh_key_path }}
  91. - name: Report failure
  92. if: failure()
  93. run: |
  94. jq -n '[{"color": "#ff0000", "fields": [{"title": "Infrastructure creation failed", "short": false}]}]' > attachments.json
  95. jq -n --argjson attachments "$(<attachments.json)" '{"attachments": $attachments}' > slack-payload.json
  96. - name: Run benchmark
  97. if: success()
  98. id: benchmark
  99. timeout-minutes: 60
  100. run: |
  101. success=0
  102. export TMPDIR=$(mktemp -d)
  103. echo "TMPDIR=$TMPDIR" >> $GITHUB_ENV
  104. echo '[]' > attachments.json
  105. PERIOD=1m scripts/summary.sh
  106. MEM_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .mem' $TMPDIR/mem.json)
  107. MEM_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .mem' $TMPDIR/mem.json)
  108. if [ $(echo "$MEM_CORE_1 > $INITIAL_RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  109. || [ $(echo "$MEM_CORE_2 > $INITIAL_RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  110. success=1
  111. jq --arg mem1 "$MEM_CORE_1" --arg mem2 "$MEM_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "Initial RAM usage is too high", "short": false, "value": "Core 1: \($mem1)%\nCore 2: \($mem2)%"}]}]' \
  112. attachments.json 1<> attachments.json
  113. fi
  114. EMQX_API_URL=$(terraform output -raw emqx_dashboard_url)
  115. ansible loadgen -m command -a 'systemctl start loadgen' --become --limit 'loadgen-emqtt_bench-1.*'
  116. echo "Waiting for subscribers to connect"
  117. subs=0
  118. while [ $subs -lt 10000 ]; do
  119. curl -s -u perftest:perftest "$EMQX_API_URL/api/v5/monitor_current" > "$TMPDIR/monitor_current.json"
  120. subs=$(jq -r '.subscriptions' "$TMPDIR/monitor_current.json")
  121. sleep 1
  122. done
  123. ansible loadgen -m command -a 'systemctl start loadgen' --become --limit 'loadgen-emqtt_bench-2.*'
  124. echo "Waiting for publishers to connect"
  125. conns=$(jq -r '.live_connections' "$TMPDIR/monitor_current.json")
  126. while [ $conns -lt 20000 ]; do
  127. curl -s -u perftest:perftest "$EMQX_API_URL/api/v5/monitor_current" > "$TMPDIR/monitor_current.json"
  128. conns=$(jq -r '.live_connections' "$TMPDIR/monitor_current.json")
  129. sleep 1
  130. done
  131. echo "All clients connected, sleep for $DURATION seconds"
  132. sleep $DURATION
  133. PERIOD="${DURATION}s" scripts/summary.sh | tee -a $GITHUB_STEP_SUMMARY
  134. echo "success=$success" >> $GITHUB_OUTPUT
  135. - name: Cleanup infrastructure
  136. if: always()
  137. run: |
  138. terraform destroy -var spec_file=${{ matrix.scenario }}.yaml -auto-approve
  139. - name: Analyze results
  140. if: success()
  141. run: |
  142. success=${{ steps.benchmark.outputs.success }}
  143. echo "## Test results analysis" >> $GITHUB_STEP_SUMMARY
  144. echo '' >> $GITHUB_STEP_SUMMARY
  145. CPU_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .cpu' $TMPDIR/cpu.json)
  146. CPU_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .cpu' $TMPDIR/cpu.json)
  147. if [ $(echo "$CPU_CORE_1 > $CPU_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  148. || [ $(echo "$CPU_CORE_2 > $CPU_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  149. success=1
  150. jq --arg cpu1 "$CPU_CORE_1" --arg cpu2 "$CPU_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "CPU utilization was too high", "short": false, "value": "Core 1: \($cpu1)%\nCore 2: \($cpu2)%"}]}]' \
  151. attachments.json 1<> attachments.json
  152. echo "* CPU utilization was too high: Core 1: $CPU_CORE_1%, Core 2: $CPU_CORE_2%" >> $GITHUB_STEP_SUMMARY
  153. fi
  154. MEM_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .mem' $TMPDIR/mem.json)
  155. MEM_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .mem' $TMPDIR/mem.json)
  156. if [ $(echo "$MEM_CORE_1 > $RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  157. || [ $(echo "$MEM_CORE_2 > $RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  158. success=1
  159. jq --arg mem1 "$MEM_CORE_1" --arg mem2 "$MEM_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "RAM usage was too high", "short": false, "value": "Core 1: \($mem1)%\nCore 2: \($mem2)%"}]}]' \
  160. attachments.json 1<> attachments.json
  161. echo "* RAM usage was too high: Core 1: $MEM_CORE_1%, Core 2: $MEM_CORE_2%" >> $GITHUB_STEP_SUMMARY
  162. fi
  163. RECEIVED_MSG_RATE=$(jq -r '.received_msg_rate' $TMPDIR/emqx_metrics.json)
  164. SENT_MSG_RATE=$(jq -r '.sent_msg_rate' $TMPDIR/emqx_metrics.json)
  165. if [ $(echo "$RECEIVED_MSG_RATE < $RECEIVED_MSG_RATE_BASELINE * (1 - $ALLOWED_DEVIATION_MSG_RATE)" | bc -l) -eq 1 ] \
  166. || [ $(echo "$SENT_MSG_RATE < $SENT_MSG_RATE_BASELINE * (1 - $ALLOWED_DEVIATION_MSG_RATE)" | bc -l) -eq 1 ]; then
  167. success=1
  168. jq --arg received_msg_rate "$RECEIVED_MSG_RATE" --arg sent_msg_rate "$SENT_MSG_RATE" \
  169. '. += [{"color": "#ff0000", "fields": [{"title": "Message rate was too low", "short": false, "value": "Received message rate: \($received_msg_rate)\nSent message rate: \($sent_msg_rate)"}]}]' \
  170. attachments.json 1<> attachments.json
  171. echo "* Message rate was too low: Received message rate: $RECEIVED_MSG_RATE, Sent message rate: $SENT_MSG_RATE" >> $GITHUB_STEP_SUMMARY
  172. fi
  173. MESSAGES_DROPPED=$(jq -r '.messages_dropped' $TMPDIR/emqx_metrics.json)
  174. if [ $(echo "$MESSAGES_DROPPED > 100" | bc) -eq 1 ]; then
  175. success=1
  176. jq --arg dropped "$MESSAGES_DROPPED" '. += [{"color": "#ff0000", "fields": [{"title": "Too many dropped messages", "short": false, "value": "Dropped: \($dropped)"}]}]' \
  177. attachments.json 1<> attachments.json
  178. echo "* Too many dropped messages: $MESSAGES_DROPPED" >> $GITHUB_STEP_SUMMARY
  179. fi
  180. jq -n --argjson attachments "$(<attachments.json)" '{"attachments": $attachments}' > slack-payload.json
  181. exit $success
  182. - name: Post to Slack
  183. if: failure()
  184. uses: slackapi/slack-github-action@v1.27.0
  185. env:
  186. SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  187. with:
  188. channel-id: ${{ secrets.SLACK_PERFTEST_CHANNEL_ID }}
  189. slack-message: "EMQX performance test ${{ matrix.scenario }} failed. <${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}|Workflow Run>"
  190. payload-file-path: slack-payload.json
  191. payload-file-path-parsed: false
  192. - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
  193. if: failure()
  194. with:
  195. name: terraform
  196. path: |
  197. .terraform
  198. *.tfstate