performance_test.yaml 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. name: Performance Test
  2. on:
  3. workflow_dispatch:
  4. inputs:
  5. version:
  6. required: false
  7. download_url:
  8. required: false
  9. permissions:
  10. contents: read
  11. jobs:
  12. perftest:
  13. runs-on: ubuntu-latest
  14. strategy:
  15. max-parallel: 1
  16. matrix:
  17. scenario:
  18. - tests/ci/pubsub-2x2c4g-10k-20k-tps
  19. defaults:
  20. run:
  21. shell: bash
  22. steps:
  23. - name: Checkout tf-emqx-performance-test
  24. uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
  25. with:
  26. repository: emqx/tf-emqx-performance-test
  27. ref: v0.3.2
  28. - name: Setup Terraform
  29. uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2
  30. with:
  31. terraform_version: 1.6.4
  32. terraform_wrapper: false
  33. - uses: actions/setup-python@v5
  34. with:
  35. python-version: '3.11'
  36. - run: pip install -r requirements.txt
  37. - name: Download emqx package (custom URL)
  38. if: github.event.inputs.version == '' && github.event.inputs.download_url != ''
  39. run: |
  40. wget "${{ github.event.inputs.download_url }}"
  41. - uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
  42. with:
  43. aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
  44. aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  45. aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
  46. - name: Download emqx package (specific version)
  47. env:
  48. AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
  49. if: github.event.inputs.version != '' && github.event.inputs.download_url == ''
  50. run: |
  51. version=${{ github.event.inputs.version }}
  52. aws s3 cp s3://$AWS_S3_BUCKET/emqx-ee/e${version}/emqx-enterprise-${version}-ubuntu22.04-amd64.deb .
  53. - name: Download emqx package (latest version)
  54. if: github.event.inputs.version == '' && github.event.inputs.download_url == ''
  55. env:
  56. GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  57. AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
  58. run: |
  59. set -xeuo pipefail
  60. # get latest emqx version
  61. version=$(gh release list --repo emqx/emqx --limit 1 --json tagName --jq '.[] | .tagName')
  62. # remove 'v' prefix from the version
  63. version=${version:1}
  64. aws s3 cp s3://$AWS_S3_BUCKET/emqx-ee/e${version}/emqx-enterprise-${version}-ubuntu22.04-amd64.deb .
  65. - name: Configure AWS Credentials
  66. uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
  67. with:
  68. aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_PERF_TEST }}
  69. aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY_PERF_TEST }}
  70. aws-region: ${{ secrets.AWS_DEFAULT_REGION_PERF_TEST }}
  71. - name: Create infrastructure
  72. id: infra
  73. timeout-minutes: 30
  74. run: |
  75. mv emqx-enterprise-*.deb emqx-enterprise-ubuntu22.04-amd64.deb
  76. ls -lh *.deb
  77. echo "${{ secrets.EMQX_ENTERPRISE_LICENSE }}" > emqx5.lic
  78. cat ${{ matrix.scenario }}.env >> "$GITHUB_ENV"
  79. echo '{}' > slack-payload.json
  80. terraform init
  81. set +e
  82. terraform apply -var spec_file=${{ matrix.scenario }}.yaml -auto-approve -lock=false
  83. # retry once
  84. if [ $? != 0 ]; then
  85. echo "Retrying once"
  86. set -e
  87. terraform apply -var spec_file=${{ matrix.scenario }}.yaml -auto-approve -lock=false
  88. fi
  89. set -e
  90. echo "ssh_key_path=$(terraform output -raw ssh_key_path)" >> $GITHUB_OUTPUT
  91. - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
  92. if: success()
  93. with:
  94. name: ssh_private_key
  95. path: |
  96. ${{ steps.infra.outputs.ssh_key_path }}
  97. - name: Report failure
  98. if: failure()
  99. run: |
  100. jq -n '[{"color": "#ff0000", "fields": [{"title": "Failed to provision infrastructure", "short": false}]}]' > attachments.json
  101. jq -n --argjson attachments "$(<attachments.json)" '{"attachments": $attachments}' > slack-payload.json
  102. - name: Run benchmark
  103. if: success()
  104. id: benchmark
  105. timeout-minutes: 60
  106. run: |
  107. success=0
  108. export TMPDIR=$(mktemp -d)
  109. echo "TMPDIR=$TMPDIR" >> $GITHUB_ENV
  110. echo '[]' > attachments.json
  111. PERIOD=1m scripts/summary.sh
  112. MEM_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .mem' $TMPDIR/mem.json)
  113. MEM_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .mem' $TMPDIR/mem.json)
  114. if [ $(echo "$MEM_CORE_1 > $INITIAL_RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  115. || [ $(echo "$MEM_CORE_2 > $INITIAL_RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  116. success=1
  117. jq --arg mem1 "$MEM_CORE_1" --arg mem2 "$MEM_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "Initial RAM usage is too high", "short": false, "value": "Core 1: \($mem1)%\nCore 2: \($mem2)%"}]}]' \
  118. attachments.json 1<> attachments.json
  119. fi
  120. EMQX_API_URL=$(terraform output -raw emqx_dashboard_url)
  121. ansible loadgen -m command -a 'systemctl start loadgen' --become --limit 'loadgen-emqtt_bench-1.*'
  122. echo "Waiting for subscribers to connect"
  123. subs=0
  124. while [ $subs -lt 10000 ]; do
  125. curl -s -u perftest:perftest "$EMQX_API_URL/api/v5/monitor_current" > "$TMPDIR/monitor_current.json"
  126. subs=$(jq -r '.subscriptions' "$TMPDIR/monitor_current.json")
  127. sleep 1
  128. done
  129. ansible loadgen -m command -a 'systemctl start loadgen' --become --limit 'loadgen-emqtt_bench-2.*'
  130. echo "Waiting for publishers to connect"
  131. conns=$(jq -r '.live_connections' "$TMPDIR/monitor_current.json")
  132. while [ $conns -lt 20000 ]; do
  133. curl -s -u perftest:perftest "$EMQX_API_URL/api/v5/monitor_current" > "$TMPDIR/monitor_current.json"
  134. conns=$(jq -r '.live_connections' "$TMPDIR/monitor_current.json")
  135. sleep 1
  136. done
  137. echo "All clients connected, sleep for $DURATION seconds"
  138. sleep $DURATION
  139. PERIOD="${DURATION}s" scripts/summary.sh | tee -a $GITHUB_STEP_SUMMARY
  140. echo "success=$success" >> $GITHUB_OUTPUT
  141. - name: Cleanup infrastructure
  142. if: always()
  143. run: |
  144. terraform destroy -var spec_file=${{ matrix.scenario }}.yaml -auto-approve
  145. - name: Analyze results
  146. if: success()
  147. run: |
  148. success=${{ steps.benchmark.outputs.success }}
  149. echo "## Test results analysis" >> $GITHUB_STEP_SUMMARY
  150. echo '' >> $GITHUB_STEP_SUMMARY
  151. CPU_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .cpu' $TMPDIR/cpu.json)
  152. CPU_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .cpu' $TMPDIR/cpu.json)
  153. if [ $(echo "$CPU_CORE_1 > $CPU_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  154. || [ $(echo "$CPU_CORE_2 > $CPU_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  155. success=1
  156. jq --arg cpu1 "$CPU_CORE_1" --arg cpu2 "$CPU_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "CPU utilization was too high", "short": false, "value": "Core 1: \($cpu1)%\nCore 2: \($cpu2)%"}]}]' \
  157. attachments.json 1<> attachments.json
  158. echo "* CPU utilization was too high: Core 1: $CPU_CORE_1%, Core 2: $CPU_CORE_2%" >> $GITHUB_STEP_SUMMARY
  159. fi
  160. MEM_CORE_1=$(jq -r '.[] | select(.host == "emqx-core-1") | .mem' $TMPDIR/mem.json)
  161. MEM_CORE_2=$(jq -r '.[] | select(.host == "emqx-core-2") | .mem' $TMPDIR/mem.json)
  162. if [ $(echo "$MEM_CORE_1 > $RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ] \
  163. || [ $(echo "$MEM_CORE_2 > $RAM_BASELINE * (1 + $ALLOWED_DEVIATION_CPU_RAM)" | bc -l) -eq 1 ]; then
  164. success=1
  165. jq --arg mem1 "$MEM_CORE_1" --arg mem2 "$MEM_CORE_2" '. += [{"color": "#ff0000", "fields": [{"title": "RAM usage was too high", "short": false, "value": "Core 1: \($mem1)%\nCore 2: \($mem2)%"}]}]' \
  166. attachments.json 1<> attachments.json
  167. echo "* RAM usage was too high: Core 1: $MEM_CORE_1%, Core 2: $MEM_CORE_2%" >> $GITHUB_STEP_SUMMARY
  168. fi
  169. RECEIVED_MSG_RATE=$(jq -r '.received_msg_rate' $TMPDIR/emqx_metrics.json)
  170. SENT_MSG_RATE=$(jq -r '.sent_msg_rate' $TMPDIR/emqx_metrics.json)
  171. if [ $(echo "$RECEIVED_MSG_RATE < $RECEIVED_MSG_RATE_BASELINE * (1 - $ALLOWED_DEVIATION_MSG_RATE)" | bc -l) -eq 1 ] \
  172. || [ $(echo "$SENT_MSG_RATE < $SENT_MSG_RATE_BASELINE * (1 - $ALLOWED_DEVIATION_MSG_RATE)" | bc -l) -eq 1 ]; then
  173. success=1
  174. jq --arg received_msg_rate "$RECEIVED_MSG_RATE" --arg sent_msg_rate "$SENT_MSG_RATE" \
  175. '. += [{"color": "#ff0000", "fields": [{"title": "Message rate was too low", "short": false, "value": "Received message rate: \($received_msg_rate)\nSent message rate: \($sent_msg_rate)"}]}]' \
  176. attachments.json 1<> attachments.json
  177. echo "* Message rate was too low: Received message rate: $RECEIVED_MSG_RATE, Sent message rate: $SENT_MSG_RATE" >> $GITHUB_STEP_SUMMARY
  178. fi
  179. MESSAGES_DROPPED=$(jq -r '.messages_dropped' $TMPDIR/emqx_metrics.json)
  180. if [ $(echo "$MESSAGES_DROPPED > 100" | bc) -eq 1 ]; then
  181. success=1
  182. jq --arg dropped "$MESSAGES_DROPPED" '. += [{"color": "#ff0000", "fields": [{"title": "Too many dropped messages", "short": false, "value": "Dropped: \($dropped)"}]}]' \
  183. attachments.json 1<> attachments.json
  184. echo "* Too many dropped messages: $MESSAGES_DROPPED" >> $GITHUB_STEP_SUMMARY
  185. fi
  186. jq -n --argjson attachments "$(<attachments.json)" '{"attachments": $attachments}' > slack-payload.json
  187. exit $success
  188. - name: Post to Slack
  189. if: failure()
  190. uses: slackapi/slack-github-action@v1.27.0
  191. env:
  192. SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  193. with:
  194. channel-id: ${{ secrets.SLACK_PERFTEST_CHANNEL_ID }}
  195. slack-message: "EMQX performance test ${{ matrix.scenario }} failed. <${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}|Workflow Run>"
  196. payload-file-path: slack-payload.json
  197. payload-file-path-parsed: false
  198. - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
  199. if: failure()
  200. with:
  201. name: terraform
  202. path: |
  203. .terraform
  204. *.tfstate