Skip to content

Commit a1d1b5a

Browse files
committed
use prometheus instead of benchstat
1 parent 001bc32 commit a1d1b5a

File tree

1 file changed

+256
-7
lines changed

1 file changed

+256
-7
lines changed

.github/workflows/benchmark.yaml

+256-7
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ on:
66
- main
77

88
jobs:
9-
benchmark:
9+
run-benchmark:
1010
runs-on: ubuntu-latest
1111
steps:
1212
- name: Checkout code
@@ -33,14 +33,263 @@ jobs:
3333
mkdir -p /tmp/artifacts/
3434
ARTIFACT_PATH=/tmp/artifacts make test-benchmark
3535
36-
- name: Compare with baseline
36+
- name: Convert Benchmark Output to Prometheus Metrics
3737
run: |
38-
go install golang.org/x/perf/cmd/benchstat@latest
39-
benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output
38+
mkdir -p /tmp/artifacts/prometheus/
39+
echo "RUN_ID=${{ github.run_id }}"
40+
export RUN_ID=${{ github.run_id }}
41+
cat << 'EOF' > benchmark_to_prometheus.py
42+
import sys
43+
import re
44+
import os
4045
41-
- name: Upload benchmark results
46+
def parse_benchmark_output(benchmark_output):
47+
metrics = []
48+
round = 0
49+
value = os.getenv('RUN_ID') #get the github action run id so that those metrics cannot be overwritten
50+
for line in benchmark_output.split("\n"):
51+
match = re.match(r"Benchmark([\w\d]+)-\d+\s+\d+\s+([\d]+)\s+ns/op\s+([\d]+)\s+B/op\s+([\d]+)\s+allocs/op", line)
52+
if match:
53+
benchmark_name = match.group(1).lower()
54+
time_ns = match.group(2)
55+
memory_bytes = match.group(3)
56+
allocs = match.group(4)
57+
58+
metrics.append(f"benchmark_{benchmark_name}_ns {{run_id=\"{value}\", round=\"{round}\"}} {time_ns}")
59+
metrics.append(f"benchmark_{benchmark_name}_allocs {{run_id=\"{value}\", round=\"{round}\"}} {allocs}")
60+
metrics.append(f"benchmark_{benchmark_name}_mem_bytes {{run_id=\"{value}\", round=\"{round}\"}} {memory_bytes}")
61+
round+=1
62+
63+
return "\n".join(metrics)
64+
65+
if __name__ == "__main__":
66+
benchmark_output = sys.stdin.read()
67+
metrics = parse_benchmark_output(benchmark_output)
68+
print(metrics)
69+
EOF
70+
71+
cat /tmp/artifacts/new.txt | python3 benchmark_to_prometheus.py | tee /tmp/artifacts/prometheus/metrics.txt
72+
73+
# - name: Compare with baseline
74+
# run: |
75+
# go install golang.org/x/perf/cmd/benchstat@latest
76+
# benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output
77+
78+
- name: Upload Benchmark Metrics
4279
uses: actions/upload-artifact@v4
4380
with:
44-
name: benchmark-artifacts
45-
path: /tmp/artifacts/
81+
name: benchmark-metrics
82+
path: /tmp/artifacts/prometheus/
83+
84+
run-prometheus:
85+
needs: run-benchmark
86+
runs-on: ubuntu-latest
87+
steps:
88+
- name: Checkout code
89+
uses: actions/checkout@v4
90+
with:
91+
fetch-depth: 0
92+
93+
# ToDo: use GitHub REST API to download artifact across repos
94+
- name: Download Prometheus Snapshot
95+
run: |
96+
echo "Available Artifacts in this run:"
97+
gh run list --repo operator-framework/operator-controller --limit 5
98+
gh run download --repo operator-framework/operator-controller --name prometheus-snapshot --dir .
99+
ls -lh ./
100+
env:
101+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
102+
103+
# #this step is invalid if download the artifacts in a different job
104+
# - name: Download Prometheus Snapshot2
105+
# uses: actions/download-artifact@v4
106+
# with:
107+
# name: prometheus-snapshot
108+
# path: ./
109+
# env:
110+
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
111+
112+
- name: Download Benchmark Metrics
113+
uses: actions/download-artifact@v4
114+
with:
115+
name: benchmark-metrics
116+
path: ./
117+
118+
- name: Get Host IP
119+
run: |
120+
echo "HOST_IP=$(ip route get 1 | awk '{print $7}')"
121+
export HOST_IP=$(ip route get 1 | awk '{print $7}')
122+
123+
# localhost doesn't work, use host IP directly
124+
- name: Set Up Prometheus Config
125+
run: |
126+
cat << EOF > prometheus.yml
127+
global:
128+
scrape_interval: 5s
129+
scrape_configs:
130+
- job_name: 'benchmark_metrics'
131+
static_configs:
132+
- targets: ['$HOST_IP:9000']
133+
EOF
134+
mkdir -p ${{ github.workspace }}/prometheus-data
135+
sudo chown -R 65534:65534 ${{ github.workspace }}/prometheus-data
136+
sudo chmod -R 777 ${{ github.workspace }}/prometheus-data
137+
138+
- name: Extract and Restore Prometheus Snapshot
139+
run: |
140+
SNAPSHOT_ZIP="${{ github.workspace }}/prometheus-snapshot.zip"
141+
SNAPSHOT_TAR="${{ github.workspace }}/prometheus_snapshot.tar.gz"
142+
SNAPSHOT_DIR="${{ github.workspace }}/prometheus-data/snapshots"
143+
144+
mkdir -p "$SNAPSHOT_DIR"
145+
146+
if [[ -f "$SNAPSHOT_ZIP" ]]; then
147+
echo "📦 Detected ZIP archive: $SNAPSHOT_ZIP"
148+
unzip -o "$SNAPSHOT_ZIP" -d "$SNAPSHOT_DIR"
149+
echo "✅ Successfully extracted ZIP snapshot."
150+
elif [[ -f "$SNAPSHOT_TAR" ]]; then
151+
echo "📦 Detected TAR archive: $SNAPSHOT_TAR"
152+
tar -xzf "$SNAPSHOT_TAR" -C "$SNAPSHOT_DIR"
153+
echo "✅ Successfully extracted TAR snapshot."
154+
else
155+
echo "⚠️ WARNING: No snapshot file found. Skipping extraction."
156+
fi
157+
158+
- name: Run Prometheus
159+
run: |
160+
docker run -d --name prometheus -p 9090:9090 \
161+
--user=root \
162+
-v ${{ github.workspace }}/prometheus.yml:/etc/prometheus/prometheus.yml \
163+
-v ${{ github.workspace }}/prometheus-data:/prometheus \
164+
prom/prometheus --config.file=/etc/prometheus/prometheus.yml \
165+
--storage.tsdb.path=/prometheus \
166+
--storage.tsdb.retention.time=1h \
167+
--web.enable-admin-api
168+
169+
- name: Wait for Prometheus to start
170+
run: sleep 10
171+
172+
- name: Check Prometheus is running
173+
run: |
174+
set -e
175+
curl -s http://localhost:9090/-/ready || (docker logs prometheus && exit 1)
46176
177+
- name: Start HTTP Server to Expose Metrics
178+
run: |
179+
cat << 'EOF' > server.py
180+
from http.server import SimpleHTTPRequestHandler, HTTPServer
181+
182+
class MetricsHandler(SimpleHTTPRequestHandler):
183+
def do_GET(self):
184+
if self.path == "/metrics":
185+
self.send_response(200)
186+
self.send_header("Content-type", "text/plain")
187+
self.end_headers()
188+
with open("metrics.txt", "r") as f:
189+
self.wfile.write(f.read().encode())
190+
else:
191+
self.send_response(404)
192+
self.end_headers()
193+
194+
if __name__ == "__main__":
195+
server = HTTPServer(('0.0.0.0', 9000), MetricsHandler)
196+
print("Serving on port 9000...")
197+
server.serve_forever()
198+
EOF
199+
200+
nohup python3 server.py &
201+
202+
- name: Wait for Prometheus to Collect Data
203+
run: sleep 30
204+
205+
- name: Check Prometheus targets page
206+
run: |
207+
http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
208+
if [ "$http_status" -eq 200 ]; then
209+
echo "Prometheus targets page is reachable."
210+
else
211+
echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
212+
exit 1
213+
fi
214+
215+
http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets)
216+
if [ "$http_status" -eq 200 ]; then
217+
echo "Prometheus targets page is reachable."
218+
219+
# Check for lastError field in the targets API
220+
error=$(curl -s http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets[].lastError')
221+
if [ "$error" != "null" ] && [ -n "$error" ]; then
222+
echo "Error: Prometheus target has an error: $error"
223+
exit 1
224+
else
225+
echo "No errors found in Prometheus targets."
226+
fi
227+
228+
else
229+
echo "Error: Prometheus targets page is not reachable. Status code: $http_status"
230+
exit 1
231+
fi
232+
233+
# - name: Debug via SSH
234+
# uses: mxschmitt/action-tmate@v3
235+
236+
- name: Check Benchmark Metrics Against Threshold
237+
run: |
238+
MAX_TIME_NS=1200000000 # 1.2s
239+
MAX_ALLOCS=4000
240+
MAX_MEM_BYTES=450000
241+
242+
# Query Prometheus Metrics, get the max value
243+
time_ns=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_ns)" | jq -r '.data.result[0].value[1]')
244+
allocs=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_allocs)" | jq -r '.data.result[0].value[1]')
245+
mem_bytes=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_mem_bytes)" | jq -r '.data.result[0].value[1]')
246+
247+
echo "⏳ Benchmark Execution Time: $time_ns ns"
248+
echo "🛠️ Memory Allocations: $allocs"
249+
echo "💾 Memory Usage: $mem_bytes bytes"
250+
251+
# threshold checking
252+
if (( $(echo "$time_ns > $MAX_TIME_NS" | bc -l) )); then
253+
echo "❌ ERROR: Execution time exceeds threshold!"
254+
exit 1
255+
fi
256+
257+
if (( $(echo "$allocs > $MAX_ALLOCS" | bc -l) )); then
258+
echo "❌ ERROR: Too many memory allocations!"
259+
exit 1
260+
fi
261+
262+
if (( $(echo "$mem_bytes > $MAX_MEM_BYTES" | bc -l) )); then
263+
echo "❌ ERROR: Memory usage exceeds threshold!"
264+
exit 1
265+
fi
266+
267+
echo "✅ All benchmarks passed within threshold!"
268+
269+
- name: Trigger Prometheus Snapshot
270+
run: |
271+
set -e
272+
curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot || (docker logs prometheus && exit 1)
273+
274+
- name: Find and Upload Prometheus Snapshot
275+
run: |
276+
SNAPSHOT_PATH=$(ls -td ${{ github.workspace }}/prometheus-data/snapshots/* 2>/dev/null | head -1 || echo "")
277+
if [[ -z "$SNAPSHOT_PATH" ]]; then
278+
echo "❌ No Prometheus snapshot found!"
279+
docker logs prometheus
280+
exit 1
281+
fi
282+
283+
echo "✅ Prometheus snapshot stored in: $SNAPSHOT_PATH"
284+
tar -czf $GITHUB_WORKSPACE/prometheus_snapshot.tar.gz -C "$SNAPSHOT_PATH" .
285+
286+
287+
- name: Stop Prometheus
288+
run: docker stop prometheus
289+
290+
- name: Upload Prometheus Snapshot
291+
uses: actions/upload-artifact@v4
292+
with:
293+
name: prometheus-snapshot
294+
path: prometheus_snapshot.tar.gz
295+

0 commit comments

Comments
 (0)