diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml new file mode 100644 index 000000000..879b3f52f --- /dev/null +++ b/.github/workflows/benchmark.yaml @@ -0,0 +1,295 @@ +name: Benchmark Test + +on: + pull_request: + branches: + - main + +jobs: + run-benchmark: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Install dependencies + run: | + go mod download + go mod tidy + + # - name: Debug via SSH + # uses: mxschmitt/action-tmate@v3 + + - name: Run benchmark test + # working-directory: test/e2e + run: | + mkdir -p /tmp/artifacts/ + ARTIFACT_PATH=/tmp/artifacts make test-benchmark + + - name: Convert Benchmark Output to Prometheus Metrics + run: | + mkdir -p /tmp/artifacts/prometheus/ + echo "RUN_ID=${{ github.run_id }}" + export RUN_ID=${{ github.run_id }} + cat << 'EOF' > benchmark_to_prometheus.py + import sys + import re + import os + + def parse_benchmark_output(benchmark_output): + metrics = [] + round = 0 + value = os.getenv('RUN_ID') #get the github action run id so that those metrics cannot be overwritten + for line in benchmark_output.split("\n"): + match = re.match(r"Benchmark([\w\d]+)-\d+\s+\d+\s+([\d]+)\s+ns/op\s+([\d]+)\s+B/op\s+([\d]+)\s+allocs/op", line) + if match: + benchmark_name = match.group(1).lower() + time_ns = match.group(2) + memory_bytes = match.group(3) + allocs = match.group(4) + + metrics.append(f"benchmark_{benchmark_name}_ns {{run_id=\"{value}\", round=\"{round}\"}} {time_ns}") + metrics.append(f"benchmark_{benchmark_name}_allocs {{run_id=\"{value}\", round=\"{round}\"}} {allocs}") + metrics.append(f"benchmark_{benchmark_name}_mem_bytes {{run_id=\"{value}\", round=\"{round}\"}} {memory_bytes}") + round+=1 + + return "\n".join(metrics) + + if __name__ == "__main__": + benchmark_output = sys.stdin.read() + metrics = parse_benchmark_output(benchmark_output) + print(metrics) + EOF + + cat /tmp/artifacts/new.txt | python3 benchmark_to_prometheus.py | tee /tmp/artifacts/prometheus/metrics.txt + + # - name: Compare with baseline + # run: | + # go install golang.org/x/perf/cmd/benchstat@latest + # benchstat benchmarks/baseline.txt /tmp/artifacts/new.txt | tee /tmp/artifacts/output + + - name: Upload Benchmark Metrics + uses: actions/upload-artifact@v4 + with: + name: benchmark-metrics + path: /tmp/artifacts/prometheus/ + + run-prometheus: + needs: run-benchmark + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # ToDo: use GitHub REST API to download artifact across repos + - name: Download Prometheus Snapshot + run: | + echo "Available Artifacts in this run:" + gh run list --repo operator-framework/operator-controller --limit 5 + gh run download --repo operator-framework/operator-controller --name prometheus-snapshot --dir . + ls -lh ./ + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # #this step is invalid if download the artifacts in a different job + # - name: Download Prometheus Snapshot2 + # uses: actions/download-artifact@v4 + # with: + # name: prometheus-snapshot + # path: ./ + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Download Benchmark Metrics + uses: actions/download-artifact@v4 + with: + name: benchmark-metrics + path: ./ + + - name: Get Host IP + run: | + echo "HOST_IP=$(ip route get 1 | awk '{print $7}')" | tee -a $GITHUB_ENV + + # localhost doesn't work, use host IP directly + - name: Set Up Prometheus Config + run: | + echo "HOST_IP is $HOST_IP" + cat << EOF > prometheus.yml + global: + scrape_interval: 5s + scrape_configs: + - job_name: 'benchmark_metrics' + static_configs: + - targets: ['$HOST_IP:9000'] + EOF + mkdir -p ${{ github.workspace }}/prometheus-data + sudo chown -R 65534:65534 ${{ github.workspace }}/prometheus-data + sudo chmod -R 777 ${{ github.workspace }}/prometheus-data + + - name: Extract and Restore Prometheus Snapshot + run: | + SNAPSHOT_ZIP="${{ github.workspace }}/prometheus-snapshot.zip" + SNAPSHOT_TAR="${{ github.workspace }}/prometheus_snapshot.tar.gz" + SNAPSHOT_DIR="${{ github.workspace }}/prometheus-data/snapshots" + + mkdir -p "$SNAPSHOT_DIR" + + if [[ -f "$SNAPSHOT_ZIP" ]]; then + echo "📦 Detected ZIP archive: $SNAPSHOT_ZIP" + unzip -o "$SNAPSHOT_ZIP" -d "$SNAPSHOT_DIR" + echo "✅ Successfully extracted ZIP snapshot." + elif [[ -f "$SNAPSHOT_TAR" ]]; then + echo "📦 Detected TAR archive: $SNAPSHOT_TAR" + tar -xzf "$SNAPSHOT_TAR" -C "$SNAPSHOT_DIR" + echo "✅ Successfully extracted TAR snapshot." + else + echo "⚠️ WARNING: No snapshot file found. Skipping extraction." + fi + + - name: Run Prometheus + run: | + docker run -d --name prometheus -p 9090:9090 \ + --user=root \ + -v ${{ github.workspace }}/prometheus.yml:/etc/prometheus/prometheus.yml \ + -v ${{ github.workspace }}/prometheus-data:/prometheus \ + prom/prometheus --config.file=/etc/prometheus/prometheus.yml \ + --storage.tsdb.path=/prometheus \ + --storage.tsdb.retention.time=1h \ + --web.enable-admin-api + + - name: Wait for Prometheus to start + run: sleep 10 + + - name: Check Prometheus is running + run: | + set -e + curl -s http://localhost:9090/-/ready || (docker logs prometheus && exit 1) + + - name: Start HTTP Server to Expose Metrics + run: | + cat << 'EOF' > server.py + from http.server import SimpleHTTPRequestHandler, HTTPServer + + class MetricsHandler(SimpleHTTPRequestHandler): + def do_GET(self): + if self.path == "/metrics": + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + with open("metrics.txt", "r") as f: + self.wfile.write(f.read().encode()) + else: + self.send_response(404) + self.end_headers() + + if __name__ == "__main__": + server = HTTPServer(('0.0.0.0', 9000), MetricsHandler) + print("Serving on port 9000...") + server.serve_forever() + EOF + + nohup python3 server.py & + + - name: Wait for Prometheus to Collect Data + run: sleep 30 + + - name: Check Prometheus targets page + run: | + http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets) + if [ "$http_status" -eq 200 ]; then + echo "Prometheus targets page is reachable." + else + echo "Error: Prometheus targets page is not reachable. Status code: $http_status" + exit 1 + fi + + http_status=$(curl -o /dev/null -s -w "%{http_code}" http://localhost:9090/targets) + if [ "$http_status" -eq 200 ]; then + echo "Prometheus targets page is reachable." + + # Check for lastError field in the targets API + error=$(curl -s http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets[].lastError') + if [ "$error" != "null" ] && [ -n "$error" ]; then + echo "Error: Prometheus target has an error: $error" + exit 1 + else + echo "No errors found in Prometheus targets." + fi + + else + echo "Error: Prometheus targets page is not reachable. Status code: $http_status" + exit 1 + fi + + # - name: Debug via SSH + # uses: mxschmitt/action-tmate@v3 + + - name: Check Benchmark Metrics Against Threshold + run: | + MAX_TIME_NS=1200000000 # 1.2s + MAX_ALLOCS=4000 + MAX_MEM_BYTES=450000 + + # Query Prometheus Metrics, get the max value + time_ns=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_ns)" | jq -r '.data.result[0].value[1]') + allocs=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_allocs)" | jq -r '.data.result[0].value[1]') + mem_bytes=$(curl -s "http://localhost:9090/api/v1/query?query=max(benchmark_createclustercatalog_mem_bytes)" | jq -r '.data.result[0].value[1]') + + echo "⏳ Benchmark Execution Time: $time_ns ns" + echo "🛠️ Memory Allocations: $allocs" + echo "💾 Memory Usage: $mem_bytes bytes" + + # threshold checking + if (( $(echo "$time_ns > $MAX_TIME_NS" | bc -l) )); then + echo "❌ ERROR: Execution time exceeds threshold!" + exit 1 + fi + + if (( $(echo "$allocs > $MAX_ALLOCS" | bc -l) )); then + echo "❌ ERROR: Too many memory allocations!" + exit 1 + fi + + if (( $(echo "$mem_bytes > $MAX_MEM_BYTES" | bc -l) )); then + echo "❌ ERROR: Memory usage exceeds threshold!" + exit 1 + fi + + echo "✅ All benchmarks passed within threshold!" + + - name: Trigger Prometheus Snapshot + run: | + set -e + curl -X POST http://localhost:9090/api/v1/admin/tsdb/snapshot || (docker logs prometheus && exit 1) + + - name: Find and Upload Prometheus Snapshot + run: | + SNAPSHOT_PATH=$(ls -td ${{ github.workspace }}/prometheus-data/snapshots/* 2>/dev/null | head -1 || echo "") + if [[ -z "$SNAPSHOT_PATH" ]]; then + echo "❌ No Prometheus snapshot found!" + docker logs prometheus + exit 1 + fi + + echo "✅ Prometheus snapshot stored in: $SNAPSHOT_PATH" + tar -czf $GITHUB_WORKSPACE/prometheus_snapshot.tar.gz -C "$SNAPSHOT_PATH" . + + + - name: Stop Prometheus + run: docker stop prometheus + + - name: Upload Prometheus Snapshot + uses: actions/upload-artifact@v4 + with: + name: prometheus-snapshot + path: prometheus_snapshot.tar.gz + \ No newline at end of file diff --git a/Makefile b/Makefile index ee943a9d5..44a612194 100644 --- a/Makefile +++ b/Makefile @@ -167,6 +167,11 @@ test: manifests generate fmt lint test-unit test-e2e #HELP Run all tests. e2e: #EXHELP Run the e2e tests. go test -count=1 -v ./test/e2e/... +.PHONY: benchmark +benchmark: #EXHELP Run the benchmark tests. + export CATALOG_IMG=registry.redhat.io/redhat/redhat-operator-index:v4.18 + go test -v -run=^$$ -bench=. -benchmem -count=10 -v ./test/e2e/... | tee /tmp/artifacts/new.txt + E2E_REGISTRY_NAME := docker-registry E2E_REGISTRY_NAMESPACE := operator-controller-e2e @@ -256,6 +261,12 @@ catalogd-pre-upgrade-setup: catalogd-image-registry: ## Setup in-cluster image registry ./test/tools/imageregistry/registry.sh $(ISSUER_KIND) $(ISSUER_NAME) +.PHONY: test-benchmark +test-benchmark: KIND_CLUSTER_NAME := operator-controller-benchmark +test-benchmark: KUSTOMIZE_BUILD_DIR := config/overlays/e2e +test-benchmark: GO_BUILD_FLAGS := -cover +test-benchmark: run image-registry benchmark kind-clean #HELP Run benchmark test suite on local kind cluster + .PHONY: extension-developer-e2e extension-developer-e2e: KUSTOMIZE_BUILD_DIR := config/overlays/cert-manager extension-developer-e2e: KIND_CLUSTER_NAME := operator-controller-ext-dev-e2e #EXHELP Run extension-developer e2e on local kind cluster diff --git a/benchmarks/baseline.txt b/benchmarks/baseline.txt new file mode 100644 index 000000000..6bf274941 --- /dev/null +++ b/benchmarks/baseline.txt @@ -0,0 +1,16 @@ +goos: darwin +goarch: arm64 +pkg: github.com/operator-framework/operator-controller/test/e2e +cpu: Apple M1 Pro +BenchmarkCreateClusterCatalog-10 1 1352852042 ns/op 404520 B/op 3914 allocs/op +BenchmarkCreateClusterCatalog-10 13 86982353 ns/op 36907 B/op 394 allocs/op +BenchmarkCreateClusterCatalog-10 12 84962496 ns/op 34555 B/op 393 allocs/op +BenchmarkCreateClusterCatalog-10 18 70375363 ns/op 34880 B/op 388 allocs/op +BenchmarkCreateClusterCatalog-10 15 71715708 ns/op 37654 B/op 399 allocs/op +BenchmarkCreateClusterCatalog-10 13 85251170 ns/op 36572 B/op 396 allocs/op +BenchmarkCreateClusterCatalog-10 13 83413260 ns/op 38435 B/op 393 allocs/op +BenchmarkCreateClusterCatalog-10 13 93851487 ns/op 37249 B/op 395 allocs/op +BenchmarkCreateClusterCatalog-10 13 78722212 ns/op 36593 B/op 393 allocs/op +BenchmarkCreateClusterCatalog-10 13 86393522 ns/op 37404 B/op 395 allocs/op +PASS +ok github.com/operator-framework/operator-controller/test/e2e 32.699s diff --git a/test/e2e/benchmark_test.go b/test/e2e/benchmark_test.go new file mode 100644 index 000000000..8122afba3 --- /dev/null +++ b/test/e2e/benchmark_test.go @@ -0,0 +1,40 @@ +package e2e + +import ( + "context" + "os" + "testing" + + "k8s.io/apimachinery/pkg/util/rand" +) + +func BenchmarkCreateClusterCatalog(b *testing.B) { + catalogImageRef := os.Getenv(testCatalogRefEnvVar) + if catalogImageRef == "" { + b.Fatalf("environment variable %s is not set", testCatalogRefEnvVar) + } + ctx := context.Background() + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + catalogObj, err := createTestCatalog(ctx, rand.String(6), catalogImageRef) + if err != nil { + b.Logf("failed to create ClusterCatalog: %v", err) + } + + if err := deleteTestCatalog(ctx, catalogObj); err != nil { + b.Logf("failed to remove ClusterCatalog: %v", err) + } + } + }) + // for i := 0; i < b.N; i++ { + // catalogObj, err := createTestCatalog(ctx, rand.String(8), catalogImageRef) + // if err != nil { + // b.Logf("failed to create ClusterCatalog: %v", err) + // } + + // if err := deleteTestCatalog(ctx, catalogObj); err != nil { + // b.Logf("failed to remove ClusterCatalog: %v", err) + // } + // } +} diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 3e8c4dfa1..802a5f15a 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -65,6 +65,10 @@ func createTestCatalog(ctx context.Context, name string, imageRef string) (*cata return catalog, err } +func deleteTestCatalog(ctx context.Context, catalog *catalogd.ClusterCatalog) error { + return c.Delete(ctx, catalog) +} + // patchTestCatalog will patch the existing clusterCatalog on the test cluster, provided // the context, catalog name, and the image reference. It returns an error // if any errors occurred while updating the catalog.