intel · aelovikov-intel · Feb 21, 2025 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
@@ -0,0 +1,52 @@
+name: Aggregate compute-benchmark averages from historical data
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this workflow, which aggregates historical data and
+# produces measures of central tendency (median in this case) used for this
+# purpose.
+
+on:
+  workflow_dispatch:
+    inputs:
+      lookback_days:
+        description: |
+          Number of days from today to look back in historical results for:
+          This sets the age limit of data used in average calculation: Any
+          benchmark results created before `lookback_days` from today is
+          excluded from being aggregated in the historical average. 
+        type: number
+        required: true
+  workflow_call:
+    inputs:
+      lookback_days:
+        type: number
+        required: true
+    secrets:
+      LLVM_SYCL_BENCHMARK_TOKEN:
+        description: |
+          Github token used by the faceless account to push newly calculated
+          medians.
+        required: true
+
+
+permissions:
+  contents: read
+
+jobs:
+  aggregate:
+    name: Aggregate average (median) value for all metrics
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        sparse-checkout: |
+          devops/scripts/benchmarking
+          devops/benchmarking
+          devops/actions/benchmarking
+    - name: Aggregate benchmark results and produce historical average
+      uses: ./devops/actions/benchmarking/aggregate
+      with:
+        lookback_days: ${{ inputs.lookback_days }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
@@ -25,7 +25,7 @@ on:
         required: False
       tests_selector:
         description: |
-          Two possible options: "e2e" and "cts".
+          Three possible options: "e2e", "cts", and "compute-benchmarks".
         type: string
         default: "e2e"
 
@@ -152,6 +152,7 @@ on:
         options:
           - e2e
           - cts
+          - compute-benchmarks
 
       env:
         description: |
@@ -314,3 +315,12 @@ jobs:
         sycl_cts_artifact: ${{ inputs.sycl_cts_artifact }}
         target_devices: ${{ inputs.target_devices }}
         retention-days: ${{ inputs.retention-days }}
+
+    - name: Run compute-benchmarks on SYCL
+      if: inputs.tests_selector == 'compute-benchmarks'
+      uses: ./devops/actions/run-tests/benchmark
+      with:
+        target_devices: ${{ inputs.target_devices }}
+      env:
+        RUNNER_TAG: ${{ inputs.runner }}
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
@@ -243,6 +243,46 @@ jobs:
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
       sycl_cts_artifact: sycl_cts_bin
 
+  aggregate_benchmark_results:
+    if: always() && !cancelled()
+    name: Aggregate benchmark results and produce historical averages
+    uses: ./.github/workflows/sycl-benchmark-aggregate.yml
+    secrets:
+      LLVM_SYCL_BENCHMARK_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+    with:
+      lookback_days: 100
+
+  run-sycl-benchmarks:
+    needs: [ubuntu2204_build, aggregate_benchmark_results]
+    if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Run compute-benchmarks on L0 Gen12
+            runner: '["Linux", "gen12"]'
+            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+            target_devices: level_zero:gpu
+            reset_intel_gpu: true
+          - name: Run compute-benchmarks on L0 PVC
+            runner: '["Linux", "pvc"]'
+            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+            target_devices: level_zero:gpu
+            reset_intel_gpu: false
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    secrets: inherit
+    with:
+      name: ${{ matrix.name }}
+      runner: ${{ matrix.runner }}
+      image_options: ${{ matrix.image_options }}
+      target_devices: ${{ matrix.target_devices }}
+      tests_selector: compute-benchmarks
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      ref: ${{ github.sha }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
+
   nightly_build_upload:
     name: Nightly Build Upload
     if: ${{ github.ref_name == 'sycl' }}

@@ -0,0 +1,95 @@
+name: 'Aggregate compute-benchmark results and produce historical averages'
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this composite workflow, which aggregates historical
+# data and produces measures of central tendency (median in this case) used for
+# this purpose.
+#
+# This action assumes that /devops has been checked out in ./devops. This action
+# also assumes that GITHUB_TOKEN was properly set in env, because according to
+# Github, that's apparently the recommended way to pass a secret into a github
+# action:
+#
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+
+inputs:
+  lookback_days:
+    type: number
+    required: true
+
+runs:
+  using: "composite"
+  steps:
+  - name: Obtain oldest timestamp allowed for data in aggregation
+    shell: bash
+    run: |
+      # DO NOT use inputs.lookback_days directly, only use SANITIZED_TIMESTAMP.
+      SANITIZED_LOOKBACK_DAYS="$(echo '${{ inputs.lookback_days }}' | grep -oE '^[0-9]+$')"
+      if [ -z "$SANITIZED_LOOKBACK_DAYS" ]; then
+        echo "Please ensure inputs.lookback_days is a number."
+        exit 1
+      fi
+      SANITIZED_TIMESTAMP="$(date -d "$SANITIZED_LOOKBACK_DAYS days ago" +%Y%m%d_%H%M%S)"
+      if [ -z "$(echo "$SANITIZED_TIMESTAMP" | grep -oE '^[0-9]{8}_[0-9]{6}$' )" ]; then
+        echo "Invalid timestamp generated: is inputs.lookback_days valid?"
+        exit 1
+      fi
+      echo "SANITIZED_TIMESTAMP=$SANITIZED_TIMESTAMP" >> $GITHUB_ENV
+  - name: Load benchmarking configuration
+    shell: bash
+    run: |
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+      echo "SANITIZED_PERF_RES_GIT_REPO=$SANITIZED_PERF_RES_GIT_REPO" >> $GITHUB_ENV
+      echo "SANITIZED_PERF_RES_GIT_BRANCH=$SANITIZED_PERF_RES_GIT_BRANCH" >> $GITHUB_ENV
+  - name: Checkout historical performance results repository
+    shell: bash
+    run: |
+      if [ ! -d ./llvm-ci-perf-results ]; then
+        git clone -b "$SANITIZED_PERF_RES_GIT_BRANCH" "https://github.com/$SANITIZED_PERF_RES_GIT_REPO" ./llvm-ci-perf-results
+      fi
+  - name: Run aggregator on historical results
+    shell: bash
+    run: |
+      # The current format of the historical results respository is:
+      #
+      # /<ONEAPI_DEVICE_SELECTOR>/<runner>/<test name>
+      #
+      # Thus, a min/max depth of 3 is used to enumerate all test cases in the
+      # repository. Test name is also derived from here.
+      find ./llvm-ci-perf-results -mindepth 3 -maxdepth 3 -type d ! -path '*.git*' |
+      while read -r dir; do
+        test_name="$(basename "$dir")"
+        python ./devops/scripts/benchmarking/aggregate.py ./devops "$test_name" "$dir" "$SANITIZED_TIMESTAMP"
+      done
+  - name: Upload average to the repo
+    shell: bash
+    run: |
+      cd ./llvm-ci-perf-results
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "[email protected]"
+      git pull
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No changes to median, skipping push."
+      else
+        git add .
+        git commit -m "[GHA] Aggregate median data from $SANITIZED_TIMESTAMP to $(date +%Y%m%d_%H%M%S)"
+        git push "https://[email protected]/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+      fi
+  - name: Find aggregated average results artifact here
+    if: always()
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # Artifact link for aggregated averages here:
+      #
+      EOF
+  - name: Archive new medians
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: llvm-ci-perf-results new medians
+      path: ./llvm-ci-perf-results/**/*-median.csv
@@ -0,0 +1,107 @@
+name: 'Run compute-benchmarks'
+
+# Run compute-benchmarks on SYCL
+# 
+# This action assumes SYCL is in ./toolchain, and that /devops has been
+# checked out in ./devops. This action also assumes that GITHUB_TOKEN
+# was properly set in env, because according to Github, that's apparently the
+# recommended way to pass a secret into a github action:
+#
+# https://docs.github.com/en/actions/security-for-github-actions/security-guides/using-secrets-in-github-actions#accessing-your-secrets
+#
+# This action also expects a RUNNER_TAG environment variable to be set to the
+# runner tag used to run this workflow: Currently, only gen12 and pvc on Linux
+# are fully supported. Although this workflow won't stop you from running other
+# devices, note that only gen12 and pvc has been tested to work.
+#
+
+inputs:
+  target_devices:
+    type: string
+    required: True
+
+runs:
+  using: "composite"
+  steps:
+  - name: Check specified runner type / target backend
+    shell: bash
+    env:
+      TARGET_DEVICE: ${{ inputs.target_devices }}
+    run: |
+      case "$RUNNER_TAG" in
+        '["Linux", "gen12"]' | '["Linux", "pvc"]') ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only gen12/pvc on Linux is fully supported."
+          echo "# This workflow is not guaranteed to work with other runners."
+          echo "#" ;;
+      esac
+
+      # input.target_devices is not directly used, as this allows code injection
+      case "$TARGET_DEVICE" in
+        level_zero:*) ;;
+        *)
+          echo "#"
+          echo "# WARNING: Only level_zero backend is fully supported."
+          echo "# This workflow is not guaranteed to work with other backends."
+          echo "#" ;;
+      esac
+  - name: Run compute-benchmarks
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # NOTE TO DEVELOPERS:
+      #
+
+      Check latter steps of the workflow: This job produces an artifact with:
+        - benchmark results from passing/failing tests
+        - log containing all failing (too slow) benchmarks
+        - log containing all erroring benchmarks
+
+      While this step in the workflow provides debugging output describing this
+      information, it might be easier to inspect the logs from the artifact
+      instead.
+
+      EOF
+      export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+      export CMPLR_ROOT=./toolchain
+      echo "-----"
+      sycl-ls
+      echo "-----"
+      ./devops/scripts/benchmarking/benchmark.sh -n '${{ runner.name }}' -s || exit 1
+  - name: Push compute-benchmarks results
+    if: always()
+    shell: bash
+    run: |
+      # TODO -- waiting on security clearance
+      # Load configuration values
+      $(python ./devops/scripts/benchmarking/load_config.py ./devops constants)
+
+      cd "./llvm-ci-perf-results"
+      git config user.name "SYCL Benchmarking Bot"
+      git config user.email "[email protected]"
+      git pull
+      git add .
+      # Make sure changes have been made
+      if git diff --quiet && git diff --cached --quiet; then
+        echo "No new results added, skipping push."
+      else
+        git commit -m "[GHA] Upload compute-benchmarks results from https://github.com/intel/llvm/actions/runs/${{ github.run_id }}"
+        git push "https://[email protected]/$SANITIZED_PERF_RES_GIT_REPO.git" "$SANITIZED_PERF_RES_GIT_BRANCH"
+      fi
+  - name: Find benchmark result artifact here
+    if: always()
+    shell: bash
+    run: |
+      cat << EOF
+      #
+      # Artifact link for benchmark results here:
+      #
+      EOF
+  - name: Archive compute-benchmark results
+    if: always()
+    uses: actions/upload-artifact@v4
+    with:
+      name: Compute-benchmark run ${{ github.run_id }} (${{ runner.name }})
+      path: ./artifact
@@ -0,0 +1,44 @@
+;
+; This file contains configuration options to change the behaviour of the
+; benchmarking workflow in sycl-linux-run-tests.yml.
+;
+; DO NOT USE THE CONTENTS OF THIS FILE DIRECTLY -- Due to security concerns, The
+; contents of this file must be sanitized first before use.
+; See: /devops/scripts/benchmarking/common.py
+;
+
+; Compute-benchmark compile/run options
+[compute_bench]
+; Value for -j during compilation of compute-benchmarks
+compile_jobs = 2
+; Number of iterations to run compute-benchmark tests
+iterations = 100
+
+; Options for benchmark result metrics (to record/compare against)
+[metrics]
+; Sets the metrics to record/aggregate in the historical average.
+; Format: comma-separated list of column names in compute-benchmark results
+recorded = Median,StdDev
+; Sets the tolerance for each recorded metric and their allowed deviation from
+; the historical average. Metrics not included here are not compared against
+; when passing/failing benchmark results.
+; Format: comma-separated list of <metric>:<deviation percentage in decimals>
+tolerances = Median:0.5
+
+; Options for computing historical averages
+[average]
+; Number of days (from today) to look back for results when computing historical
+; average 
+cutoff_range = 7
+; Minimum number of samples required to compute a historical average
+min_threshold = 3
+
+; ONEAPI_DEVICE_SELECTOR linting/options
+[device_selector]
+; Backends to allow in device_selector
+enabled_backends = level_zero,opencl,cuda,hip
+; native_cpu is disabled
+
+; Devices to allow in device_selector
+enabled_devices = cpu,gpu
+; fpga is disabled