diff --git a/scripts/slowest_tests/extract-slow-tests.py b/scripts/slowest_tests/extract-slow-tests.py new file mode 100644 index 0000000000..793b0f9b20 --- /dev/null +++ b/scripts/slowest_tests/extract-slow-tests.py @@ -0,0 +1,80 @@ +"""Parse the GitHub action log for test times. + +Taken from https://github.com/pymc-labs/pymc-marketing/tree/main/scripts/slowest_tests/extract-slow-tests.py + +""" + +import re +import sys + +from pathlib import Path + +start_pattern = re.compile(r"==== slow") +separator_pattern = re.compile(r"====") +time_pattern = re.compile(r"(\d+\.\d+)s ") + + +def extract_lines(lines: list[str]) -> list[str]: + times = [] + + in_section = False + for line in lines: + detect_start = start_pattern.search(line) + detect_end = separator_pattern.search(line) + + if detect_start: + in_section = True + + if in_section: + times.append(line) + + if not detect_start and in_section and detect_end: + break + + return times + + +def trim_up_to_match(pattern, string: str) -> str: + match = pattern.search(string) + if not match: + return "" + + return string[match.start() :] + + +def trim(pattern, lines: list[str]) -> list[str]: + return [trim_up_to_match(pattern, line) for line in lines] + + +def strip_ansi(text: str) -> str: + ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + return ansi_escape.sub("", text) + + +def format_times(times: list[str]) -> list[str]: + return ( + trim(separator_pattern, times[:1]) + + trim(time_pattern, times[1:-1]) + + [strip_ansi(line) for line in trim(separator_pattern, times[-1:])] + ) + + +def read_lines_from_stdin(): + return sys.stdin.read().splitlines() + + +def read_from_file(file: Path): + """For testing purposes.""" + return file.read_text().splitlines() + + +def main(read_lines): + lines = read_lines() + times = extract_lines(lines) + parsed_times = format_times(times) + print("\n".join(parsed_times)) # noqa: T201 + + +if __name__ == "__main__": + read_lines = read_lines_from_stdin + main(read_lines) diff --git a/scripts/slowest_tests/update-slowest-times-issue.sh b/scripts/slowest_tests/update-slowest-times-issue.sh new file mode 100644 index 0000000000..be9d18bb42 --- /dev/null +++ b/scripts/slowest_tests/update-slowest-times-issue.sh @@ -0,0 +1,139 @@ +#!/bin/zsh + +DRY_RUN=false + +owner=pymc-devs +repo=pymc +issue_number=7686 +title="Speed up test times :rocket:" +workflow=tests +contributing_url="https://www.pymc.io/projects/docs/en/stable/contributing/index.html" +action_url="https://github.com/$owner/$repo/blob/main/.github/workflows/slow-tests-issue.yml" +latest_id=$(gh run list --limit 30 --workflow $workflow --status success --json databaseId,startedAt,updatedAt --jq ' +. | map({ + databaseId: .databaseId, + startedAt: .startedAt, + updatedAt: .updatedAt, + minutes: (((.updatedAt | fromdate) - (.startedAt | fromdate)) / 60) +} | select(.minutes > 10)) +| .[0].databaseId +') +gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq ' + .jobs + | map({name, id, run_id, node_id, started_at, completed_at}) +' > tmp.json + +# Skip 3.10, float32, and Benchmark tests +function skip_job() { + name=$1 + # if [[ $name == *"py3.10"* ]]; then + # return 0 + # fi + # + # if [[ $name == *"float32 1"* ]]; then + # return 0 + # fi + # + # if [[ $name == *"Benchmark"* ]]; then + # return 0 + # fi + + return 1 +} + +# Remove common prefix from the name +function remove_prefix() { + name=$1 + echo $name + # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //' +} + +function human_readable_time() { + started_at=$1 + completed_at=$2 + + start_seconds=$(date -d "$started_at" +%s) + end_seconds=$(date -d "$completed_at" +%s) + + seconds=$(($end_seconds - $start_seconds)) + + if [ $seconds -lt 60 ]; then + echo "$seconds seconds" + else + echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')" + fi +} + + +all_times="" +cat tmp.json | jq -c '.[]' | while IFS= read -r job; do + id=$(printf '%s' "$job" | jq -r '.id') + name=$(printf '%s' "$job" | jq -r '.name') + run_id=$(printf '%s' "$job" | jq -r '.run_id') + started_at=$(printf '%s' "$job" | jq -r '.started_at') + completed_at=$(printf '%s' "$job" | jq -r '.completed_at') + + if skip_job $name; then + echo "Skipping $name" + continue + fi + + echo "Processing job: $name (ID: $id, Run ID: $run_id)" + + # Seeing a bit more stabilty with the API rather than the CLI + # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run + times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py) + # times=$(gh run view --job $id --log | python extract-slow-tests.py) + + if [ -z "$times" ]; then + # Some of the jobs are non-test jobs, so we skip them + echo "No tests found for '$name', skipping" + continue + fi + + echo $times + + human_readable=$(human_readable_time $started_at $completed_at) + name=$(remove_prefix $name) + + top="
($human_readable) $name\n\n\n\`\`\`" + bottom="\`\`\`\n\n
" + + formatted_times="$top\n$times\n$bottom" + + if [ -n "$all_times" ]; then + all_times="$all_times\n$formatted_times" + else + all_times="$formatted_times" + fi +done + +if [ -z "$all_times" ]; then + echo "No slow tests found, exiting" + exit 1 +fi + +run_date=$(date +"%Y-%m-%d") +body=$(cat << EOF +If you are motivated to help speed up some tests, we would appreciate it! + +Here are some of the slowest test times: + +$all_times + +You can find more information on how to contribute [here]($contributing_url) + +Automatically generated by [GitHub Action]($action_url) +Latest run date: $run_date +Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id) +EOF +) + +if [ "$DRY_RUN" = true ]; then + echo "Dry run, not updating issue" + echo $body + exit +fi + +echo $body | gh issue edit $issue_number --body-file - --title "$title" +echo "Updated issue $issue_number with all times"