chore(e2e): naively parallelize CI jobs by chunking alphabetically (#2520)

estroz · timflannagan · web-flow · commit 989690f6c27f · 2022-01-18T13:58:05.000-05:00
* chore(e2e): naively parallelize CI jobs by chunking alphabetically

Signed-off-by: Eric Stroczynski &lt;ericstroczynski@gmail.com&gt;

* build word trie from specs, add unit tests

Signed-off-by: Eric Stroczynski &lt;ericstroczynski@gmail.com&gt;

* .github: Aggregate e2e matrix jobs into a single status

Signed-off-by: timflannagan &lt;timflannagan@gmail.com&gt;

Co-authored-by: timflannagan &lt;timflannagan@gmail.com&gt;
diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
@@ -8,17 +8,34 @@ on:
   pull_request:
   workflow_dispatch:
 jobs:
-  e2e-tests:
+  e2e:
+    strategy:
+      fail-fast: false
+      matrix:
+        parallel-id: [0, 1, 2, 3]
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v1
       - uses: actions/setup-go@v2
         with:
           go-version: '~1.17'
-      - run: make e2e-local E2E_NODES=2 ARTIFACTS_DIR=./artifacts/
+      - run: make e2e-local E2E_TEST_CHUNK=${{ matrix.parallel-id }} E2E_TEST_NUM_CHUNKS=${{ strategy.job-total }} E2E_NODES=2 ARTIFACTS_DIR=./artifacts/
       - name: Archive Test Artifacts # test results, failed or not, are always uploaded.
         if: ${{ always() }}
         uses: actions/upload-artifact@v2
         with:
-          name: e2e-test-output-${{(github.event.pull_request.head.sha||github.sha)}}-${{ github.run_id }}
+          name: e2e-test-output-${{ (github.event.pull_request.head.sha || github.sha) }}-${{ github.run_id }}-${{ matrix.parallel-id }}
           path: ${{ github.workspace }}/bin/artifacts/*
+        # TODO: create job to combine test artifacts using code in https://github.com/operator-framework/operator-lifecycle-manager/pull/1476
+  e2e-tests:
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    needs: e2e
+    steps:
+    - run: |
+        echo "Matrix result: ${{ needs.e2e.result }}"
+    - name: check individual matrix results
+      if: ${{ needs.e2e.result == 'failure' }}
+      run: |
+        echo 'Failure: at least one e2e matrix job has failed'
+        exit 1
diff --git a/Makefile b/Makefile
@@ -43,7 +43,7 @@ all: test build
 test: clean cover.out
 
 unit: kubebuilder
-	KUBEBUILDER_ASSETS=$(KUBEBUILDER_ASSETS) go test $(MOD_FLAGS) $(SPECIFIC_UNIT_TEST) -tags "json1" -race -count=1 ./pkg/...
+	KUBEBUILDER_ASSETS=$(KUBEBUILDER_ASSETS) go test $(MOD_FLAGS) $(SPECIFIC_UNIT_TEST) -tags "json1" -race -count=1 ./pkg/... ./test/e2e/split/...
 
 # Ensure kubebuilder is installed before continuing
 KUBEBUILDER_ASSETS_ERR := not detected in $(KUBEBUILDER_ASSETS), to override the assets path set the KUBEBUILDER_ASSETS environment variable, for install instructions see https://book.kubebuilder.io/quick-start.html
@@ -126,6 +126,13 @@ setup-bare: clean e2e.namespace
 E2E_NODES ?= 1
 E2E_FLAKE_ATTEMPTS ?= 1
 E2E_TIMEOUT ?= 90m
+# Optionally run an individual chunk of e2e test specs.
+# Do not use this from the CLI; this is intended to be used by CI only.
+E2E_TEST_CHUNK ?= all
+E2E_TEST_NUM_CHUNKS ?= 4
+ifneq (all,$(E2E_TEST_CHUNK))
+TEST := $(shell go run ./test/e2e/split/... -chunks $(E2E_TEST_NUM_CHUNKS) -print-chunk $(E2E_TEST_CHUNK) ./test/e2e)
+endif
 E2E_OPTS ?= $(if $(E2E_SEED),-seed '$(E2E_SEED)') $(if $(TEST),-focus '$(TEST)') -flakeAttempts $(E2E_FLAKE_ATTEMPTS) -nodes $(E2E_NODES) -timeout $(E2E_TIMEOUT) -v -randomizeSuites -race -trace -progress
 E2E_INSTALL_NS ?= operator-lifecycle-manager
 E2E_TEST_NS ?= operators
diff --git a/test/e2e/split/integration_test.sh b/test/e2e/split/integration_test.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+function get_total_specs() {
+  go run github.com/onsi/ginkgo/ginkgo -noColor -dryRun -v -seed 1 "$@" ./test/e2e | grep -Po "Ran \K([0-9]+)(?= of .+ Specs in .+ seconds)"
+}
+
+unfocused_specs=$(get_total_specs)
+regexp=$(go run ./test/e2e/split/... -chunks 1 -print-chunk 0 ./test/e2e)
+focused_specs=$(get_total_specs -focus "$regexp")
+
+if ! [ $unfocused_specs -eq $focused_specs ]; then
+  echo "expected number of unfocused specs $unfocused_specs to equal focus specs $focused_specs"
+  exit 1
+fi
diff --git a/test/e2e/split/main.go b/test/e2e/split/main.go
@@ -0,0 +1,227 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"math"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// TODO: configurable log verbosity.
+
+type options struct {
+	numChunks  int
+	printChunk int
+	printDebug bool
+	writer     io.Writer
+}
+
+func main() {
+	opts := options{
+		writer: os.Stdout,
+	}
+	flag.IntVar(&opts.numChunks, "chunks", 1, "Number of chunks to create focus regexps for")
+	flag.IntVar(&opts.printChunk, "print-chunk", 0, "Chunk to print a regexp for")
+	flag.BoolVar(&opts.printDebug, "print-debug", false, "Print all spec prefixes in non-regexp format. Use for debugging")
+	flag.Parse()
+
+	if opts.printChunk >= opts.numChunks {
+		exitIfErr(fmt.Errorf("the chunk to print (%d) must be a smaller number than the number of chunks (%d)", opts.printChunk, opts.numChunks))
+	}
+
+	dir := flag.Arg(0)
+	if dir == "" {
+		exitIfErr(fmt.Errorf("test directory required as the argument"))
+	}
+
+	// Clean dir.
+	var err error
+	dir, err = filepath.Abs(dir)
+	exitIfErr(err)
+	wd, err := os.Getwd()
+	exitIfErr(err)
+	dir, err = filepath.Rel(wd, dir)
+	exitIfErr(err)
+
+	exitIfErr(opts.run(dir))
+}
+
+func exitIfErr(err error) {
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+func (opts options) run(dir string) error {
+	describes, err := findDescribes(dir)
+	if err != nil {
+		return err
+	}
+
+	// Find minimal prefixes for all spec strings so no spec runs are duplicated across chunks.
+	prefixes := findMinimalWordPrefixes(describes)
+	sort.Strings(prefixes)
+
+	var out string
+	if opts.printDebug {
+		out = strings.Join(prefixes, "\n")
+	} else {
+		out, err = createChunkRegexp(opts.numChunks, opts.printChunk, prefixes)
+		if err != nil {
+			return err
+		}
+	}
+
+	fmt.Fprint(opts.writer, out)
+	return nil
+}
+
+// TODO: this is hacky because top-level tests may be defined elsewise.
+// A better strategy would be to use the output of `ginkgo -noColor -dryRun`
+// like https://github.com/operator-framework/operator-lifecycle-manager/pull/1476 does.
+var topDescribeRE = regexp.MustCompile(`var _ = Describe\("(.+)", func\(.*`)
+
+func findDescribes(dir string) ([]string, error) {
+	// Find all Ginkgo specs in dir's test files.
+	// These can be grouped independently.
+	describeTable := make(map[string]struct{})
+	matches, err := filepath.Glob(filepath.Join(dir, "*_test.go"))
+	if err != nil {
+		return nil, err
+	}
+	for _, match := range matches {
+		b, err := ioutil.ReadFile(match)
+		if err != nil {
+			return nil, err
+		}
+		specNames := topDescribeRE.FindAllSubmatch(b, -1)
+		if len(specNames) == 0 {
+			log.Printf("%s: found no top level describes, skipping", match)
+			continue
+		}
+		for _, possibleNames := range specNames {
+			if len(possibleNames) != 2 {
+				log.Printf("%s: expected to find 2 submatch, found %d:", match, len(possibleNames))
+				for _, name := range possibleNames {
+					log.Printf("\t%s\n", string(name))
+				}
+				continue
+			}
+			describe := strings.TrimSpace(string(possibleNames[1]))
+			describeTable[describe] = struct{}{}
+		}
+	}
+
+	describes := make([]string, len(describeTable))
+	i := 0
+	for describeKey := range describeTable {
+		describes[i] = describeKey
+		i++
+	}
+	return describes, nil
+}
+
+func createChunkRegexp(numChunks, printChunk int, specs []string) (string, error) {
+
+	numSpecs := len(specs)
+	if numSpecs < numChunks {
+		return "", fmt.Errorf("have more desired chunks (%d) than specs (%d)", numChunks, numSpecs)
+	}
+
+	// Create chunks of size ceil(number of specs/number of chunks) in alphanumeric order.
+	// This is deterministic on inputs.
+	chunks := make([][]string, numChunks)
+	interval := int(math.Ceil(float64(numSpecs) / float64(numChunks)))
+	currIdx := 0
+	for chunkIdx := 0; chunkIdx < numChunks; chunkIdx++ {
+		nextIdx := int(math.Min(float64(currIdx+interval), float64(numSpecs)))
+		chunks[chunkIdx] = specs[currIdx:nextIdx]
+		currIdx = nextIdx
+	}
+
+	chunk := chunks[printChunk]
+	if len(chunk) == 0 {
+		// This is a panic because the caller may skip this error, resulting in missed test specs.
+		panic(fmt.Sprintf("bug: chunk %d has no elements", printChunk))
+	}
+
+	// Write out the regexp to focus chunk specs via `ginkgo -focus <re>`.
+	var reStr string
+	if len(chunk) == 1 {
+		reStr = fmt.Sprintf("%s .*", chunk[0])
+	} else {
+		sb := strings.Builder{}
+		sb.WriteString(chunk[0])
+		for _, test := range chunk[1:] {
+			sb.WriteString("|")
+			sb.WriteString(test)
+		}
+		reStr = fmt.Sprintf("(%s) .*", sb.String())
+	}
+
+	return reStr, nil
+}
+
+func findMinimalWordPrefixes(specs []string) (prefixes []string) {
+
+	// Create a word trie of all spec strings.
+	t := make(wordTrie)
+	for _, spec := range specs {
+		t.push(spec)
+	}
+
+	// Now find the first branch point for each path in the trie by DFS.
+	for word, node := range t {
+		var prefixElements []string
+	next:
+		if word != "" {
+			prefixElements = append(prefixElements, word)
+		}
+		if len(node.children) == 1 {
+			for nextWord, nextNode := range node.children {
+				word, node = nextWord, nextNode
+			}
+			goto next
+		}
+		// TODO: this might need to be joined by "\s+"
+		// in case multiple spaces were used in the spec name.
+		prefixes = append(prefixes, strings.Join(prefixElements, " "))
+	}
+
+	return prefixes
+}
+
+// wordTrie is a trie of word nodes, instead of individual characters.
+type wordTrie map[string]*wordTrieNode
+
+type wordTrieNode struct {
+	word     string
+	children map[string]*wordTrieNode
+}
+
+// push creates s branch of the trie from each word in s.
+func (t wordTrie) push(s string) {
+	split := strings.Split(s, " ")
+
+	curr := &wordTrieNode{word: "", children: t}
+	for _, sp := range split {
+		if sp = strings.TrimSpace(sp); sp == "" {
+			continue
+		}
+		next, hasNext := curr.children[sp]
+		if !hasNext {
+			next = &wordTrieNode{word: sp, children: make(map[string]*wordTrieNode)}
+			curr.children[sp] = next
+		}
+		curr = next
+	}
+	// Add termination node so "foo" and "foo bar" have a branching point of "foo".
+	curr.children[""] = &wordTrieNode{}
+}
diff --git a/test/e2e/split/main_test.go b/test/e2e/split/main_test.go