Skip to content

Commit c3a4d77

Browse files
craig[bot]herkolategan
craig[bot]
andcommitted
142206: microbench-ci: improve signal to noise ratio r=Darrylwong,srosenberg a=herkolategan This PR introduces several enhancements to the microbenchmarking process in CI. It modifies the microbenchmarks to require three consecutive runs to detect a regression, significantly reducing the chance of false positives. As a result, the total CI running time will dynamically adjust, ensuring that if a regression is detected CI will at most take approximately ±45 minutes to complete. Additionally, it adds configurable compare alpha thresholds to reduce noise during benchmark comparisons. This allows for better tuning and more accurate results. The metrics builder has also been updated to accept options for configuring these thresholds, improving flexibility. Lastly, the previous use of delta thresholds to filter out insignificant regressions has been removed. This change aims to lower the probability of false positives through alternative mechanisms. Epic: None Release note: None Co-authored-by: Herko Lategan <[email protected]>
2 parents 0ab93e3 + 03f2bcc commit c3a4d77

File tree

12 files changed

+897
-157
lines changed

12 files changed

+897
-157
lines changed

.github/workflows/microbenchmarks-ci.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
pkg: ${{ env.PACKAGE }}
4646
run-group-1:
4747
runs-on: [self-hosted, basic_microbench_runner_group]
48-
timeout-minutes: 30
48+
timeout-minutes: 60
4949
needs: [base, head]
5050
steps:
5151
- name: Checkout
@@ -58,7 +58,7 @@ jobs:
5858
group: 1
5959
run-group-2:
6060
runs-on: [self-hosted, basic_microbench_runner_group]
61-
timeout-minutes: 30
61+
timeout-minutes: 60
6262
needs: [base, head]
6363
steps:
6464
- name: Checkout

pkg/cmd/microbench-ci/benchmark.go

+10-9
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,16 @@ import (
1919

2020
type (
2121
Benchmark struct {
22-
DisplayName string `yaml:"display_name"`
23-
Package string `yaml:"package"`
24-
Labels []string `yaml:"labels"`
25-
Name string `yaml:"name"`
26-
RunnerGroup int `yaml:"runner_group"`
27-
Count int `yaml:"count"`
28-
Iterations int `yaml:"iterations"`
29-
30-
Thresholds map[string]float64 `yaml:"thresholds"`
22+
DisplayName string `yaml:"display_name"`
23+
Package string `yaml:"package"`
24+
Labels []string `yaml:"labels"`
25+
Name string `yaml:"name"`
26+
RunnerGroup int `yaml:"runner_group"`
27+
Count int `yaml:"count"`
28+
Iterations int `yaml:"iterations"`
29+
CompareAlpha float64 `yaml:"compare_alpha"`
30+
Retries int `yaml:"retries"`
31+
Metrics []string `yaml:"metrics"`
3132
}
3233
Benchmarks []Benchmark
3334
ProfileType string

pkg/cmd/microbench-ci/compare.go

+66-20
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,17 @@
66
package main
77

88
import (
9+
"bufio"
910
"bytes"
1011
"fmt"
11-
"math"
1212
"os"
1313
"path"
1414

1515
"github.com/cockroachdb/cockroach/pkg/cmd/roachprod-microbench/model"
1616
"github.com/cockroachdb/errors"
1717
"golang.org/x/exp/maps"
1818
"golang.org/x/perf/benchfmt"
19+
"golang.org/x/perf/benchmath"
1920
)
2021

2122
type (
@@ -31,11 +32,24 @@ type (
3132

3233
const (
3334
NoChange Status = iota
34-
Better
35-
Worse
36-
Regression
35+
Improved
36+
Regressed
3737
)
3838

39+
// String returns the string representation of the status.
40+
func (s Status) String() string {
41+
switch s {
42+
case NoChange:
43+
return "No Change"
44+
case Improved:
45+
return "Improved"
46+
case Regressed:
47+
return "Regressed"
48+
default:
49+
panic(fmt.Sprintf("unknown status: %d", s))
50+
}
51+
}
52+
3953
// status returns the status of a metric in the comparison.
4054
func (c *CompareResult) status(metricName string) Status {
4155
entry := c.MetricMap[metricName]
@@ -47,35 +61,36 @@ func (c *CompareResult) status(metricName string) Status {
4761
return NoChange
4862
}
4963
status := NoChange
50-
threshold := c.Benchmark.Thresholds[metricName] * 100.0
5164
if cc.Delta*float64(entry.Better) > 0 {
52-
status = Better
65+
status = Improved
5366
} else if cc.Delta*float64(entry.Better) < 0 {
54-
status = Worse
55-
if math.Abs(cc.Delta) >= threshold {
56-
status = Regression
57-
}
67+
status = Regressed
5868
}
5969
return status
6070
}
6171

62-
// regressed returns true if any metric in the comparison has regressed.
63-
func (c *CompareResult) regressed() bool {
72+
// top returns the top status of all metrics in the comparison.
73+
func (c *CompareResult) top() Status {
74+
topStatus := NoChange
6475
for metric := range c.MetricMap {
6576
status := c.status(metric)
66-
if status == Regression {
67-
return true
77+
if status > topStatus {
78+
topStatus = status
6879
}
6980
}
70-
return false
81+
return topStatus
7182
}
7283

73-
// compare compares the metrics of a benchmark between two revisions.
74-
func (b *Benchmark) compare() (*CompareResult, error) {
75-
builder := model.NewBuilder()
84+
// compare compares the metrics of a benchmark between two revisions. Only the
85+
// specified last number of lines of the benchmark logs are considered. If lines
86+
// is 0, it considers the entire logs.
87+
func (b *Benchmark) compare(lines int) (*CompareResult, error) {
88+
builder := model.NewBuilder(model.WithThresholds(&benchmath.Thresholds{
89+
CompareAlpha: b.CompareAlpha,
90+
}))
7691
compareResult := CompareResult{Benchmark: b}
7792
for _, revision := range []Revision{Old, New} {
78-
data, err := os.ReadFile(path.Join(suite.artifactsDir(revision), b.cleanLog()))
93+
data, err := logTail(path.Join(suite.artifactsDir(revision), b.cleanLog()), lines)
7994
if err != nil {
8095
return nil, err
8196
}
@@ -110,11 +125,42 @@ func (b *Benchmark) compare() (*CompareResult, error) {
110125
func (b Benchmarks) compareBenchmarks() (CompareResults, error) {
111126
compareResults := make(CompareResults, 0, len(b))
112127
for _, benchmark := range b {
113-
compareResult, err := benchmark.compare()
128+
compareResult, err := benchmark.compare(0)
114129
if err != nil {
115130
return nil, err
116131
}
117132
compareResults = append(compareResults, compareResult)
118133
}
119134
return compareResults, nil
120135
}
136+
137+
// logTail returns the last N lines of a file.
138+
// If N is 0, it returns the entire file.
139+
func logTail(filePath string, N int) ([]byte, error) {
140+
if N == 0 {
141+
return os.ReadFile(filePath)
142+
}
143+
file, err := os.Open(filePath)
144+
if err != nil {
145+
return nil, err
146+
}
147+
defer file.Close()
148+
149+
lines := make([]string, 0, N)
150+
scanner := bufio.NewScanner(file)
151+
for scanner.Scan() {
152+
lines = append(lines, scanner.Text())
153+
if len(lines) > N {
154+
lines = lines[1:]
155+
}
156+
}
157+
if err := scanner.Err(); err != nil {
158+
return nil, err
159+
}
160+
161+
var buffer bytes.Buffer
162+
for _, line := range lines {
163+
buffer.WriteString(line + "\n")
164+
}
165+
return buffer.Bytes(), nil
166+
}

pkg/cmd/microbench-ci/config/pull-request-suite.yml

+21-18
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,36 @@ benchmarks:
44
name: "BenchmarkSysbench/SQL/3node/oltp_read_write"
55
package: "pkg/sql/tests"
66
runner_group: 1
7-
count: 10
8-
iterations: 3000
9-
thresholds:
10-
"sec/op": .03
11-
"B/op": .02
12-
"allocs/op": .02
7+
count: 15
8+
iterations: 1500
9+
compare_alpha: 0.025
10+
retries: 3
11+
metrics:
12+
- "sec/op"
13+
- "allocs/op"
1314

1415
- display_name: Sysbench
1516
labels: ["KV", "1node", "local", "oltp_read_only"]
1617
name: "BenchmarkSysbench/KV/1node_local/oltp_read_only"
1718
package: "pkg/sql/tests"
1819
runner_group: 2
19-
count: 10
20-
iterations: 12000
21-
thresholds:
22-
"sec/op": .02
23-
"B/op": .015
24-
"allocs/op": .015
20+
count: 20
21+
iterations: 6000
22+
compare_alpha: 0.025
23+
retries: 3
24+
metrics:
25+
- "sec/op"
26+
- "allocs/op"
2527

2628
- display_name: Sysbench
2729
labels: ["KV", "1node", "local", "oltp_write_only"]
2830
name: "BenchmarkSysbench/KV/1node_local/oltp_write_only"
2931
package: "pkg/sql/tests"
3032
runner_group: 2
31-
count: 10
32-
iterations: 12000
33-
thresholds:
34-
"sec/op": .025
35-
"B/op": .0175
36-
"allocs/op": .0175
33+
count: 20
34+
iterations: 6000
35+
compare_alpha: 0.025
36+
retries: 3
37+
metrics:
38+
- "sec/op"
39+
- "allocs/op"

pkg/cmd/microbench-ci/report.go

+32-14
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ import (
1212
"log"
1313
"math"
1414
"os"
15+
"regexp"
1516
"sort"
17+
"strconv"
1618
"strings"
1719
"text/template"
1820

@@ -57,14 +59,18 @@ func (c *CompareResult) generateSummaryData(
5759
statusTemplateFunc func(status Status) string,
5860
) []SummaryData {
5961
summaryData := make([]SummaryData, 0, len(c.MetricMap))
60-
for metricName, entry := range c.MetricMap {
62+
for _, metricName := range c.Benchmark.Metrics {
63+
entry := c.MetricMap[metricName]
64+
if entry == nil {
65+
log.Printf("WARN: no metric found for benchmark metric %q", metricName)
66+
continue
67+
}
6168
benchmark := entry.BenchmarkEntries[c.EntryName]
6269
cc := entry.ComputeComparison(c.EntryName, string(Old), string(New))
6370
if cc == nil {
6471
log.Printf("WARN: no comparison found for benchmark metric %q:%q", c.EntryName, metricName)
6572
continue
6673
}
67-
threshold := c.Benchmark.Thresholds[metricName] * 100.0
6874
status := statusTemplateFunc(c.status(metricName))
6975
oldSum := benchmark.Summaries[string(Old)]
7076
newSum := benchmark.Summaries[string(New)]
@@ -74,7 +80,6 @@ func (c *CompareResult) generateSummaryData(
7480
NewCenter: fmt.Sprintf("%s ±%s", formatValue(newSum.Center, metricName), newSum.PctRangeString()),
7581
Delta: cc.FormattedDelta,
7682
Note: cc.Distribution.String(),
77-
Threshold: fmt.Sprintf("%.1f%%", threshold),
7883
Status: status,
7984
})
8085
}
@@ -112,13 +117,6 @@ func (c *CompareResult) benchdiffData() BenchdiffData {
112117
// writeJSONSummary writes a JSON summary of the comparison results to the given
113118
// path.
114119
func (c CompareResults) writeJSONSummary(path string) error {
115-
file, err := os.Create(path)
116-
if err != nil {
117-
return err
118-
}
119-
defer file.Close()
120-
encoder := json.NewEncoder(file)
121-
encoder.SetIndent("", " ")
122120
type (
123121
Data struct {
124122
Metric string
@@ -161,13 +159,20 @@ func (c CompareResults) writeJSONSummary(path string) error {
161159
Data: data,
162160
}
163161
}
164-
return encoder.Encode(struct {
162+
163+
jsonData, err := json.MarshalIndent(struct {
165164
Entries []Entry
166165
Revisions Revisions
167166
}{
168167
Entries: entries,
169168
Revisions: suite.Revisions,
170-
})
169+
}, "", " ")
170+
if err != nil {
171+
return err
172+
}
173+
174+
formattedData := formatFloats(jsonData, 5)
175+
return os.WriteFile(path, formattedData, 0644)
171176
}
172177

173178
// writeGitHubSummary writes a markdown summary of the comparison results to the
@@ -192,7 +197,7 @@ func (c CompareResults) writeGitHubSummary(path string) error {
192197
if status > finalStatus {
193198
finalStatus = status
194199
}
195-
if status == Regression {
200+
if status == Regressed {
196201
regressionDetected = true
197202
}
198203
return statusToDot(status)
@@ -227,11 +232,24 @@ func (c CompareResults) writeGitHubSummary(path string) error {
227232
}
228233

229234
func statusToDot(status Status) string {
230-
return string([]rune("⚪🟢🟡🔴")[status])
235+
return string([]rune("⚪🟢🔴")[status])
231236
}
232237

233238
// formatValue formats a value according to the unit of the metric.
234239
func formatValue(val float64, metric string) string {
235240
cls := benchunit.ClassOf(metric)
236241
return benchunit.Scale(val, cls)
237242
}
243+
244+
// formatFloats formats all floating point numbers in the JSON data to the given
245+
// precision.
246+
func formatFloats(jsonData []byte, precision int) []byte {
247+
re := regexp.MustCompile(`\d+\.\d+`)
248+
return re.ReplaceAllFunc(jsonData, func(match []byte) []byte {
249+
f, err := strconv.ParseFloat(string(match), 64)
250+
if err != nil {
251+
return match
252+
}
253+
return []byte(strconv.FormatFloat(f, 'f', precision, 64))
254+
})
255+
}

0 commit comments

Comments
 (0)