Skip to content

Commit 328ea3f

Browse files
authored
Merge pull request #2458 from pbalcer/filter-suites
[benchmarks] add ability to filter benchmarks by suite
2 parents 9bad1b5 + e9c0e96 commit 328ea3f

File tree

9 files changed

+101
-35
lines changed

9 files changed

+101
-35
lines changed

scripts/benchmarks/benches/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
import tarfile
1414

1515
class Benchmark:
16-
def __init__(self, directory):
16+
def __init__(self, directory, suite):
1717
self.directory = directory
18+
self.suite = suite
1819

1920
@staticmethod
2021
def get_adapter_full_path():
@@ -74,9 +75,15 @@ def teardown(self):
7475
def stddev_threshold(self):
7576
return None
7677

78+
def get_suite_name(self) -> str:
79+
return self.suite.name()
80+
7781
class Suite:
7882
def benchmarks(self) -> list[Benchmark]:
7983
raise NotImplementedError()
8084

85+
def name(self) -> str:
86+
raise NotImplementedError()
87+
8188
def setup(self):
8289
return

scripts/benchmarks/benches/compute.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ class ComputeBench(Suite):
1515
def __init__(self, directory):
1616
self.directory = directory
1717

18+
def name(self) -> str:
19+
return "Compute Benchmarks"
20+
1821
def setup(self):
1922
if options.sycl is None:
2023
return
@@ -90,10 +93,10 @@ def parse_unit_type(compute_unit):
9093

9194
class ComputeBenchmark(Benchmark):
9295
def __init__(self, bench, name, test):
96+
super().__init__(bench.directory, bench)
9397
self.bench = bench
9498
self.bench_name = name
9599
self.test = test
96-
super().__init__(bench.directory)
97100

98101
def bin_args(self) -> list[str]:
99102
return []

scripts/benchmarks/benches/llamacpp.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ def __init__(self, directory):
2121

2222
self.directory = directory
2323

24+
def name(self) -> str:
25+
return "llama.cpp bench"
26+
2427
def setup(self):
2528
if options.sycl is None:
2629
return
@@ -64,8 +67,8 @@ def benchmarks(self) -> list[Benchmark]:
6467

6568
class LlamaBench(Benchmark):
6669
def __init__(self, bench):
70+
super().__init__(bench.directory, bench)
6771
self.bench = bench
68-
super().__init__(bench.directory)
6972

7073
def setup(self):
7174
self.benchmark_bin = os.path.join(self.bench.build_path, 'bin', 'llama-bench')

scripts/benchmarks/benches/result.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ class Result:
2525
# values below should not be set by the benchmark
2626
name: str = ""
2727
lower_is_better: bool = True
28-
git_hash: str = ''
28+
git_hash: str = ""
2929
date: Optional[datetime] = None
30+
suite: str = ""
3031

3132
@dataclass_json
3233
@dataclass

scripts/benchmarks/benches/syclbench.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def __init__(self, directory):
1919
self.directory = directory
2020
return
2121

22+
def name(self) -> str:
23+
return "SYCL-Bench"
24+
2225
def setup(self):
2326
if options.sycl is None:
2427
return
@@ -87,11 +90,11 @@ def benchmarks(self) -> list[Benchmark]:
8790

8891
class SyclBenchmark(Benchmark):
8992
def __init__(self, bench, name, test):
93+
super().__init__(bench.directory, bench)
9094
self.bench = bench
9195
self.bench_name = name
9296
self.test = test
9397
self.done = False
94-
super().__init__(bench.directory)
9598

9699
def bin_args(self) -> list[str]:
97100
return []

scripts/benchmarks/benches/umf.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ def __init__(self, directory):
2222
self.directory = directory
2323
if not isUMFAvailable():
2424
print("UMF not provided. Related benchmarks will not run")
25-
25+
26+
def name(self) -> str:
27+
return "UMF"
28+
2629
def setup(self):
2730
if not isUMFAvailable():
2831
return []

scripts/benchmarks/benches/velocity.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ def __init__(self, directory):
2222

2323
self.directory = directory
2424

25+
def name(self) -> str:
26+
return "Velocity Bench"
27+
2528
def setup(self):
2629
if options.sycl is None:
2730
return
@@ -46,7 +49,7 @@ def benchmarks(self) -> list[Benchmark]:
4649

4750
class VelocityBase(Benchmark):
4851
def __init__(self, name: str, bin_name: str, vb: VelocityBench, unit: str):
49-
super().__init__(vb.directory)
52+
super().__init__(vb.directory, vb)
5053
self.vb = vb
5154
self.bench_name = name
5255
self.bin_name = bin_name

scripts/benchmarks/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def run_iterations(benchmark: Benchmark, env_vars, iters: int, results: dict[str
4242

4343
bench_result.name = bench_result.label
4444
bench_result.lower_is_better = benchmark.lower_is_better()
45+
bench_result.suite = benchmark.get_suite_name()
4546

4647
if bench_result.label not in results:
4748
results[bench_result.label] = []

scripts/benchmarks/output_html.py

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
@dataclass
1616
class BenchmarkMetadata:
1717
unit: str
18+
suite: str
1819
lower_is_better: bool
1920

2021
@dataclass
@@ -26,6 +27,7 @@ class BenchmarkSeries:
2627
@dataclass
2728
class BenchmarkChart:
2829
label: str
30+
suite: str
2931
html: str
3032

3133
def tooltip_css() -> str:
@@ -74,13 +76,6 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
7476
targets=targets)
7577
mpld3.plugins.connect(fig, tooltip)
7678

77-
# This is so that the stddev doesn't fill the entire y axis on the chart
78-
if all_values and all_stddevs:
79-
max_value = max(all_values)
80-
min_value = min(all_values)
81-
max_stddev = max(all_stddevs)
82-
ax.set_ylim(min_value - 3 * max_stddev, max_value + 3 * max_stddev)
83-
8479
ax.set_title(benchmark.label, pad=20)
8580
performance_indicator = "lower is better" if benchmark.metadata.lower_is_better else "higher is better"
8681
ax.text(0.5, 1.05, f"({performance_indicator})",
@@ -98,7 +93,7 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
9893
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S'))
9994

10095
plt.tight_layout()
101-
html_charts.append(BenchmarkChart(html=mpld3.fig_to_html(fig), label=benchmark.label))
96+
html_charts.append(BenchmarkChart(html=mpld3.fig_to_html(fig), label=benchmark.label, suite=benchmark.metadata.suite))
10297
plt.close(fig)
10398

10499
return html_charts
@@ -119,7 +114,7 @@ def create_explicit_groups(benchmark_runs: list[BenchmarkRun], compare_names: li
119114
if res.explicit_group != '':
120115
if res.explicit_group not in groups:
121116
groups[res.explicit_group] = ExplicitGroup(name=res.explicit_group, nnames=len(compare_names),
122-
metadata=BenchmarkMetadata(unit=res.unit, lower_is_better=res.lower_is_better),
117+
metadata=BenchmarkMetadata(unit=res.unit, lower_is_better=res.lower_is_better, suite=res.suite),
123118
runs={})
124119

125120
group = groups[res.explicit_group]
@@ -207,7 +202,7 @@ def create_grouped_bar_charts(groups: list[ExplicitGroup]) -> list[BenchmarkChar
207202
color='#666666')
208203

209204
plt.tight_layout()
210-
html_charts.append(BenchmarkChart(label=group.name, html=mpld3.fig_to_html(fig)))
205+
html_charts.append(BenchmarkChart(label=group.name, html=mpld3.fig_to_html(fig), suite=group.metadata.suite))
211206
plt.close(fig)
212207

213208
return html_charts
@@ -224,7 +219,8 @@ def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: li
224219
if result.label not in benchmark_metadata:
225220
benchmark_metadata[result.label] = BenchmarkMetadata(
226221
unit=result.unit,
227-
lower_is_better=result.lower_is_better
222+
lower_is_better=result.lower_is_better,
223+
suite=result.suite
228224
)
229225

230226
result.date = run.date
@@ -249,12 +245,15 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
249245
benchmarks = process_benchmark_data(benchmark_runs, compare_names)
250246

251247
timeseries = create_time_series_chart(benchmarks, github_repo)
252-
timeseries_charts_html = '\n'.join(f'<div class="chart" data-label="{ts.label}"><div>{ts.html}</div></div>' for ts in timeseries)
248+
timeseries_charts_html = '\n'.join(f'<div class="chart" data-label="{ts.label}" data-suite="{ts.suite}"><div>{ts.html}</div></div>' for ts in timeseries)
253249

254250
explicit_groups = create_explicit_groups(benchmark_runs, compare_names)
255251

256252
bar_charts = create_grouped_bar_charts(explicit_groups)
257-
bar_charts_html = '\n'.join(f'<div class="chart" data-label="{bc.label}"><div>{bc.html}</div></div>' for bc in bar_charts)
253+
bar_charts_html = '\n'.join(f'<div class="chart" data-label="{bc.label}" data-suite="{bc.suite}"><div>{bc.html}</div></div>' for bc in bar_charts)
254+
255+
suite_names = {t.suite for t in timeseries}
256+
suite_checkboxes_html = ' '.join(f'<label><input type="checkbox" class="suite-checkbox" data-suite="{suite}" checked> {suite}</label>' for suite in suite_names)
258257

259258
html_template = f"""
260259
<!DOCTYPE html>
@@ -317,6 +316,16 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
317316
width: 400px;
318317
max-width: 100%;
319318
}}
319+
.suite-filter-container {{
320+
text-align: center;
321+
margin-bottom: 24px;
322+
padding: 16px;
323+
background: #e9ecef;
324+
border-radius: 8px;
325+
}}
326+
.suite-checkbox {{
327+
margin: 0 8px;
328+
}}
320329
details {{
321330
margin-bottom: 24px;
322331
}}
@@ -342,54 +351,87 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
342351
function filterCharts() {{
343352
const regexInput = document.getElementById('bench-filter').value;
344353
const regex = new RegExp(regexInput, 'i');
354+
const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
345355
const charts = document.querySelectorAll('.chart');
346-
let timeseriesVisible = false;
347-
let barChartsVisible = false;
348356
349357
charts.forEach(chart => {{
350358
const label = chart.getAttribute('data-label');
351-
if (regex.test(label)) {{
359+
const suite = chart.getAttribute('data-suite');
360+
if (regex.test(label) && activeSuites.includes(suite)) {{
352361
chart.style.display = '';
353-
if (chart.closest('.timeseries')) {{
354-
timeseriesVisible = true;
355-
}} else if (chart.closest('.bar-charts')) {{
356-
barChartsVisible = true;
357-
}}
358362
}} else {{
359363
chart.style.display = 'none';
360364
}}
361365
}});
362366
363-
updateURL(regexInput);
364-
365-
document.querySelector('.timeseries').open = timeseriesVisible;
366-
document.querySelector('.bar-charts').open = barChartsVisible;
367+
updateURL();
367368
}}
368369
369-
function updateURL(regex) {{
370+
function updateURL() {{
370371
const url = new URL(window.location);
372+
const regex = document.getElementById('bench-filter').value;
373+
const activeSuites = Array.from(document.querySelectorAll('.suite-checkbox:checked')).map(checkbox => checkbox.getAttribute('data-suite'));
374+
371375
if (regex) {{
372376
url.searchParams.set('regex', regex);
373377
}} else {{
374378
url.searchParams.delete('regex');
375379
}}
380+
381+
if (activeSuites.length > 0) {{
382+
url.searchParams.set('suites', activeSuites.join(','));
383+
}} else {{
384+
url.searchParams.delete('suites');
385+
}}
386+
376387
history.replaceState(null, '', url);
377388
}}
378389
379390
document.addEventListener('DOMContentLoaded', (event) => {{
380391
const regexParam = getQueryParam('regex');
392+
const suitesParam = getQueryParam('suites');
393+
381394
if (regexParam) {{
382395
document.getElementById('bench-filter').value = regexParam;
383-
filterCharts();
384396
}}
397+
398+
const suiteCheckboxes = document.querySelectorAll('.suite-checkbox');
399+
if (suitesParam) {{
400+
const suites = suitesParam.split(',');
401+
suiteCheckboxes.forEach(checkbox => {{
402+
if (suites.includes(checkbox.getAttribute('data-suite'))) {{
403+
checkbox.checked = true;
404+
}} else {{
405+
checkbox.checked = false;
406+
}}
407+
}});
408+
}} else {{
409+
suiteCheckboxes.forEach(checkbox => {{
410+
checkbox.checked = true;
411+
}});
412+
}}
413+
filterCharts();
414+
415+
suiteCheckboxes.forEach(checkbox => {{
416+
checkbox.addEventListener('change', () => {{
417+
filterCharts();
418+
}});
419+
}});
420+
421+
document.getElementById('bench-filter').addEventListener('input', () => {{
422+
filterCharts();
423+
}});
385424
}});
386425
</script>
387426
</head>
388427
<body>
389428
<div class="container">
390429
<h1>Benchmark Results</h1>
391430
<div class="filter-container">
392-
<input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
431+
<input type="text" id="bench-filter" placeholder="Regex...">
432+
</div>
433+
<div class="suite-filter-container">
434+
{suite_checkboxes_html}
393435
</div>
394436
<details class="timeseries">
395437
<summary>Historical Results</summary>

0 commit comments

Comments
 (0)