Skip to content

Commit fd7872b

Browse files
authored
Merge branch 'main' into add-opencl-device-queries
2 parents f74b2d7 + e37f75f commit fd7872b

File tree

6 files changed

+61
-38
lines changed

6 files changed

+61
-38
lines changed

scripts/benchmarks/benches/compute.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,9 @@ def run(self, env_vars) -> list[Result]:
118118
result = self.run_bench(command, env_vars)
119119
parsed_results = self.parse_output(result)
120120
ret = []
121-
for label, mean, unit in parsed_results:
121+
for label, median, stddev, unit in parsed_results:
122122
extra_label = " CPU count" if parse_unit_type(unit) == "instr" else ""
123-
ret.append(Result(label=self.name() + extra_label, value=mean, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit)))
123+
ret.append(Result(label=self.name() + extra_label, value=median, stddev=stddev, command=command, env=env_vars, stdout=result, unit=parse_unit_type(unit)))
124124
return ret
125125

126126
def parse_output(self, output):
@@ -135,8 +135,11 @@ def parse_output(self, output):
135135
try:
136136
label = data_row[0]
137137
mean = float(data_row[1])
138+
median = float(data_row[2])
139+
# compute benchmarks report stddev as %
140+
stddev = mean * (float(data_row[3].strip('%')) / 100.0)
138141
unit = data_row[7]
139-
results.append((label, mean, unit))
142+
results.append((label, median, stddev, unit))
140143
except (ValueError, IndexError) as e:
141144
raise ValueError(f"Error parsing output: {e}")
142145
if len(results) == 0:

scripts/benchmarks/benches/result.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ class Result:
1818
stdout: str
1919
passed: bool = True
2020
unit: str = ""
21-
# values should not be set by the benchmark
21+
# stddev can be optionally set by the benchmark,
22+
# if not set, it will be calculated automatically.
23+
stddev: float = 0.0
24+
# values below should not be set by the benchmark
2225
name: str = ""
2326
lower_is_better: bool = True
2427
git_hash: str = ''
2528
date: Optional[datetime] = None
26-
stddev: float = 0.0
2729

2830
@dataclass_json
2931
@dataclass

scripts/benchmarks/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ def process_results(results: dict[str, list[Result]]) -> tuple[bool, list[Result
103103
rlist.sort(key=lambda res: res.value)
104104
median_index = len(rlist) // 2
105105
median_result = rlist[median_index]
106-
median_result.stddev = stddev
106+
107+
# only override the stddev if not already set
108+
if median_result.stddev == 0.0:
109+
median_result.stddev = stddev
107110

108111
processed.append(median_result)
109112

@@ -160,7 +163,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
160163
if valid:
161164
break
162165
results += processed
163-
164166
except Exception as e:
165167
if options.exit_on_failure:
166168
raise e

scripts/benchmarks/output_html.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,32 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
3232

3333
num_benchmarks = len(benchmarks)
3434
if num_benchmarks == 0:
35-
return
35+
return []
3636

3737
html_charts = []
3838

3939
for _, benchmark in enumerate(benchmarks):
4040
fig, ax = plt.subplots(figsize=(10, 4))
4141

42+
all_values = []
43+
all_stddevs = []
44+
4245
for run in benchmark.runs:
4346
sorted_points = sorted(run.results, key=lambda x: x.date)
4447
dates = [point.date for point in sorted_points]
4548
values = [point.value for point in sorted_points]
49+
stddevs = [point.stddev for point in sorted_points]
50+
51+
all_values.extend(values)
52+
all_stddevs.extend(stddevs)
4653

47-
ax.plot_date(dates, values, '-', label=run.name, alpha=0.5)
54+
ax.errorbar(dates, values, yerr=stddevs, fmt='-', label=run.name, alpha=0.5)
4855
scatter = ax.scatter(dates, values, picker=True)
4956

5057
tooltip_labels = [
5158
f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n"
52-
f"Value: {point.value:.2f}\n"
59+
f"Value: {point.value:.2f} {benchmark.metadata.unit}\n"
60+
f"Stddev: {point.stddev:.2f} {benchmark.metadata.unit}\n"
5361
f"Git Hash: {point.git_hash}"
5462
for point in sorted_points
5563
]
@@ -62,6 +70,13 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
6270
targets=targets)
6371
mpld3.plugins.connect(fig, tooltip)
6472

73+
# This is so that the stddev doesn't fill the entire y axis on the chart
74+
if all_values and all_stddevs:
75+
max_value = max(all_values)
76+
min_value = min(all_values)
77+
max_stddev = max(all_stddevs)
78+
ax.set_ylim(min_value - 3 * max_stddev, max_value + 3 * max_stddev)
79+
6580
ax.set_title(benchmark.label, pad=20)
6681
performance_indicator = "lower is better" if benchmark.metadata.lower_is_better else "higher is better"
6782
ax.text(0.5, 1.05, f"({performance_indicator})",
@@ -79,7 +94,7 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
7994
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S'))
8095

8196
plt.tight_layout()
82-
html_charts.append(BenchmarkTimeSeries(html= mpld3.fig_to_html(fig), label= benchmark.label))
97+
html_charts.append(BenchmarkTimeSeries(html=mpld3.fig_to_html(fig), label=benchmark.label))
8398
plt.close(fig)
8499

85100
return html_charts

source/adapters/cuda/command_buffer.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::addWaitNodes(
105105
}
106106
// Set DepsLists as an output parameter for communicating the list of wait
107107
// nodes created.
108-
DepsList = WaitNodes;
108+
DepsList = std::move(WaitNodes);
109109
return UR_RESULT_SUCCESS;
110110
}
111111

@@ -115,7 +115,7 @@ kernel_command_handle::kernel_command_handle(
115115
const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr,
116116
const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives,
117117
ur_kernel_handle_t *KernelAlternatives, CUgraphNode SignalNode,
118-
std::vector<CUgraphNode> WaitNodes)
118+
const std::vector<CUgraphNode> &WaitNodes)
119119
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
120120
WaitNodes),
121121
Kernel(Kernel), Params(Params), WorkDim(WorkDim) {
@@ -146,7 +146,7 @@ kernel_command_handle::kernel_command_handle(
146146
ur_exp_command_buffer_command_handle_t_::
147147
ur_exp_command_buffer_command_handle_t_(
148148
ur_exp_command_buffer_handle_t CommandBuffer, CUgraphNode Node,
149-
CUgraphNode SignalNode, std::vector<CUgraphNode> WaitNodes)
149+
CUgraphNode SignalNode, const std::vector<CUgraphNode> &WaitNodes)
150150
: CommandBuffer(CommandBuffer), Node(Node), SignalNode(SignalNode),
151151
WaitNodes(WaitNodes), RefCountInternal(1), RefCountExternal(1) {
152152
CommandBuffer->incrementInternalReferenceCount();
@@ -339,7 +339,7 @@ static ur_result_t enqueueCommandBufferFillHelper(
339339
}
340340

341341
std::vector<CUgraphNode> WaitNodes =
342-
NumEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
342+
NumEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
343343
auto NewCommand = new T(CommandBuffer, GraphNode, SignalNode, WaitNodes);
344344
CommandBuffer->CommandHandles.push_back(NewCommand);
345345

@@ -537,7 +537,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
537537
}
538538

539539
std::vector<CUgraphNode> WaitNodes =
540-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
540+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
541541
auto NewCommand = new kernel_command_handle(
542542
hCommandBuffer, hKernel, GraphNode, NodeParams, workDim,
543543
pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
@@ -595,7 +595,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp(
595595
}
596596

597597
std::vector<CUgraphNode> WaitNodes =
598-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
598+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
599599
auto NewCommand = new usm_memcpy_command_handle(hCommandBuffer, GraphNode,
600600
SignalNode, WaitNodes);
601601
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -663,9 +663,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
663663
}
664664

665665
std::vector<CUgraphNode> WaitNodes =
666-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
666+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
667667
auto NewCommand = new buffer_copy_command_handle(hCommandBuffer, GraphNode,
668668
SignalNode, WaitNodes);
669+
hCommandBuffer->CommandHandles.push_back(NewCommand);
669670

670671
if (phCommand) {
671672
NewCommand->incrementInternalReferenceCount();
@@ -727,7 +728,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
727728
}
728729

729730
std::vector<CUgraphNode> WaitNodes =
730-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
731+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
731732
auto NewCommand = new buffer_copy_rect_command_handle(
732733
hCommandBuffer, GraphNode, SignalNode, WaitNodes);
733734
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -788,7 +789,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp(
788789
}
789790

790791
std::vector<CUgraphNode> WaitNodes =
791-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
792+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
792793
auto NewCommand = new buffer_write_command_handle(hCommandBuffer, GraphNode,
793794
SignalNode, WaitNodes);
794795
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -848,7 +849,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp(
848849
}
849850

850851
std::vector<CUgraphNode> WaitNodes =
851-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
852+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
852853
auto NewCommand = new buffer_read_command_handle(hCommandBuffer, GraphNode,
853854
SignalNode, WaitNodes);
854855
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -913,7 +914,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp(
913914
}
914915

915916
std::vector<CUgraphNode> WaitNodes =
916-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
917+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
917918
auto NewCommand = new buffer_write_rect_command_handle(
918919
hCommandBuffer, GraphNode, SignalNode, WaitNodes);
919920
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -978,7 +979,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp(
978979
}
979980

980981
std::vector<CUgraphNode> WaitNodes =
981-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
982+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
982983
auto NewCommand = new buffer_read_rect_command_handle(
983984
hCommandBuffer, GraphNode, SignalNode, WaitNodes);
984985
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -1034,7 +1035,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
10341035
}
10351036

10361037
std::vector<CUgraphNode> WaitNodes =
1037-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
1038+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
10381039
auto NewCommand = new usm_prefetch_command_handle(hCommandBuffer, GraphNode,
10391040
SignalNode, WaitNodes);
10401041
hCommandBuffer->CommandHandles.push_back(NewCommand);
@@ -1090,7 +1091,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
10901091
}
10911092

10921093
std::vector<CUgraphNode> WaitNodes =
1093-
numEventsInWaitList ? DepsList : std::vector<CUgraphNode>();
1094+
numEventsInWaitList ? std::move(DepsList) : std::vector<CUgraphNode>();
10941095
auto NewCommand = new usm_advise_command_handle(hCommandBuffer, GraphNode,
10951096
SignalNode, WaitNodes);
10961097
hCommandBuffer->CommandHandles.push_back(NewCommand);

source/adapters/cuda/command_buffer.hpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ enum class CommandType {
5656
struct ur_exp_command_buffer_command_handle_t_ {
5757
ur_exp_command_buffer_command_handle_t_(
5858
ur_exp_command_buffer_handle_t CommandBuffer, CUgraphNode Node,
59-
CUgraphNode SignalNode, std::vector<CUgraphNode> WaitNodes);
59+
CUgraphNode SignalNode, const std::vector<CUgraphNode> &WaitNodes);
6060

6161
virtual ~ur_exp_command_buffer_command_handle_t_() {}
6262

@@ -102,7 +102,7 @@ struct kernel_command_handle : ur_exp_command_buffer_command_handle_t_ {
102102
const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr,
103103
const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives,
104104
ur_kernel_handle_t *KernelAlternatives, CUgraphNode SignalNode,
105-
std::vector<CUgraphNode> WaitNodes);
105+
const std::vector<CUgraphNode> &WaitNodes);
106106

107107
CommandType getCommandType() const noexcept override {
108108
return CommandType::Kernel;
@@ -161,7 +161,7 @@ struct kernel_command_handle : ur_exp_command_buffer_command_handle_t_ {
161161
struct usm_memcpy_command_handle : ur_exp_command_buffer_command_handle_t_ {
162162
usm_memcpy_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
163163
CUgraphNode Node, CUgraphNode SignalNode,
164-
std::vector<CUgraphNode> WaitNodes)
164+
const std::vector<CUgraphNode> &WaitNodes)
165165
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
166166
WaitNodes) {}
167167
CommandType getCommandType() const noexcept override {
@@ -172,7 +172,7 @@ struct usm_memcpy_command_handle : ur_exp_command_buffer_command_handle_t_ {
172172
struct usm_fill_command_handle : ur_exp_command_buffer_command_handle_t_ {
173173
usm_fill_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
174174
CUgraphNode Node, CUgraphNode SignalNode,
175-
std::vector<CUgraphNode> WaitNodes)
175+
const std::vector<CUgraphNode> &WaitNodes)
176176
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
177177
WaitNodes) {}
178178
CommandType getCommandType() const noexcept override {
@@ -183,7 +183,7 @@ struct usm_fill_command_handle : ur_exp_command_buffer_command_handle_t_ {
183183
struct buffer_copy_command_handle : ur_exp_command_buffer_command_handle_t_ {
184184
buffer_copy_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
185185
CUgraphNode Node, CUgraphNode SignalNode,
186-
std::vector<CUgraphNode> WaitNodes)
186+
const std::vector<CUgraphNode> &WaitNodes)
187187
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
188188
WaitNodes) {}
189189
CommandType getCommandType() const noexcept override {
@@ -195,7 +195,7 @@ struct buffer_copy_rect_command_handle
195195
: ur_exp_command_buffer_command_handle_t_ {
196196
buffer_copy_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
197197
CUgraphNode Node, CUgraphNode SignalNode,
198-
std::vector<CUgraphNode> WaitNodes)
198+
const std::vector<CUgraphNode> &WaitNodes)
199199
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
200200
WaitNodes) {}
201201
CommandType getCommandType() const noexcept override {
@@ -206,7 +206,7 @@ struct buffer_copy_rect_command_handle
206206
struct buffer_read_command_handle : ur_exp_command_buffer_command_handle_t_ {
207207
buffer_read_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
208208
CUgraphNode Node, CUgraphNode SignalNode,
209-
std::vector<CUgraphNode> WaitNodes)
209+
const std::vector<CUgraphNode> &WaitNodes)
210210
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
211211
WaitNodes) {}
212212
CommandType getCommandType() const noexcept override {
@@ -218,7 +218,7 @@ struct buffer_read_rect_command_handle
218218
: ur_exp_command_buffer_command_handle_t_ {
219219
buffer_read_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
220220
CUgraphNode Node, CUgraphNode SignalNode,
221-
std::vector<CUgraphNode> WaitNodes)
221+
const std::vector<CUgraphNode> &WaitNodes)
222222
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
223223
WaitNodes) {}
224224
CommandType getCommandType() const noexcept override {
@@ -229,7 +229,7 @@ struct buffer_read_rect_command_handle
229229
struct buffer_write_command_handle : ur_exp_command_buffer_command_handle_t_ {
230230
buffer_write_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
231231
CUgraphNode Node, CUgraphNode SignalNode,
232-
std::vector<CUgraphNode> WaitNodes)
232+
const std::vector<CUgraphNode> &WaitNodes)
233233
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
234234
WaitNodes) {}
235235
CommandType getCommandType() const noexcept override {
@@ -241,7 +241,7 @@ struct buffer_write_rect_command_handle
241241
: ur_exp_command_buffer_command_handle_t_ {
242242
buffer_write_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
243243
CUgraphNode Node, CUgraphNode SignalNode,
244-
std::vector<CUgraphNode> WaitNodes)
244+
const std::vector<CUgraphNode> &WaitNodes)
245245
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
246246
WaitNodes) {}
247247
CommandType getCommandType() const noexcept override {
@@ -252,7 +252,7 @@ struct buffer_write_rect_command_handle
252252
struct buffer_fill_command_handle : ur_exp_command_buffer_command_handle_t_ {
253253
buffer_fill_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
254254
CUgraphNode Node, CUgraphNode SignalNode,
255-
std::vector<CUgraphNode> WaitNodes)
255+
const std::vector<CUgraphNode> &WaitNodes)
256256
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
257257
WaitNodes) {}
258258
CommandType getCommandType() const noexcept override {
@@ -263,7 +263,7 @@ struct buffer_fill_command_handle : ur_exp_command_buffer_command_handle_t_ {
263263
struct usm_prefetch_command_handle : ur_exp_command_buffer_command_handle_t_ {
264264
usm_prefetch_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
265265
CUgraphNode Node, CUgraphNode SignalNode,
266-
std::vector<CUgraphNode> WaitNodes)
266+
const std::vector<CUgraphNode> &WaitNodes)
267267
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
268268
WaitNodes) {}
269269
CommandType getCommandType() const noexcept override {
@@ -274,7 +274,7 @@ struct usm_prefetch_command_handle : ur_exp_command_buffer_command_handle_t_ {
274274
struct usm_advise_command_handle : ur_exp_command_buffer_command_handle_t_ {
275275
usm_advise_command_handle(ur_exp_command_buffer_handle_t CommandBuffer,
276276
CUgraphNode Node, CUgraphNode SignalNode,
277-
std::vector<CUgraphNode> WaitNodes)
277+
const std::vector<CUgraphNode> &WaitNodes)
278278
: ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode,
279279
WaitNodes) {}
280280
CommandType getCommandType() const noexcept override {

0 commit comments

Comments
 (0)