Skip to content

Commit 6c75eb1

Browse files
committed
[UR][CUDA][HIP] Fix incorrect outputs and improve performance of queue::fill
Update the UR tag to fix queue::fill for the CUDA and HIP backends, which was previously producing incorrect outputs for any pattern size other than 1, 2, or a multiple of 4 bytes. A new optimisation is also added which speeds up the fill greatly if the pattern equals to the first word repeated throughout (e.g. all zeros). Add a new e2e test to validate queue::fill outputs for any pattern size between 1 and 32 bytes. This test fails for CUDA and HIP before the UR change and passes with this PR. Other backends already worked correctly.
1 parent db59b4f commit 6c75eb1

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

sycl/test-e2e/USM/fill_any_size.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// RUN: %{build} -o %t1.out
2+
// RUN: %{run} %t1.out
3+
4+
/**
5+
* Test of the queue::fill interface with a range of pattern sizes and values.
6+
*
7+
* Loops over pattern sizes from 1 to MaxPatternSize bytes and calls queue::fill
8+
* with std::array<uint8_t,Size> for the pattern. Two pattern values are tested,
9+
* all zeros and value=index+42. The output is copied back to host and
10+
* validated.
11+
*/
12+
13+
#include <array>
14+
#include <cstdio>
15+
#include <sycl/detail/core.hpp>
16+
#include <sycl/usm.hpp>
17+
18+
constexpr size_t MaxPatternSize{32}; // bytes
19+
constexpr size_t NumElements{10};
20+
constexpr size_t NumRepeats{1};
21+
constexpr bool verbose{false};
22+
23+
template <size_t PatternSize, bool SameValue>
24+
int test(sycl::queue &q, uint8_t firstValue = 0) {
25+
using T = std::array<uint8_t, PatternSize>;
26+
T value{};
27+
for (unsigned int i{0}; i < PatternSize; ++i) {
28+
if constexpr (SameValue) {
29+
value[0] = firstValue;
30+
} else {
31+
value[i] = firstValue + i;
32+
}
33+
}
34+
35+
T *dptr{sycl::malloc_device<T>(NumElements, q)};
36+
for (size_t repeat{0}; repeat < NumRepeats; ++repeat) {
37+
q.fill(dptr, value, NumElements).wait();
38+
}
39+
40+
std::array<T, NumElements> host{};
41+
q.copy<T>(dptr, host.data(), NumElements).wait();
42+
bool pass{true};
43+
for (unsigned int i{0}; i < NumElements; ++i) {
44+
for (unsigned int j{0}; j < PatternSize; ++j) {
45+
if (host[i][j] != value[j]) {
46+
pass = false;
47+
}
48+
}
49+
}
50+
sycl::free(dptr, q);
51+
52+
if (!pass || verbose) {
53+
printf("Pattern size %3lu bytes, %s values (initial %3u) %s\n", PatternSize,
54+
(SameValue ? " equal" : "varied"), firstValue,
55+
(pass ? "== PASS ==" : "== FAIL =="));
56+
}
57+
58+
return !pass;
59+
}
60+
61+
template <size_t Size> int testOneSize(sycl::queue &q) {
62+
return test<Size, true>(q, 0) + test<Size, false>(q, 42);
63+
}
64+
65+
template <size_t... Sizes>
66+
int testSizes(sycl::queue &q, std::index_sequence<Sizes...>) {
67+
return (testOneSize<1u + Sizes>(q) + ...);
68+
}
69+
70+
int main() {
71+
sycl::queue q{};
72+
int failures = testSizes(q, std::make_index_sequence<MaxPatternSize>{});
73+
if (failures > 0) {
74+
printf("%d / %lu tests failed\n", failures, 2 * MaxPatternSize);
75+
} else {
76+
printf("All %lu tests passed\n", 2 * MaxPatternSize);
77+
}
78+
return failures;
79+
}

0 commit comments

Comments
 (0)