Skip to content

Commit 805754c

Browse files
authored
Add e2e test for queue::fill with a range of pattern sizes (#15991)
~~Update the UR tag to include oneapi-src/unified-runtime#2273 fixing `queue::fill` for the CUDA and HIP backends. It was previously producing incorrect outputs for any pattern size other than 1, 2, or a multiple of 4 bytes. A new optimisation is also added which speeds up the fill greatly if the pattern equals to the first word repeated throughout (e.g. all zeros). See the UR PR for more details.~~ _The UR tag update was collected in #16040 so now this PR only adds an e2e test as stated below._ Add a new e2e test to validate `queue::fill` outputs for any pattern size between 1 and 32 bytes. This test fails for CUDA and HIP before the UR change and passes with this PR. Other backends already worked correctly.
1 parent 09e1d41 commit 805754c

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed

sycl/test-e2e/USM/fill_any_size.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// RUN: %{build} -o %t1.out
2+
// RUN: %{run} %t1.out
3+
// XFAIL: (opencl && cpu)
4+
// XFAIL-TRACKER: https://github.com/oneapi-src/unified-runtime/issues/2440
5+
6+
/**
7+
* Test of the queue::fill interface with a range of pattern sizes and values.
8+
*
9+
* Loops over pattern sizes from 1 to MaxPatternSize bytes and calls queue::fill
10+
* with std::array<uint8_t,Size> for the pattern. Two pattern values are tested,
11+
* all zeros and value=index+42. The output is copied back to host and
12+
* validated.
13+
*/
14+
15+
#include <array>
16+
#include <cstdio>
17+
#include <sycl/detail/core.hpp>
18+
#include <sycl/usm.hpp>
19+
20+
constexpr size_t MaxPatternSize{32}; // Bytes.
21+
constexpr size_t NumElements{10};
22+
constexpr size_t NumRepeats{1};
23+
constexpr bool verbose{false};
24+
25+
template <size_t PatternSize, bool SameValue>
26+
int test(sycl::queue &q, uint8_t firstValue = 0) {
27+
using T = std::array<uint8_t, PatternSize>;
28+
T value{};
29+
for (size_t i{0}; i < PatternSize; ++i) {
30+
if constexpr (SameValue) {
31+
value[i] = firstValue;
32+
} else {
33+
value[i] = firstValue + i;
34+
}
35+
}
36+
37+
T *dptr{sycl::malloc_device<T>(NumElements, q)};
38+
for (size_t repeat{0}; repeat < NumRepeats; ++repeat) {
39+
q.fill(dptr, value, NumElements).wait();
40+
}
41+
42+
std::array<T, NumElements> host{};
43+
q.copy<T>(dptr, host.data(), NumElements).wait();
44+
bool pass{true};
45+
for (size_t i{0}; i < NumElements; ++i) {
46+
for (size_t j{0}; j < PatternSize; ++j) {
47+
if (host[i][j] != value[j]) {
48+
pass = false;
49+
}
50+
}
51+
}
52+
sycl::free(dptr, q);
53+
54+
if (!pass || verbose) {
55+
printf("Pattern size %3zu bytes, %s values (initial %3u) %s\n", PatternSize,
56+
(SameValue ? " equal" : "varied"), firstValue,
57+
(pass ? "== PASS ==" : "== FAIL =="));
58+
}
59+
60+
return !pass;
61+
}
62+
63+
template <size_t Size> int testOneSize(sycl::queue &q) {
64+
return test<Size, true>(q, 0) + test<Size, false>(q, 42);
65+
}
66+
67+
template <size_t... Sizes>
68+
int testSizes(sycl::queue &q, std::index_sequence<Sizes...>) {
69+
return (testOneSize<1u + Sizes>(q) + ...);
70+
}
71+
72+
int main() {
73+
sycl::queue q{};
74+
int failures = testSizes(q, std::make_index_sequence<MaxPatternSize>{});
75+
if (failures > 0) {
76+
printf("%d / %zu tests failed\n", failures, 2u * MaxPatternSize);
77+
} else {
78+
printf("All %zu tests passed\n", 2u * MaxPatternSize);
79+
}
80+
return failures;
81+
}

0 commit comments

Comments
 (0)