@@ -17,8 +17,7 @@ const size_t NElems = 32;
17
17
const size_t WorkGroupSize = 8 ;
18
18
const size_t NWorkGroups = NElems / WorkGroupSize;
19
19
20
- template <typename T>
21
- void initInputBuffer (buffer<T, 1 > &Buf, size_t Stride = 0 ) {
20
+ template <typename T> void initInputBuffer (buffer<T, 1 > &Buf, size_t Stride) {
22
21
auto Acc = Buf.template get_access <access ::mode::write >();
23
22
for (size_t I = 0 ; I < Buf.get_count (); I += WorkGroupSize) {
24
23
for (size_t J = 0 ; J < WorkGroupSize; J++)
@@ -77,10 +76,9 @@ template <typename T> int checkResults(buffer<T, 1> &OutBuf, size_t Stride) {
77
76
for (size_t J = 0 ; J < WorkGroupSize; J++) {
78
77
size_t ExpectedVal = (J % Stride == 0 ) ? (100 + I + J) : 0 ;
79
78
if (!checkEqual (Out[I + J], ExpectedVal)) {
80
- std::cerr << std::string (typeid (T).name ()) +
81
- " : Incorrect value at index "
82
- << I + J << " : "
83
- << " Expected: " << toString (ExpectedVal)
79
+ std::cerr << std::string (typeid (T).name ()) + " : Stride=" << Stride
80
+ << " : Incorrect value at index " << I + J
81
+ << " : Expected: " << toString (ExpectedVal)
84
82
<< " , Computed: " << toString (Out[I + J]) << " \n " ;
85
83
if (--EarlyFailout == 0 )
86
84
return 1 ;
@@ -112,15 +110,27 @@ template <typename T> int test(size_t Stride) {
112
110
size_t NElemsToCopy =
113
111
WorkGroupSize / Stride + ((WorkGroupSize % Stride) ? 1 : 0 );
114
112
size_t Offset = GrId * WorkGroupSize;
115
- auto E = NDId.async_work_group_copy (Local.get_pointer (),
116
- In.get_pointer () + Offset,
117
- NElemsToCopy, Stride);
118
- E.wait ();
119
-
120
- E = NDId.async_work_group_copy (Out.get_pointer () + Offset,
121
- Local.get_pointer (), NElemsToCopy,
122
- Stride);
123
- Group.wait_for (E);
113
+ if (Stride == 1 ) { // Check the version without stride arg.
114
+ auto E = NDId.async_work_group_copy (
115
+ Local.get_pointer (), In.get_pointer () + Offset, NElemsToCopy);
116
+ E.wait ();
117
+ } else {
118
+ auto E = NDId.async_work_group_copy (Local.get_pointer (),
119
+ In.get_pointer () + Offset,
120
+ NElemsToCopy, Stride);
121
+ E.wait ();
122
+ }
123
+
124
+ if (Stride == 1 ) { // Check the version without stride arg.
125
+ auto E = Group.async_work_group_copy (
126
+ Out.get_pointer () + Offset, Local.get_pointer (), NElemsToCopy);
127
+ Group.wait_for (E);
128
+ } else {
129
+ auto E = Group.async_work_group_copy (Out.get_pointer () + Offset,
130
+ Local.get_pointer (), NElemsToCopy,
131
+ Stride);
132
+ Group.wait_for (E);
133
+ }
124
134
});
125
135
}).wait ();
126
136
0 commit comments