@@ -131,6 +131,58 @@ HWY_NOINLINE void TestAllSafeCopyN() {
131
131
ForAllTypes(ForPartialVectors<TestSafeCopyN>());
132
132
}
133
133
134
+ struct TestStoreInterleaved2 {
135
+ template <class T, class D>
136
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
137
+ const size_t N = Lanes(d);
138
+
139
+ RandomState rng;
140
+
141
+ // Data to be interleaved
142
+ auto bytes = AllocateAligned<uint8_t>(2 * N);
143
+ for (size_t i = 0; i < 2 * N; ++i) {
144
+ bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF);
145
+ }
146
+ const auto in0 = Load(d, &bytes[0 * N]);
147
+ const auto in1 = Load(d, &bytes[1 * N]);
148
+
149
+ // Interleave here, ensure vector results match scalar
150
+ auto expected = AllocateAligned<T>(3 * N);
151
+ auto actual_aligned = AllocateAligned<T>(3 * N + 1);
152
+ T* actual = actual_aligned.get() + 1;
153
+
154
+ for (size_t rep = 0; rep < 100; ++rep) {
155
+ for (size_t i = 0; i < N; ++i) {
156
+ expected[2 * i + 0] = bytes[0 * N + i];
157
+ expected[2 * i + 1] = bytes[1 * N + i];
158
+ // Ensure we do not write more than 2*N bytes
159
+ expected[2 * N + i] = actual[2 * N + i] = 0;
160
+ }
161
+ StoreInterleaved2(in0, in1, d, actual);
162
+ size_t pos = 0;
163
+ if (!BytesEqual(expected.get(), actual, 3 * N, &pos)) {
164
+ Print(d, "in0", in0, pos / 4);
165
+ Print(d, "in1", in1, pos / 4);
166
+ const size_t i = pos;
167
+ fprintf(stderr, "interleaved %d %d %d %d %d %d %d %d\n", actual[i],
168
+ actual[i + 1], actual[i + 2], actual[i + 3], actual[i + 4],
169
+ actual[i + 5], actual[i + 6], actual[i + 7]);
170
+ HWY_ASSERT(false);
171
+ }
172
+ }
173
+ }
174
+ };
175
+
176
+ HWY_NOINLINE void TestAllStoreInterleaved2() {
177
+ #if HWY_TARGET == HWY_RVV
178
+ // Segments are limited to 8 registers, so we can only go up to LMUL=2.
179
+ const ForExtendableVectors<TestStoreInterleaved2, 2> test;
180
+ #else
181
+ const ForPartialVectors<TestStoreInterleaved2> test;
182
+ #endif
183
+ test(uint8_t());
184
+ }
185
+
134
186
struct TestStoreInterleaved3 {
135
187
template <class T, class D>
136
188
HWY_NOINLINE void operator()(T /*unused*/, D d) {
@@ -443,6 +495,7 @@ namespace hwy {
443
495
HWY_BEFORE_TEST(HwyMemoryTest);
444
496
HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadStore);
445
497
HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllSafeCopyN);
498
+ HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStoreInterleaved2);
446
499
HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStoreInterleaved3);
447
500
HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStoreInterleaved4);
448
501
HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadDup128);
0 commit comments