Skip to content

Commit 7c429d9

Browse files
ngzhianCommit Bot
authored and
Commit Bot
committed
[wasm-simd][liftoff][arm] Implement s8x16shuffle
Bug: v8:9909 Change-Id: Icb4dd53f02bcd3b38511bb028768d276e3bfebaf Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2251041 Commit-Queue: Zhi An Ng <[email protected]> Reviewed-by: Clemens Backes <[email protected]> Cr-Commit-Position: refs/heads/master@{#68467}
1 parent b47c163 commit 7c429d9

File tree

2 files changed

+83
-1
lines changed

2 files changed

+83
-1
lines changed

src/wasm/baseline/arm/liftoff-assembler-arm.h

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2870,7 +2870,54 @@ void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
28702870
LiftoffRegister lhs,
28712871
LiftoffRegister rhs,
28722872
const uint8_t shuffle[16]) {
2873-
bailout(kSimd, "s8x16_shuffle");
2873+
Simd128Register dest = liftoff::GetSimd128Register(dst);
2874+
Simd128Register src1 = liftoff::GetSimd128Register(lhs);
2875+
Simd128Register src2 = liftoff::GetSimd128Register(rhs);
2876+
UseScratchRegisterScope temps(this);
2877+
Simd128Register scratch = temps.AcquireQ();
2878+
if ((src1 != src2) && src1.code() + 1 != src2.code()) {
2879+
// vtbl requires the operands to be consecutive or the same.
2880+
// If they are the same, we build a smaller list operand (table_size = 2).
2881+
// If they are not the same, and not consecutive, we move the src1 and src2
2882+
// to q14 and q15, which will be unused since they are not allocatable in
2883+
// Liftoff. If the operands are the same, then we build a smaller list
2884+
// operand below.
2885+
static_assert(!(kLiftoffAssemblerFpCacheRegs &
2886+
(d28.bit() | d29.bit() | d30.bit() | d31.bit())),
2887+
"This only works if q14-q15 (d28-d31) are not used.");
2888+
vmov(q14, src1);
2889+
src1 = q14;
2890+
vmov(q15, src2);
2891+
src2 = q15;
2892+
}
2893+
2894+
int table_size = src1 == src2 ? 2 : 4;
2895+
uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F;
2896+
2897+
int scratch_s_base = scratch.code() * 4;
2898+
for (int j = 0; j < 4; j++) {
2899+
uint32_t imm = 0;
2900+
for (int i = 3; i >= 0; i--) {
2901+
imm = (imm << 8) | shuffle[j * 4 + i];
2902+
}
2903+
uint32_t four_lanes = imm;
2904+
// Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
2905+
four_lanes &= mask;
2906+
vmov(SwVfpRegister::from_code(scratch_s_base + j),
2907+
Float32::FromBits(four_lanes));
2908+
}
2909+
2910+
DwVfpRegister table_base = src1.low();
2911+
NeonListOperand table(table_base, table_size);
2912+
2913+
if (dest != src1 && dest != src2) {
2914+
vtbl(dest.low(), table, scratch.low());
2915+
vtbl(dest.high(), table, scratch.high());
2916+
} else {
2917+
vtbl(scratch.low(), table, scratch.low());
2918+
vtbl(scratch.high(), table, scratch.high());
2919+
vmov(dest, scratch);
2920+
}
28742921
}
28752922

28762923
void LiftoffAssembler::emit_i8x16_splat(LiftoffRegister dst,

test/cctest/wasm/test-run-wasm-simd-liftoff.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,41 @@ WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle) {
151151
CHECK_EQ(31, ReadLittleEndianValue<byte>(&output[15]));
152152
}
153153

154+
// Exercise logic in Liftoff's implementation of shuffle when inputs to the
155+
// shuffle are the same register.
156+
WASM_SIMD_LIFTOFF_TEST(S8x16Shuffle_SingleOperand) {
157+
WasmRunner<int32_t> r(ExecutionTier::kLiftoff, kNoLowerSimd);
158+
byte local0 = r.AllocateLocal(kWasmS128);
159+
160+
byte* g0 = r.builder().AddGlobal<byte>(kWasmS128);
161+
for (int i = 0; i < 16; i++) {
162+
WriteLittleEndianValue<byte>(&g0[i], i);
163+
}
164+
165+
byte* output = r.builder().AddGlobal<byte>(kWasmS128);
166+
167+
// This pattern reverses first operand. 31 should select the last lane of
168+
// the second operand, but since the operands are the same, the effect is that
169+
// the first operand is reversed.
170+
std::array<byte, 16> pattern = {
171+
{31, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}};
172+
173+
// Set up locals so shuffle is called with non-adjacent registers v2 and v0.
174+
BUILD(r, WASM_SET_LOCAL(local0, WASM_GET_GLOBAL(0)), WASM_GET_LOCAL(local0),
175+
WASM_GET_LOCAL(local0),
176+
WASM_SET_GLOBAL(1, WASM_SIMD_S8x16_SHUFFLE_OP(
177+
kExprS8x16Shuffle, pattern, WASM_NOP, WASM_NOP)),
178+
WASM_ONE);
179+
180+
r.Call();
181+
182+
for (int i = 0; i < 16; i++) {
183+
// Check that the output is the reverse of input.
184+
byte actual = ReadLittleEndianValue<byte>(&output[i]);
185+
CHECK_EQ(15 - i, actual);
186+
}
187+
}
188+
154189
#undef WASM_SIMD_LIFTOFF_TEST
155190

156191
} // namespace test_run_wasm_simd_liftoff

0 commit comments

Comments
 (0)