@@ -2870,7 +2870,54 @@ void LiftoffAssembler::emit_s8x16_shuffle(LiftoffRegister dst,
2870
2870
LiftoffRegister lhs,
2871
2871
LiftoffRegister rhs,
2872
2872
const uint8_t shuffle[16 ]) {
2873
- bailout (kSimd , " s8x16_shuffle" );
2873
+ Simd128Register dest = liftoff::GetSimd128Register (dst);
2874
+ Simd128Register src1 = liftoff::GetSimd128Register (lhs);
2875
+ Simd128Register src2 = liftoff::GetSimd128Register (rhs);
2876
+ UseScratchRegisterScope temps (this );
2877
+ Simd128Register scratch = temps.AcquireQ ();
2878
+ if ((src1 != src2) && src1.code () + 1 != src2.code ()) {
2879
+ // vtbl requires the operands to be consecutive or the same.
2880
+ // If they are the same, we build a smaller list operand (table_size = 2).
2881
+ // If they are not the same, and not consecutive, we move the src1 and src2
2882
+ // to q14 and q15, which will be unused since they are not allocatable in
2883
+ // Liftoff. If the operands are the same, then we build a smaller list
2884
+ // operand below.
2885
+ static_assert (!(kLiftoffAssemblerFpCacheRegs &
2886
+ (d28.bit () | d29.bit () | d30.bit () | d31.bit ())),
2887
+ " This only works if q14-q15 (d28-d31) are not used." );
2888
+ vmov (q14, src1);
2889
+ src1 = q14;
2890
+ vmov (q15, src2);
2891
+ src2 = q15;
2892
+ }
2893
+
2894
+ int table_size = src1 == src2 ? 2 : 4 ;
2895
+ uint32_t mask = table_size == 2 ? 0x0F0F0F0F : 0x1F1F1F1F ;
2896
+
2897
+ int scratch_s_base = scratch.code () * 4 ;
2898
+ for (int j = 0 ; j < 4 ; j++) {
2899
+ uint32_t imm = 0 ;
2900
+ for (int i = 3 ; i >= 0 ; i--) {
2901
+ imm = (imm << 8 ) | shuffle[j * 4 + i];
2902
+ }
2903
+ uint32_t four_lanes = imm;
2904
+ // Ensure indices are in [0,15] if table_size is 2, or [0,31] if 4.
2905
+ four_lanes &= mask;
2906
+ vmov (SwVfpRegister::from_code (scratch_s_base + j),
2907
+ Float32::FromBits (four_lanes));
2908
+ }
2909
+
2910
+ DwVfpRegister table_base = src1.low ();
2911
+ NeonListOperand table (table_base, table_size);
2912
+
2913
+ if (dest != src1 && dest != src2) {
2914
+ vtbl (dest.low (), table, scratch.low ());
2915
+ vtbl (dest.high (), table, scratch.high ());
2916
+ } else {
2917
+ vtbl (scratch.low (), table, scratch.low ());
2918
+ vtbl (scratch.high (), table, scratch.high ());
2919
+ vmov (dest, scratch);
2920
+ }
2874
2921
}
2875
2922
2876
2923
void LiftoffAssembler::emit_i8x16_splat (LiftoffRegister dst,
0 commit comments