Skip to content

Commit 9a68fa1

Browse files
ngzhianCommit Bot
authored and
Commit Bot
committed
[wasm-simd] Prototype f32x4 and f64x2 pmin and pmax
This patch implements f32x4.pmin, f32x4.pmax, f64x2.pmin, and f64x2.pmax for x64 and interpreter. Pseudo-min and Pseudo-max instructions were proposed in WebAssembly/simd#122. These instructions exactly match std::min and std::max in C++ STL, and thus have different semantics from the existing min and max. The instruction-selector for x64 switches the operands around, because it allows for defining the dst to be same as first (really the second input node), allowing better codegen. For example, b = f32x4.pmin(a, b) directly maps to vminps(b, b, a) or minps(b, a), as long as we can define dst == b, and switching the instruction operands around allows us to do that. Bug: v8:10501 Change-Id: I06f983fc1764caf673e600ac91d9c0ac5166e17e Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2186630 Commit-Queue: Zhi An Ng <[email protected]> Reviewed-by: Tobias Tebbi <[email protected]> Reviewed-by: Deepti Gandluri <[email protected]> Cr-Commit-Position: refs/heads/master@{#67688}
1 parent 6adf7e8 commit 9a68fa1

13 files changed

+146
-3
lines changed

src/compiler/backend/instruction-selector.cc

+16
Original file line numberDiff line numberDiff line change
@@ -1884,6 +1884,10 @@ void InstructionSelector::VisitNode(Node* node) {
18841884
return MarkAsSimd128(node), VisitF64x2Qfma(node);
18851885
case IrOpcode::kF64x2Qfms:
18861886
return MarkAsSimd128(node), VisitF64x2Qfms(node);
1887+
case IrOpcode::kF64x2Pmin:
1888+
return MarkAsSimd128(node), VisitF64x2Pmin(node);
1889+
case IrOpcode::kF64x2Pmax:
1890+
return MarkAsSimd128(node), VisitF64x2Pmax(node);
18871891
case IrOpcode::kF32x4Splat:
18881892
return MarkAsSimd128(node), VisitF32x4Splat(node);
18891893
case IrOpcode::kF32x4ExtractLane:
@@ -1930,6 +1934,10 @@ void InstructionSelector::VisitNode(Node* node) {
19301934
return MarkAsSimd128(node), VisitF32x4Qfma(node);
19311935
case IrOpcode::kF32x4Qfms:
19321936
return MarkAsSimd128(node), VisitF32x4Qfms(node);
1937+
case IrOpcode::kF32x4Pmin:
1938+
return MarkAsSimd128(node), VisitF32x4Pmin(node);
1939+
case IrOpcode::kF32x4Pmax:
1940+
return MarkAsSimd128(node), VisitF32x4Pmax(node);
19331941
case IrOpcode::kI64x2Splat:
19341942
return MarkAsSimd128(node), VisitI64x2Splat(node);
19351943
case IrOpcode::kI64x2SplatI32Pair:
@@ -2653,6 +2661,14 @@ void InstructionSelector::VisitI32x4BitMask(Node* node) { UNIMPLEMENTED(); }
26532661
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
26542662
// && !V8_TARGET_ARCH_X64
26552663

2664+
// TODO(v8:10501) Prototyping pmin and pmax instructions.
2665+
#if !V8_TARGET_ARCH_X64
2666+
void InstructionSelector::VisitF32x4Pmin(Node* node) { UNIMPLEMENTED(); }
2667+
void InstructionSelector::VisitF32x4Pmax(Node* node) { UNIMPLEMENTED(); }
2668+
void InstructionSelector::VisitF64x2Pmin(Node* node) { UNIMPLEMENTED(); }
2669+
void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
2670+
#endif // !V8_TARGET_ARCH_X64
2671+
26562672
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
26572673

26582674
void InstructionSelector::VisitParameter(Node* node) {

src/compiler/backend/x64/code-generator-x64.cc

+24
Original file line numberDiff line numberDiff line change
@@ -2607,6 +2607,30 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
26072607
}
26082608
break;
26092609
}
2610+
case kX64F32x4Pmin: {
2611+
XMMRegister dst = i.OutputSimd128Register();
2612+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2613+
__ Minps(dst, i.InputSimd128Register(1));
2614+
break;
2615+
}
2616+
case kX64F32x4Pmax: {
2617+
XMMRegister dst = i.OutputSimd128Register();
2618+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2619+
__ Maxps(dst, i.InputSimd128Register(1));
2620+
break;
2621+
}
2622+
case kX64F64x2Pmin: {
2623+
XMMRegister dst = i.OutputSimd128Register();
2624+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2625+
__ Minpd(dst, i.InputSimd128Register(1));
2626+
break;
2627+
}
2628+
case kX64F64x2Pmax: {
2629+
XMMRegister dst = i.OutputSimd128Register();
2630+
DCHECK_EQ(dst, i.InputSimd128Register(0));
2631+
__ Maxpd(dst, i.InputSimd128Register(1));
2632+
break;
2633+
}
26102634
case kX64I64x2Splat: {
26112635
XMMRegister dst = i.OutputSimd128Register();
26122636
if (HasRegisterInput(instr, 0)) {

src/compiler/backend/x64/instruction-codes-x64.h

+4
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ namespace compiler {
172172
V(X64F64x2Le) \
173173
V(X64F64x2Qfma) \
174174
V(X64F64x2Qfms) \
175+
V(X64F64x2Pmin) \
176+
V(X64F64x2Pmax) \
175177
V(X64F32x4Splat) \
176178
V(X64F32x4ExtractLane) \
177179
V(X64F32x4ReplaceLane) \
@@ -195,6 +197,8 @@ namespace compiler {
195197
V(X64F32x4Le) \
196198
V(X64F32x4Qfma) \
197199
V(X64F32x4Qfms) \
200+
V(X64F32x4Pmin) \
201+
V(X64F32x4Pmax) \
198202
V(X64I64x2Splat) \
199203
V(X64I64x2ExtractLane) \
200204
V(X64I64x2ReplaceLane) \

src/compiler/backend/x64/instruction-scheduler-x64.cc

+4
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
144144
case kX64F64x2Le:
145145
case kX64F64x2Qfma:
146146
case kX64F64x2Qfms:
147+
case kX64F64x2Pmin:
148+
case kX64F64x2Pmax:
147149
case kX64F32x4Splat:
148150
case kX64F32x4ExtractLane:
149151
case kX64F32x4ReplaceLane:
@@ -167,6 +169,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
167169
case kX64F32x4Le:
168170
case kX64F32x4Qfma:
169171
case kX64F32x4Qfms:
172+
case kX64F32x4Pmin:
173+
case kX64F32x4Pmax:
170174
case kX64I64x2Splat:
171175
case kX64I64x2ExtractLane:
172176
case kX64I64x2ReplaceLane:

src/compiler/backend/x64/instruction-selector-x64.cc

+28
Original file line numberDiff line numberDiff line change
@@ -3380,6 +3380,34 @@ void InstructionSelector::VisitS8x16Swizzle(Node* node) {
33803380
arraysize(temps), temps);
33813381
}
33823382

3383+
namespace {
3384+
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
3385+
ArchOpcode opcode) {
3386+
// Due to the way minps/minpd work, we want the dst to be same as the second
3387+
// input: b = pmin(a, b) directly maps to minps b a.
3388+
X64OperandGenerator g(selector);
3389+
selector->Emit(opcode, g.DefineSameAsFirst(node),
3390+
g.UseRegister(node->InputAt(1)),
3391+
g.UseRegister(node->InputAt(0)));
3392+
}
3393+
} // namespace
3394+
3395+
void InstructionSelector::VisitF32x4Pmin(Node* node) {
3396+
VisitPminOrPmax(this, node, kX64F32x4Pmin);
3397+
}
3398+
3399+
void InstructionSelector::VisitF32x4Pmax(Node* node) {
3400+
VisitPminOrPmax(this, node, kX64F32x4Pmax);
3401+
}
3402+
3403+
void InstructionSelector::VisitF64x2Pmin(Node* node) {
3404+
VisitPminOrPmax(this, node, kX64F64x2Pmin);
3405+
}
3406+
3407+
void InstructionSelector::VisitF64x2Pmax(Node* node) {
3408+
VisitPminOrPmax(this, node, kX64F64x2Pmax);
3409+
}
3410+
33833411
// static
33843412
MachineOperatorBuilder::Flags
33853413
InstructionSelector::SupportedMachineOperatorFlags() {

src/compiler/machine-operator.cc

+4
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
337337
V(F64x2Le, Operator::kNoProperties, 2, 0, 1) \
338338
V(F64x2Qfma, Operator::kNoProperties, 3, 0, 1) \
339339
V(F64x2Qfms, Operator::kNoProperties, 3, 0, 1) \
340+
V(F64x2Pmin, Operator::kNoProperties, 2, 0, 1) \
341+
V(F64x2Pmax, Operator::kNoProperties, 2, 0, 1) \
340342
V(F32x4Splat, Operator::kNoProperties, 1, 0, 1) \
341343
V(F32x4SConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
342344
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
@@ -358,6 +360,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
358360
V(F32x4Le, Operator::kNoProperties, 2, 0, 1) \
359361
V(F32x4Qfma, Operator::kNoProperties, 3, 0, 1) \
360362
V(F32x4Qfms, Operator::kNoProperties, 3, 0, 1) \
363+
V(F32x4Pmin, Operator::kNoProperties, 2, 0, 1) \
364+
V(F32x4Pmax, Operator::kNoProperties, 2, 0, 1) \
361365
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
362366
V(I64x2SplatI32Pair, Operator::kNoProperties, 2, 0, 1) \
363367
V(I64x2Neg, Operator::kNoProperties, 1, 0, 1) \

src/compiler/machine-operator.h

+4
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
574574
const Operator* F64x2Le();
575575
const Operator* F64x2Qfma();
576576
const Operator* F64x2Qfms();
577+
const Operator* F64x2Pmin();
578+
const Operator* F64x2Pmax();
577579

578580
const Operator* F32x4Splat();
579581
const Operator* F32x4ExtractLane(int32_t);
@@ -598,6 +600,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
598600
const Operator* F32x4Le();
599601
const Operator* F32x4Qfma();
600602
const Operator* F32x4Qfms();
603+
const Operator* F32x4Pmin();
604+
const Operator* F32x4Pmax();
601605

602606
const Operator* I64x2Splat();
603607
const Operator* I64x2SplatI32Pair();

src/compiler/opcodes.h

+4
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,8 @@
763763
V(F64x2Le) \
764764
V(F64x2Qfma) \
765765
V(F64x2Qfms) \
766+
V(F64x2Pmin) \
767+
V(F64x2Pmax) \
766768
V(F32x4Splat) \
767769
V(F32x4ExtractLane) \
768770
V(F32x4ReplaceLane) \
@@ -788,6 +790,8 @@
788790
V(F32x4Ge) \
789791
V(F32x4Qfma) \
790792
V(F32x4Qfms) \
793+
V(F32x4Pmin) \
794+
V(F32x4Pmax) \
791795
V(I64x2Splat) \
792796
V(I64x2SplatI32Pair) \
793797
V(I64x2ExtractLane) \

src/compiler/wasm-compiler.cc

+12
Original file line numberDiff line numberDiff line change
@@ -4128,6 +4128,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
41284128
case wasm::kExprF64x2Qfms:
41294129
return graph()->NewNode(mcgraph()->machine()->F64x2Qfms(), inputs[0],
41304130
inputs[1], inputs[2]);
4131+
case wasm::kExprF64x2Pmin:
4132+
return graph()->NewNode(mcgraph()->machine()->F64x2Pmin(), inputs[0],
4133+
inputs[1]);
4134+
case wasm::kExprF64x2Pmax:
4135+
return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0],
4136+
inputs[1]);
41314137
case wasm::kExprF32x4Splat:
41324138
return graph()->NewNode(mcgraph()->machine()->F32x4Splat(), inputs[0]);
41334139
case wasm::kExprF32x4SConvertI32x4:
@@ -4193,6 +4199,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
41934199
case wasm::kExprF32x4Qfms:
41944200
return graph()->NewNode(mcgraph()->machine()->F32x4Qfms(), inputs[0],
41954201
inputs[1], inputs[2]);
4202+
case wasm::kExprF32x4Pmin:
4203+
return graph()->NewNode(mcgraph()->machine()->F32x4Pmin(), inputs[0],
4204+
inputs[1]);
4205+
case wasm::kExprF32x4Pmax:
4206+
return graph()->NewNode(mcgraph()->machine()->F32x4Pmax(), inputs[0],
4207+
inputs[1]);
41964208
case wasm::kExprI64x2Splat:
41974209
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
41984210
case wasm::kExprI64x2Neg:

src/wasm/wasm-interpreter.cc

+4
Original file line numberDiff line numberDiff line change
@@ -2299,12 +2299,16 @@ class ThreadImpl {
22992299
BINOP_CASE(F64x2Div, f64x2, float2, 2, base::Divide(a, b))
23002300
BINOP_CASE(F64x2Min, f64x2, float2, 2, JSMin(a, b))
23012301
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
2302+
BINOP_CASE(F64x2Pmin, f64x2, float2, 2, std::min(a, b))
2303+
BINOP_CASE(F64x2Pmax, f64x2, float2, 2, std::max(a, b))
23022304
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
23032305
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
23042306
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
23052307
BINOP_CASE(F32x4Div, f32x4, float4, 4, a / b)
23062308
BINOP_CASE(F32x4Min, f32x4, float4, 4, JSMin(a, b))
23072309
BINOP_CASE(F32x4Max, f32x4, float4, 4, JSMax(a, b))
2310+
BINOP_CASE(F32x4Pmin, f32x4, float4, 4, std::min(a, b))
2311+
BINOP_CASE(F32x4Pmax, f32x4, float4, 4, std::max(a, b))
23082312
BINOP_CASE(I64x2Add, i64x2, int2, 2, base::AddWithWraparound(a, b))
23092313
BINOP_CASE(I64x2Sub, i64x2, int2, 2, base::SubWithWraparound(a, b))
23102314
BINOP_CASE(I64x2Mul, i64x2, int2, 2, base::MulWithWraparound(a, b))

src/wasm/wasm-opcodes.cc

+5
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,11 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
321321
CASE_I16x8_OP(BitMask, "bitmask")
322322
CASE_I32x4_OP(BitMask, "bitmask")
323323

324+
CASE_F32x4_OP(Pmin, "pmin")
325+
CASE_F32x4_OP(Pmax, "pmax")
326+
CASE_F64x2_OP(Pmin, "pmin")
327+
CASE_F64x2_OP(Pmax, "pmax")
328+
324329
// Atomic operations.
325330
CASE_OP(AtomicNotify, "atomic.notify")
326331
CASE_INT_OP(AtomicWait, "atomic.wait")

src/wasm/wasm-opcodes.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,11 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
461461
V(I32x4AddHoriz, 0xfdb0, s_ss) \
462462
V(F32x4AddHoriz, 0xfdb2, s_ss) \
463463
V(F32x4RecipApprox, 0xfdb3, s_s) \
464-
V(F32x4RecipSqrtApprox, 0xfdba, s_s)
464+
V(F32x4RecipSqrtApprox, 0xfdba, s_s) \
465+
V(F32x4Pmin, 0xfdda, s_ss) \
466+
V(F32x4Pmax, 0xfddb, s_ss) \
467+
V(F64x2Pmin, 0xfddc, s_ss) \
468+
V(F64x2Pmax, 0xfddd, s_ss)
465469

466470
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
467471
V(I8x16ExtractLaneS, 0xfd15, _) \

test/cctest/wasm/test-run-wasm-simd.cc

+32-2
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,18 @@ T Div(T a, T b) {
9999

100100
template <typename T>
101101
T Minimum(T a, T b) {
102-
return a <= b ? a : b;
102+
// Follow one of the possible implementation given in
103+
// https://en.cppreference.com/w/cpp/algorithm/min so that it works the same
104+
// way for floats (when given NaNs/Infs).
105+
return (b < a) ? b : a;
103106
}
104107

105108
template <typename T>
106109
T Maximum(T a, T b) {
107-
return a >= b ? a : b;
110+
// Follow one of the possible implementation given in
111+
// https://en.cppreference.com/w/cpp/algorithm/max so that it works the same
112+
// way for floats (when given NaNs/Infs).
113+
return (a < b) ? b : a;
108114
}
109115

110116
template <typename T>
@@ -750,6 +756,18 @@ WASM_SIMD_TEST(F32x4Max) {
750756
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Max, JSMax);
751757
}
752758

759+
#if V8_TARGET_ARCH_X64
760+
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmin) {
761+
FLAG_SCOPE(wasm_simd_post_mvp);
762+
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmin, Minimum);
763+
}
764+
765+
WASM_SIMD_TEST_NO_LOWERING(F32x4Pmax) {
766+
FLAG_SCOPE(wasm_simd_post_mvp);
767+
RunF32x4BinOpTest(execution_tier, lower_simd, kExprF32x4Pmax, Maximum);
768+
}
769+
#endif // V8_TARGET_ARCH_X64
770+
753771
void RunF32x4CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
754772
WasmOpcode opcode, FloatCompareOp expected_op) {
755773
WasmRunner<int32_t, float, float> r(execution_tier, lower_simd);
@@ -1340,6 +1358,18 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Div) {
13401358
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Div, Div);
13411359
}
13421360

1361+
#if V8_TARGET_ARCH_X64
1362+
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmin) {
1363+
FLAG_SCOPE(wasm_simd_post_mvp);
1364+
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmin, Minimum);
1365+
}
1366+
1367+
WASM_SIMD_TEST_NO_LOWERING(F64x2Pmax) {
1368+
FLAG_SCOPE(wasm_simd_post_mvp);
1369+
RunF64x2BinOpTest(execution_tier, lower_simd, kExprF64x2Pmax, Maximum);
1370+
}
1371+
#endif // V8_TARGET_ARCH_X64
1372+
13431373
void RunF64x2CompareOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
13441374
WasmOpcode opcode, DoubleCompareOp expected_op) {
13451375
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);

0 commit comments

Comments
 (0)