From 1bd9895dffb4405071295768e40ecb38caaee195 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 31 Jul 2020 13:24:58 -0700 Subject: [PATCH 1/2] Implement prototype v128.load{32,64}_zero instructions Specified in https://github.com/WebAssembly/simd/pull/237. Since these are just prototypes necessary for benchmarking, this PR does not add support for these instructions to the fuzzer or the C or JS APIs. This PR also renumbers the QFMA instructions that previously used the opcodes for these new instructions. The renumbering matches the renumbering in V8 and LLVM. --- scripts/gen-s-parser.py | 2 ++ src/binaryen-c.h | 1 + src/gen-s-parser.inc | 17 ++++++++++++++--- src/passes/Print.cpp | 6 ++++++ src/tools/fuzzing.h | 4 ++++ src/wasm-binary.h | 7 +++++-- src/wasm-interpreter.h | 22 ++++++++++++++++++++++ src/wasm.h | 4 +++- src/wasm/wasm-binary.cpp | 8 ++++++++ src/wasm/wasm-s-parser.cpp | 2 ++ src/wasm/wasm-stack.cpp | 6 ++++++ src/wasm/wasm-validator.cpp | 2 ++ src/wasm/wasm.cpp | 2 ++ test/simd.wast | 10 ++++++++++ test/simd.wast.from-wast | 12 +++++++++++- test/simd.wast.fromBinary | 16 ++++++++++++++-- test/simd.wast.fromBinary.noDebugInfo | 18 +++++++++++++++--- test/spec/simd.wast | 4 ++++ 18 files changed, 131 insertions(+), 12 deletions(-) diff --git a/scripts/gen-s-parser.py b/scripts/gen-s-parser.py index 7c4fc6e561d..2d22c2cbdf8 100755 --- a/scripts/gen-s-parser.py +++ b/scripts/gen-s-parser.py @@ -476,6 +476,8 @@ ("i32x4.load16x4_u", "makeSIMDLoad(s, SIMDLoadOp::LoadExtUVec16x4ToVecI32x4)"), ("i64x2.load32x2_s", "makeSIMDLoad(s, SIMDLoadOp::LoadExtSVec32x2ToVecI64x2)"), ("i64x2.load32x2_u", "makeSIMDLoad(s, SIMDLoadOp::LoadExtUVec32x2ToVecI64x2)"), + ("v128.load32_zero", "makeSIMDLoad(s, SIMDLoadOp::Load32Zero)"), + ("v128.load64_zero", "makeSIMDLoad(s, SIMDLoadOp::Load64Zero)"), ("i8x16.narrow_i16x8_s", "makeBinary(s, BinaryOp::NarrowSVecI16x8ToVecI8x16)"), ("i8x16.narrow_i16x8_u", "makeBinary(s, BinaryOp::NarrowUVecI16x8ToVecI8x16)"), ("i16x8.narrow_i32x4_s", "makeBinary(s, BinaryOp::NarrowSVecI32x4ToVecI16x8)"), diff --git a/src/binaryen-c.h b/src/binaryen-c.h index b749937806d..d064ceb5c2f 100644 --- a/src/binaryen-c.h +++ b/src/binaryen-c.h @@ -582,6 +582,7 @@ BINARYEN_API BinaryenOp BinaryenLoadExtSVec16x4ToVecI32x4(void); BINARYEN_API BinaryenOp BinaryenLoadExtUVec16x4ToVecI32x4(void); BINARYEN_API BinaryenOp BinaryenLoadExtSVec32x2ToVecI64x2(void); BINARYEN_API BinaryenOp BinaryenLoadExtUVec32x2ToVecI64x2(void); +// TODO: Add Load{32,64}Zero to C and JS APIs once merged to proposal BINARYEN_API BinaryenOp BinaryenNarrowSVecI16x8ToVecI8x16(void); BINARYEN_API BinaryenOp BinaryenNarrowUVecI16x8ToVecI8x16(void); BINARYEN_API BinaryenOp BinaryenNarrowSVecI32x4ToVecI16x8(void); diff --git a/src/gen-s-parser.inc b/src/gen-s-parser.inc index bc8568b19eb..5e6beb4ac66 100644 --- a/src/gen-s-parser.inc +++ b/src/gen-s-parser.inc @@ -2676,9 +2676,20 @@ switch (op[0]) { case 'c': if (strcmp(op, "v128.const") == 0) { return makeConst(s, Type::v128); } goto parse_error; - case 'l': - if (strcmp(op, "v128.load") == 0) { return makeLoad(s, Type::v128, /*isAtomic=*/false); } - goto parse_error; + case 'l': { + switch (op[9]) { + case '\0': + if (strcmp(op, "v128.load") == 0) { return makeLoad(s, Type::v128, /*isAtomic=*/false); } + goto parse_error; + case '3': + if (strcmp(op, "v128.load32_zero") == 0) { return makeSIMDLoad(s, SIMDLoadOp::Load32Zero); } + goto parse_error; + case '6': + if (strcmp(op, "v128.load64_zero") == 0) { return makeSIMDLoad(s, SIMDLoadOp::Load64Zero); } + goto parse_error; + default: goto parse_error; + } + } case 'n': if (strcmp(op, "v128.not") == 0) { return makeUnary(s, UnaryOp::NotVec128); } goto parse_error; diff --git a/src/passes/Print.cpp b/src/passes/Print.cpp index 6246a5f5ae2..d90e7f95835 100644 --- a/src/passes/Print.cpp +++ b/src/passes/Print.cpp @@ -485,6 +485,12 @@ struct PrintExpressionContents case LoadExtUVec32x2ToVecI64x2: o << "i64x2.load32x2_u"; break; + case Load32Zero: + o << "v128.load32_zero"; + break; + case Load64Zero: + o << "v128.load64_zero"; + break; } restoreNormalColor(o); if (curr->offset) { diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index 51e9300af4d..95ccf2613b6 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -2544,6 +2544,7 @@ class TranslateToFuzzReader { } Expression* makeSIMDLoad() { + // TODO: add Load{32,64}Zero if merged to proposal SIMDLoadOp op = pick(LoadSplatVec8x16, LoadSplatVec16x8, LoadSplatVec32x4, @@ -2575,6 +2576,9 @@ class TranslateToFuzzReader { case LoadExtUVec32x2ToVecI64x2: align = pick(1, 2, 4, 8); break; + case Load32Zero: + case Load64Zero: + WASM_UNREACHABLE("Unexpected SIMD loads"); } Expression* ptr = makePointer(); return builder.makeSIMDLoad(op, offset, align, ptr); diff --git a/src/wasm-binary.h b/src/wasm-binary.h index db93c86de10..b23be1e75bc 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -888,8 +888,11 @@ enum ASTNodes { F32x4ConvertSI32x4 = 0xfa, F32x4ConvertUI32x4 = 0xfb, - F32x4QFMA = 0xfc, - F32x4QFMS = 0xfd, + V128Load32Zero = 0xfc, + V128Load64Zero = 0xfd, + + F32x4QFMA = 0xb4, + F32x4QFMS = 0xd4, F64x2QFMA = 0xfe, F64x2QFMS = 0xff, diff --git a/src/wasm-interpreter.h b/src/wasm-interpreter.h index 124ba9aae6f..a61f549c73d 100644 --- a/src/wasm-interpreter.h +++ b/src/wasm-interpreter.h @@ -1238,6 +1238,7 @@ class ExpressionRunner : public OverriddenVisitor { Flow visitSIMDLoad(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } Flow visitSIMDLoadSplat(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } Flow visitSIMDLoadExtend(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } + Flow visitSIMDLoadZero(SIMDLoad* curr) { WASM_UNREACHABLE("unimp"); } Flow visitPop(Pop* curr) { WASM_UNREACHABLE("unimp"); } Flow visitRefNull(RefNull* curr) { NOTE_ENTER("RefNull"); @@ -2174,6 +2175,9 @@ template class ModuleInstanceBase { case LoadExtSVec32x2ToVecI64x2: case LoadExtUVec32x2ToVecI64x2: return visitSIMDLoadExtend(curr); + case Load32Zero: + case Load64Zero: + return visitSIMDLoadZero(curr); } WASM_UNREACHABLE("invalid op"); } @@ -2266,6 +2270,24 @@ template class ModuleInstanceBase { } WASM_UNREACHABLE("invalid op"); } + Flow visitSIMDLoadZero(SIMDLoad* curr) { + Flow flow = this->visit(curr->ptr); + if (flow.breaking()) { + return flow; + } + NOTE_EVAL1(flow); + Address src = instance.getFinalAddress( + curr, flow.getSingleValue(), curr->op == Load32Zero ? 32 : 64); + auto zero = + Literal::makeSingleZero(curr->op == Load32Zero ? Type::i32 : Type::i64); + if (curr->op == Load32Zero) { + auto val = Literal(instance.externalInterface->load32u(src)); + return Literal(std::array{{val, zero, zero, zero}}); + } else { + auto val = Literal(instance.externalInterface->load64u(src)); + return Literal(std::array{{val, zero}}); + } + } Flow visitHost(Host* curr) { NOTE_ENTER("Host"); switch (curr->op) { diff --git a/src/wasm.h b/src/wasm.h index fafb03d412b..3d658d3670e 100644 --- a/src/wasm.h +++ b/src/wasm.h @@ -481,7 +481,9 @@ enum SIMDLoadOp { LoadExtSVec16x4ToVecI32x4, LoadExtUVec16x4ToVecI32x4, LoadExtSVec32x2ToVecI64x2, - LoadExtUVec32x2ToVecI64x2 + LoadExtUVec32x2ToVecI64x2, + Load32Zero, + Load64Zero }; enum SIMDTernaryOp { Bitselect, QFMAF32x4, QFMSF32x4, QFMAF64x2, QFMSF64x2 }; diff --git a/src/wasm/wasm-binary.cpp b/src/wasm/wasm-binary.cpp index bb5d420a952..ddc4de36c22 100644 --- a/src/wasm/wasm-binary.cpp +++ b/src/wasm/wasm-binary.cpp @@ -4598,6 +4598,14 @@ bool WasmBinaryBuilder::maybeVisitSIMDLoad(Expression*& out, uint32_t code) { curr = allocator.alloc(); curr->op = LoadExtUVec32x2ToVecI64x2; break; + case BinaryConsts::V128Load32Zero: + curr = allocator.alloc(); + curr->op = Load32Zero; + break; + case BinaryConsts::V128Load64Zero: + curr = allocator.alloc(); + curr->op = Load64Zero; + break; default: return false; } diff --git a/src/wasm/wasm-s-parser.cpp b/src/wasm/wasm-s-parser.cpp index 429cac27343..5e6008bca36 100644 --- a/src/wasm/wasm-s-parser.cpp +++ b/src/wasm/wasm-s-parser.cpp @@ -1528,6 +1528,7 @@ Expression* SExpressionWasmBuilder::makeSIMDLoad(Element& s, SIMDLoadOp op) { defaultAlign = 2; break; case LoadSplatVec32x4: + case Load32Zero: defaultAlign = 4; break; case LoadSplatVec64x2: @@ -1537,6 +1538,7 @@ Expression* SExpressionWasmBuilder::makeSIMDLoad(Element& s, SIMDLoadOp op) { case LoadExtUVec16x4ToVecI32x4: case LoadExtSVec32x2ToVecI64x2: case LoadExtUVec32x2ToVecI64x2: + case Load64Zero: defaultAlign = 8; break; } diff --git a/src/wasm/wasm-stack.cpp b/src/wasm/wasm-stack.cpp index 9e3ce2afa1a..90113bb7f98 100644 --- a/src/wasm/wasm-stack.cpp +++ b/src/wasm/wasm-stack.cpp @@ -632,6 +632,12 @@ void BinaryInstWriter::visitSIMDLoad(SIMDLoad* curr) { case LoadExtUVec32x2ToVecI64x2: o << U32LEB(BinaryConsts::I64x2LoadExtUVec32x2); break; + case Load32Zero: + o << U32LEB(BinaryConsts::V128Load32Zero); + break; + case Load64Zero: + o << U32LEB(BinaryConsts::V128Load64Zero); + break; } assert(curr->align); emitMemoryAccess(curr->align, /*(unused) bytes=*/0, curr->offset); diff --git a/src/wasm/wasm-validator.cpp b/src/wasm/wasm-validator.cpp index 2c2aac1c862..5f37560e79e 100644 --- a/src/wasm/wasm-validator.cpp +++ b/src/wasm/wasm-validator.cpp @@ -1139,6 +1139,7 @@ void FunctionValidator::visitSIMDLoad(SIMDLoad* curr) { case LoadSplatVec8x16: case LoadSplatVec16x8: case LoadSplatVec32x4: + case Load32Zero: memAlignType = Type::i32; break; case LoadSplatVec64x2: @@ -1148,6 +1149,7 @@ void FunctionValidator::visitSIMDLoad(SIMDLoad* curr) { case LoadExtUVec16x4ToVecI32x4: case LoadExtSVec32x2ToVecI64x2: case LoadExtUVec32x2ToVecI64x2: + case Load64Zero: memAlignType = Type::i64; break; } diff --git a/src/wasm/wasm.cpp b/src/wasm/wasm.cpp index a0c6dbbb69e..be3ab6ccca9 100644 --- a/src/wasm/wasm.cpp +++ b/src/wasm/wasm.cpp @@ -629,6 +629,7 @@ Index SIMDLoad::getMemBytes() { case LoadSplatVec16x8: return 2; case LoadSplatVec32x4: + case Load32Zero: return 4; case LoadSplatVec64x2: case LoadExtSVec8x8ToVecI16x8: @@ -637,6 +638,7 @@ Index SIMDLoad::getMemBytes() { case LoadExtUVec16x4ToVecI32x4: case LoadExtSVec32x2ToVecI64x2: case LoadExtUVec32x2ToVecI64x2: + case Load64Zero: return 8; } WASM_UNREACHABLE("unexpected op"); diff --git a/test/simd.wast b/test/simd.wast index 7ee8760e1fd..619d1a5779a 100644 --- a/test/simd.wast +++ b/test/simd.wast @@ -1144,6 +1144,16 @@ (local.get $0) ) ) + (func $v128.load32_zero (param $0 i32) (result v128) + (v128.load32_zero + (local.get $0) + ) + ) + (func $v128.load64_zero (param $0 i32) (result v128) + (v128.load64_zero + (local.get $0) + ) + ) (func $v8x16.swizzle (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) diff --git a/test/simd.wast.from-wast b/test/simd.wast.from-wast index 508d44c7c00..af57494e3f1 100644 --- a/test/simd.wast.from-wast +++ b/test/simd.wast.from-wast @@ -2,8 +2,8 @@ (type $v128_v128_=>_v128 (func (param v128 v128) (result v128))) (type $v128_=>_v128 (func (param v128) (result v128))) (type $v128_=>_i32 (func (param v128) (result i32))) - (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) (type $i32_=>_v128 (func (param i32) (result v128))) + (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) (type $none_=>_v128 (func (result v128))) (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $i32_v128_=>_none (func (param i32 v128))) @@ -1160,6 +1160,16 @@ (local.get $0) ) ) + (func $v128.load32_zero (param $0 i32) (result v128) + (v128.load32_zero + (local.get $0) + ) + ) + (func $v128.load64_zero (param $0 i32) (result v128) + (v128.load64_zero + (local.get $0) + ) + ) (func $v8x16.swizzle (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) diff --git a/test/simd.wast.fromBinary b/test/simd.wast.fromBinary index d5337688369..7353f8ec1ab 100644 --- a/test/simd.wast.fromBinary +++ b/test/simd.wast.fromBinary @@ -2,8 +2,8 @@ (type $v128_v128_=>_v128 (func (param v128 v128) (result v128))) (type $v128_=>_v128 (func (param v128) (result v128))) (type $v128_=>_i32 (func (param v128) (result i32))) - (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) (type $i32_=>_v128 (func (param i32) (result v128))) + (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) (type $none_=>_v128 (func (result v128))) (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $i32_v128_=>_none (func (param i32 v128))) @@ -896,8 +896,10 @@ ) ) (func $f32x4.qfma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) - (f32x4.qfma + (drop (local.get $0) + ) + (i32x4.dot_i16x8_s (local.get $1) (local.get $2) ) @@ -1160,6 +1162,16 @@ (local.get $0) ) ) + (func $v128.load32_zero (param $0 i32) (result v128) + (v128.load32_zero + (local.get $0) + ) + ) + (func $v128.load64_zero (param $0 i32) (result v128) + (v128.load64_zero + (local.get $0) + ) + ) (func $v8x16.swizzle (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) diff --git a/test/simd.wast.fromBinary.noDebugInfo b/test/simd.wast.fromBinary.noDebugInfo index 474dfc7c561..5a20c473c85 100644 --- a/test/simd.wast.fromBinary.noDebugInfo +++ b/test/simd.wast.fromBinary.noDebugInfo @@ -2,8 +2,8 @@ (type $v128_v128_=>_v128 (func (param v128 v128) (result v128))) (type $v128_=>_v128 (func (param v128) (result v128))) (type $v128_=>_i32 (func (param v128) (result i32))) - (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) (type $i32_=>_v128 (func (param i32) (result v128))) + (type $v128_i32_=>_v128 (func (param v128 i32) (result v128))) (type $none_=>_v128 (func (result v128))) (type $v128_v128_v128_=>_v128 (func (param v128 v128 v128) (result v128))) (type $i32_v128_=>_none (func (param i32 v128))) @@ -896,8 +896,10 @@ ) ) (func $156 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) - (f32x4.qfma + (drop (local.get $0) + ) + (i32x4.dot_i16x8_s (local.get $1) (local.get $2) ) @@ -1160,7 +1162,17 @@ (local.get $0) ) ) - (func $205 (param $0 v128) (param $1 v128) (result v128) + (func $205 (param $0 i32) (result v128) + (v128.load32_zero + (local.get $0) + ) + ) + (func $206 (param $0 i32) (result v128) + (v128.load64_zero + (local.get $0) + ) + ) + (func $207 (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) (local.get $1) diff --git a/test/spec/simd.wast b/test/spec/simd.wast index 4fe9c84e28d..5cc34d3053e 100644 --- a/test/spec/simd.wast +++ b/test/spec/simd.wast @@ -232,6 +232,8 @@ (func (export "i32x4.load16x4_s") (param $0 i32) (result v128) (i32x4.load16x4_s (local.get $0))) (func (export "i64x2.load32x2_u") (param $0 i32) (result v128) (i64x2.load32x2_u (local.get $0))) (func (export "i64x2.load32x2_s") (param $0 i32) (result v128) (i64x2.load32x2_s (local.get $0))) + (func (export "v128.load32_zero") (param $0 i32) (result v128) (v128.load32_zero (local.get $0))) + (func (export "v128.load64_zero") (param $0 i32) (result v128) (v128.load64_zero (local.get $0))) (func (export "v8x16.swizzle") (param $0 v128) (param $1 v128) (result v128) (v8x16.swizzle (local.get $0) (local.get $1))) ) @@ -944,6 +946,8 @@ (assert_return (invoke "i32x4.load16x4_u" (i32.const 256)) (v128.const i32x4 0x00009080 0x0000b0a0 0x0000d0c0 0x0000f0e0)) (assert_return (invoke "i64x2.load32x2_s" (i32.const 256)) (v128.const i64x2 0xffffffffb0a09080 0xfffffffff0e0d0c0)) (assert_return (invoke "i64x2.load32x2_u" (i32.const 256)) (v128.const i64x2 0x00000000b0a09080 0x00000000f0e0d0c0)) +(assert_return (invoke "v128.load32_zero" (i32.const 256)) (v128.const i32x4 0xb0a09080 0 0 0)) +(assert_return (invoke "v128.load64_zero" (i32.const 256)) (v128.const i64x2 0xf0e0d0c0b0a09080 0)) (assert_return (invoke "v8x16.swizzle" (v128.const i8x16 0xf0 0xf1 0xf2 0xf3 0xf4 0xf5 0xf6 0xf7 0xf8 0xf9 0xfa 0xfb 0xfc 0xfd 0xfe 0xff) From ff5c5e5386a4ad5f0634de8f17c02ad170ab12b8 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 3 Aug 2020 11:09:03 -0700 Subject: [PATCH 2/2] Renumber dot, too --- src/wasm-binary.h | 2 +- test/simd.wast.fromBinary | 4 +--- test/simd.wast.fromBinary.noDebugInfo | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/wasm-binary.h b/src/wasm-binary.h index b23be1e75bc..ba3b4038a18 100644 --- a/src/wasm-binary.h +++ b/src/wasm-binary.h @@ -833,12 +833,12 @@ enum ASTNodes { I32x4ShrU = 0xad, I32x4Add = 0xae, I32x4Sub = 0xb1, - I32x4DotSVecI16x8 = 0xb4, I32x4Mul = 0xb5, I32x4MinS = 0xb6, I32x4MinU = 0xb7, I32x4MaxS = 0xb8, I32x4MaxU = 0xb9, + I32x4DotSVecI16x8 = 0xba, I64x2Neg = 0xc1, I64x2AnyTrue = 0xc2, diff --git a/test/simd.wast.fromBinary b/test/simd.wast.fromBinary index 7353f8ec1ab..a566894497c 100644 --- a/test/simd.wast.fromBinary +++ b/test/simd.wast.fromBinary @@ -896,10 +896,8 @@ ) ) (func $f32x4.qfma (param $0 v128) (param $1 v128) (param $2 v128) (result v128) - (drop + (f32x4.qfma (local.get $0) - ) - (i32x4.dot_i16x8_s (local.get $1) (local.get $2) ) diff --git a/test/simd.wast.fromBinary.noDebugInfo b/test/simd.wast.fromBinary.noDebugInfo index 5a20c473c85..c3bfb8a541b 100644 --- a/test/simd.wast.fromBinary.noDebugInfo +++ b/test/simd.wast.fromBinary.noDebugInfo @@ -896,10 +896,8 @@ ) ) (func $156 (param $0 v128) (param $1 v128) (param $2 v128) (result v128) - (drop + (f32x4.qfma (local.get $0) - ) - (i32x4.dot_i16x8_s (local.get $1) (local.get $2) )