ambroff
diff --git a/‎js/src/jit-test/lib/wasm-binary.js
+32-2 b/‎js/src/jit-test/lib/wasm-binary.js
+32-2
diff --git a/‎js/src/jit-test/tests/wasm/simd/experimental.js
+200 b/‎js/src/jit-test/tests/wasm/simd/experimental.js
+200
diff --git a/‎js/src/jit/MacroAssembler.h
+48 b/‎js/src/jit/MacroAssembler.h
+48
diff --git a/‎js/src/jit/arm/MacroAssembler-arm.cpp
+1 b/‎js/src/jit/arm/MacroAssembler-arm.cpp
+1
diff --git a/‎js/src/jit/arm64/MacroAssembler-arm64.cpp
+2 b/‎js/src/jit/arm64/MacroAssembler-arm64.cpp
+2
diff --git a/‎js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
+2 b/‎js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
+2
diff --git a/‎js/src/jit/shared/Assembler-shared.h
+10-1 b/‎js/src/jit/shared/Assembler-shared.h
+10-1
diff --git a/‎js/src/jit/x64/MacroAssembler-x64.cpp
+4-2 b/‎js/src/jit/x64/MacroAssembler-x64.cpp
+4-2
@@ -39,6 +39,7 @@ const I32Code          = 0x7f;
 const I64Code          = 0x7e;
 const F32Code          = 0x7d;
 const F64Code          = 0x7c;
+const V128Code         = 0x7b;
 const AnyFuncCode      = 0x70;
 const AnyrefCode       = 0x6f;
 const OptRefCode       = 0x6c;
@@ -53,6 +54,7 @@ const CallCode         = 0x10;
 const CallIndirectCode = 0x11;
 const DropCode         = 0x1a;
 const SelectCode       = 0x1b;
+const LocalGetCode     = 0x20;
 const I32Load          = 0x28;
 const I64Load          = 0x29;
 const F32Load          = 0x2a;
@@ -102,6 +104,27 @@ const RefNullCode      = 0xd0;
 const RefIsNullCode    = 0xd1;
 const RefFuncCode      = 0xd2;
 
+// SIMD opcodes
+const V128LoadCode = 0x00;
+const V128StoreCode = 0x0b;
+
+// Experimental SIMD opcodes as of August, 2020.
+const I32x4DotSI16x8Code = 0xba;
+const F32x4CeilCode = 0xd8;
+const F32x4FloorCode = 0xd9;
+const F32x4TruncCode = 0xda;
+const F32x4NearestCode = 0xdb;
+const F64x2CeilCode = 0xdc;
+const F64x2FloorCode = 0xdd;
+const F64x2TruncCode = 0xde;
+const F64x2NearestCode = 0xdf;
+const F32x4PMinCode = 0xea;
+const F32x4PMaxCode = 0xeb;
+const F64x2PMinCode = 0xf6;
+const F64x2PMaxCode = 0xf7;
+const V128Load32ZeroCode = 0xfc;
+const V128Load64ZeroCode = 0xfd;
+
 const FirstInvalidOpcode = 0xc5;
 const LastInvalidOpcode = 0xfa;
 const GcPrefix = 0xfb;
@@ -300,8 +323,15 @@ function exportSection(exports) {
     body.push(...varU32(exports.length));
     for (let exp of exports) {
         body.push(...string(exp.name));
-        body.push(...varU32(FunctionCode));
-        body.push(...varU32(exp.funcIndex));
+        if (exp.hasOwnProperty("funcIndex")) {
+            body.push(...varU32(FunctionCode));
+            body.push(...varU32(exp.funcIndex));
+        } else if (exp.hasOwnProperty("memIndex")) {
+            body.push(...varU32(MemoryCode));
+            body.push(...varU32(exp.memIndex));
+        } else {
+            throw "Bad export " + exp;
+        }
     }
     return { name: exportId, body };
 }
 
@@ -0,0 +1,200 @@
+// Experimental opcodes.  We have no text parsing support for these yet.  The
+// tests will be cleaned up and moved into ad-hack.js if the opcodes are
+// adopted.
+
+// When simd is enabled by default in release builds we will flip the value of
+// SimdExperimentalEnabled to false in RELEASE_OR_BETA builds.  At that point,
+// these tests will start failing in release or beta builds, and a guard
+// asserting !RELEASE_OR_BETA will have to be added above.  That is how it
+// should be.
+
+load(libdir + "wasm-binary.js");
+
+function wasmEval(bytes, imports) {
+    return new WebAssembly.Instance(new WebAssembly.Module(bytes), imports);
+}
+
+function get(arr, loc, len) {
+    let res = [];
+    for ( let i=0; i < len; i++ ) {
+        res.push(arr[loc+i]);
+    }
+    return res;
+}
+
+function set(arr, loc, vals) {
+    for ( let i=0; i < vals.length; i++ ) {
+        if (arr instanceof BigInt64Array) {
+            arr[loc+i] = BigInt(vals[i]);
+        } else {
+            arr[loc+i] = vals[i];
+        }
+    }
+}
+
+function assertSame(got, expected) {
+    assertEq(got.length, expected.length);
+    for ( let i=0; i < got.length; i++ ) {
+        let g = got[i];
+        let e = expected[i];
+        if (typeof g != typeof e) {
+            if (typeof g == "bigint")
+                e = BigInt(e);
+            else if (typeof e == "bigint")
+                g = BigInt(g);
+        }
+        assertEq(g, e);
+    }
+}
+
+function iota(len) {
+    let xs = [];
+    for ( let i=0 ; i < len ; i++ )
+        xs.push(i);
+    return xs;
+}
+
+function pmin(x, y) { return y < x ? y : x }
+function pmax(x, y) { return x < y ? y : x }
+
+function ffloor(x) { return Math.fround(Math.floor(x)) }
+function fceil(x) { return Math.fround(Math.ceil(x)) }
+function ftrunc(x) { return Math.fround(Math.sign(x)*Math.floor(Math.abs(x))) }
+function fnearest(x) { return Math.fround(Math.round(x)) }
+
+function dfloor(x) { return Math.floor(x) }
+function dceil(x) { return Math.ceil(x) }
+function dtrunc(x) { return Math.sign(x)*Math.floor(Math.abs(x)) }
+function dnearest(x) { return Math.round(x) }
+
+const v2vSig = {args:[], ret:VoidCode};
+
+function V128Load(addr) {
+    return [I32ConstCode, varS32(addr),
+            SimdPrefix, V128LoadCode, 4, varU32(0)]
+}
+
+function V128StoreExpr(addr, v) {
+    return [I32ConstCode, varS32(addr),
+            ...v,
+            SimdPrefix, V128StoreCode, 4, varU32(0)];
+}
+
+// Pseudo-min/max, https://github.com/WebAssembly/simd/pull/122
+var fxs = [5, 1, -4, 2];
+var fys = [6, 0, -7, 3];
+var dxs = [5, 1];
+var dys = [6, 0];
+
+for ( let [opcode, xs, ys, operator] of [[F32x4PMinCode, fxs, fys, pmin],
+                                         [F32x4PMaxCode, fxs, fys, pmax],
+                                         [F64x2PMinCode, dxs, dys, pmin],
+                                         [F64x2PMaxCode, dxs, dys, pmax]] ) {
+    var k = xs.length;
+    var ans = iota(k).map((i) => operator(xs[i], ys[i]))
+
+    var ins = wasmEval(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(16),
+                                                  ...V128Load(32),
+                                                  SimdPrefix, varU32(opcode)])]})])]));
+
+    var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer);
+    set(mem, k, xs);
+    set(mem, 2*k, ys);
+    ins.exports.run();
+    var result = get(mem, 0, k);
+    assertSame(result, ans);
+}
+
+// Widening integer dot product, https://github.com/WebAssembly/simd/pull/127
+
+var ins = wasmEval(moduleWithSections([
+    sigSection([v2vSig]),
+    declSection([0]),
+    memorySection(1),
+    exportSection([{funcIndex: 0, name: "run"},
+                   {memIndex: 0, name: "mem"}]),
+    bodySection([
+        funcBody({locals:[],
+                  body: [...V128StoreExpr(0, [...V128Load(16),
+                                              ...V128Load(32),
+                                              SimdPrefix, varU32(I32x4DotSI16x8Code)])]})])]));
+
+var xs = [5, 1, -4, 2, 20, -15, 12, 3];
+var ys = [6, 0, -7, 3, 8, -1, -3, 7];
+var ans = [xs[0]*ys[0] + xs[1]*ys[1],
+           xs[2]*ys[2] + xs[3]*ys[3],
+           xs[4]*ys[4] + xs[5]*ys[5],
+           xs[6]*ys[6] + xs[7]*ys[7]];
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+var mem32 = new Int32Array(ins.exports.mem.buffer);
+set(mem16, 8, xs);
+set(mem16, 16, ys);
+ins.exports.run();
+var result = get(mem32, 0, 4);
+assertSame(result, ans);
+
+// Rounding, https://github.com/WebAssembly/simd/pull/232
+
+var fxs = [5.1, -1.1, -4.3, 0];
+var dxs = [5.1, -1.1];
+
+for ( let [opcode, xs, operator] of [[F32x4CeilCode, fxs, fceil],
+                                     [F32x4FloorCode, fxs, ffloor],
+                                     [F32x4TruncCode, fxs, ftrunc],
+                                     [F32x4NearestCode, fxs, fnearest],
+                                     [F64x2CeilCode, dxs, dceil],
+                                     [F64x2FloorCode, dxs, dfloor],
+                                     [F64x2TruncCode, dxs, dtrunc],
+                                     [F64x2NearestCode, dxs, dnearest]] ) {
+    var k = xs.length;
+    var ans = xs.map(operator);
+
+    var ins = wasmEval(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [...V128Load(16),
+                                                  SimdPrefix, varU32(opcode)])]})])]));
+
+    var mem = new (k == 4 ? Float32Array : Float64Array)(ins.exports.mem.buffer);
+    set(mem, k, xs);
+    ins.exports.run();
+    var result = get(mem, 0, k);
+    assertSame(result, ans);
+}
+
+// Zero-extending SIMD load, https://github.com/WebAssembly/simd/pull/237
+
+for ( let [opcode, k, log2align, cons, cast] of [[V128Load32ZeroCode, 4, 2, Int32Array, Number],
+                                                 [V128Load64ZeroCode, 2, 3, BigInt64Array, BigInt]] ) {
+    var ins = wasmEval(moduleWithSections([
+        sigSection([v2vSig]),
+        declSection([0]),
+        memorySection(1),
+        exportSection([{funcIndex: 0, name: "run"},
+                       {memIndex: 0, name: "mem"}]),
+        bodySection([
+            funcBody({locals:[],
+                      body: [...V128StoreExpr(0, [I32ConstCode, varU32(16),
+                                                  SimdPrefix, varU32(opcode), log2align, varU32(0)])]})])]));
+
+    var mem = new cons(ins.exports.mem.buffer);
+    mem[k] = cast(37);
+    ins.exports.run();
+    var result = get(mem, 0, k);
+    assertSame(result, iota(k).map((v) => v == 0 ? 37 : 0));
+}
+
@@ -2524,6 +2524,54 @@ class MacroAssembler : public MacroAssemblerSpecific {
   inline void unsignedWidenLowInt32x4(FloatRegister src, FloatRegister dest)
       DEFINED_ON(x86_shared);
 
+  // Compare-based minimum/maximum (experimental as of August, 2020)
+  // https://github.com/WebAssembly/simd/pull/122
+
+  inline void pseudoMinFloat32x4(FloatRegister rhs, FloatRegister lhsDest)
+      DEFINED_ON(x86_shared);
+
+  inline void pseudoMinFloat64x2(FloatRegister rhs, FloatRegister lhsDest)
+      DEFINED_ON(x86_shared);
+
+  inline void pseudoMaxFloat32x4(FloatRegister rhs, FloatRegister lhsDest)
+      DEFINED_ON(x86_shared);
+
+  inline void pseudoMaxFloat64x2(FloatRegister rhs, FloatRegister lhsDest)
+      DEFINED_ON(x86_shared);
+
+  // Widening/pairwise integer dot product (experimental as of August, 2020)
+  // https://github.com/WebAssembly/simd/pull/127
+
+  inline void widenDotInt16x8(FloatRegister rhs, FloatRegister lhsDest)
+      DEFINED_ON(x86_shared);
+
+  // Floating point rounding (experimental as of August, 2020)
+  // https://github.com/WebAssembly/simd/pull/232
+
+  inline void ceilFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void ceilFloat64x2(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void floorFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void floorFloat64x2(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void truncFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void truncFloat64x2(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void nearestFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void nearestFloat64x2(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
  public:
   // ========================================================================
   // Truncate floating point.
 
@@ -6028,6 +6028,7 @@ void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
       }
     }
   } else {
+    MOZ_ASSERT(!access.isZeroExtendSimd128Load());
     bool isFloat = output.isFloat();
     if (isFloat) {
       MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
 
@@ -360,9 +360,11 @@ void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
         Ldr(SelectGPReg(outany, out64), srcAddr);
         break;
       case Scalar::Float32:
+        MOZ_ASSERT(!access.isZeroExtendSimd128Load());
         Ldr(SelectFPReg(outany, out64, 32), srcAddr);
         break;
       case Scalar::Float64:
+        MOZ_ASSERT(!access.isZeroExtendSimd128Load());
         Ldr(SelectFPReg(outany, out64, 64), srcAddr);
         break;
       case Scalar::Uint8Clamped:
 
@@ -2126,9 +2126,11 @@ void MacroAssemblerMIPSShared::wasmLoadImpl(
       isSigned = false;
       break;
     case Scalar::Float64:
+      MOZ_ASSERT(!access.isZeroExtendSimd128Load());
       isFloat = true;
       break;
     case Scalar::Float32:
+      MOZ_ASSERT(!access.isZeroExtendSimd128Load());
       isFloat = true;
       break;
     default:
 
@@ -492,6 +492,7 @@ class MemoryAccessDesc {
   Scalar::Type type_;
   jit::Synchronization sync_;
   wasm::BytecodeOffset trapOffset_;
+  bool zeroExtendSimd128Load_;
 
  public:
   explicit MemoryAccessDesc(
@@ -502,7 +503,8 @@ class MemoryAccessDesc {
         align_(align),
         type_(type),
         sync_(sync),
-        trapOffset_(trapOffset) {
+        trapOffset_(trapOffset),
+        zeroExtendSimd128Load_(false) {
     MOZ_ASSERT(mozilla::IsPowerOfTwo(align));
   }
 
@@ -513,6 +515,13 @@ class MemoryAccessDesc {
   const jit::Synchronization& sync() const { return sync_; }
   BytecodeOffset trapOffset() const { return trapOffset_; }
   bool isAtomic() const { return !sync_.isNone(); }
+  bool isZeroExtendSimd128Load() const { return zeroExtendSimd128Load_; }
+
+  void setZeroExtendSimd128Load() {
+    MOZ_ASSERT(type() == Scalar::Float32 || type() == Scalar::Float64);
+    MOZ_ASSERT(!isAtomic());
+    zeroExtendSimd128Load_ = true;
+  }
 
   void clearOffset() { offset_ = 0; }
   void setOffset(uint32_t offset) { offset_ = offset; }
 
@@ -596,10 +596,12 @@ void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
       movl(srcAddr, out.gpr());
       break;
     case Scalar::Float32:
-      loadFloat32(srcAddr, out.fpu());
+      // vmovss does the right thing also for access.isZeroExtendSimdLoad()
+      vmovss(srcAddr, out.fpu());
       break;
     case Scalar::Float64:
-      loadDouble(srcAddr, out.fpu());
+      // vmovsd does the right thing also for access.isZeroExtendSimdLoad()
+      vmovsd(srcAddr, out.fpu());
       break;
     case Scalar::Simd128:
       MacroAssemblerX64::loadUnalignedSimd128(srcAddr, out.fpu());
Original file line number	Diff line number	Diff line change
`@@ -6028,6 +6028,7 @@ void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,`
`6028`	`6028`	`}`
`6029`	`6029`	`}`
`6030`	`6030`	`} else {`
	`6031`	`+ MOZ_ASSERT(!access.isZeroExtendSimd128Load());`
`6031`	`6032`	`bool isFloat = output.isFloat();`
`6032`	`6033`	`if (isFloat) {`
`6033`	`6034`	`MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());`