Skip to content
This repository was archived by the owner on Dec 22, 2021. It is now read-only.

Commit d154084

Browse files
authored
Implement i32x4.dot_i16x8_s (#393)
It multiplies respective lanes from the 2 input operands, then adds adjacent lanes. This was merged into the proposal in #127.
1 parent 599b20d commit d154084

File tree

12 files changed

+189
-0
lines changed

12 files changed

+189
-0
lines changed

interpreter/binary/decode.ml

+1
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ let simd_prefix s =
370370
| 0xb7l -> i32x4_min_u
371371
| 0xb8l -> i32x4_max_s
372372
| 0xb9l -> i32x4_max_u
373+
| 0xbal -> i32x4_dot_i16x8_s
373374
| 0xc1l -> i64x2_neg
374375
| 0xcbl -> i64x2_shl
375376
| 0xccl -> i64x2_shr_s

interpreter/binary/encode.ml

+1
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ let encode m =
466466
| Binary (V128 V128Op.(I32x4 MinU)) -> simd_op 0xb7l
467467
| Binary (V128 V128Op.(I32x4 MaxS)) -> simd_op 0xb8l
468468
| Binary (V128 V128Op.(I32x4 MaxU)) -> simd_op 0xb9l
469+
| Binary (V128 V128Op.(I32x4 DotI16x8S)) -> simd_op 0xbal
469470
| Binary (V128 V128Op.(I32x4 Mul)) -> simd_op 0xb5l
470471
| Binary (V128 V128Op.(I32x4 Eq)) -> simd_op 0x37l
471472
| Binary (V128 V128Op.(I32x4 Ne)) -> simd_op 0x38l

interpreter/exec/eval_simd.ml

+1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct
118118
| I32x4 GtU -> SXX.I32x4.gt_u
119119
| I32x4 GeS -> SXX.I32x4.ge_s
120120
| I32x4 GeU -> SXX.I32x4.ge_u
121+
| I32x4 DotI16x8S -> SXX.I32x4_convert.dot_i16x8_s
121122
| I64x2 Add -> SXX.I64x2.add
122123
| I64x2 Sub -> SXX.I64x2.sub
123124
| I64x2 Mul -> SXX.I64x2.mul

interpreter/exec/simd.ml

+12
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ sig
185185
val widen_high_s : t -> t
186186
val widen_low_u : t -> t
187187
val widen_high_u : t -> t
188+
val dot_i16x8_s : t -> t -> t
188189
end
189190
module I64x2_convert : sig
190191
val widen_low_s : t -> t
@@ -429,6 +430,17 @@ struct
429430
let widen_high_s = widen Lib.List.drop 0xffffffffl
430431
let widen_low_u = widen Lib.List.take 0xffffl
431432
let widen_high_u = widen Lib.List.drop 0xffffl
433+
434+
let dot_i16x8_s x y =
435+
let xs = Rep.to_i16x8 x in
436+
let ys = Rep.to_i16x8 y in
437+
let rec dot xs ys =
438+
match xs, ys with
439+
| x1::x2::xss, y1::y2::yss ->
440+
Int32.(add (mul x1 y1) (mul x2 y2)) :: dot xss yss
441+
| [], [] -> []
442+
| _, _ -> assert false
443+
in Rep.of_i32x4 (dot xs ys)
432444
end
433445

434446
module I64x2_convert = struct

interpreter/syntax/ast.ml

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ struct
5454
| Eq | Ne | LtS | LtU | LeS | LeU | GtS | GtU | GeS | GeU
5555
| Swizzle | Shuffle of int list | NarrowS | NarrowU
5656
| AddSatS | AddSatU | SubSatS | SubSatU
57+
| DotI16x8S
5758
type funop = Abs | Neg | Sqrt
5859
| Ceil | Floor | Trunc | Nearest
5960
| ConvertI32x4S | ConvertI32x4U

interpreter/syntax/operators.ml

+1
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ let i32x4_max_u = Binary (V128 V128Op.(I32x4 MaxU))
367367
let i32x4_mul = Binary (V128 V128Op.(I32x4 Mul))
368368
let i32x4_trunc_sat_f32x4_s = Unary (V128 V128Op.(I32x4 TruncSatF32x4S))
369369
let i32x4_trunc_sat_f32x4_u = Unary (V128 V128Op.(I32x4 TruncSatF32x4U))
370+
let i32x4_dot_i16x8_s = Binary (V128 V128Op.(I32x4 DotI16x8S))
370371

371372
let i64x2_splat = Convert (V128 V128Op.(I64x2 Splat))
372373
let i64x2_extract_lane imm = SimdExtract (V128Op.I64x2 (ZX, imm))

interpreter/text/arrange.ml

+1
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ struct
305305
| I32x4 MinU -> "i32x4.min_u"
306306
| I32x4 MaxS -> "i32x4.max_s"
307307
| I32x4 MaxU -> "i32x4.max_u"
308+
| I32x4 DotI16x8S -> "i32x4.dot_i16x8_s"
308309
| I64x2 Add -> "i64x2.add"
309310
| I64x2 Sub -> "i64x2.sub"
310311
| I64x2 Mul -> "i64x2.mul"

interpreter/text/lexer.mll

+3
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,9 @@ rule token = parse
569569
| "i16x8.sub_sat_"(sign as s)
570570
{ BINARY (ext s i16x8_sub_sat_s i16x8_sub_sat_u) }
571571

572+
| "i32x4.dot_i16x8_s"
573+
{ BINARY i32x4_dot_i16x8_s }
574+
572575
| (simd_shape as s) { SIMD_SHAPE (simd_shape s) }
573576

574577
| name as s { VAR s }

test/core/simd/meta/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Currently it only support following simd test files generation.
2626
- 'simd_f64x2_rounding'
2727
- 'simd_f32x4_pmin_pmax'
2828
- 'simd_f64x2_pmin_pmax'
29+
- 'simd_i32x4_dot_i16x8'
2930

3031

3132
Usage:

test/core/simd/meta/gen_tests.py

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
'simd_f64x2_rounding',
3131
'simd_f32x4_pmin_pmax',
3232
'simd_f64x2_pmin_pmax',
33+
'simd_i32x4_dot_i16x8',
3334
)
3435

3536

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python3
2+
3+
from simd_arithmetic import SimdArithmeticCase, i16
4+
from simd_integer_op import ArithmeticOp
5+
6+
7+
class SimdI32x4DotI16x8TestCase(SimdArithmeticCase):
8+
LANE_TYPE = 'i32x4'
9+
UNARY_OPS = ()
10+
BINARY_OPS = ('dot_i16x8_s',)
11+
12+
@property
13+
def lane(self):
14+
return i16
15+
16+
def binary_op(self, x, y, lane):
17+
# For test data we always splat a single value to the
18+
# entire v128, so '* 2' will work here.
19+
return ArithmeticOp.get_valid_value(x, i16) * ArithmeticOp.get_valid_value(y, i16) * 2
20+
21+
@property
22+
def hex_binary_op_test_data(self):
23+
return []
24+
25+
@property
26+
def bin_test_data(self):
27+
return [
28+
(self.normal_binary_op_test_data, ['i16x8', 'i16x8', 'i32x4']),
29+
(self.hex_binary_op_test_data, ['i16x8', 'i16x8', 'i32x4'])
30+
]
31+
32+
def get_case_data(self):
33+
case_data = []
34+
op_name = 'i32x4.dot_i16x8_s'
35+
case_data.append(['#', op_name])
36+
for data_group, v128_forms in self.bin_test_data:
37+
for data in data_group:
38+
case_data.append([op_name, [str(data[0]), str(data[1])],
39+
str(self.binary_op(data[0], data[1], self.lane)),
40+
v128_forms])
41+
return case_data
42+
43+
def get_combine_cases(self):
44+
return ''
45+
46+
def gen_test_cases(self):
47+
wast_filename = '../simd_i32x4_dot_i16x8.wast'
48+
with open(wast_filename, 'w') as fp:
49+
fp.write(self.get_all_cases())
50+
51+
def gen_test_cases():
52+
simd_i16x8_arith = SimdI32x4DotI16x8TestCase()
53+
simd_i16x8_arith.gen_test_cases()
54+
55+
if __name__ == '__main__':
56+
gen_test_cases()
+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
;; Tests for i32x4 arithmetic operations on major boundary values and all special values.
2+
3+
4+
(module
5+
(func (export "i32x4.dot_i16x8_s") (param v128 v128) (result v128) (i32x4.dot_i16x8_s (local.get 0) (local.get 1)))
6+
)
7+
8+
9+
;; i32x4.dot_i16x8_s
10+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0)
11+
(v128.const i16x8 0 0 0 0 0 0 0 0))
12+
(v128.const i32x4 0 0 0 0))
13+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0)
14+
(v128.const i16x8 1 1 1 1 1 1 1 1))
15+
(v128.const i32x4 0 0 0 0))
16+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 1 1 1 1 1 1 1 1)
17+
(v128.const i16x8 1 1 1 1 1 1 1 1))
18+
(v128.const i32x4 2 2 2 2))
19+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0)
20+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
21+
(v128.const i32x4 0 0 0 0))
22+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 1 1 1 1 1 1 1 1)
23+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
24+
(v128.const i32x4 -2 -2 -2 -2))
25+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)
26+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
27+
(v128.const i32x4 2 2 2 2))
28+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 16383 16383 16383 16383 16383 16383 16383 16383)
29+
(v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384))
30+
(v128.const i32x4 536838144 536838144 536838144 536838144))
31+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384)
32+
(v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384))
33+
(v128.const i32x4 536870912 536870912 536870912 536870912))
34+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16383 -16383 -16383 -16383 -16383 -16383 -16383 -16383)
35+
(v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384))
36+
(v128.const i32x4 536838144 536838144 536838144 536838144))
37+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384)
38+
(v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384))
39+
(v128.const i32x4 536870912 536870912 536870912 536870912))
40+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16385 -16385 -16385 -16385 -16385 -16385 -16385 -16385)
41+
(v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384))
42+
(v128.const i32x4 536903680 536903680 536903680 536903680))
43+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32765 32765 32765 32765 32765 32765 32765 32765)
44+
(v128.const i16x8 1 1 1 1 1 1 1 1))
45+
(v128.const i32x4 65530 65530 65530 65530))
46+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32766 32766 32766 32766 32766 32766 32766 32766)
47+
(v128.const i16x8 1 1 1 1 1 1 1 1))
48+
(v128.const i32x4 65532 65532 65532 65532))
49+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32768 32768 32768 32768 32768 32768 32768 32768)
50+
(v128.const i16x8 1 1 1 1 1 1 1 1))
51+
(v128.const i32x4 -65536 -65536 -65536 -65536))
52+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32766 -32766 -32766 -32766 -32766 -32766 -32766 -32766)
53+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
54+
(v128.const i32x4 65532 65532 65532 65532))
55+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32767 -32767 -32767 -32767 -32767 -32767 -32767 -32767)
56+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
57+
(v128.const i32x4 65534 65534 65534 65534))
58+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)
59+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
60+
(v128.const i32x4 65536 65536 65536 65536))
61+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767)
62+
(v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767))
63+
(v128.const i32x4 2147352578 2147352578 2147352578 2147352578))
64+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)
65+
(v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768))
66+
(v128.const i32x4 2147483648 2147483648 2147483648 2147483648))
67+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)
68+
(v128.const i16x8 -32767 -32767 -32767 -32767 -32767 -32767 -32767 -32767))
69+
(v128.const i32x4 2147418112 2147418112 2147418112 2147418112))
70+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
71+
(v128.const i16x8 0 0 0 0 0 0 0 0))
72+
(v128.const i32x4 0 0 0 0))
73+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
74+
(v128.const i16x8 1 1 1 1 1 1 1 1))
75+
(v128.const i32x4 -2 -2 -2 -2))
76+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
77+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
78+
(v128.const i32x4 2 2 2 2))
79+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
80+
(v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767))
81+
(v128.const i32x4 -65534 -65534 -65534 -65534))
82+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
83+
(v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768))
84+
(v128.const i32x4 65536 65536 65536 65536))
85+
(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
86+
(v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535))
87+
(v128.const i32x4 2 2 2 2))
88+
89+
;; type check
90+
(assert_invalid (module (func (result v128) (i32x4.dot_i16x8_s (i32.const 0) (f32.const 0.0)))) "type mismatch")
91+
92+
;; Test operation with empty argument
93+
94+
(assert_invalid
95+
(module
96+
(func $i32x4.dot_i16x8_s-1st-arg-empty (result v128)
97+
(i32x4.dot_i16x8_s (v128.const i32x4 0 0 0 0))
98+
)
99+
)
100+
"type mismatch"
101+
)
102+
(assert_invalid
103+
(module
104+
(func $i32x4.dot_i16x8_s-arg-empty (result v128)
105+
(i32x4.dot_i16x8_s)
106+
)
107+
)
108+
"type mismatch"
109+
)
110+

0 commit comments

Comments
 (0)