Skip to content

Commit 90ad796

Browse files
authored
[SIMD] Add i32x4.dot_i8x16_i7x16_add_s in interpreter (#7527)
1 parent e185ff9 commit 90ad796

File tree

6 files changed

+170
-5
lines changed

6 files changed

+170
-5
lines changed

scripts/test/fuzzing.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
unfuzzable = [
2020
# Float16 is still experimental.
2121
'f16.wast',
22-
# not all relaxed SIMD instructions are implemented in the interpreter
23-
'relaxed-simd.wast',
2422
# TODO: fuzzer and interpreter support for strings
2523
'strings.wast',
2624
'simplify-locals-strings.wast',

src/literal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ class Literal {
607607
Literal dotSI8x16toI16x8(const Literal& other) const;
608608
Literal dotUI8x16toI16x8(const Literal& other) const;
609609
Literal dotSI16x8toI32x4(const Literal& other) const;
610+
Literal dotSI8x16toI16x8Add(const Literal& left, const Literal& right) const;
610611
Literal extMulLowSI32x4(const Literal& other) const;
611612
Literal extMulHighSI32x4(const Literal& other) const;
612613
Literal extMulLowUI32x4(const Literal& other) const;

src/wasm-interpreter.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,10 +1306,13 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
13061306
return NONCONSTANT_FLOW;
13071307
}
13081308
return a.relaxedNmaddF64x2(b, c);
1309-
default:
1310-
// TODO: implement signselect and dot_add
1311-
WASM_UNREACHABLE("not implemented");
1309+
case DotI8x16I7x16AddSToVecI32x4:
1310+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1311+
return NONCONSTANT_FLOW;
1312+
}
1313+
return a.dotSI8x16toI16x8Add(b, c);
13121314
}
1315+
WASM_UNREACHABLE("invalid op");
13131316
}
13141317
Flow visitSIMDShift(SIMDShift* curr) {
13151318
NOTE_ENTER("SIMDShift");

src/wasm/literal.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2604,6 +2604,21 @@ Literal Literal::dotSI16x8toI32x4(const Literal& other) const {
26042604
return dot<4, 2, &Literal::getLanesSI16x8>(*this, other);
26052605
}
26062606

2607+
Literal Literal::dotSI8x16toI16x8Add(const Literal& left,
2608+
const Literal& right) const {
2609+
auto temp = dotSI8x16toI16x8(left);
2610+
2611+
auto tempLanes = temp.getLanesSI16x8();
2612+
LaneArray<4> dest;
2613+
// TODO: the index on dest may be wrong, see
2614+
// https://github.com/WebAssembly/relaxed-simd/issues/162
2615+
for (size_t i = 0; i < 4; i++) {
2616+
dest[i] = tempLanes[i * 2].add(tempLanes[i * 2 + 1]);
2617+
}
2618+
2619+
return Literal(dest).addI32x4(right);
2620+
}
2621+
26072622
Literal Literal::bitselectV128(const Literal& left,
26082623
const Literal& right) const {
26092624
return andV128(left).orV128(notV128().andV128(right));

test/lit/exec/relaxed.wast

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items --output=fuzz-exec and should not be edited.
2+
3+
;; RUN: wasm-opt %s -all --fuzz-exec-before -q -o /dev/null 2>&1 | filecheck %s
4+
5+
(module
6+
(import "fuzzing-support" "log-i32" (func $log (param i32)))
7+
8+
;; CHECK: [fuzz-exec] calling i32x4.dot_i8x16_i7x16_add_s
9+
;; CHECK-NEXT: [LoggingExternalInterface logging 8]
10+
;; CHECK-NEXT: [LoggingExternalInterface logging 14]
11+
;; CHECK-NEXT: [LoggingExternalInterface logging 22]
12+
;; CHECK-NEXT: [LoggingExternalInterface logging 32]
13+
(func $i32x4.dot_i8x16_i7x16_add_s (export "i32x4.dot_i8x16_i7x16_add_s")
14+
(local $v v128)
15+
(local.set $v
16+
(i32x4.dot_i8x16_i7x16_add_s
17+
(v128.const i32x4 0 1 2 3)
18+
(v128.const i32x4 4 5 6 7)
19+
(v128.const i32x4 8 9 10 11)
20+
)
21+
)
22+
(call $log
23+
(i32x4.extract_lane 0
24+
(local.get $v)
25+
)
26+
)
27+
(call $log
28+
(i32x4.extract_lane 1
29+
(local.get $v)
30+
)
31+
)
32+
(call $log
33+
(i32x4.extract_lane 2
34+
(local.get $v)
35+
)
36+
)
37+
(call $log
38+
(i32x4.extract_lane 3
39+
(local.get $v)
40+
)
41+
)
42+
)
43+
)
44+

test/spec/dot_product.wast

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
;; Tests for dot products.
2+
;;
3+
;; This is the same as the upstream relaxed_dot_product.wast test in
4+
;; relaxed-simd, but with the non-relaxed versions, and with picking the proper
5+
;; outcome in the multiple-choice questions (which use either() in the original
6+
;; test).
7+
8+
(module
9+
(func (export "i16x8.dot_i8x16_i7x16_s") (param v128 v128) (result v128) (i16x8.dot_i8x16_i7x16_s (local.get 0) (local.get 1)))
10+
(func (export "i32x4.dot_i8x16_i7x16_add_s") (param v128 v128 v128) (result v128) (i32x4.dot_i8x16_i7x16_add_s (local.get 0) (local.get 1) (local.get 2)))
11+
12+
(func (export "i16x8.dot_i8x16_i7x16_s_cmp") (param v128 v128) (result v128)
13+
(i16x8.eq
14+
(i16x8.dot_i8x16_i7x16_s (local.get 0) (local.get 1))
15+
(i16x8.dot_i8x16_i7x16_s (local.get 0) (local.get 1))))
16+
(func (export "i32x4.dot_i8x16_i7x16_add_s_cmp") (param v128 v128 v128) (result v128)
17+
(i16x8.eq
18+
(i32x4.dot_i8x16_i7x16_add_s (local.get 0) (local.get 1) (local.get 2))
19+
(i32x4.dot_i8x16_i7x16_add_s (local.get 0) (local.get 1) (local.get 2))))
20+
)
21+
22+
;; Simple values to ensure things are functional.
23+
(assert_return (invoke "i16x8.dot_i8x16_i7x16_s"
24+
(v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
25+
(v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15))
26+
(v128.const i16x8 1 13 41 85 145 221 313 421))
27+
28+
;; Test max and min i8 values;
29+
(assert_return (invoke "i16x8.dot_i8x16_i7x16_s"
30+
(v128.const i8x16 -128 -128 127 127 0 0 0 0 0 0 0 0 0 0 0 0)
31+
(v128.const i8x16 127 127 127 127 0 0 0 0 0 0 0 0 0 0 0 0))
32+
(v128.const i16x8 -32512 32258 0 0 0 0 0 0))
33+
34+
;; signed * unsigned : -128 * 129 * 2 = -33,024 saturated to -32,768
35+
;; signed * signed : -128 * -127 * 2 = 32,512
36+
;; unsigned * unsigned : 128 * 129 * 2 = 33,024
37+
(assert_return (invoke "i16x8.dot_i8x16_i7x16_s"
38+
(v128.const i8x16 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
39+
(v128.const i8x16 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0 0 0))
40+
(v128.const i16x8 32512 0 0 0 0 0 0 0))
41+
42+
;; Simple values to ensure things are functional.
43+
(assert_return (invoke "i32x4.dot_i8x16_i7x16_add_s"
44+
(v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
45+
(v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
46+
(v128.const i32x4 0 1 2 3))
47+
;; intermediate result is [14, 126, 366, 734]
48+
(v128.const i32x4 14 127 368 737))
49+
50+
;; Test max and min i8 values;
51+
(assert_return (invoke "i32x4.dot_i8x16_i7x16_add_s"
52+
(v128.const i8x16 -128 -128 -128 -128 127 127 127 127 0 0 0 0 0 0 0 0)
53+
(v128.const i8x16 127 127 127 127 127 127 127 127 0 0 0 0 0 0 0 0)
54+
(v128.const i32x4 1 2 3 4))
55+
;; intermediate result is [-65024, 64516, 0, 0]
56+
(v128.const i32x4 -65023 64518 3 4))
57+
58+
;; signed * unsigned : -128 * 129 * 4 = -66,048 (+ 1) VPDPBUSD AVX2-VNNI or AVX512-VNNI
59+
;; signed * unsigned with intermediate saturation :
60+
;; (-128 * 129) + (-128 * 129) = -33024 saturated to -32768 (PMADDUBSW)
61+
;; -32768 + -32768 = -65536 (+ 1)
62+
;; signed * signed : -128 * -127 * 4 = 65,024 (+ 1)
63+
;; unsigned * unsigned : 128 * 129 * 2 = 66,048 (+ 1)
64+
(assert_return (invoke "i32x4.dot_i8x16_i7x16_add_s"
65+
(v128.const i8x16 -128 -128 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0)
66+
(v128.const i8x16 -127 -127 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0)
67+
(v128.const i32x4 1 2 3 4))
68+
(v128.const i32x4 65025 2 3 4))
69+
70+
;; Check that multiple calls to the relaxed instruction with same inputs returns same results.
71+
72+
;; Test max and min i8 values;
73+
(assert_return (invoke "i16x8.dot_i8x16_i7x16_s_cmp"
74+
(v128.const i8x16 -128 -128 127 127 0 0 0 0 0 0 0 0 0 0 0 0)
75+
(v128.const i8x16 127 127 127 127 0 0 0 0 0 0 0 0 0 0 0 0))
76+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
77+
78+
;; Test max and min i8 values;
79+
(assert_return (invoke "i32x4.dot_i8x16_i7x16_add_s_cmp"
80+
(v128.const i8x16 -128 -128 -128 -128 127 127 127 127 0 0 0 0 0 0 0 0)
81+
(v128.const i8x16 127 127 127 127 127 127 127 127 0 0 0 0 0 0 0 0)
82+
(v128.const i32x4 1 2 3 4))
83+
;; intermediate result is [-65024, 64516, 0, 0]
84+
(v128.const i32x4 -1 -1 -1 -1))
85+
86+
;; signed * unsigned : -128 * 129 * 2 = -33,024 saturated to -32,768
87+
;; signed * signed : -128 * -127 * 2 = 32,512
88+
;; unsigned * unsigned : 128 * 129 * 2 = 33,024
89+
(assert_return (invoke "i16x8.dot_i8x16_i7x16_s_cmp"
90+
(v128.const i8x16 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0 0 0)
91+
(v128.const i8x16 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0 0 0))
92+
(v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
93+
94+
;; signed * unsigned : -128 * 129 * 4 = -66,048 (+ 1) VPDPBUSD AVX2-VNNI or AVX512-VNNI
95+
;; signed * unsigned with intermediate saturation :
96+
;; (-128 * 129) + (-128 * 129) = -33024 saturated to -32768 (PMADDUBSW)
97+
;; -32768 + -32768 = -65536 (+ 1)
98+
;; signed * signed : -128 * -127 * 4 = 65,024 (+ 1)
99+
;; unsigned * unsigned : 128 * 129 * 2 = 66,048 (+ 1)
100+
(assert_return (invoke "i32x4.dot_i8x16_i7x16_add_s_cmp"
101+
(v128.const i8x16 -128 -128 -128 -128 0 0 0 0 0 0 0 0 0 0 0 0)
102+
(v128.const i8x16 -127 -127 -127 -127 0 0 0 0 0 0 0 0 0 0 0 0)
103+
(v128.const i32x4 1 2 3 4))
104+
(v128.const i32x4 -1 -1 -1 -1))

0 commit comments

Comments
 (0)