Skip to content

Commit 5092fe2

Browse files
authored
Make Pulley pass simd_f32x4_arith.wast (bytecodealliance#9897)
* Add f32x4 arithmetic instructions to Pulley: Adds float SIMD instructions on 4 lanes (f32x4) for subtraction, multiplication, and negation. `vtrunc32x4` and `vmuli32x4` were used as basis on how to organize things. * Mark `simd_f32x4_arith.wast` as passing for Pulley: To be exact: `spec_testsuite/simd_f32x4_arith.wast` has been removed from the should fail list for Pulley. * Rename 2 f32x4 arithmatic instructions to contain "f32x4": Specifically: - "vsub32x4" -> "vsubf32x4" - "vmul32x4" -> "vmulf32x4"
1 parent dd2365e commit 5092fe2

File tree

4 files changed

+38
-1
lines changed

4 files changed

+38
-1
lines changed

cranelift/codegen/src/isa/pulley_shared/lower.isle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,12 +1157,14 @@
11571157

11581158
(rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
11591159
(rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
1160+
(rule (lower (has_type $F32X4 (fsub a b))) (pulley_vsubf32x4 a b))
11601161
(rule (lower (has_type $F64X2 (fsub a b))) (pulley_vsubf64x2 a b))
11611162

11621163
;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11631164

11641165
(rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
11651166
(rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
1167+
(rule (lower (has_type $F32X4 (fmul a b))) (pulley_vmulf32x4 a b))
11661168
(rule (lower (has_type $F64X2 (fmul a b))) (pulley_vmulf64x2 a b))
11671169

11681170
;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1233,6 +1235,7 @@
12331235

12341236
(rule (lower (has_type $F32 (fneg a))) (pulley_fneg32 a))
12351237
(rule (lower (has_type $F64 (fneg a))) (pulley_fneg64 a))
1238+
(rule (lower (has_type $F32X4 (fneg a))) (pulley_vnegf32x4 a))
12361239
(rule (lower (has_type $F64X2 (fneg a))) (pulley_vnegf64x2 a))
12371240

12381241
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

crates/wast-util/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,6 @@ impl WastTest {
408408
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
409409
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
410410
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
411-
"spec_testsuite/simd_f32x4_arith.wast",
412411
"spec_testsuite/simd_f32x4_cmp.wast",
413412
"spec_testsuite/simd_f32x4_pmin_pmax.wast",
414413
"spec_testsuite/simd_f64x2_cmp.wast",

pulley/src/interp.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2980,13 +2980,33 @@ impl ExtendedOpVisitor for Interpreter<'_> {
29802980
ControlFlow::Continue(())
29812981
}
29822982

2983+
fn vsubf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
2984+
let mut a = self.state[operands.src1].get_f32x4();
2985+
let b = self.state[operands.src2].get_f32x4();
2986+
for (a, b) in a.iter_mut().zip(b) {
2987+
*a = *a - b;
2988+
}
2989+
self.state[operands.dst].set_f32x4(a);
2990+
ControlFlow::Continue(())
2991+
}
2992+
29832993
fn fmul32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
29842994
let a = self.state[operands.src1].get_f32();
29852995
let b = self.state[operands.src2].get_f32();
29862996
self.state[operands.dst].set_f32(a * b);
29872997
ControlFlow::Continue(())
29882998
}
29892999

3000+
fn vmulf32x4(&mut self, operands: BinaryOperands<VReg>) -> ControlFlow<Done> {
3001+
let mut a = self.state[operands.src1].get_f32x4();
3002+
let b = self.state[operands.src2].get_f32x4();
3003+
for (a, b) in a.iter_mut().zip(b) {
3004+
*a = *a * b;
3005+
}
3006+
self.state[operands.dst].set_f32x4(a);
3007+
ControlFlow::Continue(())
3008+
}
3009+
29903010
fn fdiv32(&mut self, operands: BinaryOperands<FReg>) -> ControlFlow<Done> {
29913011
let a = self.state[operands.src1].get_f32();
29923012
let b = self.state[operands.src2].get_f32();
@@ -3162,6 +3182,15 @@ impl ExtendedOpVisitor for Interpreter<'_> {
31623182
ControlFlow::Continue(())
31633183
}
31643184

3185+
fn vnegf32x4(&mut self, dst: VReg, src: VReg) -> ControlFlow<Done> {
3186+
let mut a = self.state[src].get_f32x4();
3187+
for elem in a.iter_mut() {
3188+
*elem = -*elem;
3189+
}
3190+
self.state[dst].set_f32x4(a);
3191+
ControlFlow::Continue(())
3192+
}
3193+
31653194
fn fabs32(&mut self, dst: FReg, src: FReg) -> ControlFlow<Done> {
31663195
let a = self.state[src].get_f32();
31673196
self.state[dst].set_f32(a.wasm_abs());

pulley/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,8 +811,12 @@ macro_rules! for_each_extended_op {
811811
fadd32 = Fadd32 { operands: BinaryOperands<FReg> };
812812
/// `low32(dst) = low32(src1) - low32(src2)`
813813
fsub32 = Fsub32 { operands: BinaryOperands<FReg> };
814+
/// `low128(dst) = low128(src1) - low128(src2)`
815+
vsubf32x4 = Vsubf32x4 { operands: BinaryOperands<VReg> };
814816
/// `low32(dst) = low32(src1) * low32(src2)`
815817
fmul32 = Fmul32 { operands: BinaryOperands<FReg> };
818+
/// `low128(dst) = low128(src1) * low128(src2)`
819+
vmulf32x4 = Vmulf32x4 { operands: BinaryOperands<VReg> };
816820
/// `low32(dst) = low32(src1) / low32(src2)`
817821
fdiv32 = Fdiv32 { operands: BinaryOperands<FReg> };
818822
/// `low128(dst) = low128(src1) / low128(src2)`
@@ -849,6 +853,8 @@ macro_rules! for_each_extended_op {
849853
vsqrt64x2 = Vsqrt64x2 { dst: VReg, src: VReg };
850854
/// `low32(dst) = -low32(src)`
851855
fneg32 = Fneg32 { dst: FReg, src: FReg };
856+
/// `low128(dst) = -low128(src)`
857+
vnegf32x4 = Vnegf32x4 { dst: VReg, src: VReg };
852858
/// `low32(dst) = |low32(src)|`
853859
fabs32 = Fabs32 { dst: FReg, src: FReg };
854860

0 commit comments

Comments
 (0)