Skip to content

Commit be6c6c8

Browse files
authored
Winch: Add splat instructions to x64 using AVX2 (bytecodealliance#10028)
* Winch: Add splat instructions to x64 using AVX2 * Change vpbroadcast size to lane size * Create helper for vpshuf mask * Change to single masm method for splatting * Split out SplatLoadKind from SplatKind * Add comment about heap allocation
1 parent e4340b3 commit be6c6c8

File tree

21 files changed

+1075
-50
lines changed

21 files changed

+1075
-50
lines changed

crates/wast-util/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,7 @@ impl WastTest {
502502
if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) {
503503
let unsupported = [
504504
"misc_testsuite/winch/_simd_lane.wast",
505+
"misc_testsuite/winch/_simd_splat.wast",
505506
"spec_testsuite/simd_align.wast",
506507
];
507508

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx2" ]
4+
5+
(module
6+
(func (result v128)
7+
(f32x4.splat (f32.const 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movss 0x14(%rip), %xmm0
23+
;; vpbroadcastd %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx2" ]
4+
5+
(module
6+
(func (param f32) (result v128)
7+
(f32x4.splat (local.get 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x20, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x44
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x20, %rsp
20+
;; movq %rdi, 0x18(%rsp)
21+
;; movq %rsi, 0x10(%rsp)
22+
;; movss %xmm0, 0xc(%rsp)
23+
;; movss 0xc(%rsp), %xmm0
24+
;; vpbroadcastd %xmm0, %xmm0
25+
;; addq $0x20, %rsp
26+
;; popq %rbp
27+
;; retq
28+
;; 44: ud2
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(f64x2.splat (f64.const 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3f
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; movsd 0x14(%rip), %xmm0
23+
;; vpshufd $0x44, %xmm0, %xmm0
24+
;; addq $0x10, %rsp
25+
;; popq %rbp
26+
;; retq
27+
;; 3f: ud2
28+
;; 41: addb %al, (%rax)
29+
;; 43: addb %al, (%rax)
30+
;; 45: addb %al, (%rax)
31+
;; 47: addb %al, (%rax)
32+
;; 49: addb %al, (%rax)
33+
;; 4b: addb %al, (%rax)
34+
;; 4d: addb %al, (%rax)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (param f64) (result v128)
7+
(f64x2.splat (local.get 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x20, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x44
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x20, %rsp
20+
;; movq %rdi, 0x18(%rsp)
21+
;; movq %rsi, 0x10(%rsp)
22+
;; movsd %xmm0, 8(%rsp)
23+
;; movsd 8(%rsp), %xmm0
24+
;; vpshufd $0x44, %xmm0, %xmm0
25+
;; addq $0x20, %rsp
26+
;; popq %rbp
27+
;; retq
28+
;; 44: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx2" ]
4+
5+
(module
6+
(func (result v128)
7+
(i16x8.splat (i32.const 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3b
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; vpbroadcastw 0xb(%rip), %xmm0
23+
;; addq $0x10, %rsp
24+
;; popq %rbp
25+
;; retq
26+
;; 3b: ud2
27+
;; 3d: addb %al, (%rax)
28+
;; 3f: addb %al, (%rax)
29+
;; 41: addb %al, (%rax)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx2" ]
4+
5+
(module
6+
(func (param i32) (result v128)
7+
(i16x8.splat (local.get 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x20, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x44
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x20, %rsp
20+
;; movq %rdi, 0x18(%rsp)
21+
;; movq %rsi, 0x10(%rsp)
22+
;; movl %edx, 0xc(%rsp)
23+
;; movl 0xc(%rsp), %eax
24+
;; movd %eax, %xmm0
25+
;; vpbroadcastw %xmm0, %xmm0
26+
;; addq $0x20, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 44: ud2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx2" ]
4+
5+
(module
6+
(func (result v128)
7+
(i32x4.splat (i32.const 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3b
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; vpbroadcastd 0xb(%rip), %xmm0
23+
;; addq $0x10, %rsp
24+
;; popq %rbp
25+
;; retq
26+
;; 3b: ud2
27+
;; 3d: addb %al, (%rax)
28+
;; 3f: addb %al, (%rax)
29+
;; 41: addb %al, (%rax)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx2" ]
4+
5+
(module
6+
(func (param i32) (result v128)
7+
(i32x4.splat (local.get 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x20, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x44
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x20, %rsp
20+
;; movq %rdi, 0x18(%rsp)
21+
;; movq %rsi, 0x10(%rsp)
22+
;; movl %edx, 0xc(%rsp)
23+
;; movl 0xc(%rsp), %eax
24+
;; movd %eax, %xmm0
25+
;; vpbroadcastd %xmm0, %xmm0
26+
;; addq $0x20, %rsp
27+
;; popq %rbp
28+
;; retq
29+
;; 44: ud2
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
;;! target = "x86_64"
2+
;;! test = "winch"
3+
;;! flags = [ "-Ccranelift-has-avx" ]
4+
5+
(module
6+
(func (result v128)
7+
(i64x2.splat (i64.const 0))
8+
)
9+
)
10+
;; wasm[0]::function[0]:
11+
;; pushq %rbp
12+
;; movq %rsp, %rbp
13+
;; movq 8(%rdi), %r11
14+
;; movq 0x10(%r11), %r11
15+
;; addq $0x10, %r11
16+
;; cmpq %rsp, %r11
17+
;; ja 0x3b
18+
;; 1c: movq %rdi, %r14
19+
;; subq $0x10, %rsp
20+
;; movq %rdi, 8(%rsp)
21+
;; movq %rsi, (%rsp)
22+
;; vpshufd $0x44, 0xb(%rip), %xmm0
23+
;; addq $0x10, %rsp
24+
;; popq %rbp
25+
;; retq
26+
;; 3b: ud2
27+
;; 3d: addb %al, (%rax)
28+
;; 3f: addb %al, (%rax)
29+
;; 41: addb %al, (%rax)
30+
;; 43: addb %al, (%rax)
31+
;; 45: addb %al, (%rax)

0 commit comments

Comments
 (0)