@@ -12,9 +12,6 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
12
12
; CHECK-NEXT: addi sp, sp, -2032
13
13
; CHECK-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
14
14
; CHECK-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
15
- ; CHECK-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill
16
- ; CHECK-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill
17
- ; CHECK-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill
18
15
; CHECK-NEXT: lui a0, 7
19
16
; CHECK-NEXT: sub t1, sp, a0
20
17
; CHECK-NEXT: lui t2, 1
@@ -24,8 +21,9 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
24
21
; CHECK-NEXT: bne sp, t1, .LBB0_1
25
22
; CHECK-NEXT: # %bb.2:
26
23
; CHECK-NEXT: addi sp, sp, -2048
27
- ; CHECK-NEXT: addi sp, sp, -96
24
+ ; CHECK-NEXT: addi sp, sp, -48
28
25
; CHECK-NEXT: csrr t1, vlenb
26
+ ; CHECK-NEXT: slli t1, t1, 2
29
27
; CHECK-NEXT: lui t2, 1
30
28
; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1
31
29
; CHECK-NEXT: sub sp, sp, t2
@@ -34,45 +32,34 @@ define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
34
32
; CHECK-NEXT: bge t1, t2, .LBB0_3
35
33
; CHECK-NEXT: # %bb.4:
36
34
; CHECK-NEXT: sub sp, sp, t1
37
- ; CHECK-NEXT: li a0, 86
38
- ; CHECK-NEXT: addi s0, sp, 48
39
- ; CHECK-NEXT: addi s1, sp, 32
40
- ; CHECK-NEXT: addi s2, sp, 16
41
- ; CHECK-NEXT: lui a1, 353637
42
- ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
35
+ ; CHECK-NEXT: lui a0, 353637
36
+ ; CHECK-NEXT: addi a0, a0, 1622
37
+ ; CHECK-NEXT: slli a1, a0, 32
38
+ ; CHECK-NEXT: add a0, a0, a1
39
+ ; CHECK-NEXT: vsetivli zero, 7, e64, m4, ta, ma
43
40
; CHECK-NEXT: vmv.v.x v8, a0
44
41
; CHECK-NEXT: lui a0, 8
45
- ; CHECK-NEXT: addi a0, a0, 32
42
+ ; CHECK-NEXT: addi a0, a0, 16
46
43
; CHECK-NEXT: add a0, sp, a0
47
- ; CHECK-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
48
- ; CHECK-NEXT: addi a0, a1, 1622
49
- ; CHECK-NEXT: vse8.v v8, (s0)
50
- ; CHECK-NEXT: vse8.v v8, (s1)
51
- ; CHECK-NEXT: vse8.v v8, (s2)
52
- ; CHECK-NEXT: slli a1, a0, 32
53
- ; CHECK-NEXT: add s3, a0, a1
54
- ; CHECK-NEXT: sd s3, 64(sp)
44
+ ; CHECK-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
45
+ ; CHECK-NEXT: addi s0, sp, 16
46
+ ; CHECK-NEXT: vse64.v v8, (s0)
55
47
; CHECK-NEXT: call bar
56
48
; CHECK-NEXT: lui a0, 8
57
- ; CHECK-NEXT: addi a0, a0, 32
49
+ ; CHECK-NEXT: addi a0, a0, 16
58
50
; CHECK-NEXT: add a0, sp, a0
59
- ; CHECK-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
60
- ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
61
- ; CHECK-NEXT: vse8.v v8, (s0)
62
- ; CHECK-NEXT: vse8.v v8, (s1)
63
- ; CHECK-NEXT: vse8.v v8, (s2)
64
- ; CHECK-NEXT: sd s3, 64(sp)
51
+ ; CHECK-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
52
+ ; CHECK-NEXT: vsetivli zero, 7, e64, m4, ta, ma
53
+ ; CHECK-NEXT: vse64.v v8, (s0)
65
54
; CHECK-NEXT: li a0, 0
66
55
; CHECK-NEXT: csrr a1, vlenb
56
+ ; CHECK-NEXT: slli a1, a1, 2
67
57
; CHECK-NEXT: add sp, sp, a1
68
58
; CHECK-NEXT: lui a1, 8
69
- ; CHECK-NEXT: addi a1, a1, -1952
59
+ ; CHECK-NEXT: addi a1, a1, -2000
70
60
; CHECK-NEXT: add sp, sp, a1
71
61
; CHECK-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
72
62
; CHECK-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
73
- ; CHECK-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload
74
- ; CHECK-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload
75
- ; CHECK-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload
76
63
; CHECK-NEXT: addi sp, sp, 2032
77
64
; CHECK-NEXT: ret
78
65
%1 = alloca %"buff" , align 8
0 commit comments