Skip to content

Commit c5425da

Browse files
power8 ?gemm_tcopy save/restore
1 parent 60596a1 commit c5425da

8 files changed

+287
-500
lines changed

kernel/power/cgemm_tcopy_8_power8.S

Lines changed: 37 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
110110

111111
#include "cgemm_tcopy_macros_8_power8.S"
112112

113-
#define STACKSIZE 576
113+
#define STACKSIZE 144
114114

115115

116116
PROLOGUE
@@ -119,49 +119,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
119119
addi SP, SP, -STACKSIZE
120120
li r0, 0
121121

122-
std r31, 144(SP)
123-
std r30, 152(SP)
124-
std r29, 160(SP)
125-
std r28, 168(SP)
126-
std r27, 176(SP)
127-
std r26, 184(SP)
128-
std r25, 192(SP)
129-
std r24, 200(SP)
130-
std r23, 208(SP)
131-
std r22, 216(SP)
132-
std r21, 224(SP)
133-
std r20, 232(SP)
134-
std r19, 240(SP)
135-
std r18, 248(SP)
136-
std r17, 256(SP)
137-
std r16, 264(SP)
138-
std r15, 272(SP)
139-
std r14, 280(SP)
140-
addi r11, SP, 288
141-
stvx v20, r11, r0
142-
addi r11, r11, 16
143-
stvx v21, r11, r0
144-
addi r11, r11, 16
145-
stvx v22, r11, r0
146-
addi r11, r11, 16
147-
stvx v23, r11, r0
148-
addi r11, r11, 16
149-
stvx v24, r11, r0
150-
addi r11, r11, 16
151-
stvx v25, r11, r0
152-
addi r11, r11, 16
153-
stvx v26, r11, r0
154-
addi r11, r11, 16
155-
stvx v27, r11, r0
156-
addi r11, r11, 16
157-
stvx v28, r11, r0
158-
addi r11, r11, 16
159-
stvx v29, r11, r0
160-
addi r11, r11, 16
161-
stvx v30, r11, r0
162-
addi r11, r11, 16
163-
stvx v31, r11, r0
164-
li r11, 0
122+
std r14, 0(SP)
123+
std r15, 8(SP)
124+
std r16, 16(SP)
125+
std r17, 24(SP)
126+
std r18, 32(SP)
127+
std r19, 40(SP)
128+
std r20, 48(SP)
129+
std r21, 56(SP)
130+
std r22, 64(SP)
131+
std r23, 72(SP)
132+
std r24, 80(SP)
133+
std r25, 88(SP)
134+
std r26, 96(SP)
135+
std r27, 104(SP)
136+
std r28, 112(SP)
137+
std r29, 120(SP)
138+
std r30, 128(SP)
139+
std r31, 136(SP)
165140

166141
cmpwi cr0, M, 0
167142
ble- L999
@@ -203,51 +178,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
203178

204179
L999:
205180

206-
li r3, 0
207-
208-
ld r31, 144(SP)
209-
ld r30, 152(SP)
210-
ld r29, 160(SP)
211-
ld r28, 168(SP)
212-
ld r27, 176(SP)
213-
ld r26, 184(SP)
214-
ld r25, 192(SP)
215-
ld r24, 200(SP)
216-
ld r23, 208(SP)
217-
ld r22, 216(SP)
218-
ld r21, 224(SP)
219-
ld r20, 232(SP)
220-
ld r19, 240(SP)
221-
ld r18, 248(SP)
222-
ld r17, 256(SP)
223-
ld r16, 264(SP)
224-
ld r15, 272(SP)
225-
ld r14, 280(SP)
226-
addi r11, SP, 288
227-
lvx v20, r11, r3
228-
addi r11, r11, 16
229-
lvx v21, r11, r3
230-
addi r11, r11, 16
231-
lvx v22, r11, r3
232-
addi r11, r11, 16
233-
lvx v23, r11, r3
234-
addi r11, r11, 16
235-
lvx v24, r11, r3
236-
addi r11, r11, 16
237-
lvx v25, r11, r3
238-
addi r11, r11, 16
239-
lvx v26, r11, r3
240-
addi r11, r11, 16
241-
lvx v27, r11, r3
242-
addi r11, r11, 16
243-
lvx v28, r11, r3
244-
addi r11, r11, 16
245-
lvx v29, r11, r3
246-
addi r11, r11, 16
247-
lvx v30, r11, r3
248-
addi r11, r11, 16
249-
lvx v31, r11, r3
250-
li r11, 0
181+
ld r14, 0(SP)
182+
ld r15, 8(SP)
183+
ld r16, 16(SP)
184+
ld r17, 24(SP)
185+
ld r18, 32(SP)
186+
ld r19, 40(SP)
187+
ld r20, 48(SP)
188+
ld r21, 56(SP)
189+
ld r22, 64(SP)
190+
ld r23, 72(SP)
191+
ld r24, 80(SP)
192+
ld r25, 88(SP)
193+
ld r26, 96(SP)
194+
ld r27, 104(SP)
195+
ld r28, 112(SP)
196+
ld r29, 120(SP)
197+
ld r30, 128(SP)
198+
ld r31, 136(SP)
251199

252200
addi SP, SP, STACKSIZE
253201
blr

kernel/power/dgemm_tcopy_16_power8.S

Lines changed: 40 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -109,61 +109,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
109109

110110
#include "dgemm_tcopy_macros_16_power8.S"
111111

112-
#define STACKSIZE 384
113-
#define STACKSIZE 576
112+
#define STACKSIZE 144
113+
114114

115115

116116
PROLOGUE
117117
PROFCODE
118118

119119
addi SP, SP, -STACKSIZE
120-
//addi SP, SP, -208
121120

122121
li r0, 0
123122

124-
std r31, 144(SP)
125-
std r30, 152(SP)
126-
std r29, 160(SP)
127-
std r28, 168(SP)
128-
std r27, 176(SP)
129-
std r26, 184(SP)
130-
std r25, 192(SP)
131-
std r24, 200(SP)
132-
std r23, 208(SP)
133-
std r22, 216(SP)
134-
std r21, 224(SP)
135-
std r20, 232(SP)
136-
std r19, 240(SP)
137-
std r18, 248(SP)
138-
std r17, 256(SP)
139-
std r16, 264(SP)
140-
std r15, 272(SP)
141-
std r14, 280(SP)
142-
addi r11,SP,288
143-
stvx v20, r11,r0
144-
addi r11,r11,16
145-
stvx v21, r11,r0
146-
addi r11,r11,16
147-
stvx v22, r11,r0
148-
addi r11,r11,16
149-
stvx v23, r11,r0
150-
addi r11,r11,16
151-
stvx v24, r11,r0
152-
addi r11,r11,16
153-
stvx v25, r11,r0
154-
addi r11,r11,16
155-
stvx v26, r11,r0
156-
addi r11,r11,16
157-
stvx v27, r11,r0
158-
addi r11,r11,16
159-
stvx v28, r11,r0
160-
addi r11,r11,16
161-
stvx v29, r11,r0
162-
addi r11,r11,16
163-
stvx v30, r11,r0
164-
addi r11,r11,16
165-
stvx v31, r11,r0
166-
li r11,0
123+
std r14,0(SP)
124+
std r15,8(SP)
125+
std r16,16(SP)
126+
std r17,24(SP)
127+
std r18,32(SP)
128+
std r19,40(SP)
129+
std r20,48(SP)
130+
std r21,56(SP)
131+
std r22,64(SP)
132+
std r23,72(SP)
133+
std r24,80(SP)
134+
std r25,88(SP)
135+
std r26,96(SP)
136+
std r27,104(SP)
137+
std r28,112(SP)
138+
std r29,120(SP)
139+
std r30,128(SP)
140+
std r31,136(SP)
167141

168142
cmpwi cr0, M, 0
169143
ble- L999
@@ -198,8 +172,7 @@ li r11,0
198172
add B2, B2, B
199173
add B1, B1, B
200174

201-
//li PREA, 384
202-
li PREA, 576
175+
li PREA, 384
203176
addi PREB, M16, 128
204177

205178
li o8, 8
@@ -213,52 +186,27 @@ L999:
213186

214187
li r3, 0
215188

216-
ld r31, 144(SP)
217-
ld r30, 152(SP)
218-
ld r29, 160(SP)
219-
ld r28, 168(SP)
220-
ld r27, 176(SP)
221-
ld r26, 184(SP)
222-
ld r25, 192(SP)
223-
ld r24, 200(SP)
224-
ld r23, 208(SP)
225-
ld r22, 216(SP)
226-
ld r21, 224(SP)
227-
ld r20, 232(SP)
228-
ld r19, 240(SP)
229-
ld r18, 248(SP)
230-
ld r17, 256(SP)
231-
ld r16, 264(SP)
232-
ld r15, 272(SP)
233-
ld r14, 280(SP)
234-
addi r11,SP,288
235-
lvx v20, r11,r3
236-
addi r11,r11,16
237-
lvx v21, r11,r3
238-
addi r11,r11,16
239-
lvx v22, r11,r3
240-
addi r11,r11,16
241-
lvx v23, r11,r3
242-
addi r11,r11,16
243-
lvx v24, r11,r3
244-
addi r11,r11,16
245-
lvx v25, r11,r3
246-
addi r11,r11,16
247-
lvx v26, r11,r3
248-
addi r11,r11,16
249-
lvx v27, r11,r3
250-
addi r11,r11,16
251-
lvx v28, r11,r3
252-
addi r11,r11,16
253-
lvx v29, r11,r3
254-
addi r11,r11,16
255-
lvx v30, r11,r3
256-
addi r11,r11,16
257-
lvx v31, r11,r3
258-
li r11,0
189+
ld r14,0(SP)
190+
ld r15,8(SP)
191+
ld r16,16(SP)
192+
ld r17,24(SP)
193+
ld r18,32(SP)
194+
ld r19,40(SP)
195+
ld r20,48(SP)
196+
ld r21,56(SP)
197+
ld r22,64(SP)
198+
ld r23,72(SP)
199+
ld r24,80(SP)
200+
ld r25,88(SP)
201+
ld r26,96(SP)
202+
ld r27,104(SP)
203+
ld r28,112(SP)
204+
ld r29,120(SP)
205+
ld r30,128(SP)
206+
ld r31,136(SP)
259207

260208
addi SP, SP, STACKSIZE
261-
//addi SP, SP, 208
209+
262210
blr
263211
EPILOGUE
264212

kernel/power/dgemm_tcopy_macros_16_power8.S

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5858
lxvd2x vs51, o48, A2
5959
addi A2, A2, 64
6060

61-
lxvd2x vs56, o0, A3
62-
lxvd2x vs57, o16, A3
63-
lxvd2x vs58, o32, A3
64-
lxvd2x vs59, o48, A3
61+
lxvd2x vs4, o0, A3
62+
lxvd2x vs5, o16, A3
63+
lxvd2x vs6, o32, A3
64+
lxvd2x vs7, o48, A3
6565
addi A3, A3, 64
6666

6767
lxvd2x vs36, o0, A0
@@ -76,16 +76,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7676
lxvd2x vs47, o48, A1
7777
addi A1, A1, 64
7878

79-
lxvd2x vs52, o0, A2
80-
lxvd2x vs53, o16, A2
81-
lxvd2x vs54, o32, A2
82-
lxvd2x vs55, o48, A2
79+
lxvd2x vs12, o0, A2
80+
lxvd2x vs13, o16, A2
81+
lxvd2x vs2, o32, A2
82+
lxvd2x vs3, o48, A2
8383
addi A2, A2, 64
8484

85-
lxvd2x vs60, o0, A3
86-
lxvd2x vs61, o16, A3
87-
lxvd2x vs62, o32, A3
88-
lxvd2x vs63, o48, A3
85+
lxvd2x vs8, o0, A3
86+
lxvd2x vs9, o16, A3
87+
lxvd2x vs10, o32, A3
88+
lxvd2x vs11, o48, A3
8989
addi A3, A3, 64
9090

9191
mr T1, BO
@@ -122,23 +122,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
122122
stxvd2x vs51, o48, T1
123123
addi T1, T1, 64
124124

125-
stxvd2x vs52, o0, T1
126-
stxvd2x vs53, o16, T1
127-
stxvd2x vs54, o32, T1
128-
stxvd2x vs55, o48, T1
125+
stxvd2x vs12, o0, T1
126+
stxvd2x vs13, o16, T1
127+
stxvd2x vs2, o32, T1
128+
stxvd2x vs3, o48, T1
129129

130130
addi T1, T1, 64
131131

132-
stxvd2x vs56, o0, T1
133-
stxvd2x vs57, o16, T1
134-
stxvd2x vs58, o32, T1
135-
stxvd2x vs59, o48, T1
132+
stxvd2x vs4, o0, T1
133+
stxvd2x vs5, o16, T1
134+
stxvd2x vs6, o32, T1
135+
stxvd2x vs7, o48, T1
136136
addi T1, T1, 64
137137

138-
stxvd2x vs60, o0, T1
139-
stxvd2x vs61, o16, T1
140-
stxvd2x vs62, o32, T1
141-
stxvd2x vs63, o48, T1
138+
stxvd2x vs8, o0, T1
139+
stxvd2x vs9, o16, T1
140+
stxvd2x vs10, o32, T1
141+
stxvd2x vs11, o48, T1
142142

143143
.endm
144144

0 commit comments

Comments
 (0)