Skip to content

Commit 2c0a008

Browse files
dgemm_ncopy_4_ save/restore
1 parent c5425da commit 2c0a008

File tree

2 files changed

+160
-249
lines changed

2 files changed

+160
-249
lines changed

kernel/power/dgemm_ncopy_4_power8.S

Lines changed: 41 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -109,80 +109,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
109109

110110
#include "dgemm_ncopy_macros_4_power8.S"
111111

112-
#define STACKSIZE 384
113-
#define STACKSIZE 576
112+
#define STACKSIZE 144
113+
114114

115115
PROLOGUE
116116
PROFCODE
117117

118118
addi SP, SP, -STACKSIZE
119-
//addi SP, SP, -208
119+
120120
li r0, 0
121121

122-
stfd f14, 0(SP)
123-
stfd f15, 8(SP)
124-
stfd f16, 16(SP)
125-
stfd f17, 24(SP)
126-
stfd f18, 32(SP)
127-
stfd f19, 40(SP)
128-
stfd f20, 48(SP)
129-
stfd f21, 56(SP)
130-
stfd f22, 64(SP)
131-
stfd f23, 72(SP)
132-
stfd f24, 80(SP)
133-
stfd f25, 88(SP)
134-
stfd f26, 96(SP)
135-
stfd f27, 104(SP)
136-
stfd f28, 112(SP)
137-
stfd f29, 120(SP)
138-
stfd f30, 128(SP)
139-
stfd f31, 136(SP)
140-
141-
142-
std r31, 144(SP)
143-
std r30, 152(SP)
144-
std r29, 160(SP)
145-
std r28, 168(SP)
146-
std r27, 176(SP)
147-
std r26, 184(SP)
148-
std r25, 192(SP)
149-
std r24, 200(SP)
150-
std r23, 208(SP)
151-
std r22, 216(SP)
152-
std r21, 224(SP)
153-
std r20, 232(SP)
154-
std r19, 240(SP)
155-
std r18, 248(SP)
156-
std r17, 256(SP)
157-
std r16, 264(SP)
158-
std r15, 272(SP)
159-
std r14, 280(SP)
122+
std r14, 0(SP)
123+
std r15, 8(SP)
124+
std r16, 16(SP)
125+
std r17, 24(SP)
126+
std r18, 32(SP)
127+
std r19, 40(SP)
128+
std r20, 48(SP)
129+
std r21, 56(SP)
130+
std r22, 64(SP)
131+
std r23, 72(SP)
132+
std r24, 80(SP)
133+
std r25, 88(SP)
134+
std r26, 96(SP)
135+
std r27, 104(SP)
136+
std r28, 112(SP)
137+
std r29, 120(SP)
138+
std r30, 128(SP)
139+
std r31, 136(SP)
160140

161-
addi r11,SP,288
162-
stvx v20, r11,r0
163-
addi r11,r11,16
164-
stvx v21, r11,r0
165-
addi r11,r11,16
166-
stvx v22, r11,r0
167-
addi r11,r11,16
168-
stvx v23, r11,r0
169-
addi r11,r11,16
170-
stvx v24, r11,r0
171-
addi r11,r11,16
172-
stvx v25, r11,r0
173-
addi r11,r11,16
174-
stvx v26, r11,r0
175-
addi r11,r11,16
176-
stvx v27, r11,r0
177-
addi r11,r11,16
178-
stvx v28, r11,r0
179-
addi r11,r11,16
180-
stvx v29, r11,r0
181-
addi r11,r11,16
182-
stvx v30, r11,r0
183-
addi r11,r11,16
184-
stvx v31, r11,r0
185-
li r11,0
186141

187142
cmpwi cr0, M, 0
188143
ble- L999
@@ -191,10 +146,8 @@ li r11,0
191146

192147
slwi LDA, LDA, BASE_SHIFT
193148

194-
//li PREA, 384
195-
//li PREB, 384
196-
li PREA, 576
197-
li PREB, 576
149+
li PREA, 384
150+
li PREB, 384
198151

199152

200153
li o8, 8
@@ -210,70 +163,24 @@ li r11,0
210163

211164
L999:
212165

213-
li r3, 0
214-
215-
lfd f14, 0(SP)
216-
lfd f15, 8(SP)
217-
lfd f16, 16(SP)
218-
lfd f17, 24(SP)
219-
lfd f18, 32(SP)
220-
lfd f19, 40(SP)
221-
lfd f20, 48(SP)
222-
lfd f21, 56(SP)
223-
lfd f22, 64(SP)
224-
lfd f23, 72(SP)
225-
lfd f24, 80(SP)
226-
lfd f25, 88(SP)
227-
lfd f26, 96(SP)
228-
lfd f27, 104(SP)
229-
lfd f28, 112(SP)
230-
lfd f29, 120(SP)
231-
lfd f30, 128(SP)
232-
lfd f31, 136(SP)
233-
234-
ld r31, 144(SP)
235-
ld r30, 152(SP)
236-
ld r29, 160(SP)
237-
ld r28, 168(SP)
238-
ld r27, 176(SP)
239-
ld r26, 184(SP)
240-
ld r25, 192(SP)
241-
ld r24, 200(SP)
242-
ld r23, 208(SP)
243-
ld r22, 216(SP)
244-
ld r21, 224(SP)
245-
ld r20, 232(SP)
246-
ld r19, 240(SP)
247-
ld r18, 248(SP)
248-
ld r17, 256(SP)
249-
ld r16, 264(SP)
250-
ld r15, 272(SP)
251-
ld r14, 280(SP)
252-
addi r11,SP,288
253-
lvx v20, r11,r3
254-
addi r11,r11,16
255-
lvx v21, r11,r3
256-
addi r11,r11,16
257-
lvx v22, r11,r3
258-
addi r11,r11,16
259-
lvx v23, r11,r3
260-
addi r11,r11,16
261-
lvx v24, r11,r3
262-
addi r11,r11,16
263-
lvx v25, r11,r3
264-
addi r11,r11,16
265-
lvx v26, r11,r3
266-
addi r11,r11,16
267-
lvx v27, r11,r3
268-
addi r11,r11,16
269-
lvx v28, r11,r3
270-
addi r11,r11,16
271-
lvx v29, r11,r3
272-
addi r11,r11,16
273-
lvx v30, r11,r3
274-
addi r11,r11,16
275-
lvx v31, r11,r3
276-
li r11,0
166+
ld r14, 0(SP)
167+
ld r15, 8(SP)
168+
ld r16, 16(SP)
169+
ld r17, 24(SP)
170+
ld r18, 32(SP)
171+
ld r19, 40(SP)
172+
ld r20, 48(SP)
173+
ld r21, 56(SP)
174+
ld r22, 64(SP)
175+
ld r23, 72(SP)
176+
ld r24, 80(SP)
177+
ld r25, 88(SP)
178+
ld r26, 96(SP)
179+
ld r27, 104(SP)
180+
ld r28, 112(SP)
181+
ld r29, 120(SP)
182+
ld r30, 128(SP)
183+
ld r31, 136(SP)
277184

278185
addi SP, SP, STACKSIZE
279186
//addi SP, SP, 208

0 commit comments

Comments
 (0)