Skip to content

Commit 70f2a4e

Browse files
authored
Add SPARC implementation of ?sum
as trivial copy of ?asum with the fabs replaced by fmov to preserve code structure
1 parent 706dfe2 commit 70f2a4e

File tree

2 files changed

+652
-0
lines changed

2 files changed

+652
-0
lines changed

kernel/sparc/sum.S

Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#define ASSEMBLER
40+
#include "common.h"
41+
42+
#define N %i0
43+
#define X %i1
44+
#define INCX %i2
45+
#define I %i3
46+
47+
#ifdef DOUBLE
48+
#define c1 %f0
49+
#define c2 %f2
50+
#define t1 %f8
51+
#define t2 %f10
52+
#define t3 %f12
53+
#define t4 %f14
54+
55+
#define a1 %f16
56+
#define a2 %f18
57+
#define a3 %f20
58+
#define a4 %f22
59+
#define a5 %f24
60+
#define a6 %f26
61+
#define a7 %f28
62+
#define a8 %f30
63+
#else
64+
#define c1 %f0
65+
#define c2 %f1
66+
#define t1 %f4
67+
#define t2 %f5
68+
#define t3 %f6
69+
#define t4 %f7
70+
71+
#define a1 %f8
72+
#define a2 %f9
73+
#define a3 %f10
74+
#define a4 %f11
75+
#define a5 %f12
76+
#define a6 %f13
77+
#define a7 %f14
78+
#define a8 %f15
79+
#endif
80+
81+
PROLOGUE
82+
SAVESP
83+
84+
FCLR(0)
85+
86+
sll INCX, BASE_SHIFT, INCX
87+
88+
FMOV c1, c2
89+
FMOV c1, t1
90+
FMOV c1, t2
91+
FMOV c1, t3
92+
FMOV c1, t4
93+
94+
cmp INCX, 0
95+
ble .LL19
96+
cmp INCX, SIZE
97+
bne .LL50
98+
99+
sra N, 3, I
100+
cmp I, 0
101+
ble,pn %icc, .LL15
102+
nop
103+
104+
LDF [X + 0 * SIZE], a1
105+
add I, -1, I
106+
LDF [X + 1 * SIZE], a2
107+
cmp I, 0
108+
LDF [X + 2 * SIZE], a3
109+
LDF [X + 3 * SIZE], a4
110+
LDF [X + 4 * SIZE], a5
111+
LDF [X + 5 * SIZE], a6
112+
LDF [X + 6 * SIZE], a7
113+
LDF [X + 7 * SIZE], a8
114+
115+
ble,pt %icc, .LL12
116+
add X, 8 * SIZE, X
117+
118+
#define PREFETCHSIZE 128
119+
120+
.LL11:
121+
FADD c1, t1, c1
122+
prefetch [X + PREFETCHSIZE * SIZE], 0
123+
FMOV a1, t1
124+
LDF [X + 0 * SIZE], a1
125+
126+
FADD c2, t2, c2
127+
add I, -1, I
128+
FMOV a2, t2
129+
LDF [X + 1 * SIZE], a2
130+
131+
FADD c1, t3, c1
132+
cmp I, 0
133+
FMOV a3, t3
134+
LDF [X + 2 * SIZE], a3
135+
136+
FADD c2, t4, c2
137+
nop
138+
FMOV a4, t4
139+
LDF [X + 3 * SIZE], a4
140+
141+
FADD c1, t1, c1
142+
nop
143+
FMOV a5, t1
144+
LDF [X + 4 * SIZE], a5
145+
146+
FADD c2, t2, c2
147+
nop
148+
FMOV a6, t2
149+
LDF [X + 5 * SIZE], a6
150+
151+
FADD c1, t3, c1
152+
FMOV a7, t3
153+
LDF [X + 6 * SIZE], a7
154+
add X, 8 * SIZE, X
155+
156+
FADD c2, t4, c2
157+
FMOV a8, t4
158+
bg,pt %icc, .LL11
159+
LDF [X - 1 * SIZE], a8
160+
161+
.LL12:
162+
FADD c1, t1, c1
163+
FMOV a1, t1
164+
FADD c2, t2, c2
165+
FMOV a2, t2
166+
167+
FADD c1, t3, c1
168+
FMOV a3, t3
169+
FADD c2, t4, c2
170+
FMOV a4, t4
171+
172+
FADD c1, t1, c1
173+
FMOV a5, t1
174+
FADD c2, t2, c2
175+
FMOV a6, t2
176+
177+
FADD c1, t3, c1
178+
FMOV a7, t3
179+
FADD c2, t4, c2
180+
FMOV a8, t4
181+
182+
.LL15:
183+
and N, 7, I
184+
cmp I, 0
185+
ble,a,pn %icc, .LL19
186+
nop
187+
188+
.LL16:
189+
LDF [X + 0 * SIZE], a1
190+
add I, -1, I
191+
cmp I, 0
192+
FADD c1, t1, c1
193+
FMOV a1, t1
194+
bg,pt %icc, .LL16
195+
add X, 1 * SIZE, X
196+
197+
.LL19:
198+
FADD c1, t1, c1
199+
FADD c2, t2, c2
200+
FADD c1, t3, c1
201+
FADD c2, t4, c2
202+
203+
FADD c1, c2, c1
204+
return %i7 + 8
205+
clr %g0
206+
207+
.LL50:
208+
sra N, 3, I
209+
cmp I, 0
210+
ble,pn %icc, .LL55
211+
nop
212+
213+
LDF [X + 0 * SIZE], a1
214+
add X, INCX, X
215+
LDF [X + 0 * SIZE], a2
216+
add X, INCX, X
217+
LDF [X + 0 * SIZE], a3
218+
add X, INCX, X
219+
LDF [X + 0 * SIZE], a4
220+
add X, INCX, X
221+
LDF [X + 0 * SIZE], a5
222+
add X, INCX, X
223+
LDF [X + 0 * SIZE], a6
224+
add X, INCX, X
225+
add I, -1, I
226+
LDF [X + 0 * SIZE], a7
227+
cmp I, 0
228+
add X, INCX, X
229+
LDF [X + 0 * SIZE], a8
230+
231+
ble,pt %icc, .LL52
232+
add X, INCX, X
233+
234+
.LL51:
235+
FADD c1, t1, c1
236+
add I, -1, I
237+
FMOV a1, t1
238+
LDF [X + 0 * SIZE], a1
239+
add X, INCX, X
240+
241+
FADD c2, t2, c2
242+
cmp I, 0
243+
FMOV a2, t2
244+
LDF [X + 0 * SIZE], a2
245+
add X, INCX, X
246+
247+
FADD c1, t3, c1
248+
FMOV a3, t3
249+
LDF [X + 0 * SIZE], a3
250+
add X, INCX, X
251+
252+
FADD c2, t4, c2
253+
FMOV a4, t4
254+
LDF [X + 0 * SIZE], a4
255+
add X, INCX, X
256+
257+
FADD c1, t1, c1
258+
FMOV a5, t1
259+
LDF [X + 0 * SIZE], a5
260+
add X, INCX, X
261+
262+
FADD c2, t2, c2
263+
FMOV a6, t2
264+
LDF [X + 0 * SIZE], a6
265+
add X, INCX, X
266+
267+
FADD c1, t3, c1
268+
FMOV a7, t3
269+
LDF [X + 0 * SIZE], a7
270+
add X, INCX, X
271+
272+
FADD c2, t4, c2
273+
FMOV a8, t4
274+
LDF [X + 0 * SIZE], a8
275+
276+
bg,pt %icc, .LL51
277+
add X, INCX, X
278+
279+
.LL52:
280+
FADD c1, t1, c1
281+
FMOV a1, t1
282+
FADD c2, t2, c2
283+
FMOV a2, t2
284+
285+
FADD c1, t3, c1
286+
FMOV a3, t3
287+
FADD c2, t4, c2
288+
FMOV a4, t4
289+
290+
FADD c1, t1, c1
291+
FMOV a5, t1
292+
FADD c2, t2, c2
293+
FMOV a6, t2
294+
295+
FADD c1, t3, c1
296+
FMOV a7, t3
297+
FADD c2, t4, c2
298+
FMOV a8, t4
299+
300+
.LL55:
301+
and N, 7, I
302+
cmp I, 0
303+
ble,a,pn %icc, .LL59
304+
nop
305+
306+
.LL56:
307+
LDF [X + 0 * SIZE], a1
308+
FADD c1, t1, c1
309+
add I, -1, I
310+
FMOV a1, t1
311+
cmp I, 0
312+
bg,pt %icc, .LL56
313+
add X, INCX, X
314+
315+
.LL59:
316+
FADD c1, t1, c1
317+
FADD c2, t2, c2
318+
FADD c1, t3, c1
319+
FADD c2, t4, c2
320+
321+
FADD c1, c2, c1
322+
return %i7 + 8
323+
clr %o0
324+
325+
EPILOGUE

0 commit comments

Comments
 (0)