Skip to content

Commit e3bc83f

Browse files
authored
Add x86 implementation of ?sum
as trivial copy of ?asum with the fabs calls removed
1 parent 70f2a4e commit e3bc83f

File tree

2 files changed

+415
-0
lines changed

2 files changed

+415
-0
lines changed

kernel/x86/sum.S

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#define ASSEMBLER
40+
#include "common.h"
41+
42+
#define STACK 8
43+
#define ARGS 0
44+
45+
#define STACK_M 4 + STACK + ARGS(%esp)
46+
#define STACK_X 8 + STACK + ARGS(%esp)
47+
#define STACK_INCX 12 + STACK + ARGS(%esp)
48+
49+
#define M %edx
50+
#define X %ecx
51+
#define INCX %esi
52+
53+
#define I %eax
54+
55+
#include "l1param.h"
56+
57+
PROLOGUE
58+
59+
pushl %esi
60+
pushl %ebx
61+
62+
PROFCODE
63+
64+
#if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95)
65+
EMMS
66+
#endif
67+
68+
movl STACK_M, M
69+
movl STACK_X, X
70+
movl STACK_INCX, INCX
71+
72+
#ifdef F_INTERFACE
73+
movl (M), M
74+
movl (INCX), INCX
75+
#endif
76+
77+
fldz
78+
testl M, M
79+
jle .L999
80+
testl INCX, INCX
81+
jle .L999
82+
83+
sall $BASE_SHIFT, INCX
84+
fldz
85+
fldz
86+
fldz
87+
cmpl $SIZE, INCX
88+
jne .L40
89+
90+
movl M, I
91+
sarl $3, I
92+
jle .L20
93+
ALIGN_4
94+
95+
.L10:
96+
#ifdef PREFETCH
97+
PREFETCH (PREFETCHSIZE + 0) - PREOFFSET(X)
98+
#endif
99+
100+
FLD 0 * SIZE(X)
101+
FLD 1 * SIZE(X)
102+
FLD 2 * SIZE(X)
103+
FLD 3 * SIZE(X)
104+
105+
faddp %st, %st(7)
106+
faddp %st, %st(5)
107+
faddp %st, %st(3)
108+
faddp %st, %st(1)
109+
110+
FLD 4 * SIZE(X)
111+
FLD 5 * SIZE(X)
112+
FLD 6 * SIZE(X)
113+
FLD 7 * SIZE(X)
114+
115+
addl $8 * SIZE, X
116+
117+
faddp %st, %st(7)
118+
faddp %st, %st(5)
119+
faddp %st, %st(3)
120+
faddp %st, %st(1)
121+
122+
decl I
123+
jg .L10
124+
ALIGN_4
125+
126+
.L20:
127+
movl M, I
128+
andl $7, I
129+
jle .L998
130+
ALIGN_4
131+
132+
133+
.L21:
134+
FLD (X)
135+
faddp %st,%st(1)
136+
addl $1 * SIZE, X
137+
decl I
138+
jg .L21
139+
jmp .L998
140+
ALIGN_4
141+
142+
.L40:
143+
movl M, I
144+
sarl $3, I
145+
jle .L60
146+
ALIGN_4
147+
148+
.L50:
149+
FLD (X)
150+
addl INCX, X
151+
FLD (X)
152+
addl INCX, X
153+
FLD (X)
154+
addl INCX, X
155+
FLD (X)
156+
addl INCX, X
157+
158+
faddp %st, %st(7)
159+
faddp %st, %st(5)
160+
faddp %st, %st(3)
161+
faddp %st, %st(1)
162+
163+
FLD (X)
164+
addl INCX, X
165+
FLD (X)
166+
addl INCX, X
167+
FLD (X)
168+
addl INCX, X
169+
FLD (X)
170+
addl INCX, X
171+
172+
faddp %st, %st(7)
173+
faddp %st, %st(5)
174+
faddp %st, %st(3)
175+
faddp %st, %st(1)
176+
177+
decl I
178+
jg .L50
179+
ALIGN_4
180+
181+
.L60:
182+
movl M, I
183+
andl $7, I
184+
jle .L998
185+
ALIGN_4
186+
187+
188+
.L61:
189+
FLD (X)
190+
addl INCX, X
191+
faddp %st,%st(1)
192+
decl I
193+
jg .L61
194+
ALIGN_4
195+
196+
.L998:
197+
faddp %st,%st(2)
198+
faddp %st,%st(1)
199+
faddp %st,%st(1)
200+
ALIGN_4
201+
202+
.L999:
203+
popl %ebx
204+
popl %esi
205+
ret
206+
207+
EPILOGUE

0 commit comments

Comments
 (0)