Skip to content

Commit 9d717cb

Browse files
authored
Add x86_64 implementation of ?sum
as trivial copy of ?asum with the fabs calls removed
1 parent e3bc83f commit 9d717cb

File tree

2 files changed

+359
-0
lines changed

2 files changed

+359
-0
lines changed

kernel/x86_64/sum.S

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#define ASSEMBLER
40+
#include "common.h"
41+
42+
#define M ARG1
43+
#define X ARG2
44+
#define INCX ARG3
45+
46+
#define I %rax
47+
48+
#include "l1param.h"
49+
50+
PROLOGUE
51+
PROFCODE
52+
53+
fldz
54+
testq M, M
55+
jle .L999
56+
testq INCX, INCX
57+
jle .L999
58+
59+
salq $BASE_SHIFT, INCX
60+
61+
fldz
62+
fldz
63+
fldz
64+
cmpq $SIZE, INCX
65+
jne .L40
66+
67+
movq M, I
68+
sarq $3, I
69+
jle .L20
70+
ALIGN_4
71+
72+
.L10:
73+
#ifdef PREFETCH
74+
PREFETCH (PREFETCHSIZE + 0) - PREOFFSET(X)
75+
#endif
76+
77+
FLD 0 * SIZE(X)
78+
FLD 1 * SIZE(X)
79+
FLD 2 * SIZE(X)
80+
FLD 3 * SIZE(X)
81+
82+
faddp %st, %st(7)
83+
faddp %st, %st(5)
84+
faddp %st, %st(3)
85+
faddp %st, %st(1)
86+
87+
FLD 4 * SIZE(X)
88+
FLD 5 * SIZE(X)
89+
FLD 6 * SIZE(X)
90+
FLD 7 * SIZE(X)
91+
92+
addq $8 * SIZE, X
93+
94+
faddp %st, %st(7)
95+
faddp %st, %st(5)
96+
faddp %st, %st(3)
97+
faddp %st, %st(1)
98+
99+
decq I
100+
jg .L10
101+
ALIGN_4
102+
103+
.L20:
104+
andq $7, M
105+
jle .L998
106+
ALIGN_4
107+
108+
.L21:
109+
FLD (X)
110+
faddp %st,%st(1)
111+
addq $1 * SIZE, X
112+
decq M
113+
jg .L21
114+
jmp .L998
115+
ALIGN_4
116+
117+
.L40:
118+
movq M, I
119+
sarq $3, I
120+
jle .L60
121+
ALIGN_4
122+
123+
.L50:
124+
FLD (X)
125+
addq INCX, X
126+
FLD (X)
127+
addq INCX, X
128+
FLD (X)
129+
addq INCX, X
130+
FLD (X)
131+
addq INCX, X
132+
133+
faddp %st, %st(7)
134+
faddp %st, %st(5)
135+
faddp %st, %st(3)
136+
faddp %st, %st(1)
137+
138+
FLD (X)
139+
addq INCX, X
140+
FLD (X)
141+
addq INCX, X
142+
FLD (X)
143+
addq INCX, X
144+
FLD (X)
145+
addq INCX, X
146+
147+
faddp %st, %st(7)
148+
faddp %st, %st(5)
149+
faddp %st, %st(3)
150+
faddp %st, %st(1)
151+
152+
decq I
153+
jg .L50
154+
ALIGN_4
155+
156+
.L60:
157+
andq $7, M
158+
jle .L998
159+
ALIGN_4
160+
161+
162+
.L61:
163+
FLD (X)
164+
addq INCX, X
165+
faddp %st,%st(1)
166+
decq M
167+
jg .L61
168+
ALIGN_4
169+
170+
.L998:
171+
faddp %st,%st(2)
172+
faddp %st,%st(1)
173+
faddp %st,%st(1)
174+
ALIGN_4
175+
176+
.L999:
177+
ret
178+
179+
EPILOGUE

kernel/x86_64/zsum.S

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#define ASSEMBLER
40+
#include "common.h"
41+
42+
#define M ARG1
43+
#define X ARG2
44+
#define INCX ARG3
45+
46+
#define I %rax
47+
48+
#include "l1param.h"
49+
50+
PROLOGUE
51+
PROFCODE
52+
53+
fldz
54+
testq M, M
55+
jle .L999
56+
testq INCX, INCX
57+
jle .L999
58+
59+
salq $ZBASE_SHIFT, INCX
60+
61+
fldz
62+
fldz
63+
fldz
64+
cmpq $SIZE * 2, INCX
65+
jne .L40
66+
67+
movq M, I
68+
sarq $2, I
69+
jle .L20
70+
ALIGN_4
71+
72+
.L10:
73+
#ifdef PREFETCH
74+
PREFETCH (PREFETCHSIZE + 0) - PREOFFSET(X)
75+
#endif
76+
77+
FLD 0 * SIZE(X)
78+
FLD 1 * SIZE(X)
79+
FLD 2 * SIZE(X)
80+
FLD 3 * SIZE(X)
81+
82+
faddp %st, %st(7)
83+
faddp %st, %st(5)
84+
faddp %st, %st(3)
85+
faddp %st, %st(1)
86+
87+
FLD 4 * SIZE(X)
88+
FLD 5 * SIZE(X)
89+
FLD 6 * SIZE(X)
90+
FLD 7 * SIZE(X)
91+
92+
addq $8 * SIZE, X
93+
94+
faddp %st, %st(7)
95+
faddp %st, %st(5)
96+
faddp %st, %st(3)
97+
faddp %st, %st(1)
98+
99+
decq I
100+
jg .L10
101+
ALIGN_4
102+
103+
.L20:
104+
andq $3, M
105+
jle .L998
106+
ALIGN_4
107+
108+
109+
.L21:
110+
FLD 0 * SIZE(X)
111+
FLD 1 * SIZE(X)
112+
faddp %st,%st(3)
113+
faddp %st,%st(1)
114+
addq $2 * SIZE, X
115+
decq M
116+
jg .L21
117+
jmp .L998
118+
ALIGN_4
119+
120+
.L40:
121+
movq M, I
122+
sarq $2, I
123+
jle .L60
124+
ALIGN_4
125+
126+
.L50:
127+
FLD 0 * SIZE(X)
128+
FLD 1 * SIZE(X)
129+
addq INCX, X
130+
FLD 0 * SIZE(X)
131+
FLD 1 * SIZE(X)
132+
addq INCX, X
133+
134+
faddp %st, %st(7)
135+
faddp %st, %st(5)
136+
faddp %st, %st(3)
137+
faddp %st, %st(1)
138+
139+
FLD 0 * SIZE(X)
140+
FLD 1 * SIZE(X)
141+
addq INCX, X
142+
FLD 0 * SIZE(X)
143+
FLD 1 * SIZE(X)
144+
addq INCX, X
145+
146+
faddp %st, %st(7)
147+
faddp %st, %st(5)
148+
faddp %st, %st(3)
149+
faddp %st, %st(1)
150+
151+
decq I
152+
jg .L50
153+
ALIGN_4
154+
155+
.L60:
156+
andq $3, M
157+
jle .L998
158+
ALIGN_4
159+
160+
161+
.L61:
162+
FLD 0 * SIZE(X)
163+
FLD 1 * SIZE(X)
164+
addq INCX, X
165+
faddp %st,%st(3)
166+
faddp %st,%st(1)
167+
decq M
168+
jg .L61
169+
ALIGN_4
170+
171+
.L998:
172+
faddp %st,%st(2)
173+
faddp %st,%st(1)
174+
faddp %st,%st(1)
175+
ALIGN_4
176+
177+
.L999:
178+
ret
179+
180+
EPILOGUE

0 commit comments

Comments
 (0)