|
| 1 | +/*********************************************************************/ |
| 2 | +/* Copyright 2009, 2010 The University of Texas at Austin. */ |
| 3 | +/* All rights reserved. */ |
| 4 | +/* */ |
| 5 | +/* Redistribution and use in source and binary forms, with or */ |
| 6 | +/* without modification, are permitted provided that the following */ |
| 7 | +/* conditions are met: */ |
| 8 | +/* */ |
| 9 | +/* 1. Redistributions of source code must retain the above */ |
| 10 | +/* copyright notice, this list of conditions and the following */ |
| 11 | +/* disclaimer. */ |
| 12 | +/* */ |
| 13 | +/* 2. Redistributions in binary form must reproduce the above */ |
| 14 | +/* copyright notice, this list of conditions and the following */ |
| 15 | +/* disclaimer in the documentation and/or other materials */ |
| 16 | +/* provided with the distribution. */ |
| 17 | +/* */ |
| 18 | +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
| 19 | +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
| 20 | +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
| 21 | +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
| 22 | +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
| 23 | +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
| 24 | +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
| 25 | +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
| 26 | +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
| 27 | +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
| 28 | +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
| 29 | +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
| 30 | +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
| 31 | +/* POSSIBILITY OF SUCH DAMAGE. */ |
| 32 | +/* */ |
| 33 | +/* The views and conclusions contained in the software and */ |
| 34 | +/* documentation are those of the authors and should not be */ |
| 35 | +/* interpreted as representing official policies, either expressed */ |
| 36 | +/* or implied, of The University of Texas at Austin. */ |
| 37 | +/*********************************************************************/ |
| 38 | + |
| 39 | +#define ASSEMBLER |
| 40 | +#include "common.h" |
| 41 | + |
| 42 | +#define N %i0 |
| 43 | +#define X %i1 |
| 44 | +#define INCX %i2 |
| 45 | +#define I %i3 |
| 46 | + |
| 47 | +#ifdef DOUBLE |
| 48 | +#define c1 %f0 |
| 49 | +#define c2 %f2 |
| 50 | +#define t1 %f8 |
| 51 | +#define t2 %f10 |
| 52 | +#define t3 %f12 |
| 53 | +#define t4 %f14 |
| 54 | + |
| 55 | +#define a1 %f16 |
| 56 | +#define a2 %f18 |
| 57 | +#define a3 %f20 |
| 58 | +#define a4 %f22 |
| 59 | +#define a5 %f24 |
| 60 | +#define a6 %f26 |
| 61 | +#define a7 %f28 |
| 62 | +#define a8 %f30 |
| 63 | +#else |
| 64 | +#define c1 %f0 |
| 65 | +#define c2 %f1 |
| 66 | +#define t1 %f4 |
| 67 | +#define t2 %f5 |
| 68 | +#define t3 %f6 |
| 69 | +#define t4 %f7 |
| 70 | + |
| 71 | +#define a1 %f8 |
| 72 | +#define a2 %f9 |
| 73 | +#define a3 %f10 |
| 74 | +#define a4 %f11 |
| 75 | +#define a5 %f12 |
| 76 | +#define a6 %f13 |
| 77 | +#define a7 %f14 |
| 78 | +#define a8 %f15 |
| 79 | +#endif |
| 80 | + |
| 81 | + PROLOGUE |
| 82 | + SAVESP |
| 83 | + |
| 84 | + FCLR(0) |
| 85 | + |
| 86 | + sll INCX, BASE_SHIFT, INCX |
| 87 | + |
| 88 | + FMOV c1, c2 |
| 89 | + FMOV c1, t1 |
| 90 | + FMOV c1, t2 |
| 91 | + FMOV c1, t3 |
| 92 | + FMOV c1, t4 |
| 93 | + |
| 94 | + cmp INCX, 0 |
| 95 | + ble .LL19 |
| 96 | + cmp INCX, SIZE |
| 97 | + bne .LL50 |
| 98 | + |
| 99 | + sra N, 3, I |
| 100 | + cmp I, 0 |
| 101 | + ble,pn %icc, .LL15 |
| 102 | + nop |
| 103 | + |
| 104 | + LDF [X + 0 * SIZE], a1 |
| 105 | + add I, -1, I |
| 106 | + LDF [X + 1 * SIZE], a2 |
| 107 | + cmp I, 0 |
| 108 | + LDF [X + 2 * SIZE], a3 |
| 109 | + LDF [X + 3 * SIZE], a4 |
| 110 | + LDF [X + 4 * SIZE], a5 |
| 111 | + LDF [X + 5 * SIZE], a6 |
| 112 | + LDF [X + 6 * SIZE], a7 |
| 113 | + LDF [X + 7 * SIZE], a8 |
| 114 | + |
| 115 | + ble,pt %icc, .LL12 |
| 116 | + add X, 8 * SIZE, X |
| 117 | + |
| 118 | +#define PREFETCHSIZE 128 |
| 119 | + |
| 120 | +.LL11: |
| 121 | + FADD c1, t1, c1 |
| 122 | + prefetch [X + PREFETCHSIZE * SIZE], 0 |
| 123 | + FMOV a1, t1 |
| 124 | + LDF [X + 0 * SIZE], a1 |
| 125 | + |
| 126 | + FADD c2, t2, c2 |
| 127 | + add I, -1, I |
| 128 | + FMOV a2, t2 |
| 129 | + LDF [X + 1 * SIZE], a2 |
| 130 | + |
| 131 | + FADD c1, t3, c1 |
| 132 | + cmp I, 0 |
| 133 | + FMOV a3, t3 |
| 134 | + LDF [X + 2 * SIZE], a3 |
| 135 | + |
| 136 | + FADD c2, t4, c2 |
| 137 | + nop |
| 138 | + FMOV a4, t4 |
| 139 | + LDF [X + 3 * SIZE], a4 |
| 140 | + |
| 141 | + FADD c1, t1, c1 |
| 142 | + nop |
| 143 | + FMOV a5, t1 |
| 144 | + LDF [X + 4 * SIZE], a5 |
| 145 | + |
| 146 | + FADD c2, t2, c2 |
| 147 | + nop |
| 148 | + FMOV a6, t2 |
| 149 | + LDF [X + 5 * SIZE], a6 |
| 150 | + |
| 151 | + FADD c1, t3, c1 |
| 152 | + FMOV a7, t3 |
| 153 | + LDF [X + 6 * SIZE], a7 |
| 154 | + add X, 8 * SIZE, X |
| 155 | + |
| 156 | + FADD c2, t4, c2 |
| 157 | + FMOV a8, t4 |
| 158 | + bg,pt %icc, .LL11 |
| 159 | + LDF [X - 1 * SIZE], a8 |
| 160 | + |
| 161 | +.LL12: |
| 162 | + FADD c1, t1, c1 |
| 163 | + FMOV a1, t1 |
| 164 | + FADD c2, t2, c2 |
| 165 | + FMOV a2, t2 |
| 166 | + |
| 167 | + FADD c1, t3, c1 |
| 168 | + FMOV a3, t3 |
| 169 | + FADD c2, t4, c2 |
| 170 | + FMOV a4, t4 |
| 171 | + |
| 172 | + FADD c1, t1, c1 |
| 173 | + FMOV a5, t1 |
| 174 | + FADD c2, t2, c2 |
| 175 | + FMOV a6, t2 |
| 176 | + |
| 177 | + FADD c1, t3, c1 |
| 178 | + FMOV a7, t3 |
| 179 | + FADD c2, t4, c2 |
| 180 | + FMOV a8, t4 |
| 181 | + |
| 182 | +.LL15: |
| 183 | + and N, 7, I |
| 184 | + cmp I, 0 |
| 185 | + ble,a,pn %icc, .LL19 |
| 186 | + nop |
| 187 | + |
| 188 | +.LL16: |
| 189 | + LDF [X + 0 * SIZE], a1 |
| 190 | + add I, -1, I |
| 191 | + cmp I, 0 |
| 192 | + FADD c1, t1, c1 |
| 193 | + FMOV a1, t1 |
| 194 | + bg,pt %icc, .LL16 |
| 195 | + add X, 1 * SIZE, X |
| 196 | + |
| 197 | +.LL19: |
| 198 | + FADD c1, t1, c1 |
| 199 | + FADD c2, t2, c2 |
| 200 | + FADD c1, t3, c1 |
| 201 | + FADD c2, t4, c2 |
| 202 | + |
| 203 | + FADD c1, c2, c1 |
| 204 | + return %i7 + 8 |
| 205 | + clr %g0 |
| 206 | + |
| 207 | +.LL50: |
| 208 | + sra N, 3, I |
| 209 | + cmp I, 0 |
| 210 | + ble,pn %icc, .LL55 |
| 211 | + nop |
| 212 | + |
| 213 | + LDF [X + 0 * SIZE], a1 |
| 214 | + add X, INCX, X |
| 215 | + LDF [X + 0 * SIZE], a2 |
| 216 | + add X, INCX, X |
| 217 | + LDF [X + 0 * SIZE], a3 |
| 218 | + add X, INCX, X |
| 219 | + LDF [X + 0 * SIZE], a4 |
| 220 | + add X, INCX, X |
| 221 | + LDF [X + 0 * SIZE], a5 |
| 222 | + add X, INCX, X |
| 223 | + LDF [X + 0 * SIZE], a6 |
| 224 | + add X, INCX, X |
| 225 | + add I, -1, I |
| 226 | + LDF [X + 0 * SIZE], a7 |
| 227 | + cmp I, 0 |
| 228 | + add X, INCX, X |
| 229 | + LDF [X + 0 * SIZE], a8 |
| 230 | + |
| 231 | + ble,pt %icc, .LL52 |
| 232 | + add X, INCX, X |
| 233 | + |
| 234 | +.LL51: |
| 235 | + FADD c1, t1, c1 |
| 236 | + add I, -1, I |
| 237 | + FMOV a1, t1 |
| 238 | + LDF [X + 0 * SIZE], a1 |
| 239 | + add X, INCX, X |
| 240 | + |
| 241 | + FADD c2, t2, c2 |
| 242 | + cmp I, 0 |
| 243 | + FMOV a2, t2 |
| 244 | + LDF [X + 0 * SIZE], a2 |
| 245 | + add X, INCX, X |
| 246 | + |
| 247 | + FADD c1, t3, c1 |
| 248 | + FMOV a3, t3 |
| 249 | + LDF [X + 0 * SIZE], a3 |
| 250 | + add X, INCX, X |
| 251 | + |
| 252 | + FADD c2, t4, c2 |
| 253 | + FMOV a4, t4 |
| 254 | + LDF [X + 0 * SIZE], a4 |
| 255 | + add X, INCX, X |
| 256 | + |
| 257 | + FADD c1, t1, c1 |
| 258 | + FMOV a5, t1 |
| 259 | + LDF [X + 0 * SIZE], a5 |
| 260 | + add X, INCX, X |
| 261 | + |
| 262 | + FADD c2, t2, c2 |
| 263 | + FMOV a6, t2 |
| 264 | + LDF [X + 0 * SIZE], a6 |
| 265 | + add X, INCX, X |
| 266 | + |
| 267 | + FADD c1, t3, c1 |
| 268 | + FMOV a7, t3 |
| 269 | + LDF [X + 0 * SIZE], a7 |
| 270 | + add X, INCX, X |
| 271 | + |
| 272 | + FADD c2, t4, c2 |
| 273 | + FMOV a8, t4 |
| 274 | + LDF [X + 0 * SIZE], a8 |
| 275 | + |
| 276 | + bg,pt %icc, .LL51 |
| 277 | + add X, INCX, X |
| 278 | + |
| 279 | +.LL52: |
| 280 | + FADD c1, t1, c1 |
| 281 | + FMOV a1, t1 |
| 282 | + FADD c2, t2, c2 |
| 283 | + FMOV a2, t2 |
| 284 | + |
| 285 | + FADD c1, t3, c1 |
| 286 | + FMOV a3, t3 |
| 287 | + FADD c2, t4, c2 |
| 288 | + FMOV a4, t4 |
| 289 | + |
| 290 | + FADD c1, t1, c1 |
| 291 | + FMOV a5, t1 |
| 292 | + FADD c2, t2, c2 |
| 293 | + FMOV a6, t2 |
| 294 | + |
| 295 | + FADD c1, t3, c1 |
| 296 | + FMOV a7, t3 |
| 297 | + FADD c2, t4, c2 |
| 298 | + FMOV a8, t4 |
| 299 | + |
| 300 | +.LL55: |
| 301 | + and N, 7, I |
| 302 | + cmp I, 0 |
| 303 | + ble,a,pn %icc, .LL59 |
| 304 | + nop |
| 305 | + |
| 306 | +.LL56: |
| 307 | + LDF [X + 0 * SIZE], a1 |
| 308 | + FADD c1, t1, c1 |
| 309 | + add I, -1, I |
| 310 | + FMOV a1, t1 |
| 311 | + cmp I, 0 |
| 312 | + bg,pt %icc, .LL56 |
| 313 | + add X, INCX, X |
| 314 | + |
| 315 | +.LL59: |
| 316 | + FADD c1, t1, c1 |
| 317 | + FADD c2, t2, c2 |
| 318 | + FADD c1, t3, c1 |
| 319 | + FADD c2, t4, c2 |
| 320 | + |
| 321 | + FADD c1, c2, c1 |
| 322 | + return %i7 + 8 |
| 323 | + clr %o0 |
| 324 | + |
| 325 | + EPILOGUE |
0 commit comments