|
1 | 1 | #include "common.h"
|
| 2 | +#include <stdint.h> |
2 | 3 | #include <immintrin.h>
|
3 | 4 | /* row-major c_block */
|
4 | 5 | /* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/
|
|
289 | 290 | INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\
|
290 | 291 | INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)
|
291 | 292 |
|
292 |
| -#define COMPUTE_m1n8 {\ |
293 |
| - __asm__ __volatile__(\ |
294 |
| - INNER_INIT_m1n8\ |
295 |
| - INNER_KERNELm1(8)\ |
296 |
| - INNER_SAVE_m1n8\ |
297 |
| - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\ |
298 |
| - :"zmm4","zmm5","zmm6","zmm7","zmm8","cc","memory","k1");\ |
299 |
| - c_pointer += 1;\ |
300 |
| -} |
301 |
| -#define COMPUTE_m2n8 {\ |
302 |
| - __asm__ __volatile__(\ |
303 |
| - INNER_INIT_m2n8\ |
304 |
| - INNER_KERNELm2(8)\ |
305 |
| - INNER_SAVE_m2n8\ |
306 |
| - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\ |
307 |
| - :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","cc","memory","k1");\ |
308 |
| - c_pointer += 2;\ |
309 |
| -} |
310 |
| -#define COMPUTE_m4n8 {\ |
311 |
| - __asm__ __volatile__(\ |
312 |
| - INNER_INIT_m4n8\ |
313 |
| - INNER_KERNELm4(8)\ |
314 |
| - INNER_SAVE_m4n8\ |
315 |
| - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03),"Yk"(k01)\ |
316 |
| - :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","cc","memory");\ |
317 |
| - c_pointer += 4;\ |
318 |
| -} |
319 |
| -#define COMPUTE_m8n8 {\ |
320 |
| - __asm__ __volatile__(\ |
321 |
| - INNER_INIT_m8n8\ |
322 |
| - INNER_KERNELm8(8)\ |
323 |
| - INNER_SAVE_m8n8\ |
324 |
| - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03)\ |
325 |
| - :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory");\ |
326 |
| - c_pointer += 8;\ |
327 |
| -} |
328 |
| - |
329 | 293 | #define COMPUTE_n8 {\
|
330 | 294 | __asm__ __volatile__(\
|
331 | 295 | "movq %8,%%r14;movq %2,%%r13;"\
|
|
0 commit comments