Skip to content

Commit 6bd67dd

Browse files
authored
Update dgemm_kernel_8x8_skylakex.c
1 parent 5da9484 commit 6bd67dd

File tree

1 file changed

+1
-37
lines changed

1 file changed

+1
-37
lines changed

kernel/x86_64/dgemm_kernel_8x8_skylakex.c

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "common.h"
2+
#include <stdint.h>
23
#include <immintrin.h>
34
/* row-major c_block */
45
/* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/
@@ -289,43 +290,6 @@
289290
INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\
290291
INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)
291292

292-
#define COMPUTE_m1n8 {\
293-
__asm__ __volatile__(\
294-
INNER_INIT_m1n8\
295-
INNER_KERNELm1(8)\
296-
INNER_SAVE_m1n8\
297-
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\
298-
:"zmm4","zmm5","zmm6","zmm7","zmm8","cc","memory","k1");\
299-
c_pointer += 1;\
300-
}
301-
#define COMPUTE_m2n8 {\
302-
__asm__ __volatile__(\
303-
INNER_INIT_m2n8\
304-
INNER_KERNELm2(8)\
305-
INNER_SAVE_m2n8\
306-
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\
307-
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","cc","memory","k1");\
308-
c_pointer += 2;\
309-
}
310-
#define COMPUTE_m4n8 {\
311-
__asm__ __volatile__(\
312-
INNER_INIT_m4n8\
313-
INNER_KERNELm4(8)\
314-
INNER_SAVE_m4n8\
315-
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03),"Yk"(k01)\
316-
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","cc","memory");\
317-
c_pointer += 4;\
318-
}
319-
#define COMPUTE_m8n8 {\
320-
__asm__ __volatile__(\
321-
INNER_INIT_m8n8\
322-
INNER_KERNELm8(8)\
323-
INNER_SAVE_m8n8\
324-
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03)\
325-
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory");\
326-
c_pointer += 8;\
327-
}
328-
329293
#define COMPUTE_n8 {\
330294
__asm__ __volatile__(\
331295
"movq %8,%%r14;movq %2,%%r13;"\

0 commit comments

Comments
 (0)