Skip to content

Commit 17cdd9f

Browse files
authored
some correction
1 parent 6bcb06f commit 17cdd9f

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

kernel/x86_64/dgemm_kernel_8x8_skylakex.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@
330330
"movq %%r14,%8;shlq $3,%8;subq %8,%3;shrq $3,%8;"\
331331
"shlq $3,%4;addq %4,%3;shrq $3,%4;"\
332332
:"+r"(a_block_pointer),"+r"(packed_b_pointer),"+r"(K),"+r"(c_pointer),"+r"(ldc_in_bytes),"+Yk"(k02),"+Yk"(k03),"+Yk"(k01),"+r"(M),"+r"(alpha)\
333-
::"zmm3","zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory","k1","r13","r14");\
333+
::"zmm3","zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory","k1","r12","r13","r14");\
334334
a_block_pointer -= M * K;\
335335
}
336336
#define COMPUTE_n16 {\
@@ -645,8 +645,8 @@ static void KERNEL_MAIN(double *packed_a, double *packed_b, BLASLONG m, BLASLONG
645645
c_pointer ++;\
646646
}
647647

648-
static void __attribute__ ((noinline)) KERNEL_EDGE(double *packed_a, double *packed_b, BLASLONG m, BLASLONG edge_n, BLASLONG k, BLASLONG LDC, double *c,double *alpha){//icopy=8,ocopy=8
649-
//perform C += A<pack> B<pack> , edge_n<8 must be satisfied !
648+
static void KERNEL_EDGE(double *packed_a, double *packed_b, BLASLONG m, BLASLONG edge_n, BLASLONG k, BLASLONG LDC, double *c,double *alpha){//icopy=8,ocopy=8
649+
//perform C += A<pack> B<pack> , edge_n<8 must be satisfied.
650650
if(k==0 || m==0 || edge_n==0) return;
651651
double *a_block_pointer,*b_block_pointer,*b_base_pointer;
652652
double *c_pointer = c;
@@ -763,11 +763,16 @@ static void copy_4_to_8(double *src,double *dst,BLASLONG m,BLASLONG k){
763763
int __attribute__ ((noinline)) CNAME(BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double * __restrict__ A, double * __restrict__ B, double * __restrict__ C, BLASLONG ldc){
764764
if(m==0 || n==0 || k==0 || alpha == 0.0) return 0;
765765
BLASLONG ndiv8 = n/8;double ALPHA = alpha;
766+
#ifdef ICOPY_4
766767
double *packed_a = (double *)malloc(m*k*sizeof(double));
767768
copy_4_to_8(A,packed_a,m,k);
769+
#else //ICOPY_8
770+
double *packed_a = A;
771+
#endif
768772
if(ndiv8>0) KERNEL_MAIN(packed_a,B,m,ndiv8,k,ldc,C,&ALPHA);
769773
if(n>ndiv8*8) KERNEL_EDGE(packed_a,B+(int64_t)k*(int64_t)ndiv8*8,m,n-ndiv8*8,k,ldc,C+(int64_t)ldc*(int64_t)ndiv8*8,&ALPHA);
774+
#ifdef ICOPY_4
770775
free(packed_a);packed_a=NULL;
776+
#endif
771777
return 0;
772778
}
773-

0 commit comments

Comments
 (0)