Skip to content

Commit 876ba58

Browse files
authored
Merge pull request #5091 from goplanid/develop
Small gemm kernel improvements for AArch64
2 parents a54f9a9 + d1bfa97 commit 876ba58

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

kernel/arm64/dgemm_small_kernel_tn_sve.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ CNAME(BLASLONG M,
213213
const BLASLONG n2 = N & -2;
214214
const BLASLONG n8 = N & -8;
215215

216-
const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
216+
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
217217
FLOAT* packed_a =
218218
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;
219219

kernel/arm64/dgemm_small_kernel_tt_sve.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ CNAME(BLASLONG M,
219219
const BLASLONG n4 = N & -4;
220220
const BLASLONG n2 = N & -2;
221221

222-
const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
222+
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
223223
FLOAT* packed_a =
224224
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;
225225

kernel/arm64/sgemm_small_kernel_tn_sve.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ CNAME(BLASLONG M,
222222
const BLASLONG n8 = N & -8;
223223
const BLASLONG n4 = N & -4;
224224

225-
const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
225+
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
226226
FLOAT* packed_a =
227227
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;
228228

kernel/arm64/sgemm_small_kernel_tt_sve.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ CNAME(BLASLONG M,
223223
const BLASLONG n8 = N & -8;
224224
const BLASLONG n4 = N & -4;
225225

226-
const int pack_a = M >= v_size2 && N >= 8 && K >= 8 ? 1 : 0;
226+
const int pack_a = M >= v_size2 && N >= 8 ? 1 : 0;
227227
FLOAT* packed_a =
228228
(pack_a) ? packed_a = (FLOAT*)malloc(K * v_size2 * sizeof(FLOAT)) : NULL;
229229

0 commit comments

Comments
 (0)