Skip to content

Commit 7389b6c

Browse files
authored
Merge pull request #5237 from martin-frbg/revert5219
Fix and reinstate the Cooper Lake/Sapphire Rapids microkernel for non-transpose SBGEMV
2 parents 1df8738 + 99d9f1f commit 7389b6c

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

kernel/x86_64/sbgemv_n.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828

2929
#include "common.h"
3030

31-
//#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
32-
//#include "sbgemv_n_microk_cooperlake.c"
33-
//#endif
31+
#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS)
32+
#include "sbgemv_n_microk_cooperlake.c"
33+
#endif
3434

3535
#define ALIGN64_ALLOC(alloc_size, TYPE, ptr_align, ptr) \
3636
ptr = (TYPE *) malloc(sizeof(TYPE)*alloc_size + 63); \

kernel/x86_64/sbgemv_n_microk_cooperlake_template.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ static int sbgemv_kernel_32xN_lda_direct(BLASLONG m, BLASLONG n, float alpha, bf
231231
accum512_8 = _mm512_permutex2var_ps(accum512_0, idx_base_0, accum512_1);
232232
accum512_9 = _mm512_permutex2var_ps(accum512_0, idx_base_1, accum512_1);
233233

234-
if ((m-tag_m_32x) > 16) {
234+
if ((m-tag_m_32x) >= 16) {
235235
STORE16_COMPLETE_RESULT(accum512_8, y+tag_m_32x+0)
236236
STORE16_MASK_COMPLETE_RESULT(accum512_9, y+tag_m_32x+16, store_tail_mask)
237237
} else {

0 commit comments

Comments
 (0)