Skip to content

Commit c628030

Browse files
authored
Merge pull request #3855 from Mousius/more-switch-ratio-tuning
SWITCH_RATIO for Arm(R) Neoverse(TM) architecture
2 parents efcf712 + 5b16542 commit c628030

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

param.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*****************************************************************************
2-
Copyright (c) 2011-2014, The OpenBLAS Project
2+
Copyright (c) 2011-2023, The OpenBLAS Project
33
All rights reserved.
44
55
Redistribution and use in source and binary forms, with or without
@@ -3338,6 +3338,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
33383338

33393339
#elif defined(NEOVERSEN1)
33403340

3341+
#if defined(XDOUBLE) || defined(DOUBLE)
3342+
#define SWITCH_RATIO 8
3343+
#else
3344+
#define SWITCH_RATIO 16
3345+
#endif
3346+
33413347
#define SGEMM_DEFAULT_UNROLL_M 16
33423348
#define SGEMM_DEFAULT_UNROLL_N 4
33433349

@@ -3367,7 +3373,11 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
33673373

33683374
#elif defined(NEOVERSEV1)
33693375

3370-
#define SWITCH_RATIO 16
3376+
#if defined(XDOUBLE) || defined(DOUBLE)
3377+
#define SWITCH_RATIO 8
3378+
#else
3379+
#define SWITCH_RATIO 16
3380+
#endif
33713381

33723382
#define SGEMM_DEFAULT_UNROLL_M 16
33733383
#define SGEMM_DEFAULT_UNROLL_N 4
@@ -3398,6 +3408,12 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
33983408

33993409
#elif defined(NEOVERSEN2)
34003410

3411+
#if defined(XDOUBLE) || defined(DOUBLE)
3412+
#define SWITCH_RATIO 8
3413+
#else
3414+
#define SWITCH_RATIO 16
3415+
#endif
3416+
34013417
#undef SBGEMM_ALIGN_K
34023418
#define SBGEMM_ALIGN_K 4
34033419

0 commit comments

Comments
 (0)