From 22d88035934ca08e560c3ef649080d986a9aa01e Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Thu, 1 Aug 2024 17:01:44 +0100 Subject: [PATCH 1/2] Tune generic SVE constants closer to other SVE cores This allows us to use the `ARMV8` and `ARMV8SVE` targets in a minimal build without losing too much performance. --- param.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/param.h b/param.h index 2618e1f609..931cd257de 100644 --- a/param.h +++ b/param.h @@ -3671,8 +3671,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #if defined(XDOUBLE) || defined(DOUBLE) #define SWITCH_RATIO 8 +#define GEMM_PREFERED_SIZE 4 #else #define SWITCH_RATIO 16 +#define GEMM_PREFERED_SIZE 8 #endif #define SGEMM_DEFAULT_UNROLL_M 4 // Actually 1VL (8) but kept seperate to keep copies seperate @@ -3689,13 +3691,13 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_MN 16 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 160 +#define SGEMM_DEFAULT_P 240 +#define DGEMM_DEFAULT_P 240 #define CGEMM_DEFAULT_P 128 #define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_Q 352 -#define DGEMM_DEFAULT_Q 128 +#define SGEMM_DEFAULT_Q 640 +#define DGEMM_DEFAULT_Q 320 #define CGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 112 From 5f8744d4e46f0e3cfac480feb9098bad6641491e Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Thu, 1 Aug 2024 17:08:37 +0100 Subject: [PATCH 2/2] Add tunings for baseline AArch64 Previously this was left without `SWITCH_RATIO` or `GEMM_PREFERED_SIZE` and older default values, but it can be seen across other cores that these values seem to work for many devices. --- param.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/param.h b/param.h index 931cd257de..220748a598 100644 --- a/param.h +++ b/param.h @@ -3708,6 +3708,14 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #else /* Other/undetected ARMv8 cores */ +#if defined(XDOUBLE) || defined(DOUBLE) +#define SWITCH_RATIO 8 +#define GEMM_PREFERED_SIZE 4 +#else +#define SWITCH_RATIO 16 +#define GEMM_PREFERED_SIZE 8 +#endif + #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -3720,13 +3728,13 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_N 4 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 160 +#define SGEMM_DEFAULT_P 240 +#define DGEMM_DEFAULT_P 240 #define CGEMM_DEFAULT_P 128 #define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_Q 352 -#define DGEMM_DEFAULT_Q 128 +#define SGEMM_DEFAULT_Q 640 +#define DGEMM_DEFAULT_Q 320 #define CGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 112