Skip to content

Commit 437c0bf

Browse files
authored
Merge pull request #3843 from Mousius/switch-ratio
Propagate SWITCH_RATIO to DYNAMIC_ARCH builds
2 parents c628030 + 32f2faf commit 437c0bf

File tree

7 files changed

+46
-28
lines changed

7 files changed

+46
-28
lines changed

common_param.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2023 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -45,6 +46,7 @@
4546

4647
typedef struct {
4748
int dtb_entries;
49+
int switch_ratio;
4850
int offsetA, offsetB, align;
4951

5052
#if BUILD_BFLOAT16 == 1

driver/level3/level3_gemm3m_thread.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2023 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -44,10 +45,6 @@
4445
#define DIVIDE_RATE 2
4546
#endif
4647

47-
#ifndef SWITCH_RATIO
48-
#define SWITCH_RATIO 2
49-
#endif
50-
5148
//The array of job_t may overflow the stack.
5249
//Instead, use malloc to alloc job_t.
5350
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
@@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
10151012
BLASLONG divN, divT;
10161013
int mode;
10171014

1015+
#if defined(DYNAMIC_ARCH)
1016+
int switch_ratio = gotoblas->switch_ratio;
1017+
#else
1018+
int switch_ratio = SWITCH_RATIO;
1019+
#endif
1020+
10181021
if (range_m) {
10191022
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
10201023
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
@@ -1030,15 +1033,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
10301033
}
10311034
*/
10321035

1033-
if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
1036+
if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) {
10341037
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);
10351038
return 0;
10361039
}
10371040

10381041
divT = nthreads;
10391042
divN = 1;
10401043

1041-
while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) {
1044+
while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) {
10421045
do {
10431046
divT --;
10441047
divN = 1;

driver/level3/level3_syrk_threaded.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2023 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -44,10 +45,6 @@
4445
#define DIVIDE_RATE 2
4546
#endif
4647

47-
#ifndef SWITCH_RATIO
48-
#define SWITCH_RATIO 2
49-
#endif
50-
5148
//The array of job_t may overflow the stack.
5249
//Instead, use malloc to alloc job_t.
5350
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
@@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
528525
int mode, mask;
529526
double dnum, di, dinum;
530527

531-
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
528+
#if defined(DYNAMIC_ARCH)
529+
int switch_ratio = gotoblas->switch_ratio;
530+
#else
531+
int switch_ratio = SWITCH_RATIO;
532+
#endif
533+
534+
if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) {
532535
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
533536
return 0;
534537
}

driver/level3/level3_thread.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2023 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -44,10 +45,6 @@
4445
#define DIVIDE_RATE 2
4546
#endif
4647

47-
#ifndef SWITCH_RATIO
48-
#define SWITCH_RATIO 2
49-
#endif
50-
5148
#ifndef GEMM_PREFERED_SIZE
5249
#define GEMM_PREFERED_SIZE 1
5350
#endif
@@ -577,6 +574,11 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
577574
BLASLONG width, i, j, k, js;
578575
BLASLONG m, n, n_from, n_to;
579576
int mode;
577+
#if defined(DYNAMIC_ARCH)
578+
int switch_ratio = gotoblas->switch_ratio;
579+
#else
580+
int switch_ratio = SWITCH_RATIO;
581+
#endif
580582

581583
/* Get execution mode */
582584
#ifndef COMPLEX
@@ -698,8 +700,8 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
698700
num_parts = 0;
699701
while (n > 0){
700702
width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts);
701-
if (width < SWITCH_RATIO) {
702-
width = SWITCH_RATIO;
703+
if (width < switch_ratio) {
704+
width = switch_ratio;
703705
}
704706
width = round_up(n, width, GEMM_PREFERED_SIZE);
705707

@@ -746,6 +748,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
746748
BLASLONG m = args -> m;
747749
BLASLONG n = args -> n;
748750
BLASLONG nthreads_m, nthreads_n;
751+
#if defined(DYNAMIC_ARCH)
752+
int switch_ratio = gotoblas->switch_ratio;
753+
#else
754+
int switch_ratio = SWITCH_RATIO;
755+
#endif
749756

750757
/* Get dimensions from index ranges if available */
751758
if (range_m) {
@@ -755,21 +762,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
755762
n = range_n[1] - range_n[0];
756763
}
757764

758-
/* Partitions in m should have at least SWITCH_RATIO rows */
759-
if (m < 2 * SWITCH_RATIO) {
765+
/* Partitions in m should have at least switch_ratio rows */
766+
if (m < 2 * switch_ratio) {
760767
nthreads_m = 1;
761768
} else {
762769
nthreads_m = args -> nthreads;
763-
while (m < nthreads_m * SWITCH_RATIO) {
770+
while (m < nthreads_m * switch_ratio) {
764771
nthreads_m = nthreads_m / 2;
765772
}
766773
}
767774

768-
/* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */
769-
if (n < SWITCH_RATIO * nthreads_m) {
775+
/* Partitions in n should have at most switch_ratio * nthreads_m columns */
776+
if (n < switch_ratio * nthreads_m) {
770777
nthreads_n = 1;
771778
} else {
772-
nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m);
779+
nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m);
773780
if (nthreads_m * nthreads_n > args -> nthreads) {
774781
nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m);
775782
}

kernel/setparam-ref.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2023 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -49,7 +50,9 @@
4950
static void init_parameter(void);
5051

5152
gotoblas_t TABLE_NAME = {
52-
DTB_DEFAULT_ENTRIES ,
53+
DTB_DEFAULT_ENTRIES,
54+
55+
SWITCH_RATIO,
5356

5457
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
5558

lapack/potrf/potrf_parallel.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,6 @@ static FLOAT dm1 = -1.;
8080
#define DIVIDE_RATE 2
8181
#endif
8282

83-
#ifndef SWITCH_RATIO
84-
#define SWITCH_RATIO 2
85-
#endif
86-
8783
#ifndef LOWER
8884
#define TRANS
8985
#endif

param.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3854,6 +3854,10 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
38543854

38553855
#endif
38563856

3857+
#ifndef SWITCH_RATIO
3858+
#define SWITCH_RATIO 2
3859+
#endif
3860+
38573861
#ifndef QGEMM_DEFAULT_UNROLL_M
38583862
#define QGEMM_DEFAULT_UNROLL_M 2
38593863
#endif

0 commit comments

Comments
 (0)