Skip to content

Commit 10d841d

Browse files
authored
Merge pull request #2026 from martin-frbg/trmv_threads
Correct range limiting in trmv_thread and re-enable TRMV multithreading
2 parents 12f2b76 + 45333d5 commit 10d841d

File tree

3 files changed

+3
-9
lines changed

3 files changed

+3
-9
lines changed

driver/level2/trmv_thread.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
346346

347347
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
348348
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
349-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
349+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
350350

351351
queue[num_cpu].mode = mode;
352352
queue[num_cpu].routine = trmv_kernel;
@@ -386,7 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
386386

387387
range_m[num_cpu + 1] = range_m[num_cpu] + width;
388388
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
389-
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
389+
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
390390

391391
queue[num_cpu].mode = mode;
392392
queue[num_cpu].routine = trmv_kernel;

interface/trmv.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
218218
buffer = (FLOAT *)blas_memory_alloc(1);
219219

220220
#ifdef SMP
221-
/* nthreads = num_cpu_avail(2);
221+
nthreads = num_cpu_avail(2);
222222

223-
FIXME trmv_thread was found to be broken, see issue 1332 */
224-
nthreads = 1;
225-
226223
if (nthreads == 1) {
227224
#endif
228225

interface/ztrmv.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,9 +239,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
239239
} else
240240
nthreads = 1;
241241

242-
/* FIXME TRMV multithreading appears to be broken, see issue 1332*/
243-
nthreads = 1;
244-
245242
if(nthreads > 1) {
246243
buffer_size = n > 16 ? 0 : n * 4 + 40;
247244
}

0 commit comments

Comments
 (0)