35
35
#include <stdio.h>
36
36
#include <stdlib.h>
37
37
#include "common.h"
38
- #ifdef FUNCTION_PROFILE
39
- #include "functable.h"
40
- #endif
41
38
42
39
#ifndef COMPLEX
43
40
#define SMP_THRESHOLD_MIN 65536.0
44
41
#ifdef XDOUBLE
45
- #define ERROR_NAME "QGEMT "
42
+ #define ERROR_NAME "QGEMMT "
46
43
#elif defined(DOUBLE )
47
- #define ERROR_NAME "DGEMT "
44
+ #define ERROR_NAME "DGEMMT "
48
45
#elif defined(BFLOAT16 )
49
- #define ERROR_NAME "SBGEMT "
46
+ #define ERROR_NAME "SBGEMMT "
50
47
#else
51
- #define ERROR_NAME "SGEMT "
48
+ #define ERROR_NAME "SGEMMT "
52
49
#endif
53
50
#else
54
51
#define SMP_THRESHOLD_MIN 8192.0
55
52
#ifdef XDOUBLE
56
- #define ERROR_NAME "XGEMT "
53
+ #define ERROR_NAME "XGEMMT "
57
54
#elif defined(DOUBLE )
58
- #define ERROR_NAME "ZGEMT "
55
+ #define ERROR_NAME "ZGEMMT "
59
56
#else
60
- #define ERROR_NAME "CGEMT "
57
+ #define ERROR_NAME "CGEMMT "
61
58
#endif
62
59
#endif
63
60
68
65
#ifndef CBLAS
69
66
70
67
void NAME (char * UPLO , char * TRANSA , char * TRANSB ,
71
- blasint * M , blasint * N , blasint * K ,
68
+ blasint * M , blasint * K ,
72
69
FLOAT * Alpha ,
73
70
IFLOAT * a , blasint * ldA ,
74
71
IFLOAT * b , blasint * ldB , FLOAT * Beta , FLOAT * c , blasint * ldC )
75
72
{
76
73
77
- blasint m , n , k ;
74
+ blasint m , k ;
78
75
blasint lda , ldb , ldc ;
79
76
int transa , transb , uplo ;
80
77
blasint info ;
@@ -92,7 +89,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
92
89
PRINT_DEBUG_NAME ;
93
90
94
91
m = * M ;
95
- n = * N ;
96
92
k = * K ;
97
93
98
94
#if defined(COMPLEX )
@@ -167,8 +163,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
167
163
info = 13 ;
168
164
if (k < 0 )
169
165
info = 5 ;
170
- if (n < 0 )
171
- info = 4 ;
172
166
if (m < 0 )
173
167
info = 3 ;
174
168
if (transb < 0 )
@@ -184,7 +178,7 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
184
178
185
179
void CNAME (enum CBLAS_ORDER order , enum CBLAS_UPLO Uplo ,
186
180
enum CBLAS_TRANSPOSE TransA , enum CBLAS_TRANSPOSE TransB , blasint M ,
187
- blasint N , blasint k ,
181
+ blasint k ,
188
182
#ifndef COMPLEX
189
183
FLOAT alpha ,
190
184
IFLOAT * A , blasint LDA ,
@@ -205,7 +199,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
205
199
206
200
int transa , transb , uplo ;
207
201
blasint info ;
208
- blasint m , n , lda , ldb ;
202
+ blasint m , lda , ldb ;
209
203
FLOAT * a , * b ;
210
204
XFLOAT * buffer ;
211
205
@@ -248,9 +242,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
248
242
transb = 3 ;
249
243
#endif
250
244
251
- m = M ;
252
- n = N ;
253
-
254
245
a = (void * )A ;
255
246
b = (void * )B ;
256
247
lda = LDA ;
@@ -262,8 +253,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
262
253
info = 13 ;
263
254
if (k < 0 )
264
255
info = 5 ;
265
- if (n < 0 )
266
- info = 4 ;
267
256
if (m < 0 )
268
257
info = 3 ;
269
258
if (transb < 0 )
@@ -273,8 +262,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
273
262
}
274
263
275
264
if (order == CblasRowMajor ) {
276
- m = N ;
277
- n = M ;
278
265
279
266
a = (void * )B ;
280
267
b = (void * )A ;
@@ -319,8 +306,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
319
306
info = 13 ;
320
307
if (k < 0 )
321
308
info = 5 ;
322
- if (n < 0 )
323
- info = 4 ;
324
309
if (m < 0 )
325
310
info = 3 ;
326
311
if (transb < 0 )
@@ -407,37 +392,35 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
407
392
408
393
#endif
409
394
410
- if ((m == 0 ) || ( n == 0 ) )
395
+ if ((m == 0 ) )
411
396
return ;
412
397
413
398
IDEBUG_START ;
414
399
415
- FUNCTION_PROFILE_START ();
416
-
417
400
const blasint incb = (transb == 0 ) ? 1 : ldb ;
418
401
419
402
if (uplo == 1 ) {
420
- for (i = 0 ; i < n ; i ++ ) {
421
- j = n - i ;
403
+ for (i = 0 ; i < m ; i ++ ) {
404
+ j = m - i ;
422
405
423
406
l = j ;
424
407
#if defined(COMPLEX )
425
408
aa = a + i * 2 ;
426
409
bb = b + i * ldb * 2 ;
427
410
if (transa ) {
428
- l = k ;
429
411
aa = a + lda * i * 2 ;
430
- bb = b + i * 2 ;
431
412
}
413
+ if (transb )
414
+ bb = b + i * 2 ;
432
415
cc = c + i * 2 * ldc + i * 2 ;
433
416
#else
434
417
aa = a + i ;
435
418
bb = b + i * ldb ;
436
419
if (transa ) {
437
- l = k ;
438
420
aa = a + lda * i ;
439
- bb = b + i ;
440
421
}
422
+ if (transb )
423
+ bb = b + i ;
441
424
cc = c + i * ldc + i ;
442
425
#endif
443
426
@@ -458,8 +441,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
458
441
459
442
IDEBUG_START ;
460
443
461
- FUNCTION_PROFILE_START ();
462
-
463
444
buffer_size = j + k + 128 / sizeof (FLOAT );
464
445
#ifdef WINDOWS_ABI
465
446
buffer_size += 160 / sizeof (FLOAT );
@@ -479,20 +460,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
479
460
#endif
480
461
481
462
#if defined(COMPLEX )
463
+ if (!transa )
482
464
(gemv [(int )transa ]) (j , k , 0 , alpha_r , alpha_i ,
483
465
aa , lda , bb , incb , cc , 1 ,
484
466
buffer );
467
+ else
468
+ (gemv [(int )transa ]) (k , j , 0 , alpha_r , alpha_i ,
469
+ aa , lda , bb , incb , cc , 1 ,
470
+ buffer );
485
471
#else
472
+ if (!transa )
486
473
(gemv [(int )transa ]) (j , k , 0 , alpha , aa , lda ,
487
474
bb , incb , cc , 1 , buffer );
475
+ else
476
+ (gemv [(int )transa ]) (k , j , 0 , alpha , aa , lda ,
477
+ bb , incb , cc , 1 , buffer );
488
478
#endif
489
479
#ifdef SMP
490
480
} else {
491
-
481
+ if (! transa )
492
482
(gemv_thread [(int )transa ]) (j , k , alpha , aa ,
493
483
lda , bb , incb , cc ,
494
484
1 , buffer ,
495
485
nthreads );
486
+ else
487
+ (gemv_thread [(int )transa ]) (k , j , alpha , aa ,
488
+ lda , bb , incb , cc ,
489
+ 1 , buffer ,
490
+ nthreads );
496
491
497
492
}
498
493
#endif
@@ -501,21 +496,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
501
496
}
502
497
} else {
503
498
504
- for (i = 0 ; i < n ; i ++ ) {
499
+ for (i = 0 ; i < m ; i ++ ) {
505
500
j = i + 1 ;
506
501
507
502
l = j ;
508
503
#if defined COMPLEX
509
504
bb = b + i * ldb * 2 ;
510
- if (transa ) {
511
- l = k ;
505
+ if (transb ) {
512
506
bb = b + i * 2 ;
513
507
}
514
508
cc = c + i * 2 * ldc ;
515
509
#else
516
510
bb = b + i * ldb ;
517
- if (transa ) {
518
- l = k ;
511
+ if (transb ) {
519
512
bb = b + i ;
520
513
}
521
514
cc = c + i * ldc ;
@@ -537,8 +530,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
537
530
#endif
538
531
IDEBUG_START ;
539
532
540
- FUNCTION_PROFILE_START ();
541
-
542
533
buffer_size = j + k + 128 / sizeof (FLOAT );
543
534
#ifdef WINDOWS_ABI
544
535
buffer_size += 160 / sizeof (FLOAT );
@@ -558,30 +549,39 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
558
549
#endif
559
550
560
551
#if defined(COMPLEX )
552
+ if (!transa )
561
553
(gemv [(int )transa ]) (j , k , 0 , alpha_r , alpha_i ,
562
554
a , lda , bb , incb , cc , 1 ,
563
555
buffer );
556
+ else
557
+ (gemv [(int )transa ]) (k , j , 0 , alpha_r , alpha_i ,
558
+ a , lda , bb , incb , cc , 1 ,
559
+ buffer );
564
560
#else
561
+ if (!transa )
565
562
(gemv [(int )transa ]) (j , k , 0 , alpha , a , lda , bb ,
566
563
incb , cc , 1 , buffer );
564
+ else
565
+ (gemv [(int )transa ]) (k , j , 0 , alpha , a , lda , bb ,
566
+ incb , cc , 1 , buffer );
567
567
#endif
568
568
569
569
#ifdef SMP
570
570
} else {
571
-
571
+ if (! transa )
572
572
(gemv_thread [(int )transa ]) (j , k , alpha , a , lda ,
573
573
bb , incb , cc , 1 ,
574
574
buffer , nthreads );
575
-
575
+ else
576
+ (gemv_thread [(int )transa ]) (k , j , alpha , a , lda ,
577
+ bb , incb , cc , 1 ,
578
+ buffer , nthreads );
576
579
}
577
580
#endif
578
581
579
582
STACK_FREE (buffer );
580
583
}
581
584
}
582
- FUNCTION_PROFILE_END (COMPSIZE * COMPSIZE ,
583
- args .m * args .k + args .k * args .n +
584
- args .m * args .n , 2 * args .m * args .n * args .k );
585
585
586
586
IDEBUG_END ;
587
587
0 commit comments