Skip to content

Commit ea747cf

Browse files
committed
start working on ?trtrs
1 parent fde8a8e commit ea747cf

File tree

2 files changed

+284
-4
lines changed

2 files changed

+284
-4
lines changed

common_macro.h

Lines changed: 113 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@
641641
#define IMATCOPY_K_CT DIMATCOPY_K_CT
642642
#define IMATCOPY_K_RT DIMATCOPY_K_RT
643643

644-
#define GEADD_K DGEADD_K
644+
#define GEADD_K DGEADD_K
645645
#else
646646

647647
#define AMAX_K SAMAX_K
@@ -944,7 +944,7 @@
944944
#define IMATCOPY_K_CT SIMATCOPY_K_CT
945945
#define IMATCOPY_K_RT SIMATCOPY_K_RT
946946

947-
#define GEADD_K SGEADD_K
947+
#define GEADD_K SGEADD_K
948948
#endif
949949
#else
950950
#ifdef XDOUBLE
@@ -1770,7 +1770,7 @@
17701770
#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC
17711771
#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC
17721772

1773-
#define GEADD_K ZGEADD_K
1773+
#define GEADD_K ZGEADD_K
17741774

17751775
#else
17761776

@@ -2193,7 +2193,7 @@
21932193
#define IMATCOPY_K_CTC CIMATCOPY_K_CTC
21942194
#define IMATCOPY_K_RTC CIMATCOPY_K_RTC
21952195

2196-
#define GEADD_K CGEADD_K
2196+
#define GEADD_K CGEADD_K
21972197

21982198
#endif
21992199
#endif
@@ -2806,3 +2806,112 @@ typedef struct {
28062806
#endif
28072807

28082808
#endif
2809+
2810+
#ifndef COMPLEX
2811+
#ifdef XDOUBLE
2812+
#define TRTRS_UNU_SINGLE qtrtrs_UNU_single
2813+
#define TRTRS_UNN_SINGLE qtrtrs_UNN_single
2814+
#define TRTRS_UTU_SINGLE qtrtrs_UTU_single
2815+
#define TRTRS_UTN_SINGLE qtrtrs_UTN_single
2816+
#define TRTRS_LNU_SINGLE qtrtrs_LNU_single
2817+
#define TRTRS_LNN_SINGLE qtrtrs_LNN_single
2818+
#define TRTRS_LTU_SINGLE qtrtrs_LTU_single
2819+
#define TRTRS_LTN_SINGLE qtrtrs_LTN_single
2820+
#define TRTRS_UNU_PARALLEL qtrtrs_UNU_parallel
2821+
#define TRTRS_UNN_PARALLEL qtrtrs_UNN_parallel
2822+
#define TRTRS_UTU_PARALLEL qtrtrs_UTU_parallel
2823+
#define TRTRS_UTN_PARALLEL qtrtrs_UTN_parallel
2824+
#define TRTRS_LNU_PARALLEL qtrtrs_LNU_parallel
2825+
#define TRTRS_LNN_PARALLEL qtrtrs_LNN_parallel
2826+
#define TRTRS_LTU_PARALLEL qtrtrs_LTU_parallel
2827+
#define TRTRS_LTN_PARALLEL qtrtrs_LTN_parallel
2828+
2829+
#elif defined(DOUBLE)
2830+
#define TRTRS_UNU_SINGLE dtrtrs_UNU_single
2831+
#define TRTRS_UNN_SINGLE dtrtrs_UNN_single
2832+
#define TRTRS_UTU_SINGLE dtrtrs_UTU_single
2833+
#define TRTRS_UTN_SINGLE dtrtrs_UTN_single
2834+
#define TRTRS_LNU_SINGLE dtrtrs_LNU_single
2835+
#define TRTRS_LNN_SINGLE dtrtrs_LNN_single
2836+
#define TRTRS_LTU_SINGLE dtrtrs_LTU_single
2837+
#define TRTRS_LTN_SINGLE dtrtrs_LTN_single
2838+
#define TRTRS_UNU_PARALLEL dtrtrs_UNU_parallel
2839+
#define TRTRS_UNN_PARALLEL dtrtrs_UNN_parallel
2840+
#define TRTRS_UTU_PARALLEL dtrtrs_UTU_parallel
2841+
#define TRTRS_UTN_PARALLEL dtrtrs_UTN_parallel
2842+
#define TRTRS_LNU_PARALLEL dtrtrs_LNU_parallel
2843+
#define TRTRS_LNN_PARALLEL dtrtrs_LNN_parallel
2844+
#define TRTRS_LTU_PARALLEL dtrtrs_LTU_parallel
2845+
#define TRTRS_LTN_PARALLEL dtrtrs_LTN_parallel
2846+
#else
2847+
#define TRTRS_UNU_SINGLE strtrs_UNU_single
2848+
#define TRTRS_UNN_SINGLE strtrs_UNN_single
2849+
#define TRTRS_UTU_SINGLE strtrs_UTU_single
2850+
#define TRTRS_UTN_SINGLE strtrs_UTN_single
2851+
#define TRTRS_LNU_SINGLE strtrs_LNU_single
2852+
#define TRTRS_LNN_SINGLE strtrs_LNN_single
2853+
#define TRTRS_LTU_SINGLE strtrs_LTU_single
2854+
#define TRTRS_LTN_SINGLE strtrs_LTN_single
2855+
#define TRTRS_UNU_PARALLEL strtrs_UNU_parallel
2856+
#define TRTRS_UNN_PARALLEL strtrs_UNN_parallel
2857+
#define TRTRS_UTU_PARALLEL strtrs_UTU_parallel
2858+
#define TRTRS_UTN_PARALLEL strtrs_UTN_parallel
2859+
#define TRTRS_LNU_PARALLEL strtrs_LNU_parallel
2860+
#define TRTRS_LNN_PARALLEL strtrs_LNN_parallel
2861+
#define TRTRS_LTU_PARALLEL strtrs_LTU_parallel
2862+
#define TRTRS_LTN_PARALLEL strtrs_LTN_parallel
2863+
#endif
2864+
#else
2865+
#ifdef XDOUBLE
2866+
#define TRTRS_UNU_SINGLE xtrtrs_UNU_single
2867+
#define TRTRS_UNN_SINGLE xtrtrs_UNN_single
2868+
#define TRTRS_UTU_SINGLE xtrtrs_UTU_single
2869+
#define TRTRS_UTN_SINGLE xtrtrs_UTN_single
2870+
#define TRTRS_LNU_SINGLE xtrtrs_LNU_single
2871+
#define TRTRS_LNN_SINGLE xtrtrs_LNN_single
2872+
#define TRTRS_LTU_SINGLE xtrtrs_LTU_single
2873+
#define TRTRS_LTN_SINGLE xtrtrs_LTN_single
2874+
#define TRTRS_UNU_PARALLEL xtrtrs_UNU_parallel
2875+
#define TRTRS_UNN_PARALLEL xtrtrs_UNN_parallel
2876+
#define TRTRS_UTU_PARALLEL xtrtrs_UTU_parallel
2877+
#define TRTRS_UTN_PARALLEL xtrtrs_UTN_parallel
2878+
#define TRTRS_LNU_PARALLEL xtrtrs_LNU_parallel
2879+
#define TRTRS_LNN_PARALLEL xtrtrs_LNN_parallel
2880+
#define TRTRS_LTU_PARALLEL xtrtrs_LTU_parallel
2881+
#define TRTRS_LTN_PARALLEL xtrtrs_LTN_parallel
2882+
#elif defined(DOUBLE)
2883+
#define TRTRS_UNU_SINGLE ztrtrs_UNU_single
2884+
#define TRTRS_UNN_SINGLE ztrtrs_UNN_single
2885+
#define TRTRS_UTU_SINGLE ztrtrs_UTU_single
2886+
#define TRTRS_UTN_SINGLE ztrtrs_UTN_single
2887+
#define TRTRS_LNU_SINGLE ztrtrs_LNU_single
2888+
#define TRTRS_LNN_SINGLE ztrtrs_LNN_single
2889+
#define TRTRS_LTU_SINGLE ztrtrs_LTU_single
2890+
#define TRTRS_LTN_SINGLE ztrtrs_LTN_single
2891+
#define TRTRS_UNU_PARALLEL ztrtrs_UNU_parallel
2892+
#define TRTRS_UNN_PARALLEL ztrtrs_UNN_parallel
2893+
#define TRTRS_UTU_PARALLEL ztrtrs_UTU_parallel
2894+
#define TRTRS_UTN_PARALLEL ztrtrs_UTN_parallel
2895+
#define TRTRS_LNU_PARALLEL ztrtrs_LNU_parallel
2896+
#define TRTRS_LNN_PARALLEL ztrtrs_LNN_parallel
2897+
#define TRTRS_LTU_PARALLEL ztrtrs_LTU_parallel
2898+
#define TRTRS_LTN_PARALLEL ztrtrs_LTN_parallel
2899+
#else
2900+
#define TRTRS_UNU_SINGLE ctrtrs_UNU_single
2901+
#define TRTRS_UNN_SINGLE ctrtrs_UNN_single
2902+
#define TRTRS_UTU_SINGLE ctrtrs_UTU_single
2903+
#define TRTRS_UTN_SINGLE ctrtrs_UTN_single
2904+
#define TRTRS_LNU_SINGLE ctrtrs_LNU_single
2905+
#define TRTRS_LNN_SINGLE ctrtrs_LNN_single
2906+
#define TRTRS_LTU_SINGLE ctrtrs_LTU_single
2907+
#define TRTRS_LTN_SINGLE ctrtrs_LTN_single
2908+
#define TRTRS_UNU_PARALLEL ctrtrs_UNU_parallel
2909+
#define TRTRS_UNN_PARALLEL ctrtrs_UNN_parallel
2910+
#define TRTRS_UTU_PARALLEL ctrtrs_UTU_parallel
2911+
#define TRTRS_UTN_PARALLEL ctrtrs_UTN_parallel
2912+
#define TRTRS_LNU_PARALLEL ctrtrs_LNU_parallel
2913+
#define TRTRS_LNN_PARALLEL ctrtrs_LNN_parallel
2914+
#define TRTRS_LTU_PARALLEL ctrtrs_LTU_parallel
2915+
#define TRTRS_LTN_PARALLEL ctrtrs_LTN_parallel
2916+
#endif
2917+
#endif

interface/lapack/trtrs.c

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#include <stdio.h>
40+
#include "common.h"
41+
#ifdef FUNCTION_PROFILE
42+
#include "functable.h"
43+
#endif
44+
45+
#ifdef XDOUBLE
46+
#define ERROR_NAME "QTRTRS"
47+
#elif defined(DOUBLE)
48+
#define ERROR_NAME "DTRTRS"
49+
#else
50+
#define ERROR_NAME "STRTRS"
51+
#endif
52+
53+
static blasint (*trtrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
54+
TRTRS_UNU_SINGLE, TRTRS_UNN_SINGLE, TRTRS_UTU_SINGLE, TRTRS_UTN_SINGLE, TRTRS_LNU_SINGLE, TRTRS_LNN_SINGLE, TRTRS_LTU_SINGLE, TRTRS_LTN_SINGLE,
55+
};
56+
57+
#ifdef SMP
58+
static blasint (*trtrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
59+
TRTRS_UNU_PARALLEL, TRTRS_UNN_PARALLEL, TRTRS_UTU_PARALLEL, TRTRS_UTN_PARALLEL, TRTRS_LNU_PARALLEL, TRTRS_LNN_PARALLEL, TRTRS_LTU_PARALLEL, TRTRS_LTN_PARALLEL,
60+
};
61+
#endif
62+
63+
int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA,
64+
FLOAT *b, blasint *ldB, blasint *Info){
65+
66+
char uplo_arg = *UPLO;
67+
char trans_arg = *TRANS;
68+
char diag_arg = *DIAG;
69+
70+
blas_arg_t args;
71+
72+
blasint info;
73+
int uplo, trans, diag;
74+
FLOAT *buffer;
75+
#ifdef PPC440
76+
extern
77+
#endif
78+
FLOAT *sa, *sb;
79+
80+
PRINT_DEBUG_NAME;
81+
82+
args.m = *N;
83+
args.n = *NRHS;
84+
args.a = (void *)a;
85+
args.lda = *ldA;
86+
args.b = (void *)b;
87+
args.ldb = *ldB;
88+
89+
info = 0;
90+
91+
TOUPPER(trans_arg);
92+
trans = -1;
93+
if (trans_arg == 'N') trans = 0;
94+
if (trans_arg == 'T') trans = 1;
95+
if (trans_arg == 'R') trans = 0;
96+
if (trans_arg == 'C') trans = 1;
97+
98+
uplo = -1;
99+
if (uplo_arg == 'U') uplo = 0;
100+
if (uplo_arg == 'L') uplo = 1;
101+
102+
diag = -1;
103+
if (diag_arg == 'U') diag = 0;
104+
if (diag_arg == 'N') diag = 1;
105+
106+
if (args.ldb < MAX(1, args.m)) info = 7;
107+
if (args.lda < MAX(1, args.m)) info = 9;
108+
if (args.n < 0) info = 5;
109+
if (args.m < 0) info = 4;
110+
if (trans < 0) info = 2;
111+
if (uplo < 0) info = 1;
112+
if (diag < 0) info = 3;
113+
114+
if (info != 0) {
115+
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
116+
*Info = - info;
117+
return 0;
118+
}
119+
120+
args.alpha = NULL;
121+
args.beta = NULL;
122+
123+
*Info = 0;
124+
125+
if (args.m == 0 || args.n == 0) return 0;
126+
127+
if (diag) {
128+
if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) {
129+
*Info = IAMIN_K(args.n, args.a, args.lda + 1);
130+
return 0;
131+
}
132+
}
133+
134+
135+
IDEBUG_START;
136+
137+
FUNCTION_PROFILE_START();
138+
139+
#ifndef PPC440
140+
buffer = (FLOAT *)blas_memory_alloc(1);
141+
142+
sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
143+
sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
144+
#endif
145+
146+
#ifdef SMP
147+
args.common = NULL;
148+
args.nthreads = num_cpu_avail(4);
149+
150+
if (args.nthreads == 1) {
151+
#endif
152+
153+
(trtrs_single[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0);
154+
155+
#ifdef SMP
156+
} else {
157+
(trtrs_parallel[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0);
158+
}
159+
#endif
160+
161+
#ifndef PPC440
162+
blas_memory_free(buffer);
163+
#endif
164+
165+
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n);
166+
167+
IDEBUG_END;
168+
169+
return 0;
170+
171+
}

0 commit comments

Comments
 (0)