|
| 1 | +/*********************************************************************/ |
| 2 | +/* Copyright 2009, 2010 The University of Texas at Austin. */ |
| 3 | +/* All rights reserved. */ |
| 4 | +/* */ |
| 5 | +/* Redistribution and use in source and binary forms, with or */ |
| 6 | +/* without modification, are permitted provided that the following */ |
| 7 | +/* conditions are met: */ |
| 8 | +/* */ |
| 9 | +/* 1. Redistributions of source code must retain the above */ |
| 10 | +/* copyright notice, this list of conditions and the following */ |
| 11 | +/* disclaimer. */ |
| 12 | +/* */ |
| 13 | +/* 2. Redistributions in binary form must reproduce the above */ |
| 14 | +/* copyright notice, this list of conditions and the following */ |
| 15 | +/* disclaimer in the documentation and/or other materials */ |
| 16 | +/* provided with the distribution. */ |
| 17 | +/* */ |
| 18 | +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
| 19 | +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
| 20 | +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
| 21 | +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
| 22 | +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
| 23 | +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
| 24 | +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
| 25 | +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
| 26 | +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
| 27 | +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
| 28 | +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
| 29 | +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
| 30 | +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
| 31 | +/* POSSIBILITY OF SUCH DAMAGE. */ |
| 32 | +/* */ |
| 33 | +/* The views and conclusions contained in the software and */ |
| 34 | +/* documentation are those of the authors and should not be */ |
| 35 | +/* interpreted as representing official policies, either expressed */ |
| 36 | +/* or implied, of The University of Texas at Austin. */ |
| 37 | +/*********************************************************************/ |
| 38 | + |
| 39 | +#include <stdio.h> |
| 40 | +#include "common.h" |
| 41 | +#ifdef FUNCTION_PROFILE |
| 42 | +#include "functable.h" |
| 43 | +#endif |
| 44 | + |
| 45 | +#ifdef XDOUBLE |
| 46 | +#define ERROR_NAME "XTRTRS" |
| 47 | +#elif defined(DOUBLE) |
| 48 | +#define ERROR_NAME "ZTRTRS" |
| 49 | +#else |
| 50 | +#define ERROR_NAME "CTRTRS" |
| 51 | +#endif |
| 52 | + |
| 53 | +static blasint (*trtrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { |
| 54 | + TRTRS_UNU_SINGLE, TRTRS_UNN_SINGLE, TRTRS_UTU_SINGLE, TRTRS_UTN_SINGLE, TRTRS_URU_SINGLE, TRTRS_URN_SINGLE, TRTRS_UCU_SINGLE, TRTRS_UCN_SINGLE, TRTRS_LNU_SINGLE, TRTRS_LNN_SINGLE, TRTRS_LTU_SINGLE, TRTRS_LTN_SINGLE, TRTRS_LRU_SINGLE, TRTRS_LRN_SINGLE, TRTRS_LCU_SINGLE, TRTRS_LCN_SINGLE, |
| 55 | +}; |
| 56 | + |
| 57 | +#ifdef SMP |
| 58 | +static blasint (*trtrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { |
| 59 | + TRTRS_UNU_PARALLEL, TRTRS_UNN_PARALLEL, TRTRS_UTU_PARALLEL, TRTRS_UTN_PARALLEL, TRTRS_URU_PARALLEL, TRTRS_URN_PARALLEL, TRTRS_UCU_PARALLEL, TRTRS_UCN_PARALLEL, TRTRS_LNU_PARALLEL, TRTRS_LNN_PARALLEL, TRTRS_LTU_PARALLEL, TRTRS_LTN_PARALLEL, TRTRS_LRU_PARALLEL, TRTRS_LRN_PARALLEL, TRTRS_LCU_PARALLEL, TRTRS_LCN_PARALLEL, |
| 60 | +}; |
| 61 | +#endif |
| 62 | + |
| 63 | +int NAME(char *UPLO, char* TRANS, char* DIAG, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, |
| 64 | + FLOAT *b, blasint *ldB, blasint *Info){ |
| 65 | + |
| 66 | + char uplo_arg = *UPLO; |
| 67 | + char trans_arg = *TRANS; |
| 68 | + char diag_arg = *DIAG; |
| 69 | + |
| 70 | + blas_arg_t args; |
| 71 | + |
| 72 | + blasint info; |
| 73 | + int uplo, trans, diag; |
| 74 | + FLOAT *buffer; |
| 75 | +#ifdef PPC440 |
| 76 | + extern |
| 77 | +#endif |
| 78 | + FLOAT *sa, *sb; |
| 79 | + |
| 80 | + PRINT_DEBUG_NAME; |
| 81 | + |
| 82 | + args.m = *N; |
| 83 | + args.n = *NRHS; |
| 84 | + args.a = (void *)a; |
| 85 | + args.lda = *ldA; |
| 86 | + args.b = (void *)b; |
| 87 | + args.ldb = *ldB; |
| 88 | + |
| 89 | + info = 0; |
| 90 | + |
| 91 | + TOUPPER(trans_arg); |
| 92 | + trans = -1; |
| 93 | + if (trans_arg == 'N') trans = 0; |
| 94 | + if (trans_arg == 'T') trans = 1; |
| 95 | + if (trans_arg == 'R') trans = 2; |
| 96 | + if (trans_arg == 'C') trans = 3; |
| 97 | + |
| 98 | + uplo = -1; |
| 99 | + if (uplo_arg == 'U') uplo = 0; |
| 100 | + if (uplo_arg == 'L') uplo = 1; |
| 101 | + |
| 102 | + diag = -1; |
| 103 | + if (diag_arg == 'U') diag = 0; |
| 104 | + if (diag_arg == 'N') diag = 1; |
| 105 | + |
| 106 | + if (args.ldb < MAX(1, args.m)) info = 7; |
| 107 | + if (args.lda < MAX(1, args.m)) info = 9; |
| 108 | + if (args.n < 0) info = 5; |
| 109 | + if (args.m < 0) info = 4; |
| 110 | + if (trans < 0) info = 2; |
| 111 | + if (uplo < 0) info = 1; |
| 112 | + if (diag < 0) info = 3; |
| 113 | + |
| 114 | + if (info != 0) { |
| 115 | + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
| 116 | + *Info = - info; |
| 117 | + return 0; |
| 118 | + } |
| 119 | + |
| 120 | + args.alpha = NULL; |
| 121 | + args.beta = NULL; |
| 122 | + |
| 123 | + *Info = 0; |
| 124 | + |
| 125 | + if (args.m == 0 || args.n == 0) return 0; |
| 126 | + |
| 127 | + if (diag) { |
| 128 | + if (AMIN_K(args.n, args.a, args.lda + 1) == ZERO) { |
| 129 | + *Info = IAMIN_K(args.n, args.a, args.lda + 1); |
| 130 | + return 0; |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + |
| 135 | + IDEBUG_START; |
| 136 | + |
| 137 | + FUNCTION_PROFILE_START(); |
| 138 | + |
| 139 | +#ifndef PPC440 |
| 140 | + buffer = (FLOAT *)blas_memory_alloc(1); |
| 141 | + |
| 142 | + sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); |
| 143 | + sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
| 144 | +#endif |
| 145 | + |
| 146 | +#ifdef SMP |
| 147 | + args.common = NULL; |
| 148 | + args.nthreads = num_cpu_avail(4); |
| 149 | + |
| 150 | + if (args.nthreads == 1) { |
| 151 | +#endif |
| 152 | + |
| 153 | + (trtrs_single[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); |
| 154 | + |
| 155 | +#ifdef SMP |
| 156 | + } else { |
| 157 | + (trtrs_parallel[(uplo << 2) | (trans << 1) | diag])(&args, NULL, NULL, sa, sb, 0); |
| 158 | + } |
| 159 | +#endif |
| 160 | + |
| 161 | +#ifndef PPC440 |
| 162 | + blas_memory_free(buffer); |
| 163 | +#endif |
| 164 | + |
| 165 | + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n); |
| 166 | + |
| 167 | + IDEBUG_END; |
| 168 | + |
| 169 | + return 0; |
| 170 | + |
| 171 | +} |
0 commit comments