Skip to content

Commit 7012e36

Browse files
authored
Merge pull request #651 from angsch/master
Add level-3 BLAS triangular Sylvester equation solver
2 parents 34b804e + 970a772 commit 7012e36

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+10190
-141
lines changed

LAPACKE/include/lapack.h

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22002,6 +22002,84 @@ void LAPACK_ztrsyl_base(
2200222002
#define LAPACK_ztrsyl(...) LAPACK_ztrsyl_base(__VA_ARGS__)
2200322003
#endif
2200422004

22005+
#define LAPACK_ctrsyl3_base LAPACK_GLOBAL(ctrsyl3,CTRSYL3)
22006+
void LAPACK_ctrsyl3_base(
22007+
char const* trana, char const* tranb,
22008+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22009+
lapack_complex_float const* A, lapack_int const* lda,
22010+
lapack_complex_float const* B, lapack_int const* ldb,
22011+
lapack_complex_float* C, lapack_int const* ldc, float* scale,
22012+
float* swork, lapack_int const *ldswork,
22013+
lapack_int* info
22014+
#ifdef LAPACK_FORTRAN_STRLEN_END
22015+
, size_t, size_t
22016+
#endif
22017+
);
22018+
#ifdef LAPACK_FORTRAN_STRLEN_END
22019+
#define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__, 1, 1)
22020+
#else
22021+
#define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__)
22022+
#endif
22023+
22024+
#define LAPACK_dtrsyl3_base LAPACK_GLOBAL(dtrsyl3,DTRSYL3)
22025+
void LAPACK_dtrsyl3_base(
22026+
char const* trana, char const* tranb,
22027+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22028+
double const* A, lapack_int const* lda,
22029+
double const* B, lapack_int const* ldb,
22030+
double* C, lapack_int const* ldc, double* scale,
22031+
lapack_int* iwork, lapack_int const* liwork,
22032+
double* swork, lapack_int const *ldswork,
22033+
lapack_int* info
22034+
#ifdef LAPACK_FORTRAN_STRLEN_END
22035+
, size_t, size_t
22036+
#endif
22037+
);
22038+
#ifdef LAPACK_FORTRAN_STRLEN_END
22039+
#define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__, 1, 1)
22040+
#else
22041+
#define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__)
22042+
#endif
22043+
22044+
#define LAPACK_strsyl3_base LAPACK_GLOBAL(strsyl3,STRSYL3)
22045+
void LAPACK_strsyl3_base(
22046+
char const* trana, char const* tranb,
22047+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22048+
float const* A, lapack_int const* lda,
22049+
float const* B, lapack_int const* ldb,
22050+
float* C, lapack_int const* ldc, float* scale,
22051+
lapack_int* iwork, lapack_int const* liwork,
22052+
float* swork, lapack_int const *ldswork,
22053+
lapack_int* info
22054+
#ifdef LAPACK_FORTRAN_STRLEN_END
22055+
, size_t, size_t
22056+
#endif
22057+
);
22058+
#ifdef LAPACK_FORTRAN_STRLEN_END
22059+
#define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__, 1, 1)
22060+
#else
22061+
#define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__)
22062+
#endif
22063+
22064+
#define LAPACK_ztrsyl3_base LAPACK_GLOBAL(ztrsyl3,ZTRSYL3)
22065+
void LAPACK_ztrsyl3_base(
22066+
char const* trana, char const* tranb,
22067+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22068+
lapack_complex_double const* A, lapack_int const* lda,
22069+
lapack_complex_double const* B, lapack_int const* ldb,
22070+
lapack_complex_double* C, lapack_int const* ldc, double* scale,
22071+
double* swork, lapack_int const *ldswork,
22072+
lapack_int* info
22073+
#ifdef LAPACK_FORTRAN_STRLEN_END
22074+
, size_t, size_t
22075+
#endif
22076+
);
22077+
#ifdef LAPACK_FORTRAN_STRLEN_END
22078+
#define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__, 1, 1)
22079+
#else
22080+
#define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__)
22081+
#endif
22082+
2200522083
#define LAPACK_ctrtri_base LAPACK_GLOBAL(ctrtri,CTRTRI)
2200622084
void LAPACK_ctrtri_base(
2200722085
char const* uplo, char const* diag,

LAPACKE/include/lapacke.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4490,6 +4490,23 @@ lapack_int LAPACKE_ztrsyl( int matrix_layout, char trana, char tranb,
44904490
lapack_complex_double* c, lapack_int ldc,
44914491
double* scale );
44924492

4493+
lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
4494+
lapack_int isgn, lapack_int m, lapack_int n,
4495+
const float* a, lapack_int lda, const float* b,
4496+
lapack_int ldb, float* c, lapack_int ldc,
4497+
float* scale );
4498+
lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
4499+
lapack_int isgn, lapack_int m, lapack_int n,
4500+
const double* a, lapack_int lda, const double* b,
4501+
lapack_int ldb, double* c, lapack_int ldc,
4502+
double* scale );
4503+
lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
4504+
lapack_int isgn, lapack_int m, lapack_int n,
4505+
const lapack_complex_double* a, lapack_int lda,
4506+
const lapack_complex_double* b, lapack_int ldb,
4507+
lapack_complex_double* c, lapack_int ldc,
4508+
double* scale );
4509+
44934510
lapack_int LAPACKE_strtri( int matrix_layout, char uplo, char diag, lapack_int n,
44944511
float* a, lapack_int lda );
44954512
lapack_int LAPACKE_dtrtri( int matrix_layout, char uplo, char diag, lapack_int n,
@@ -10202,6 +10219,28 @@ lapack_int LAPACKE_ztrsyl_work( int matrix_layout, char trana, char tranb,
1020210219
lapack_complex_double* c, lapack_int ldc,
1020310220
double* scale );
1020410221

10222+
lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
10223+
lapack_int isgn, lapack_int m, lapack_int n,
10224+
const float* a, lapack_int lda,
10225+
const float* b, lapack_int ldb,
10226+
float* c, lapack_int ldc, float* scale,
10227+
lapack_int* iwork, lapack_int liwork,
10228+
float* swork, lapack_int ldswork );
10229+
lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
10230+
lapack_int isgn, lapack_int m, lapack_int n,
10231+
const double* a, lapack_int lda,
10232+
const double* b, lapack_int ldb,
10233+
double* c, lapack_int ldc, double* scale,
10234+
lapack_int* iwork, lapack_int liwork,
10235+
double* swork, lapack_int ldswork );
10236+
lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
10237+
lapack_int isgn, lapack_int m, lapack_int n,
10238+
const lapack_complex_double* a, lapack_int lda,
10239+
const lapack_complex_double* b, lapack_int ldb,
10240+
lapack_complex_double* c, lapack_int ldc,
10241+
double* scale, double* swork,
10242+
lapack_int ldswork );
10243+
1020510244
lapack_int LAPACKE_strtri_work( int matrix_layout, char uplo, char diag,
1020610245
lapack_int n, float* a, lapack_int lda );
1020710246
lapack_int LAPACKE_dtrtri_work( int matrix_layout, char uplo, char diag,

LAPACKE/src/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,8 @@ lapacke_ctrsna.c
559559
lapacke_ctrsna_work.c
560560
lapacke_ctrsyl.c
561561
lapacke_ctrsyl_work.c
562+
lapacke_ctrsyl3.c
563+
lapacke_ctrsyl3_work.c
562564
lapacke_ctrtri.c
563565
lapacke_ctrtri_work.c
564566
lapacke_ctrtrs.c
@@ -1173,6 +1175,8 @@ lapacke_dtrsna.c
11731175
lapacke_dtrsna_work.c
11741176
lapacke_dtrsyl.c
11751177
lapacke_dtrsyl_work.c
1178+
lapacke_dtrsyl3.c
1179+
lapacke_dtrsyl3_work.c
11761180
lapacke_dtrtri.c
11771181
lapacke_dtrtri_work.c
11781182
lapacke_dtrtrs.c
@@ -1746,6 +1750,8 @@ lapacke_strsna.c
17461750
lapacke_strsna_work.c
17471751
lapacke_strsyl.c
17481752
lapacke_strsyl_work.c
1753+
lapacke_strsyl3.c
1754+
lapacke_strsyl3_work.c
17491755
lapacke_strtri.c
17501756
lapacke_strtri_work.c
17511757
lapacke_strtrs.c
@@ -2322,6 +2328,8 @@ lapacke_ztrsna.c
23222328
lapacke_ztrsna_work.c
23232329
lapacke_ztrsyl.c
23242330
lapacke_ztrsyl_work.c
2331+
lapacke_ztrsyl3.c
2332+
lapacke_ztrsyl3_work.c
23252333
lapacke_ztrtri.c
23262334
lapacke_ztrtri_work.c
23272335
lapacke_ztrtrs.c

LAPACKE/src/Makefile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ include $(TOPSRCDIR)/make.inc
3939

4040
.SUFFIXES: .c .o
4141
.c.o:
42-
$(CC) $(CFLAGS) -I../include -c -o $@ $<
42+
$(CC) $(CFLAGS) -Wall -I../include -c -o $@ $<
4343

4444
OBJ = \
4545
lapacke_ilaver.o \
@@ -604,6 +604,8 @@ lapacke_ctrsna.o \
604604
lapacke_ctrsna_work.o \
605605
lapacke_ctrsyl.o \
606606
lapacke_ctrsyl_work.o \
607+
lapacke_ctrsyl3.o \
608+
lapacke_ctrsyl3_work.o \
607609
lapacke_ctrtri.o \
608610
lapacke_ctrtri_work.o \
609611
lapacke_ctrtrs.o \
@@ -1216,6 +1218,8 @@ lapacke_dtrsna.o \
12161218
lapacke_dtrsna_work.o \
12171219
lapacke_dtrsyl.o \
12181220
lapacke_dtrsyl_work.o \
1221+
lapacke_dtrsyl3.o \
1222+
lapacke_dtrsyl3_work.o \
12191223
lapacke_dtrtri.o \
12201224
lapacke_dtrtri_work.o \
12211225
lapacke_dtrtrs.o \
@@ -1782,6 +1786,8 @@ lapacke_strsna.o \
17821786
lapacke_strsna_work.o \
17831787
lapacke_strsyl.o \
17841788
lapacke_strsyl_work.o \
1789+
lapacke_strsyl3.o \
1790+
lapacke_strsyl3_work.o \
17851791
lapacke_strtri.o \
17861792
lapacke_strtri_work.o \
17871793
lapacke_strtrs.o \
@@ -2356,6 +2362,8 @@ lapacke_ztrsna.o \
23562362
lapacke_ztrsna_work.o \
23572363
lapacke_ztrsyl.o \
23582364
lapacke_ztrsyl_work.o \
2365+
lapacke_ztrsyl3.o \
2366+
lapacke_ztrsyl3_work.o \
23592367
lapacke_ztrtri.o \
23602368
lapacke_ztrtri_work.o \
23612369
lapacke_ztrtrs.o \

LAPACKE/src/lapacke_ctrsyl3.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#include "lapacke_utils.h"
2+
3+
lapack_int LAPACKE_ctrsyl3( int matrix_layout, char trana, char tranb,
4+
lapack_int isgn, lapack_int m, lapack_int n,
5+
const lapack_complex_float* a, lapack_int lda,
6+
const lapack_complex_float* b, lapack_int ldb,
7+
lapack_complex_float* c, lapack_int ldc,
8+
float* scale )
9+
{
10+
lapack_int info = 0;
11+
float swork_query[2];
12+
float* swork = NULL;
13+
lapack_int ldswork = -1;
14+
lapack_int swork_size = -1;
15+
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
16+
LAPACKE_xerbla( "LAPACKE_ctrsyl3", -1 );
17+
return -1;
18+
}
19+
#ifndef LAPACK_DISABLE_NAN_CHECK
20+
if( LAPACKE_get_nancheck() ) {
21+
/* Optionally check input matrices for NaNs */
22+
if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) {
23+
return -7;
24+
}
25+
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
26+
return -9;
27+
}
28+
if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
29+
return -11;
30+
}
31+
}
32+
#endif
33+
/* Query optimal working array sizes */
34+
info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
35+
b, ldb, c, ldc, scale, swork_query, ldswork );
36+
if( info != 0 ) {
37+
goto exit_level_0;
38+
}
39+
ldswork = swork_query[0];
40+
swork_size = ldswork * swork_query[1];
41+
swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
42+
if( swork == NULL ) {
43+
info = LAPACK_WORK_MEMORY_ERROR;
44+
goto exit_level_0;
45+
}
46+
/* Call middle-level interface */
47+
info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
48+
lda, b, ldb, c, ldc, scale, swork, ldswork );
49+
/* Release memory and exit */
50+
LAPACKE_free( swork );
51+
exit_level_0:
52+
if( info == LAPACK_WORK_MEMORY_ERROR ) {
53+
LAPACKE_xerbla( "LAPACKE_ctrsyl3", info );
54+
}
55+
return info;
56+
}

LAPACKE/src/lapacke_ctrsyl3_work.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#include "lapacke_utils.h"
2+
3+
lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
4+
lapack_int isgn, lapack_int m, lapack_int n,
5+
const lapack_complex_float* a, lapack_int lda,
6+
const lapack_complex_float* b, lapack_int ldb,
7+
lapack_complex_float* c, lapack_int ldc,
8+
float* scale, float* swork,
9+
lapack_int ldswork )
10+
{
11+
lapack_int info = 0;
12+
if( matrix_layout == LAPACK_COL_MAJOR ) {
13+
/* Call LAPACK function and adjust info */
14+
LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a, &lda, b, &ldb, c, &ldc,
15+
scale, swork, &ldswork, &info );
16+
if( info < 0 ) {
17+
info = info - 1;
18+
}
19+
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
20+
lapack_int lda_t = MAX(1,m);
21+
lapack_int ldb_t = MAX(1,n);
22+
lapack_int ldc_t = MAX(1,m);
23+
lapack_complex_float* a_t = NULL;
24+
lapack_complex_float* b_t = NULL;
25+
lapack_complex_float* c_t = NULL;
26+
/* Check leading dimension(s) */
27+
if( lda < m ) {
28+
info = -8;
29+
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
30+
return info;
31+
}
32+
if( ldb < n ) {
33+
info = -10;
34+
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
35+
return info;
36+
}
37+
if( ldc < n ) {
38+
info = -12;
39+
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
40+
return info;
41+
}
42+
/* Allocate memory for temporary array(s) */
43+
a_t = (lapack_complex_float*)
44+
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,m) );
45+
if( a_t == NULL ) {
46+
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
47+
goto exit_level_0;
48+
}
49+
b_t = (lapack_complex_float*)
50+
LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) );
51+
if( b_t == NULL ) {
52+
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
53+
goto exit_level_1;
54+
}
55+
c_t = (lapack_complex_float*)
56+
LAPACKE_malloc( sizeof(lapack_complex_float) * ldc_t * MAX(1,n) );
57+
if( c_t == NULL ) {
58+
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
59+
goto exit_level_2;
60+
}
61+
/* Transpose input matrices */
62+
LAPACKE_cge_trans( matrix_layout, m, m, a, lda, a_t, lda_t );
63+
LAPACKE_cge_trans( matrix_layout, n, n, b, ldb, b_t, ldb_t );
64+
LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t );
65+
/* Call LAPACK function and adjust info */
66+
LAPACK_ctrsyl3( &trana, &tranb, &isgn, &m, &n, a_t, &lda_t, b_t, &ldb_t,
67+
c_t, &ldc_t, scale, swork, &ldswork, &info );
68+
if( info < 0 ) {
69+
info = info - 1;
70+
}
71+
/* Transpose output matrices */
72+
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, c_t, ldc_t, c, ldc );
73+
/* Release memory and exit */
74+
LAPACKE_free( c_t );
75+
exit_level_2:
76+
LAPACKE_free( b_t );
77+
exit_level_1:
78+
LAPACKE_free( a_t );
79+
exit_level_0:
80+
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
81+
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
82+
}
83+
} else {
84+
info = -1;
85+
LAPACKE_xerbla( "LAPACKE_ctrsyl3_work", info );
86+
}
87+
return info;
88+
}

0 commit comments

Comments
 (0)