Skip to content

Commit f16aa1c

Browse files
authored
Merge pull request #3821 from martin-frbg/lapack651
Add a BLAS3-based triangular Sylvester equation solver (Reference-LAPACK PR 651)
2 parents c5c4888 + 2a97ca6 commit f16aa1c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+24707
-139
lines changed

cmake/lapack.cmake

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ set(SLASRC
123123
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
124124
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
125125
sgesvdq.f slaorhr_col_getrfnp.f
126-
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
126+
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f
127+
slarmm.f slatrs3.f strsyl3.f)
127128

128129
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
129130
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@@ -221,7 +222,8 @@ set(CLASRC
221222
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
222223
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
223224
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
224-
cungtsqr.f cungtsqr_row.f cunhr_col.f )
225+
cungtsqr.f cungtsqr_row.f cunhr_col.f
226+
clatrs3.f ctrsyl3.f )
225227

226228
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
227229
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@@ -313,7 +315,8 @@ set(DLASRC
313315
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
314316
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
315317
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
316-
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
318+
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f
319+
dlarmm.f dlatrs3.f dtrsyl3.f)
317320

318321
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
319322
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@@ -415,7 +418,8 @@ set(ZLASRC
415418
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
416419
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
417420
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
418-
zungtsqr.f zungtsqr_row.f zunhr_col.f)
421+
zungtsqr.f zungtsqr_row.f zunhr_col.f
422+
zlatrs3.f ztrsyl3.f)
419423

420424
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
421425
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
@@ -617,7 +621,8 @@ set(SLASRC
617621
ssyevd_2stage.c ssyev_2stage.c ssyevx_2stage.c ssyevr_2stage.c
618622
ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
619623
sgesvdq.c slaorhr_col_getrfnp.c
620-
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c )
624+
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c
625+
slarmm.c slatrs3.c strsyl3.c)
621626

622627
set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
623628
sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@@ -714,7 +719,8 @@ set(CLASRC
714719
cheevd_2stage.c cheev_2stage.c cheevx_2stage.c cheevr_2stage.c
715720
chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
716721
cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c
717-
cungtsqr.c cungtsqr_row.c cunhr_col.c )
722+
cungtsqr.c cungtsqr_row.c cunhr_col.c
723+
clatrs3.c ctrsyl3.c)
718724

719725
set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
720726
cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
@@ -805,7 +811,8 @@ set(DLASRC
805811
dsyevd_2stage.c dsyev_2stage.c dsyevx_2stage.c dsyevr_2stage.c
806812
dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
807813
dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
808-
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c )
814+
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c
815+
dlarmm.c dlatrs3.c dtrsyl3.c)
809816

810817
set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
811818
dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
@@ -906,7 +913,7 @@ set(ZLASRC
906913
zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
907914
zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
908915
zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
909-
zungtsqr.c zungtsqr_row.c zunhr_col.c)
916+
zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c)
910917

911918
set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
912919
zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c

lapack-netlib/LAPACKE/include/lapack.h

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <stdlib.h>
1414
#include <stdarg.h>
15+
#include <inttypes.h>
1516

1617
/* It seems all current Fortran compilers put strlen at end.
1718
* Some historical compilers put strlen after the str argument
@@ -80,11 +81,26 @@ extern "C" {
8081

8182
/*----------------------------------------------------------------------------*/
8283
#ifndef lapack_int
83-
#define lapack_int int
84+
#if defined(LAPACK_ILP64)
85+
#define lapack_int int64_t
86+
#else
87+
#define lapack_int int32_t
88+
#endif
89+
#endif
90+
91+
/*
92+
* Integer format string
93+
*/
94+
#ifndef LAPACK_IFMT
95+
#if defined(LAPACK_ILP64)
96+
#define LAPACK_IFMT PRId64
97+
#else
98+
#define LAPACK_IFMT PRId32
99+
#endif
84100
#endif
85101

86102
#ifndef lapack_logical
87-
#define lapack_logical lapack_int
103+
#define lapack_logical lapack_int
88104
#endif
89105

90106
/* f2c, hence clapack and MacOS Accelerate, returns double instead of float
@@ -115,7 +131,7 @@ typedef lapack_logical (*LAPACK_Z_SELECT2)
115131
( const lapack_complex_double*, const lapack_complex_double* );
116132

117133
#define LAPACK_lsame_base LAPACK_GLOBAL(lsame,LSAME)
118-
lapack_logical LAPACK_lsame_base( const char* ca, const char* cb,
134+
lapack_logical LAPACK_lsame_base( const char* ca, const char* cb,
119135
lapack_int lca, lapack_int lcb
120136
#ifdef LAPACK_FORTRAN_STRLEN_END
121137
, size_t, size_t
@@ -21986,6 +22002,84 @@ void LAPACK_ztrsyl_base(
2198622002
#define LAPACK_ztrsyl(...) LAPACK_ztrsyl_base(__VA_ARGS__)
2198722003
#endif
2198822004

22005+
#define LAPACK_ctrsyl3_base LAPACK_GLOBAL(ctrsyl3,CTRSYL3)
22006+
void LAPACK_ctrsyl3_base(
22007+
char const* trana, char const* tranb,
22008+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22009+
lapack_complex_float const* A, lapack_int const* lda,
22010+
lapack_complex_float const* B, lapack_int const* ldb,
22011+
lapack_complex_float* C, lapack_int const* ldc, float* scale,
22012+
float* swork, lapack_int const *ldswork,
22013+
lapack_int* info
22014+
#ifdef LAPACK_FORTRAN_STRLEN_END
22015+
, size_t, size_t
22016+
#endif
22017+
);
22018+
#ifdef LAPACK_FORTRAN_STRLEN_END
22019+
#define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__, 1, 1)
22020+
#else
22021+
#define LAPACK_ctrsyl3(...) LAPACK_ctrsyl3_base(__VA_ARGS__)
22022+
#endif
22023+
22024+
#define LAPACK_dtrsyl3_base LAPACK_GLOBAL(dtrsyl3,DTRSYL3)
22025+
void LAPACK_dtrsyl3_base(
22026+
char const* trana, char const* tranb,
22027+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22028+
double const* A, lapack_int const* lda,
22029+
double const* B, lapack_int const* ldb,
22030+
double* C, lapack_int const* ldc, double* scale,
22031+
lapack_int* iwork, lapack_int const* liwork,
22032+
double* swork, lapack_int const *ldswork,
22033+
lapack_int* info
22034+
#ifdef LAPACK_FORTRAN_STRLEN_END
22035+
, size_t, size_t
22036+
#endif
22037+
);
22038+
#ifdef LAPACK_FORTRAN_STRLEN_END
22039+
#define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__, 1, 1)
22040+
#else
22041+
#define LAPACK_dtrsyl3(...) LAPACK_dtrsyl3_base(__VA_ARGS__)
22042+
#endif
22043+
22044+
#define LAPACK_strsyl3_base LAPACK_GLOBAL(strsyl3,STRSYL3)
22045+
void LAPACK_strsyl3_base(
22046+
char const* trana, char const* tranb,
22047+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22048+
float const* A, lapack_int const* lda,
22049+
float const* B, lapack_int const* ldb,
22050+
float* C, lapack_int const* ldc, float* scale,
22051+
lapack_int* iwork, lapack_int const* liwork,
22052+
float* swork, lapack_int const *ldswork,
22053+
lapack_int* info
22054+
#ifdef LAPACK_FORTRAN_STRLEN_END
22055+
, size_t, size_t
22056+
#endif
22057+
);
22058+
#ifdef LAPACK_FORTRAN_STRLEN_END
22059+
#define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__, 1, 1)
22060+
#else
22061+
#define LAPACK_strsyl3(...) LAPACK_strsyl3_base(__VA_ARGS__)
22062+
#endif
22063+
22064+
#define LAPACK_ztrsyl3_base LAPACK_GLOBAL(ztrsyl3,ZTRSYL3)
22065+
void LAPACK_ztrsyl3_base(
22066+
char const* trana, char const* tranb,
22067+
lapack_int const* isgn, lapack_int const* m, lapack_int const* n,
22068+
lapack_complex_double const* A, lapack_int const* lda,
22069+
lapack_complex_double const* B, lapack_int const* ldb,
22070+
lapack_complex_double* C, lapack_int const* ldc, double* scale,
22071+
double* swork, lapack_int const *ldswork,
22072+
lapack_int* info
22073+
#ifdef LAPACK_FORTRAN_STRLEN_END
22074+
, size_t, size_t
22075+
#endif
22076+
);
22077+
#ifdef LAPACK_FORTRAN_STRLEN_END
22078+
#define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__, 1, 1)
22079+
#else
22080+
#define LAPACK_ztrsyl3(...) LAPACK_ztrsyl3_base(__VA_ARGS__)
22081+
#endif
22082+
2198922083
#define LAPACK_ctrtri_base LAPACK_GLOBAL(ctrtri,CTRTRI)
2199022084
void LAPACK_ctrtri_base(
2199122085
char const* uplo, char const* diag,

lapack-netlib/LAPACKE/include/lapacke.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,6 +2313,19 @@ lapack_int LAPACKE_zlagge( int matrix_layout, lapack_int m, lapack_int n,
23132313
float LAPACKE_slamch( char cmach );
23142314
double LAPACKE_dlamch( char cmach );
23152315

2316+
float LAPACKE_slangb( int matrix_layout, char norm, lapack_int n,
2317+
lapack_int kl, lapack_int ku, const float* ab,
2318+
lapack_int ldab );
2319+
double LAPACKE_dlangb( int matrix_layout, char norm, lapack_int n,
2320+
lapack_int kl, lapack_int ku, const double* ab,
2321+
lapack_int ldab );
2322+
float LAPACKE_clangb( int matrix_layout, char norm, lapack_int n,
2323+
lapack_int kl, lapack_int ku,
2324+
const lapack_complex_float* ab, lapack_int ldab );
2325+
double LAPACKE_zlangb( int matrix_layout, char norm, lapack_int n,
2326+
lapack_int kl, lapack_int ku,
2327+
const lapack_complex_double* ab, lapack_int ldab );
2328+
23162329
float LAPACKE_slange( int matrix_layout, char norm, lapack_int m,
23172330
lapack_int n, const float* a, lapack_int lda );
23182331
double LAPACKE_dlange( int matrix_layout, char norm, lapack_int m,
@@ -4477,6 +4490,23 @@ lapack_int LAPACKE_ztrsyl( int matrix_layout, char trana, char tranb,
44774490
lapack_complex_double* c, lapack_int ldc,
44784491
double* scale );
44794492

4493+
lapack_int LAPACKE_strsyl3( int matrix_layout, char trana, char tranb,
4494+
lapack_int isgn, lapack_int m, lapack_int n,
4495+
const float* a, lapack_int lda, const float* b,
4496+
lapack_int ldb, float* c, lapack_int ldc,
4497+
float* scale );
4498+
lapack_int LAPACKE_dtrsyl3( int matrix_layout, char trana, char tranb,
4499+
lapack_int isgn, lapack_int m, lapack_int n,
4500+
const double* a, lapack_int lda, const double* b,
4501+
lapack_int ldb, double* c, lapack_int ldc,
4502+
double* scale );
4503+
lapack_int LAPACKE_ztrsyl3( int matrix_layout, char trana, char tranb,
4504+
lapack_int isgn, lapack_int m, lapack_int n,
4505+
const lapack_complex_double* a, lapack_int lda,
4506+
const lapack_complex_double* b, lapack_int ldb,
4507+
lapack_complex_double* c, lapack_int ldc,
4508+
double* scale );
4509+
44804510
lapack_int LAPACKE_strtri( int matrix_layout, char uplo, char diag, lapack_int n,
44814511
float* a, lapack_int lda );
44824512
lapack_int LAPACKE_dtrtri( int matrix_layout, char uplo, char diag, lapack_int n,
@@ -7576,6 +7606,21 @@ double LAPACKE_dlapy3_work( double x, double y, double z );
75767606
float LAPACKE_slamch_work( char cmach );
75777607
double LAPACKE_dlamch_work( char cmach );
75787608

7609+
float LAPACKE_slangb_work( int matrix_layout, char norm, lapack_int n,
7610+
lapack_int kl, lapack_int ku, const float* ab,
7611+
lapack_int ldab, float* work );
7612+
double LAPACKE_dlangb_work( int matrix_layout, char norm, lapack_int n,
7613+
lapack_int kl, lapack_int ku, const double* ab,
7614+
lapack_int ldab, double* work );
7615+
float LAPACKE_clangb_work( int matrix_layout, char norm, lapack_int n,
7616+
lapack_int kl, lapack_int ku,
7617+
const lapack_complex_float* ab, lapack_int ldab,
7618+
float* work );
7619+
double LAPACKE_zlangb_work( int matrix_layout, char norm, lapack_int n,
7620+
lapack_int kl, lapack_int ku,
7621+
const lapack_complex_double* ab, lapack_int ldab,
7622+
double* work );
7623+
75797624
float LAPACKE_slange_work( int matrix_layout, char norm, lapack_int m,
75807625
lapack_int n, const float* a, lapack_int lda,
75817626
float* work );
@@ -10174,6 +10219,35 @@ lapack_int LAPACKE_ztrsyl_work( int matrix_layout, char trana, char tranb,
1017410219
lapack_complex_double* c, lapack_int ldc,
1017510220
double* scale );
1017610221

10222+
lapack_int LAPACKE_strsyl3_work( int matrix_layout, char trana, char tranb,
10223+
lapack_int isgn, lapack_int m, lapack_int n,
10224+
const float* a, lapack_int lda,
10225+
const float* b, lapack_int ldb,
10226+
float* c, lapack_int ldc, float* scale,
10227+
lapack_int* iwork, lapack_int liwork,
10228+
float* swork, lapack_int ldswork );
10229+
lapack_int LAPACKE_dtrsyl3_work( int matrix_layout, char trana, char tranb,
10230+
lapack_int isgn, lapack_int m, lapack_int n,
10231+
const double* a, lapack_int lda,
10232+
const double* b, lapack_int ldb,
10233+
double* c, lapack_int ldc, double* scale,
10234+
lapack_int* iwork, lapack_int liwork,
10235+
double* swork, lapack_int ldswork );
10236+
lapack_int LAPACKE_ctrsyl3_work( int matrix_layout, char trana, char tranb,
10237+
lapack_int isgn, lapack_int m, lapack_int n,
10238+
const lapack_complex_float* a, lapack_int lda,
10239+
const lapack_complex_float* b, lapack_int ldb,
10240+
lapack_complex_float* c, lapack_int ldc,
10241+
float* scale, float* swork,
10242+
lapack_int ldswork );
10243+
lapack_int LAPACKE_ztrsyl3_work( int matrix_layout, char trana, char tranb,
10244+
lapack_int isgn, lapack_int m, lapack_int n,
10245+
const lapack_complex_double* a, lapack_int lda,
10246+
const lapack_complex_double* b, lapack_int ldb,
10247+
lapack_complex_double* c, lapack_int ldc,
10248+
double* scale, double* swork,
10249+
lapack_int ldswork );
10250+
1017710251
lapack_int LAPACKE_strtri_work( int matrix_layout, char uplo, char diag,
1017810252
lapack_int n, float* a, lapack_int lda );
1017910253
lapack_int LAPACKE_dtrtri_work( int matrix_layout, char uplo, char diag,

lapack-netlib/LAPACKE/src/lapacke_cgesvdq.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
4848
lapack_int lrwork = -1;
4949
float* rwork = NULL;
5050
float rwork_query;
51-
lapack_int i;
5251
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
5352
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
5453
return -1;
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#include "lapacke_utils.h"
2+
3+
lapack_int LAPACKE_ctrsyl3( int matrix_layout, char trana, char tranb,
4+
lapack_int isgn, lapack_int m, lapack_int n,
5+
const lapack_complex_float* a, lapack_int lda,
6+
const lapack_complex_float* b, lapack_int ldb,
7+
lapack_complex_float* c, lapack_int ldc,
8+
float* scale )
9+
{
10+
lapack_int info = 0;
11+
float swork_query[2];
12+
float* swork = NULL;
13+
lapack_int ldswork = -1;
14+
lapack_int swork_size = -1;
15+
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
16+
LAPACKE_xerbla( "LAPACKE_ctrsyl3", -1 );
17+
return -1;
18+
}
19+
#ifndef LAPACK_DISABLE_NAN_CHECK
20+
if( LAPACKE_get_nancheck() ) {
21+
/* Optionally check input matrices for NaNs */
22+
if( LAPACKE_cge_nancheck( matrix_layout, m, m, a, lda ) ) {
23+
return -7;
24+
}
25+
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
26+
return -9;
27+
}
28+
if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
29+
return -11;
30+
}
31+
}
32+
#endif
33+
/* Query optimal working array sizes */
34+
info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a, lda,
35+
b, ldb, c, ldc, scale, swork_query, ldswork );
36+
if( info != 0 ) {
37+
goto exit_level_0;
38+
}
39+
ldswork = swork_query[0];
40+
swork_size = ldswork * swork_query[1];
41+
swork = (float*)LAPACKE_malloc( sizeof(float) * swork_size);
42+
if( swork == NULL ) {
43+
info = LAPACK_WORK_MEMORY_ERROR;
44+
goto exit_level_0;
45+
}
46+
/* Call middle-level interface */
47+
info = LAPACKE_ctrsyl3_work( matrix_layout, trana, tranb, isgn, m, n, a,
48+
lda, b, ldb, c, ldc, scale, swork, ldswork );
49+
/* Release memory and exit */
50+
LAPACKE_free( swork );
51+
exit_level_0:
52+
if( info == LAPACK_WORK_MEMORY_ERROR ) {
53+
LAPACKE_xerbla( "LAPACKE_ctrsyl3", info );
54+
}
55+
return info;
56+
}

0 commit comments

Comments
 (0)