Skip to content

Commit a307474

Browse files
committed
Merge branch 'interfaceELSI' into 'master'
slightly improve the C/Fortran interface & remove dependencies on Nvtx tools in example2 See merge request SLai/ChASE!26
2 parents 9e9361c + f360c27 commit a307474

File tree

3 files changed

+55
-216
lines changed

3 files changed

+55
-216
lines changed

examples/2_input_output/CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,18 @@ if( ${Boost_FOUND} )
1111
add_executable( "2_input_output_seq"
1212
"2_input_output.cpp"
1313
)
14-
target_link_libraries( "2_input_output_seq" PRIVATE chase_seq ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
14+
target_link_libraries( "2_input_output_seq" PRIVATE chase_seq ${Boost_LIBRARIES})
1515

1616
##############################################################################
1717
# 2_input_output: no GPU, MPI ($A$ distributed among MPI-ranks)
1818
##############################################################################
1919

2020
add_executable( "2_input_output" "2_input_output.cpp" )
21-
target_link_libraries( "2_input_output" chase_mpi ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
21+
target_link_libraries( "2_input_output" chase_mpi ${Boost_LIBRARIES})
2222
target_compile_definitions( "2_input_output" PRIVATE USE_MPI=1 PRINT_EIGENVALUES=1 CHASE_OUTPUT=1)
2323

2424
add_executable( "2_input_output_block_cyclic" "2_input_output.cpp" )
25-
target_link_libraries( "2_input_output_block_cyclic" chase_mpi ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
25+
target_link_libraries( "2_input_output_block_cyclic" chase_mpi ${Boost_LIBRARIES} )
2626
target_compile_definitions( "2_input_output_block_cyclic" PRIVATE USE_MPI=1 USE_BLOCK_CYCLIC=1)
2727

2828
install (TARGETS 2_input_output_seq
@@ -50,11 +50,11 @@ if( ${Boost_FOUND} )
5050
endif()
5151

5252
add_executable( 2_input_output_mgpu "2_input_output.cpp" )
53-
target_link_libraries( 2_input_output_mgpu chase_mpi chase_cuda ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
53+
target_link_libraries( 2_input_output_mgpu chase_mpi chase_cuda ${Boost_LIBRARIES})
5454
target_compile_definitions( "2_input_output_mgpu" PRIVATE DRIVER_BUILD_MGPU=1 CHASE_OUTPUT=1 PRINT_EIGENVALUES=1 USE_MPI=1)
5555

5656
add_executable( 2_input_output_mgpu_block_cyclic "2_input_output.cpp" )
57-
target_link_libraries( 2_input_output_mgpu_block_cyclic chase_mpi chase_cuda ${Boost_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
57+
target_link_libraries( 2_input_output_mgpu_block_cyclic chase_mpi chase_cuda ${Boost_LIBRARIES} )
5858
target_compile_definitions( "2_input_output_mgpu_block_cyclic" PRIVATE DRIVER_BUILD_MGPU=1 USE_MPI=1 USE_BLOCK_CYCLIC=1)
5959

6060
install (TARGETS 2_input_output_mgpu

interface/chase_c.cpp

Lines changed: 33 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#ifdef HAS_GPU
2525
#include "ChASE-MPI/impl/chase_mpidla_mgpu.hpp"
26+
#include "ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp"
2627
#endif
2728

2829
using namespace chase;
@@ -288,9 +289,13 @@ ChaseMpiProperties<std::complex<float>>* ChASE_State::getProperties() {
288289
}
289290

290291
template <typename T>
291-
void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
292+
void chase_seq(int *N, T* H, int* ldh, T* V, Base<T>* ritzv, int* nev, int* nex,
292293
int* deg, double* tol, char* mode, char* opt) {
294+
#ifdef HAS_GPU
295+
typedef ChaseMpi<ChaseMpiDLACudaSeq, T> SEQ_CHASE;
296+
#else
293297
typedef ChaseMpi<ChaseMpiDLABlaslapackSeq, T> SEQ_CHASE;
298+
#endif
294299

295300
std::vector<std::chrono::duration<double>> timings(3);
296301
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times(3);
@@ -300,14 +305,18 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
300305
std::mt19937 gen(2342.0);
301306
std::normal_distribution<> d;
302307

303-
SEQ_CHASE single(*N, *nev, *nex, V, ritzv, H);
308+
SEQ_CHASE single(*N, *nev, *nex, V, ritzv);
309+
310+
T* H_ = single.GetMatrixPtr();
304311

305312
ChaseConfig<T>& config = single.GetConfig();
306313
config.SetTol(*tol);
307314
config.SetDeg(*deg);
308315
config.SetOpt(*opt == 'S');
309316
config.SetApprox(*mode == 'A');
310317

318+
t_lacpy('A', *N, *N, H, *ldh, H_, *N);
319+
311320
if (!config.UseApprox())
312321
for (std::size_t k = 0; k < *N * (*nev + *nex); ++k)
313322
V[k] = getRandomT<T>([&]() { return d(gen); });
@@ -317,9 +326,11 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
317326
chase::Solve(&performanceDecorator);
318327
timings[2] = std::chrono::high_resolution_clock::now() - start_times[2];
319328
timings[1] = std::chrono::high_resolution_clock::now() - start_times[1];
320-
std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n";
321-
performanceDecorator.GetPerfData().print();
322-
std::cout << "ChASE]> total time in ChASE: " << timings[1].count() << "\n";
329+
#ifdef CHASE_OUTPUT
330+
std::cout << " ChASE]> ChASE Solve done in: " << timings[2].count() << "\n";
331+
performanceDecorator.GetPerfData().print();
332+
std::cout << " ChASE]> total time in ChASE: " << timings[1].count() << "\n";
333+
#endif
323334
}
324335

325336
template <typename T>
@@ -347,8 +358,11 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex ){
347358
template <typename T>
348359
void chase_solve(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, char* mode,
349360
char* opt) {
361+
#ifdef HAS_GPU
362+
typedef ChaseMpi<ChaseMpiDLAMultiGPU, T> CHASE;
363+
#else
350364
typedef ChaseMpi<ChaseMpiDLABlaslapack, T> CHASE;
351-
365+
#endif
352366
std::vector<std::chrono::duration<double>> timings(3);
353367
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times(3);
354368

@@ -371,101 +385,32 @@ void chase_solve(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, ch
371385
auto N = config.GetN();
372386
auto nev = config.GetNev();
373387
auto nex = config.GetNex();
374-
375-
if (!config.UseApprox())
376-
for (std::size_t k = 0; k < N * (nev + nex); ++k)
377-
V[k] = getRandomT<T>([&]() { return d(gen); });
378-
/*
379-
for(auto j = 0; j < n; j++ ){
380-
for(auto i = 0; i < m; i++){
381-
H_[m * j + i] = H[j * ldh + i];
382-
}
383-
}
384-
*/
385-
388+
386389
t_lacpy('A', m, n, H, ldh, H_, m);
387-
388-
//std::cout << myRank << ": m = " << m << ", n = " << n << ", ldh = " << ldh << std::endl;
389390

390391
config.SetTol(*tol);
391392
config.SetDeg(*deg);
392393
config.SetOpt(*opt == 'S');
393394
config.SetApprox(*mode == 'A');
394395

395-
PerformanceDecoratorChase<T> performanceDecorator(&single);
396-
start_times[2] = std::chrono::high_resolution_clock::now();
397-
chase::Solve(&performanceDecorator);
398-
399-
timings[2] = std::chrono::high_resolution_clock::now() - start_times[2];
400-
timings[1] = std::chrono::high_resolution_clock::now() - start_times[1];
401-
if(myRank == 0){
402-
std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n";
403-
performanceDecorator.GetPerfData().print();
404-
std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n";
405-
}
406-
}
407-
408-
#ifdef HAS_GPU
409-
template <typename T>
410-
void chase_solve_mgpu(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, char* mode,
411-
char* opt) {
412-
413-
typedef ChaseMpi<ChaseMpiDLAMultiGPU, T> CHASE;
414-
415-
int ldh = *LDH;
416-
std::vector<std::chrono::duration<double>> timings(3);
417-
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times(3);
418-
419-
std::mt19937 gen(2342.0);
420-
std::normal_distribution<> d;
421-
ChaseMpiProperties<T>* props = ChASE_State::getProperties<T>();
422-
423-
int myRank = props->get_my_rank();
424-
425-
CHASE single(props, V, ritzv);
426-
427-
T* H_ = single.GetMatrixPtr();
428-
std::size_t m, n;
429-
m = props->get_m();
430-
n = props->get_n();
431-
432-
ChaseConfig<T>& config = single.GetConfig();
433-
auto N = config.GetN();
434-
auto nev = config.GetNev();
435-
auto nex = config.GetNex();
436-
437396
if (!config.UseApprox())
438397
for (std::size_t k = 0; k < N * (nev + nex); ++k)
439398
V[k] = getRandomT<T>([&]() { return d(gen); });
440-
/*
441-
for(auto j = 0; j < n; j++ ){
442-
for(auto i = 0; i < m; i++){
443-
H_[m * j + i] = H[j * ldh + i];
444-
}
445-
}
446-
*/
447-
t_lacpy('A', m, n, H, ldh, H_, m);
448-
config.SetTol(*tol);
449-
config.SetDeg(*deg);
450-
config.SetOpt(*opt == 'S');
451-
config.SetApprox(*mode == 'A');
452399

453400
PerformanceDecoratorChase<T> performanceDecorator(&single);
454401
start_times[2] = std::chrono::high_resolution_clock::now();
455402
chase::Solve(&performanceDecorator);
456403

457404
timings[2] = std::chrono::high_resolution_clock::now() - start_times[2];
458405
timings[1] = std::chrono::high_resolution_clock::now() - start_times[1];
459-
#ifdef INFO_PRINT
406+
#ifdef CHASE_OUTPUT
460407
if(myRank == 0){
461-
std::cout << "ChASE-MGPU]> ChASE Solve done in: " << timings[2].count() << "\n";
408+
std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n";
462409
performanceDecorator.GetPerfData().print();
463-
std::cout << "ChASE-MGPU]> total time in ChASE: " << timings[1].count() << "\n";
410+
std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n";
464411
}
465-
#endif
466-
412+
#endif
467413
}
468-
#endif
469414

470415
extern "C" {
471416
/** @defgroup chasc-c ChASE C Interface
@@ -486,10 +431,10 @@ extern "C" {
486431
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
487432
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
488433
*/
489-
void zchase_(std::complex<double>* H, int* N, std::complex<double>* V,
434+
void zchase_(int *N, std::complex<double>* H, int* ldh, std::complex<double>* V,
490435
double* ritzv, int* nev, int* nex, int* deg, double* tol,
491436
char* mode, char* opt) {
492-
chase_seq<std::complex<double>>(H, N, V, ritzv, nev, nex, deg, tol, mode,
437+
chase_seq<std::complex<double>>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode,
493438
opt);
494439
}
495440

@@ -506,9 +451,9 @@ void zchase_(std::complex<double>* H, int* N, std::complex<double>* V,
506451
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
507452
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
508453
*/
509-
void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex,
454+
void dchase_(int *N, double* H, int* ldh, double* V, double* ritzv, int* nev, int* nex,
510455
int* deg, double* tol, char* mode, char* opt) {
511-
chase_seq<double>(H, N, V, ritzv, nev, nex, deg, tol, mode, opt);
456+
chase_seq<double>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt);
512457
}
513458

514459
//! shard-memory version of ChASE with complex scalar in single precison
@@ -524,10 +469,10 @@ void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex,
524469
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
525470
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
526471
*/
527-
void cchase_(std::complex<float>* H, int* N, std::complex<float>* V,
472+
void cchase_(int *N, std::complex<float>* H, int *ldh, std::complex<float>* V,
528473
float* ritzv, int* nev, int* nex, int* deg, double* tol,
529474
char* mode, char* opt) {
530-
chase_seq<std::complex<float>>(H, N, V, ritzv, nev, nex, deg, tol, mode,
475+
chase_seq<std::complex<float>>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode,
531476
opt);
532477
}
533478

@@ -544,9 +489,9 @@ void cchase_(std::complex<float>* H, int* N, std::complex<float>* V,
544489
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
545490
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
546491
*/
547-
void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex,
492+
void schase_(int *N, float* H, int* ldh, float* V, float* ritzv, int* nev, int* nex,
548493
int* deg, double* tol, char* mode, char* opt) {
549-
chase_seq<float>(H, N, V, ritzv, nev, nex, deg, tol, mode, opt);
494+
chase_seq<float>(N, H, ldh, V, ritzv, nev, nex, deg, tol, mode, opt);
550495
}
551496

552497
//! an initialisation of environment for distributed ChASE for complex scalar in double precision
@@ -683,27 +628,6 @@ void pschase_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol,
683628
chase_solve<float>(H, ldh, V, ritzv, deg, tol, mode, opt);
684629
}
685630

686-
#ifdef HAS_GPU
687-
void pzchase_mgpu_(std::complex<double>* H, int *ldh, std::complex<double>* V,
688-
double* ritzv, int* deg, double* tol, char* mode, char* opt) {
689-
chase_solve_mgpu<std::complex<double>>(H, ldh, V, ritzv, deg, tol, mode, opt);
690-
}
691-
692-
void pdchase_mgpu_(double* H, int *ldh, double* V, double* ritzv, int* deg, double* tol,
693-
char* mode, char* opt) {
694-
chase_solve_mgpu<double>(H, ldh, V, ritzv, deg, tol, mode, opt);
695-
}
696-
697-
void pcchase_mgpu_(std::complex<float>* H, int *ldh, std::complex<float>* V,
698-
float* ritzv, int* deg, double* tol, char* mode, char* opt) {
699-
chase_solve_mgpu<std::complex<float>>(H, ldh, V, ritzv, deg, tol, mode, opt);
700-
}
701-
702-
void pschase_mgpu_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol,
703-
char* mode, char* opt) {
704-
chase_solve_mgpu<float>(H, ldh, V, ritzv, deg, tol, mode, opt);
705-
}
706-
#endif
707631
/** @} */ // end of chasc-c
708632

709633
} // extern C

0 commit comments

Comments
 (0)