23
23
24
24
#ifdef HAS_GPU
25
25
#include " ChASE-MPI/impl/chase_mpidla_mgpu.hpp"
26
+ #include " ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp"
26
27
#endif
27
28
28
29
using namespace chase ;
@@ -288,9 +289,13 @@ ChaseMpiProperties<std::complex<float>>* ChASE_State::getProperties() {
288
289
}
289
290
290
291
template <typename T>
291
- void chase_seq (T* H, int * N , T* V, Base<T>* ritzv, int * nev, int * nex,
292
+ void chase_seq (int *N, T* H, int * ldh , T* V, Base<T>* ritzv, int * nev, int * nex,
292
293
int * deg, double * tol, char * mode, char * opt) {
294
+ #ifdef HAS_GPU
295
+ typedef ChaseMpi<ChaseMpiDLACudaSeq, T> SEQ_CHASE;
296
+ #else
293
297
typedef ChaseMpi<ChaseMpiDLABlaslapackSeq, T> SEQ_CHASE;
298
+ #endif
294
299
295
300
std::vector<std::chrono::duration<double >> timings (3 );
296
301
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times (3 );
@@ -300,14 +305,18 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
300
305
std::mt19937 gen (2342.0 );
301
306
std::normal_distribution<> d;
302
307
303
- SEQ_CHASE single (*N, *nev, *nex, V, ritzv, H);
308
+ SEQ_CHASE single (*N, *nev, *nex, V, ritzv);
309
+
310
+ T* H_ = single.GetMatrixPtr ();
304
311
305
312
ChaseConfig<T>& config = single.GetConfig ();
306
313
config.SetTol (*tol);
307
314
config.SetDeg (*deg);
308
315
config.SetOpt (*opt == ' S' );
309
316
config.SetApprox (*mode == ' A' );
310
317
318
+ t_lacpy (' A' , *N, *N, H, *ldh, H_, *N);
319
+
311
320
if (!config.UseApprox ())
312
321
for (std::size_t k = 0 ; k < *N * (*nev + *nex); ++k)
313
322
V[k] = getRandomT<T>([&]() { return d (gen); });
@@ -317,9 +326,11 @@ void chase_seq(T* H, int* N, T* V, Base<T>* ritzv, int* nev, int* nex,
317
326
chase::Solve (&performanceDecorator);
318
327
timings[2 ] = std::chrono::high_resolution_clock::now () - start_times[2 ];
319
328
timings[1 ] = std::chrono::high_resolution_clock::now () - start_times[1 ];
320
- std::cout << " ChASE]> Seq-ChASE Solve done in: " << timings[2 ].count () << " \n " ;
321
- performanceDecorator.GetPerfData ().print ();
322
- std::cout << " ChASE]> total time in ChASE: " << timings[1 ].count () << " \n " ;
329
+ #ifdef CHASE_OUTPUT
330
+ std::cout << " ChASE]> ChASE Solve done in: " << timings[2 ].count () << " \n " ;
331
+ performanceDecorator.GetPerfData ().print ();
332
+ std::cout << " ChASE]> total time in ChASE: " << timings[1 ].count () << " \n " ;
333
+ #endif
323
334
}
324
335
325
336
template <typename T>
@@ -347,8 +358,11 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex ){
347
358
template <typename T>
348
359
void chase_solve (T* H, int *LDH, T* V, Base<T>* ritzv, int * deg, double * tol, char * mode,
349
360
char * opt) {
361
+ #ifdef HAS_GPU
362
+ typedef ChaseMpi<ChaseMpiDLAMultiGPU, T> CHASE;
363
+ #else
350
364
typedef ChaseMpi<ChaseMpiDLABlaslapack, T> CHASE;
351
-
365
+ # endif
352
366
std::vector<std::chrono::duration<double >> timings (3 );
353
367
std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times (3 );
354
368
@@ -371,101 +385,32 @@ void chase_solve(T* H, int *LDH, T* V, Base<T>* ritzv, int* deg, double* tol, ch
371
385
auto N = config.GetN ();
372
386
auto nev = config.GetNev ();
373
387
auto nex = config.GetNex ();
374
-
375
- if (!config.UseApprox ())
376
- for (std::size_t k = 0 ; k < N * (nev + nex); ++k)
377
- V[k] = getRandomT<T>([&]() { return d (gen); });
378
- /*
379
- for(auto j = 0; j < n; j++ ){
380
- for(auto i = 0; i < m; i++){
381
- H_[m * j + i] = H[j * ldh + i];
382
- }
383
- }
384
- */
385
-
388
+
386
389
t_lacpy (' A' , m, n, H, ldh, H_, m);
387
-
388
- // std::cout << myRank << ": m = " << m << ", n = " << n << ", ldh = " << ldh << std::endl;
389
390
390
391
config.SetTol (*tol);
391
392
config.SetDeg (*deg);
392
393
config.SetOpt (*opt == ' S' );
393
394
config.SetApprox (*mode == ' A' );
394
395
395
- PerformanceDecoratorChase<T> performanceDecorator (&single);
396
- start_times[2 ] = std::chrono::high_resolution_clock::now ();
397
- chase::Solve (&performanceDecorator);
398
-
399
- timings[2 ] = std::chrono::high_resolution_clock::now () - start_times[2 ];
400
- timings[1 ] = std::chrono::high_resolution_clock::now () - start_times[1 ];
401
- if (myRank == 0 ){
402
- std::cout << " ChASE-MPI]> ChASE Solve done in: " << timings[2 ].count () << " \n " ;
403
- performanceDecorator.GetPerfData ().print ();
404
- std::cout << " ChASE-MPI]> total time in ChASE: " << timings[1 ].count () << " \n " ;
405
- }
406
- }
407
-
408
- #ifdef HAS_GPU
409
- template <typename T>
410
- void chase_solve_mgpu (T* H, int *LDH, T* V, Base<T>* ritzv, int * deg, double * tol, char * mode,
411
- char * opt) {
412
-
413
- typedef ChaseMpi<ChaseMpiDLAMultiGPU, T> CHASE;
414
-
415
- int ldh = *LDH;
416
- std::vector<std::chrono::duration<double >> timings (3 );
417
- std::vector<std::chrono::time_point<std::chrono::high_resolution_clock>> start_times (3 );
418
-
419
- std::mt19937 gen (2342.0 );
420
- std::normal_distribution<> d;
421
- ChaseMpiProperties<T>* props = ChASE_State::getProperties<T>();
422
-
423
- int myRank = props->get_my_rank ();
424
-
425
- CHASE single (props, V, ritzv);
426
-
427
- T* H_ = single.GetMatrixPtr ();
428
- std::size_t m, n;
429
- m = props->get_m ();
430
- n = props->get_n ();
431
-
432
- ChaseConfig<T>& config = single.GetConfig ();
433
- auto N = config.GetN ();
434
- auto nev = config.GetNev ();
435
- auto nex = config.GetNex ();
436
-
437
396
if (!config.UseApprox ())
438
397
for (std::size_t k = 0 ; k < N * (nev + nex); ++k)
439
398
V[k] = getRandomT<T>([&]() { return d (gen); });
440
- /*
441
- for(auto j = 0; j < n; j++ ){
442
- for(auto i = 0; i < m; i++){
443
- H_[m * j + i] = H[j * ldh + i];
444
- }
445
- }
446
- */
447
- t_lacpy (' A' , m, n, H, ldh, H_, m);
448
- config.SetTol (*tol);
449
- config.SetDeg (*deg);
450
- config.SetOpt (*opt == ' S' );
451
- config.SetApprox (*mode == ' A' );
452
399
453
400
PerformanceDecoratorChase<T> performanceDecorator (&single);
454
401
start_times[2 ] = std::chrono::high_resolution_clock::now ();
455
402
chase::Solve (&performanceDecorator);
456
403
457
404
timings[2 ] = std::chrono::high_resolution_clock::now () - start_times[2 ];
458
405
timings[1 ] = std::chrono::high_resolution_clock::now () - start_times[1 ];
459
- #ifdef INFO_PRINT
406
+ #ifdef CHASE_OUTPUT
460
407
if (myRank == 0 ){
461
- std::cout << " ChASE-MGPU ]> ChASE Solve done in: " << timings[2 ].count () << " \n " ;
408
+ std::cout << " ChASE-MPI ]> ChASE Solve done in: " << timings[2 ].count () << " \n " ;
462
409
performanceDecorator.GetPerfData ().print ();
463
- std::cout << " ChASE-MGPU ]> total time in ChASE: " << timings[1 ].count () << " \n " ;
410
+ std::cout << " ChASE-MPI ]> total time in ChASE: " << timings[1 ].count () << " \n " ;
464
411
}
465
- #endif
466
-
412
+ #endif
467
413
}
468
- #endif
469
414
470
415
extern " C" {
471
416
/* * @defgroup chasc-c ChASE C Interface
@@ -486,10 +431,10 @@ extern "C" {
486
431
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
487
432
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
488
433
*/
489
- void zchase_ (std::complex<double >* H, int * N , std::complex<double >* V,
434
+ void zchase_ (int *N, std::complex<double >* H, int * ldh , std::complex<double >* V,
490
435
double * ritzv, int * nev, int * nex, int * deg, double * tol,
491
436
char * mode, char * opt) {
492
- chase_seq<std::complex<double >>(H, N , V, ritzv, nev, nex, deg, tol, mode,
437
+ chase_seq<std::complex<double >>(N, H, ldh , V, ritzv, nev, nex, deg, tol, mode,
493
438
opt);
494
439
}
495
440
@@ -506,9 +451,9 @@ void zchase_(std::complex<double>* H, int* N, std::complex<double>* V,
506
451
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
507
452
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
508
453
*/
509
- void dchase_ (double * H, int * N , double * V, double * ritzv, int * nev, int * nex,
454
+ void dchase_ (int *N, double * H, int * ldh , double * V, double * ritzv, int * nev, int * nex,
510
455
int * deg, double * tol, char * mode, char * opt) {
511
- chase_seq<double >(H, N , V, ritzv, nev, nex, deg, tol, mode, opt);
456
+ chase_seq<double >(N, H, ldh , V, ritzv, nev, nex, deg, tol, mode, opt);
512
457
}
513
458
514
459
// ! shard-memory version of ChASE with complex scalar in single precison
@@ -524,10 +469,10 @@ void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex,
524
469
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
525
470
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
526
471
*/
527
- void cchase_ (std::complex<float >* H, int * N , std::complex<float >* V,
472
+ void cchase_ (int *N, std::complex<float >* H, int *ldh , std::complex<float >* V,
528
473
float * ritzv, int * nev, int * nex, int * deg, double * tol,
529
474
char * mode, char * opt) {
530
- chase_seq<std::complex<float >>(H, N , V, ritzv, nev, nex, deg, tol, mode,
475
+ chase_seq<std::complex<float >>(N, H, ldh , V, ritzv, nev, nex, deg, tol, mode,
531
476
opt);
532
477
}
533
478
@@ -544,9 +489,9 @@ void cchase_(std::complex<float>* H, int* N, std::complex<float>* V,
544
489
* @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not.
545
490
* @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no.
546
491
*/
547
- void schase_ (float * H, int * N , float * V, float * ritzv, int * nev, int * nex,
492
+ void schase_ (int *N, float * H, int * ldh , float * V, float * ritzv, int * nev, int * nex,
548
493
int * deg, double * tol, char * mode, char * opt) {
549
- chase_seq<float >(H, N , V, ritzv, nev, nex, deg, tol, mode, opt);
494
+ chase_seq<float >(N, H, ldh , V, ritzv, nev, nex, deg, tol, mode, opt);
550
495
}
551
496
552
497
// ! an initialisation of environment for distributed ChASE for complex scalar in double precision
@@ -683,27 +628,6 @@ void pschase_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol,
683
628
chase_solve<float >(H, ldh, V, ritzv, deg, tol, mode, opt);
684
629
}
685
630
686
- #ifdef HAS_GPU
687
- void pzchase_mgpu_ (std::complex<double >* H, int *ldh, std::complex<double >* V,
688
- double * ritzv, int * deg, double * tol, char * mode, char * opt) {
689
- chase_solve_mgpu<std::complex<double >>(H, ldh, V, ritzv, deg, tol, mode, opt);
690
- }
691
-
692
- void pdchase_mgpu_ (double * H, int *ldh, double * V, double * ritzv, int * deg, double * tol,
693
- char * mode, char * opt) {
694
- chase_solve_mgpu<double >(H, ldh, V, ritzv, deg, tol, mode, opt);
695
- }
696
-
697
- void pcchase_mgpu_ (std::complex<float >* H, int *ldh, std::complex<float >* V,
698
- float * ritzv, int * deg, double * tol, char * mode, char * opt) {
699
- chase_solve_mgpu<std::complex<float >>(H, ldh, V, ritzv, deg, tol, mode, opt);
700
- }
701
-
702
- void pschase_mgpu_ (float * H, int *ldh, float * V, float * ritzv, int * deg, double * tol,
703
- char * mode, char * opt) {
704
- chase_solve_mgpu<float >(H, ldh, V, ritzv, deg, tol, mode, opt);
705
- }
706
- #endif
707
631
/* * @} */ // end of chasc-c
708
632
709
633
} // extern C
0 commit comments