40
40
#include < complex>
41
41
#include < chrono>
42
42
#include " include/fit_tsne.h"
43
+ #include " verify.hpp"
43
44
44
45
// #ifndef DEBUG_TIME
45
46
// #define DEBUG_TIME
68
69
#define PRINT_IL_TIMER (x ) std::cout << #x << " : " << ((float )x.count()) / 1000000.0 << " s" << std::endl
69
70
#endif
70
71
71
- double tsnecuda::RunTsne (tsnecuda::Options& opt)
72
+ double tsnecuda::RunTsne (tsnecuda::Options& opt, int & success )
72
73
{
73
74
std::chrono::steady_clock::time_point time_start_;
74
75
std::chrono::steady_clock::time_point time_end_;
@@ -420,14 +421,15 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
420
421
std::cout << " done." << std::endl;
421
422
}
422
423
423
- // int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
424
+ int fft_dimensions[2 ] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
425
+ size_t work_size_idft, work_size_dft;
424
426
425
- // std::int64_t fwd_strides1[3] = {0, n_fft_coeffs, 1}; // {0, 780, 1} -> 0 + 780*i + j
426
- // std::int64_t fwd_strides2[3] = {0, (n_fft_coeffs/2+1)*2, 1}; // {0, 780, 1} -> 0 + 780*i + j
427
- // std::int64_t bwd_strides[3] = {0, (n_fft_coeffs/2+1), 1}; // {0, 391, 1} -> 0 + 391*i + j
428
- // std::int64_t fwd_distances1 = n_fft_coeffs* n_fft_coeffs;
429
- // std::int64_t fwd_distances2 = n_fft_coeffs*(n_fft_coeffs/2+1)*2;
430
- // std::int64_t bwd_distances = n_fft_coeffs*(n_fft_coeffs/2+1) ;
427
+ std::int64_t fwd_strides1[3 ] = {0 , n_fft_coeffs, 1 }; // {0, 780, 1} -> 0 + 780*i + j
428
+ std::int64_t fwd_strides2[3 ] = {0 , (n_fft_coeffs/2 +1 )*2 , 1 }; // {0, 780, 1} -> 0 + 780*i + j
429
+ std::int64_t bwd_strides[3 ] = {0 , (n_fft_coeffs/2 +1 ), 1 }; // {0, 391, 1} -> 0 + 391*i + j
430
+ std::int64_t fwd_distances1 = n_fft_coeffs* n_fft_coeffs;
431
+ std::int64_t fwd_distances2 = n_fft_coeffs*(n_fft_coeffs/2 +1 )*2 ;
432
+ std::int64_t bwd_distances = n_fft_coeffs*(n_fft_coeffs/2 +1 ) ;
431
433
432
434
// std::cout << "Setting up dft plans...\n";
433
435
// // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
@@ -443,27 +445,66 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
443
445
// TIME_SINCE(time_start);
444
446
445
447
// TIME_START();
446
- // std::shared_ptr<descriptor_t> plan_dft;
447
- // plan_dft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
448
- // plan_dft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
449
- // plan_dft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, fwd_strides1);
450
- // plan_dft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, bwd_strides);
451
- // plan_dft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
452
- // plan_dft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
453
- // plan_dft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
454
- // plan_dft->commit(qts);
455
- // TIME_SINCE(time_start);
456
448
449
+
450
+ #if defined(USE_NVIDIA_BACKEND)
451
+ cufftHandle plan_dft;
452
+ CufftSafeCall (cufftCreate (&plan_dft));
453
+ CufftSafeCall (cufftMakePlanMany (
454
+ plan_dft,
455
+ 2 ,
456
+ fft_dimensions,
457
+ NULL ,
458
+ 1 ,
459
+ n_fft_coeffs * n_fft_coeffs,
460
+ NULL ,
461
+ 1 ,
462
+ n_fft_coeffs * (n_fft_coeffs / 2 + 1 ),
463
+ CUFFT_R2C,
464
+ n_terms,
465
+ &work_size_dft)
466
+ );
467
+ #else
468
+ std::shared_ptr<descriptor_t > plan_dft;
469
+ plan_dft = std::make_shared<descriptor_t >(std::vector<std::int64_t >{n_fft_coeffs, n_fft_coeffs});
470
+ plan_dft->set_value (oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
471
+ plan_dft->set_value (oneapi::mkl::dft::config_param::INPUT_STRIDES, fwd_strides1);
472
+ plan_dft->set_value (oneapi::mkl::dft::config_param::OUTPUT_STRIDES, bwd_strides);
473
+ plan_dft->set_value (oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
474
+ plan_dft->set_value (oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
475
+ plan_dft->set_value (oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
476
+ plan_dft->commit (qts);
477
+ #endif
478
+ // TIME_SINCE(time_start);
457
479
// TIME_START();
458
- // std::shared_ptr<descriptor_t> plan_idft;
459
- // plan_idft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
460
- // plan_idft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
461
- // plan_idft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, bwd_strides);
462
- // plan_idft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, fwd_strides2);
463
- // plan_idft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances2);
464
- // plan_idft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
465
- // plan_idft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
466
- // plan_idft->commit(qts);
480
+ #if defined(USE_NVIDIA_BACKEND)
481
+ cufftHandle plan_idft;
482
+ CufftSafeCall (cufftCreate (&plan_idft));
483
+ CufftSafeCall (cufftMakePlanMany (
484
+ plan_idft,
485
+ 2 ,
486
+ fft_dimensions,
487
+ NULL ,
488
+ 1 ,
489
+ n_fft_coeffs * (n_fft_coeffs / 2 + 1 ),
490
+ NULL ,
491
+ 1 ,
492
+ n_fft_coeffs * n_fft_coeffs,
493
+ CUFFT_C2R,
494
+ n_terms,
495
+ &work_size_idft)
496
+ );
497
+ #else
498
+ std::shared_ptr<descriptor_t > plan_idft;
499
+ plan_idft = std::make_shared<descriptor_t >(std::vector<std::int64_t >{n_fft_coeffs, n_fft_coeffs});
500
+ plan_idft->set_value (oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
501
+ plan_idft->set_value (oneapi::mkl::dft::config_param::INPUT_STRIDES, bwd_strides);
502
+ plan_idft->set_value (oneapi::mkl::dft::config_param::OUTPUT_STRIDES, fwd_strides1);
503
+ plan_idft->set_value (oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
504
+ plan_idft->set_value (oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
505
+ plan_idft->set_value (oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
506
+ plan_idft->commit (qts);
507
+ #endif
467
508
// // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
468
509
// TIME_SINCE(time_start);
469
510
// std::cout << "done.\n";
@@ -564,8 +605,8 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
564
605
#endif
565
606
566
607
tsnecuda::NbodyFFT2D (
567
- // plan_dft,
568
- // plan_idft,
608
+ plan_dft,
609
+ plan_idft,
569
610
fft_kernel_tilde_device, // input
570
611
fft_w_coefficients, // intermediate value
571
612
N,
@@ -723,6 +764,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
723
764
dump_file << host_ys[i] << " " << host_ys[i + num_points] << std::endl;
724
765
}
725
766
dump_file.close ();
767
+
768
+ std::string golden_file = " ../../data/tsne_mnist_output_golden.txt" ;
769
+ success = verify (golden_file, opt.get_dump_file (), 0.2 , 10.0 );
726
770
TIMER_END_ ()
727
771
728
772
sycl::free (host_ys, qts);
0 commit comments