Skip to content

Commit 137e131

Browse files
committed
use hipfft
1 parent f843736 commit 137e131

File tree

9 files changed

+82
-62
lines changed

9 files changed

+82
-62
lines changed

tsne/HIP/src/exe/main.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ int main(int argc, char** argv)
6363
std::chrono::steady_clock::time_point time_end;
6464
double time_total = 0.0;
6565
double time_total_ = 0.0;
66+
int success = 99;
6667

6768
TIMER_START()
6869

@@ -125,7 +126,12 @@ int main(int argc, char** argv)
125126
}
126127

127128
// Do the t-SNE
128-
time_total_ = tsnecuda::RunTsne(opt);
129+
time_total_ = tsnecuda::RunTsne(opt, success);
130+
if (success == 0) {
131+
std::cout << "Verification SUCCESSFUL\n";
132+
} else {
133+
std::cout << "Verification FAILED\n";
134+
}
129135
std::cout << "\nDone!\n";
130136
} catch (std::exception const& e) {
131137
std::cout << "Exception: " << e.what() << "\n";
@@ -134,5 +140,5 @@ int main(int argc, char** argv)
134140
TIMER_END()
135141
TIMER_PRINT("tsne - total time for whole calculation")
136142

137-
return 0;
143+
return success;
138144
}

tsne/HIP/src/fit_tsne.cpp

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
#include <chrono>
3636
#include "include/fit_tsne.h"
37+
#include "verify.hpp"
3738

3839
// #ifndef DEBUG_TIME
3940
// #define DEBUG_TIME
@@ -62,7 +63,7 @@
6263
#define PRINT_IL_TIMER(x) std::cout << #x << ": " << ((float)x.count()) / 1000000.0 << "s" << std::endl
6364
#endif
6465

65-
double tsnecuda::RunTsne(tsnecuda::Options& opt)
66+
double tsnecuda::RunTsne(tsnecuda::Options& opt, int& success)
6667
{
6768
std::chrono::steady_clock::time_point time_start_;
6869
std::chrono::steady_clock::time_point time_end_;
@@ -406,8 +407,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
406407
std::cout << "done." << std::endl;
407408
}
408409

409-
// int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
410-
// size_t work_size, work_size_dft, work_size_idft;
410+
int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
411+
size_t work_size_idft, work_size_dft;
412+
// size_t work_size;
411413

412414
// std::cout << "Setting up dft plans...\n";
413415
// // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
@@ -424,41 +426,41 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
424426
// TIME_SINCE(time_start);
425427

426428
// TIME_START();
427-
// hipfftHandle plan_dft;
428-
// CufftSafeCall(hipfftCreate(&plan_dft));
429-
// CufftSafeCall(hipfftMakePlanMany(
430-
// plan_dft,
431-
// 2,
432-
// fft_dimensions,
433-
// NULL,
434-
// 1,
435-
// n_fft_coeffs * n_fft_coeffs,
436-
// NULL,
437-
// 1,
438-
// n_fft_coeffs * (n_fft_coeffs / 2 + 1),
439-
// HIPFFT_R2C,
440-
// n_terms,
441-
// &work_size_dft)
442-
// );
429+
hipfftHandle plan_dft;
430+
CufftSafeCall(hipfftCreate(&plan_dft));
431+
CufftSafeCall(hipfftMakePlanMany(
432+
plan_dft,
433+
2,
434+
fft_dimensions,
435+
NULL,
436+
1,
437+
n_fft_coeffs * n_fft_coeffs,
438+
NULL,
439+
1,
440+
n_fft_coeffs * (n_fft_coeffs / 2 + 1),
441+
HIPFFT_R2C,
442+
n_terms,
443+
&work_size_dft)
444+
);
443445
// TIME_SINCE(time_start);
444446

445447
// TIME_START();
446-
// hipfftHandle plan_idft;
447-
// CufftSafeCall(hipfftCreate(&plan_idft));
448-
// CufftSafeCall(hipfftMakePlanMany(
449-
// plan_idft,
450-
// 2,
451-
// fft_dimensions,
452-
// NULL,
453-
// 1,
454-
// n_fft_coeffs * (n_fft_coeffs / 2 + 1),
455-
// NULL,
456-
// 1,
457-
// n_fft_coeffs * n_fft_coeffs,
458-
// HIPFFT_C2R,
459-
// n_terms,
460-
// &work_size_idft)
461-
// );
448+
hipfftHandle plan_idft;
449+
CufftSafeCall(hipfftCreate(&plan_idft));
450+
CufftSafeCall(hipfftMakePlanMany(
451+
plan_idft,
452+
2,
453+
fft_dimensions,
454+
NULL,
455+
1,
456+
n_fft_coeffs * (n_fft_coeffs / 2 + 1),
457+
NULL,
458+
1,
459+
n_fft_coeffs * n_fft_coeffs,
460+
HIPFFT_C2R,
461+
n_terms,
462+
&work_size_idft)
463+
);
462464
// TIME_SINCE(time_start);
463465
// std::cout << "done.\n";
464466

@@ -545,8 +547,8 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
545547
#endif
546548

547549
tsnecuda::NbodyFFT2D(
548-
// plan_dft,
549-
// plan_idft,
550+
plan_dft,
551+
plan_idft,
550552
fft_kernel_tilde_device, // input
551553
fft_w_coefficients, // intermediate value
552554
N,
@@ -697,6 +699,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
697699
dump_file << host_ys[i] << " " << host_ys[i + num_points] << std::endl;
698700
}
699701
dump_file.close();
702+
703+
std::string golden_file = "../../data/tsne_mnist_output_golden.txt";
704+
success = verify(golden_file, opt.get_dump_file(), 0.2, 10.0);
700705
TIMER_END_()
701706

702707
host_ys.clear();

tsne/HIP/src/include/common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141

4242
// CUDA Includes
4343
#include <hip/hip_runtime.h>
44-
#include <hipblas.h>
45-
#include <hipsparse.h>
46-
#include <hipfft.h>
44+
#include <hipblas/hipblas.h>
45+
#include <hipsparse/hipsparse.h>
46+
#include <hipfft/hipfft.h>
4747

4848
// Thrust includes
4949
#include <thrust/host_vector.h>

tsne/HIP/src/include/fit_tsne.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
#include "include/kernels/rep_forces.h"
5757

5858
namespace tsnecuda {
59-
double RunTsne(tsnecuda::Options& opt);
59+
double RunTsne(tsnecuda::Options& opt, int& success);
6060
}
6161

6262
#endif

tsne/HIP/src/include/kernels/nbodyfft.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ void PrecomputeFFT2D(
5959
thrust::device_vector<thrust::complex<float>>& fft_scratchpad_device, double& duration); // added
6060

6161
void NbodyFFT2D(
62-
// hipfftHandle& plan_dft,
63-
// hipfftHandle& plan_idft,
62+
hipfftHandle& plan_dft,
63+
hipfftHandle& plan_idft,
6464
thrust::device_vector<thrust::complex<float>>& fft_kernel_tilde_device,
6565
thrust::device_vector<thrust::complex<float>>& fft_w_coefficients,
6666
int N,

tsne/HIP/src/include/utils/cuda_utils.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ extern "C" void GpuErrorCheck(hipError_t ans);
7777
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
7878
__FILE__, __LINE__, hipGetErrorString( hipGetLastError() ) ); \
7979
exit(EXIT_FAILURE); \
80-
} }
80+
} \
81+
}
8182

8283
#endif

tsne/HIP/src/include/utils/thrust_transform_functions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct FunctionalEntropy {
4747
__host__ __device__
4848
float operator()(const float& x) const {
4949
float val = x * log(x);
50-
return (val != val || isinf(val)) ? 0 : val;
50+
return (x == 0 || val != val || isinf(val)) ? 0 : val;
5151
}
5252
};
5353

tsne/HIP/src/kernels/nbodyfft.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -486,8 +486,8 @@ void tsnecuda::PrecomputeFFT2D(
486486
}
487487

488488
void tsnecuda::NbodyFFT2D(
489-
// hipfftHandle& plan_dft,
490-
// hipfftHandle& plan_idft,
489+
hipfftHandle& plan_dft,
490+
hipfftHandle& plan_idft,
491491
thrust::device_vector<thrust::complex<float>>& fft_kernel_tilde_device,
492492
thrust::device_vector<thrust::complex<float>>& fft_w_coefficients,
493493
int N,
@@ -601,13 +601,15 @@ void tsnecuda::NbodyFFT2D(
601601
);
602602
HIP_CHECK_LAST_ERROR()
603603
GpuErrorCheck(hipDeviceSynchronize());
604+
#define USE_HIPFFT
604605

606+
#ifdef USE_HIPFFT
605607
// Compute fft values at interpolated nodes
606-
// hipfftExecR2C(plan_dft,
607-
// reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_input.data())),
608-
// reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())));
609-
// GpuErrorCheck(hipDeviceSynchronize());
610-
608+
hipfftExecR2C(plan_dft,
609+
reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_input.data())),
610+
reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())));
611+
GpuErrorCheck(hipDeviceSynchronize());
612+
#else
611613
int num_rows = n_fft_coeffs;
612614
int num_cols = n_fft_coeffs;
613615

@@ -638,6 +640,7 @@ void tsnecuda::NbodyFFT2D(
638640
HIP_CHECK_LAST_ERROR();
639641
GpuErrorCheck(hipDeviceSynchronize());
640642
}
643+
#endif
641644

642645
// Take the broadcasted Hadamard product of a complex matrix and a complex vector
643646
// TODO: Check timing on this kernel
@@ -651,11 +654,13 @@ void tsnecuda::NbodyFFT2D(
651654
thrust::complex<float>(1.0f));
652655

653656
// Invert the computed values at the interpolated nodes
654-
// hipfftExecC2R(plan_idft,
655-
// reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())),
656-
// reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_output.data())));
657-
// GpuErrorCheck(hipDeviceSynchronize());
658657

658+
#ifdef USE_HIPFFT
659+
hipfftExecC2R(plan_idft,
660+
reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())),
661+
reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_output.data())));
662+
GpuErrorCheck(hipDeviceSynchronize());
663+
#else
659664
din = reinterpret_cast<float*>(thrust::raw_pointer_cast(fft_output.data()));
660665

661666
for (int f = 0; f < n_terms; ++f) {
@@ -677,6 +682,9 @@ void tsnecuda::NbodyFFT2D(
677682
HIP_CHECK_LAST_ERROR();
678683
GpuErrorCheck(hipDeviceSynchronize());
679684
}
685+
#endif
686+
687+
#undef USE_HIPFFT
680688

681689
hipLaunchKernelGGL(copy_from_fft_output, num_blocks, num_threads, 0, 0,
682690
thrust::raw_pointer_cast(y_tilde_values.data()), // output

tsne/HIP/src/utils/matrix_broadcast_utils.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ void tsnecuda::utils::BroadcastMatrixVector(
9090
const int axis,
9191
const T alpha)
9292
{
93-
// Checks to make sure dimensions are correct
94-
assert(d_matrix.size() >= N * M);
95-
assert((axis == 0 && d_vector.size() >= N) ||
96-
(axis == 1 && d_vector.size() >= M));
93+
// // Checks to make sure dimensions are correct
94+
// assert(d_matrix.size() >= N * M);
95+
// assert((axis == 0 && d_vector.size() >= N) ||
96+
// (axis == 1 && d_vector.size() >= M));
9797

9898
const int kBlockSize = 32;
9999
const int kNumBlocks = iDivUp(N * M, kBlockSize);

0 commit comments

Comments
 (0)