use hipfft

mgrabban · mgrabban · commit 137e131bd9ed · 2024-06-13T06:01:10.000-07:00
diff --git a/tsne/HIP/src/exe/main.cpp b/tsne/HIP/src/exe/main.cpp
@@ -63,6 +63,7 @@ int main(int argc, char** argv)
     std::chrono::steady_clock::time_point time_end;
     double time_total = 0.0;
     double time_total_ = 0.0;
+    int success = 99;
 
     TIMER_START()
 
@@ -125,7 +126,12 @@ int main(int argc, char** argv)
     }
 
     // Do the t-SNE
-    time_total_ = tsnecuda::RunTsne(opt);
+    time_total_ = tsnecuda::RunTsne(opt, success);
+    if (success == 0) {
+        std::cout << "Verification SUCCESSFUL\n";
+    } else {
+        std::cout << "Verification FAILED\n";
+    }
     std::cout << "\nDone!\n";
     } catch (std::exception const& e) {
         std::cout << "Exception: " << e.what() << "\n";
@@ -134,5 +140,5 @@ int main(int argc, char** argv)
     TIMER_END()
     TIMER_PRINT("tsne - total time for whole calculation")
 
-    return 0;
+    return success;
 }
diff --git a/tsne/HIP/src/fit_tsne.cpp b/tsne/HIP/src/fit_tsne.cpp
@@ -34,6 +34,7 @@
 
 #include <chrono>
 #include "include/fit_tsne.h"
+#include "verify.hpp"
 
 // #ifndef DEBUG_TIME
 // #define DEBUG_TIME
@@ -62,7 +63,7 @@
 #define PRINT_IL_TIMER(x) std::cout << #x << ": " << ((float)x.count()) / 1000000.0 << "s" << std::endl
 #endif
 
-double tsnecuda::RunTsne(tsnecuda::Options& opt)
+double tsnecuda::RunTsne(tsnecuda::Options& opt, int& success)
 {
     std::chrono::steady_clock::time_point time_start_;
     std::chrono::steady_clock::time_point time_end_;
@@ -406,8 +407,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
         std::cout << "done." << std::endl;
     }
 
-    // int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs};        // {780, 780}
-    // size_t work_size, work_size_dft, work_size_idft;
+    int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs};        // {780, 780}
+    size_t work_size_idft, work_size_dft;
+    // size_t work_size;
 
     // std::cout << "Setting up dft plans...\n";
     // // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
@@ -424,41 +426,41 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
     // TIME_SINCE(time_start);
 
     // TIME_START();
-    // hipfftHandle plan_dft;
-    // CufftSafeCall(hipfftCreate(&plan_dft));
-    // CufftSafeCall(hipfftMakePlanMany(
-    //     plan_dft,
-    //     2,
-    //     fft_dimensions,
-    //     NULL,
-    //     1,
-    //     n_fft_coeffs * n_fft_coeffs,
-    //     NULL,
-    //     1,
-    //     n_fft_coeffs * (n_fft_coeffs / 2 + 1),
-    //     HIPFFT_R2C,
-    //     n_terms,
-    //     &work_size_dft)
-    // );
+    hipfftHandle plan_dft;
+    CufftSafeCall(hipfftCreate(&plan_dft));
+    CufftSafeCall(hipfftMakePlanMany(
+        plan_dft,
+        2,
+        fft_dimensions,
+        NULL,
+        1,
+        n_fft_coeffs * n_fft_coeffs,
+        NULL,
+        1,
+        n_fft_coeffs * (n_fft_coeffs / 2 + 1),
+        HIPFFT_R2C,
+        n_terms,
+        &work_size_dft)
+    );
     // TIME_SINCE(time_start);
 
     // TIME_START();
-    // hipfftHandle plan_idft;
-    // CufftSafeCall(hipfftCreate(&plan_idft));
-    // CufftSafeCall(hipfftMakePlanMany(
-    //     plan_idft,
-    //     2,
-    //     fft_dimensions,
-    //     NULL,
-    //     1,
-    //     n_fft_coeffs * (n_fft_coeffs / 2 + 1),
-    //     NULL,
-    //     1,
-    //     n_fft_coeffs * n_fft_coeffs,
-    //     HIPFFT_C2R,
-    //     n_terms,
-    //     &work_size_idft)
-    // );
+    hipfftHandle plan_idft;
+    CufftSafeCall(hipfftCreate(&plan_idft));
+    CufftSafeCall(hipfftMakePlanMany(
+        plan_idft,
+        2,
+        fft_dimensions,
+        NULL,
+        1,
+        n_fft_coeffs * (n_fft_coeffs / 2 + 1),
+        NULL,
+        1,
+        n_fft_coeffs * n_fft_coeffs,
+        HIPFFT_C2R,
+        n_terms,
+        &work_size_idft)
+    );
     // TIME_SINCE(time_start);
     // std::cout << "done.\n";
 
@@ -545,8 +547,8 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
 #endif
 
         tsnecuda::NbodyFFT2D(
-            // plan_dft,
-            // plan_idft,
+            plan_dft,
+            plan_idft,
             fft_kernel_tilde_device,            // input
             fft_w_coefficients,                 // intermediate value
             N,
@@ -697,6 +699,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
             dump_file << host_ys[i] << " " << host_ys[i + num_points] << std::endl;
         }
         dump_file.close();
+
+        std::string golden_file = "../../data/tsne_mnist_output_golden.txt";
+        success = verify(golden_file, opt.get_dump_file(), 0.2, 10.0);
         TIMER_END_()
 
         host_ys.clear();
diff --git a/tsne/HIP/src/include/common.h b/tsne/HIP/src/include/common.h
@@ -41,9 +41,9 @@
 
 // CUDA Includes
 #include <hip/hip_runtime.h>
-#include <hipblas.h>
-#include <hipsparse.h>
-#include <hipfft.h>
+#include <hipblas/hipblas.h>
+#include <hipsparse/hipsparse.h>
+#include <hipfft/hipfft.h>
 
 // Thrust includes
 #include <thrust/host_vector.h>
diff --git a/tsne/HIP/src/include/fit_tsne.h b/tsne/HIP/src/include/fit_tsne.h
@@ -56,7 +56,7 @@
 #include "include/kernels/rep_forces.h"
 
 namespace tsnecuda {
-double RunTsne(tsnecuda::Options& opt);
+double RunTsne(tsnecuda::Options& opt, int& success);
 }
 
 #endif
diff --git a/tsne/HIP/src/include/kernels/nbodyfft.h b/tsne/HIP/src/include/kernels/nbodyfft.h
@@ -59,8 +59,8 @@ void PrecomputeFFT2D(
     thrust::device_vector<thrust::complex<float>>& fft_scratchpad_device, double& duration); // added
 
 void NbodyFFT2D(
-    // hipfftHandle& plan_dft,
-    // hipfftHandle& plan_idft,
+    hipfftHandle& plan_dft,
+    hipfftHandle& plan_idft,
     thrust::device_vector<thrust::complex<float>>& fft_kernel_tilde_device,
     thrust::device_vector<thrust::complex<float>>& fft_w_coefficients,
     int   N,
diff --git a/tsne/HIP/src/include/utils/cuda_utils.h b/tsne/HIP/src/include/utils/cuda_utils.h
@@ -77,6 +77,7 @@ extern "C" void GpuErrorCheck(hipError_t ans);
         fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",     \
             __FILE__, __LINE__, hipGetErrorString( hipGetLastError() ) ); \
         exit(EXIT_FAILURE);                                               \
-    } }
+    }                                                                     \
+}
 
 #endif
diff --git a/tsne/HIP/src/include/utils/thrust_transform_functions.h b/tsne/HIP/src/include/utils/thrust_transform_functions.h
@@ -47,7 +47,7 @@ struct FunctionalEntropy {
   __host__ __device__
   float operator()(const float& x) const {
       float val = x * log(x);
-      return (val != val || isinf(val)) ? 0 : val;
+      return (x == 0 || val != val || isinf(val)) ? 0 : val;
     }
 };
 
diff --git a/tsne/HIP/src/kernels/nbodyfft.cpp b/tsne/HIP/src/kernels/nbodyfft.cpp
@@ -486,8 +486,8 @@ void tsnecuda::PrecomputeFFT2D(
 }
 
 void tsnecuda::NbodyFFT2D(
-    // hipfftHandle& plan_dft,
-    // hipfftHandle& plan_idft,
+    hipfftHandle& plan_dft,
+    hipfftHandle& plan_idft,
     thrust::device_vector<thrust::complex<float>>& fft_kernel_tilde_device,
     thrust::device_vector<thrust::complex<float>>& fft_w_coefficients,
     int    N,
@@ -601,13 +601,15 @@ void tsnecuda::NbodyFFT2D(
     );
     HIP_CHECK_LAST_ERROR()
     GpuErrorCheck(hipDeviceSynchronize());
+#define USE_HIPFFT
 
+#ifdef USE_HIPFFT
     // Compute fft values at interpolated nodes
-    // hipfftExecR2C(plan_dft,
-    //              reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_input.data())),
-    //              reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())));
-    // GpuErrorCheck(hipDeviceSynchronize());
-
+    hipfftExecR2C(plan_dft,
+                 reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_input.data())),
+                 reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())));
+    GpuErrorCheck(hipDeviceSynchronize());
+#else
     int num_rows = n_fft_coeffs;
     int num_cols = n_fft_coeffs;
 
@@ -638,6 +640,7 @@ void tsnecuda::NbodyFFT2D(
         HIP_CHECK_LAST_ERROR();
         GpuErrorCheck(hipDeviceSynchronize());
     }
+#endif
 
     // Take the broadcasted Hadamard product of a complex matrix and a complex vector
     // TODO: Check timing on this kernel
@@ -651,11 +654,13 @@ void tsnecuda::NbodyFFT2D(
         thrust::complex<float>(1.0f));
 
     // Invert the computed values at the interpolated nodes
-    // hipfftExecC2R(plan_idft,
-    //              reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())),
-    //              reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_output.data())));
-    // GpuErrorCheck(hipDeviceSynchronize());
 
+#ifdef USE_HIPFFT
+    hipfftExecC2R(plan_idft,
+                 reinterpret_cast<hipfftComplex *>(thrust::raw_pointer_cast(fft_w_coefficients.data())),
+                 reinterpret_cast<hipfftReal *>(thrust::raw_pointer_cast(fft_output.data())));
+    GpuErrorCheck(hipDeviceSynchronize());
+#else
     din  = reinterpret_cast<float*>(thrust::raw_pointer_cast(fft_output.data()));
 
     for (int f = 0; f < n_terms; ++f) {
@@ -677,6 +682,9 @@ void tsnecuda::NbodyFFT2D(
         HIP_CHECK_LAST_ERROR();
         GpuErrorCheck(hipDeviceSynchronize());
     }
+#endif
+
+#undef USE_HIPFFT
 
     hipLaunchKernelGGL(copy_from_fft_output, num_blocks, num_threads, 0, 0, 
         thrust::raw_pointer_cast(y_tilde_values.data()),    // output
diff --git a/tsne/HIP/src/utils/matrix_broadcast_utils.cpp b/tsne/HIP/src/utils/matrix_broadcast_utils.cpp
@@ -90,10 +90,10 @@ void tsnecuda::utils::BroadcastMatrixVector(
     const int axis,
     const T alpha)
 {
-    // Checks to make sure dimensions are correct
-    assert(d_matrix.size() >= N * M);
-    assert((axis == 0 && d_vector.size() >= N) ||
-            (axis == 1 && d_vector.size() >= M));
+    // // Checks to make sure dimensions are correct
+    // assert(d_matrix.size() >= N * M);
+    // assert((axis == 0 && d_vector.size() >= N) ||
+    //        (axis == 1 && d_vector.size() >= M));
 
     const int kBlockSize = 32;
     const int kNumBlocks = iDivUp(N * M, kBlockSize);

Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@`
`56`	`56`	`#include "include/kernels/rep_forces.h"`
`57`	`57`
`58`	`58`	`namespace tsnecuda {`
`59`		`-double RunTsne(tsnecuda::Options& opt);`
	`59`	`+double RunTsne(tsnecuda::Options& opt, int& success);`
`60`	`60`	`}`
`61`	`61`
`62`	`62`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ struct FunctionalEntropy {`
`47`	`47`	`__host__ __device__`
`48`	`48`	`float operator()(const float& x) const {`
`49`	`49`	`float val = x * log(x);`
`50`		`- return (val != val \|\| isinf(val)) ? 0 : val;`
	`50`	`+ return (x == 0 \|\| val != val \|\| isinf(val)) ? 0 : val;`
`51`	`51`	`}`
`52`	`52`	`};`
`53`	`53`