Skip to content

Commit 553041b

Browse files
committed
use mkl dft
1 parent 137e131 commit 553041b

File tree

9 files changed

+316
-203
lines changed

9 files changed

+316
-203
lines changed

tsne/SYCL/CMakeLists.txt

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,14 @@ endif()
112112

113113
message(STATUS "CXX Compilation flags set to: ${CMAKE_CXX_FLAGS}")
114114

115-
if(GPU_AOT)
115+
if(NOT USE_NVIDIA_BACKEND)
116116
set(MKL_LINK static)
117117
set(MKL_THREADING sequential)
118118
find_package(MKL CONFIG REQUIRED HINTS "$ENV{MKLROOT}/lib/cmake/mkl")
119+
else()
120+
find_package(CUDA REQUIRED)
121+
include_directories(${CUDA_INCLUDE_DIRS})
119122
endif()
120-
121123
# Project Setup
122124
#-------------------------------------------------------------------------------
123125
set(SOURCES
@@ -128,6 +130,7 @@ set(SOURCES
128130
${CMAKE_SOURCE_DIR}/src/utils/math_utils.dp.cpp
129131
${CMAKE_SOURCE_DIR}/src/utils/matrix_broadcast_utils.dp.cpp
130132
# ${CMAKE_SOURCE_DIR}/src/utils/reduce_utils.dp.cpp
133+
${CMAKE_SOURCE_DIR}/../data/verify.cpp
131134

132135
# # Kernels
133136
${CMAKE_SOURCE_DIR}/src/kernels/apply_forces.dp.cpp
@@ -145,14 +148,18 @@ set(SOURCES
145148
include_directories(
146149
${CMAKE_SOURCE_DIR}/src/
147150
${CMAKE_SOURCE_DIR}/src/include
151+
${CMAKE_SOURCE_DIR}/../data
148152
/nfs/pdx/home/mgrabban/oneDPL/include
149153
/nfs/pdx/home/mgrabban/oneTBB/include
150154
)
151155

152156
add_executable(tsne ${SOURCES})
153157

154-
if(GPU_AOT)
158+
if(NOT USE_NVIDIA_BACKEND)
155159
target_compile_options(tsne PUBLIC $<TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_COMPILE_OPTIONS>)
156160
target_include_directories(tsne PUBLIC $<TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_INCLUDE_DIRECTORIES>)
157161
target_link_libraries(tsne PUBLIC $<LINK_ONLY:MKL::MKL_DPCPP>)
162+
else()
163+
target_link_libraries(tsne ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUFFT_LIBRARIES} ${CUDA_cusparse_LIBRARY})
158164
endif()
165+

tsne/SYCL/src/exe/main.dp.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
// args, so we don't have to re-build to change options.
3333

3434
// Detailed includes
35-
#include <sycl/sycl.hpp>
3635
#include <time.h>
3736
#include <string>
3837
#include "include/fit_tsne.h"
@@ -64,6 +63,7 @@ int main(int argc, char** argv)
6463
std::chrono::steady_clock::time_point time_end;
6564
double time_total = 0.0;
6665
double time_total_ = 0.0;
66+
int success = 99;
6767

6868
TIMER_START()
6969

@@ -126,7 +126,12 @@ int main(int argc, char** argv)
126126
}
127127

128128
// Do the t-SNE
129-
time_total_ = tsnecuda::RunTsne(opt);
129+
time_total_ = tsnecuda::RunTsne(opt, success);
130+
if (success == 0) {
131+
std::cout << "Verification SUCCESSFUL\n";
132+
} else {
133+
std::cout << "Verification FAILED\n";
134+
}
130135
std::cout << "\nDone!\n";
131136
} catch (std::exception const& e) {
132137
std::cout << "Exception: " << e.what() << "\n";
@@ -135,5 +140,5 @@ int main(int argc, char** argv)
135140
TIMER_END()
136141
TIMER_PRINT("tsne - total time for whole calculation")
137142

138-
return 0;
143+
return success;
139144
}

tsne/SYCL/src/fit_tsne.dp.cpp

Lines changed: 73 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <complex>
4141
#include <chrono>
4242
#include "include/fit_tsne.h"
43+
#include "verify.hpp"
4344

4445
// #ifndef DEBUG_TIME
4546
// #define DEBUG_TIME
@@ -68,7 +69,7 @@
6869
#define PRINT_IL_TIMER(x) std::cout << #x << ": " << ((float)x.count()) / 1000000.0 << "s" << std::endl
6970
#endif
7071

71-
double tsnecuda::RunTsne(tsnecuda::Options& opt)
72+
double tsnecuda::RunTsne(tsnecuda::Options& opt, int& success)
7273
{
7374
std::chrono::steady_clock::time_point time_start_;
7475
std::chrono::steady_clock::time_point time_end_;
@@ -420,14 +421,15 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
420421
std::cout << "done." << std::endl;
421422
}
422423

423-
// int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
424+
int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
425+
size_t work_size_idft, work_size_dft;
424426

425-
// std::int64_t fwd_strides1[3] = {0, n_fft_coeffs, 1}; // {0, 780, 1} -> 0 + 780*i + j
426-
// std::int64_t fwd_strides2[3] = {0, (n_fft_coeffs/2+1)*2, 1}; // {0, 780, 1} -> 0 + 780*i + j
427-
// std::int64_t bwd_strides[3] = {0, (n_fft_coeffs/2+1), 1}; // {0, 391, 1} -> 0 + 391*i + j
428-
// std::int64_t fwd_distances1 = n_fft_coeffs* n_fft_coeffs;
429-
// std::int64_t fwd_distances2 = n_fft_coeffs*(n_fft_coeffs/2+1)*2;
430-
// std::int64_t bwd_distances = n_fft_coeffs*(n_fft_coeffs/2+1) ;
427+
std::int64_t fwd_strides1[3] = {0, n_fft_coeffs, 1}; // {0, 780, 1} -> 0 + 780*i + j
428+
std::int64_t fwd_strides2[3] = {0, (n_fft_coeffs/2+1)*2, 1}; // {0, 780, 1} -> 0 + 780*i + j
429+
std::int64_t bwd_strides[3] = {0, (n_fft_coeffs/2+1), 1}; // {0, 391, 1} -> 0 + 391*i + j
430+
std::int64_t fwd_distances1 = n_fft_coeffs* n_fft_coeffs;
431+
std::int64_t fwd_distances2 = n_fft_coeffs*(n_fft_coeffs/2+1)*2;
432+
std::int64_t bwd_distances = n_fft_coeffs*(n_fft_coeffs/2+1) ;
431433

432434
// std::cout << "Setting up dft plans...\n";
433435
// // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
@@ -443,27 +445,66 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
443445
// TIME_SINCE(time_start);
444446

445447
// TIME_START();
446-
// std::shared_ptr<descriptor_t> plan_dft;
447-
// plan_dft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
448-
// plan_dft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
449-
// plan_dft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, fwd_strides1);
450-
// plan_dft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, bwd_strides);
451-
// plan_dft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
452-
// plan_dft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
453-
// plan_dft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
454-
// plan_dft->commit(qts);
455-
// TIME_SINCE(time_start);
456448

449+
450+
#if defined(USE_NVIDIA_BACKEND)
451+
cufftHandle plan_dft;
452+
CufftSafeCall(cufftCreate(&plan_dft));
453+
CufftSafeCall(cufftMakePlanMany(
454+
plan_dft,
455+
2,
456+
fft_dimensions,
457+
NULL,
458+
1,
459+
n_fft_coeffs * n_fft_coeffs,
460+
NULL,
461+
1,
462+
n_fft_coeffs * (n_fft_coeffs / 2 + 1),
463+
CUFFT_R2C,
464+
n_terms,
465+
&work_size_dft)
466+
);
467+
#else
468+
std::shared_ptr<descriptor_t> plan_dft;
469+
plan_dft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
470+
plan_dft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
471+
plan_dft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, fwd_strides1);
472+
plan_dft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, bwd_strides);
473+
plan_dft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
474+
plan_dft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
475+
plan_dft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
476+
plan_dft->commit(qts);
477+
#endif
478+
// TIME_SINCE(time_start);
457479
// TIME_START();
458-
// std::shared_ptr<descriptor_t> plan_idft;
459-
// plan_idft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
460-
// plan_idft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
461-
// plan_idft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, bwd_strides);
462-
// plan_idft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, fwd_strides2);
463-
// plan_idft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances2);
464-
// plan_idft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
465-
// plan_idft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
466-
// plan_idft->commit(qts);
480+
#if defined(USE_NVIDIA_BACKEND)
481+
cufftHandle plan_idft;
482+
CufftSafeCall(cufftCreate(&plan_idft));
483+
CufftSafeCall(cufftMakePlanMany(
484+
plan_idft,
485+
2,
486+
fft_dimensions,
487+
NULL,
488+
1,
489+
n_fft_coeffs * (n_fft_coeffs / 2 + 1),
490+
NULL,
491+
1,
492+
n_fft_coeffs * n_fft_coeffs,
493+
CUFFT_C2R,
494+
n_terms,
495+
&work_size_idft)
496+
);
497+
#else
498+
std::shared_ptr<descriptor_t> plan_idft;
499+
plan_idft = std::make_shared<descriptor_t>(std::vector<std::int64_t>{n_fft_coeffs, n_fft_coeffs});
500+
plan_idft->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_CONFIG_VALUE::DFTI_NOT_INPLACE);
501+
plan_idft->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, bwd_strides);
502+
plan_idft->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, fwd_strides1);
503+
plan_idft->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, fwd_distances1);
504+
plan_idft->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, bwd_distances);
505+
plan_idft->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, n_terms);
506+
plan_idft->commit(qts);
507+
#endif
467508
// // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
468509
// TIME_SINCE(time_start);
469510
// std::cout << "done.\n";
@@ -564,8 +605,8 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
564605
#endif
565606

566607
tsnecuda::NbodyFFT2D(
567-
// plan_dft,
568-
// plan_idft,
608+
plan_dft,
609+
plan_idft,
569610
fft_kernel_tilde_device, // input
570611
fft_w_coefficients, // intermediate value
571612
N,
@@ -723,6 +764,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
723764
dump_file << host_ys[i] << " " << host_ys[i + num_points] << std::endl;
724765
}
725766
dump_file.close();
767+
768+
std::string golden_file = "../../data/tsne_mnist_output_golden.txt";
769+
success = verify(golden_file, opt.get_dump_file(), 0.2, 10.0);
726770
TIMER_END_()
727771

728772
sycl::free(host_ys, qts);

tsne/SYCL/src/include/common.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@
4141

4242
// SYCL Includes
4343
#include <sycl/sycl.hpp>
44-
// #include <oneapi/mkl.hpp>
4544

46-
// typedef oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::REAL> descriptor_t;
45+
#if defined(USE_NVIDIA_BACKEND)
46+
#include <cufft.h>
47+
#include <cuda_runtime.h>
48+
#else
49+
#include <oneapi/mkl.hpp>
50+
typedef oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::SINGLE, oneapi::mkl::dft::domain::REAL> descriptor_t;
51+
#endif
4752

4853
// Thrust includes
4954

tsne/SYCL/src/include/fit_tsne.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
#include "include/kernels/rep_forces.h"
5757

5858
namespace tsnecuda {
59-
double RunTsne(tsnecuda::Options& opt);
59+
double RunTsne(tsnecuda::Options& opt, int& success);
6060
}
6161

6262
#endif

tsne/SYCL/src/include/kernels/nbodyfft.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,13 @@ void PrecomputeFFT2D(
6161
sycl::queue& myQueue, double& duration);
6262

6363
void NbodyFFT2D(
64-
// std::shared_ptr<descriptor_t>& plan_dft,
65-
// std::shared_ptr<descriptor_t>& plan_idft,
64+
#if defined(USE_NVIDIA_BACKEND)
65+
cufftHandle& plan_dft,
66+
cufftHandle& plan_idft,
67+
#else
68+
std::shared_ptr<descriptor_t>& plan_dft,
69+
std::shared_ptr<descriptor_t>& plan_idft,
70+
#endif
6671
std::complex<float>* fft_kernel_tilde_device,
6772
std::complex<float>* fft_w_coefficients,
6873
int N,

tsne/SYCL/src/include/utils/cuda_utils.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,6 @@
6868
#include "common.h"
6969

7070
int iDivUp(int, int);
71+
extern "C" void CufftSafeCall(int err);
7172

7273
#endif

0 commit comments

Comments
 (0)