Skip to content

Commit 03e3e0b

Browse files
authored
Merge pull request #58 from oneapi-src/gr/code_update/2406
[bitcracker][hashtable][SeisAcoMod2D][sobel_filter][tsne] update cmake, use fast fft
2 parents b60581b + 553041b commit 03e3e0b

33 files changed

+60547
-328
lines changed

SeisAcoMod2D/HIP/CMakeLists.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
1919
if(MPI_HOME)
2020
message(STATUS "Using provided MPI_HOME=${MPI_HOME}")
2121
else()
22-
set(MPI_HOME "/opt/intel/oneapi/mpi/latest")
22+
# set(MPI_HOME "/opt/intel/oneapi/mpi/latest")
23+
set(MPI_HOME "/usr/lib/x86_64-linux-gnu/openmpi")
2324
message(STATUS "Using default MPI_HOME=${MPI_HOME}")
2425
endif()
2526

@@ -30,12 +31,12 @@ if(NOT DEFINED ROCM_PATH)
3031
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
3132
endif()
3233
endif()
33-
set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
34+
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
3435
set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
3536
set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
3637

3738
set(DEF_WL_CXX_FLAGS " -D__HIP_PLATFORM_AMD__ ")
38-
set(DEF_GENERAL_CXX_FLAGS " -cxx=hipcc -O3 -std=c++17 -fopenmp ")
39+
set(DEF_GENERAL_CXX_FLAGS " -O3 -std=c++17 ")
3940
set(DEF_COMBINED_CXX_FLAGS "${DEF_GENERAL_CXX_FLAGS} ${DEF_WL_CXX_FLAGS}")
4041

4142
# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific)
@@ -79,4 +80,4 @@ include_directories(
7980

8081
add_executable(SeisAcoMod2D ${SOURCES})
8182

82-
target_link_libraries(SeisAcoMod2D -L${MPI_HOME}/lib -L${HIP_LIBRARIES})
83+
target_link_libraries(SeisAcoMod2D -L${HIP_LIBRARIES} -L${MPI_HOME}/lib -lmpi_cxx -lmpi)

bitcracker/HIP/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ if(NOT DEFINED ROCM_PATH)
3535
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
3636
endif()
3737
endif()
38-
set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
38+
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
3939
set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
4040
set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
4141

hashtable/HIP/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ if(NOT DEFINED ROCM_PATH)
2323
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
2424
endif()
2525
endif()
26-
set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
26+
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
2727
set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
2828
set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
2929

sobel_filter/HIP/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ if(NOT DEFINED ROCM_PATH)
3636
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which HIP has been installed")
3737
endif()
3838
endif()
39-
set(CMAKE_MODULE_PATH "${ROCM_PATH}/hip/cmake" ${CMAKE_MODULE_PATH})
39+
set(CMAKE_MODULE_PATH "${ROCM_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
4040
set(HIP_INCLUDE_DIRS "${ROCM_PATH}/include" ${HIP_INCLUDE_DIRS})
4141
set(HIP_LIBRARIES "${ROCM_PATH}/lib" ${HIP_LIBRARIES})
4242

tsne/CUDA/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ set(SOURCES
7878
${CMAKE_SOURCE_DIR}/src/utils/math_utils.cu
7979
${CMAKE_SOURCE_DIR}/src/utils/matrix_broadcast_utils.cu
8080
${CMAKE_SOURCE_DIR}/src/utils/reduce_utils.cu
81+
${CMAKE_SOURCE_DIR}/../data/verify.cpp
8182

8283
# Kernels
8384
${CMAKE_SOURCE_DIR}/src/kernels/apply_forces.cu
@@ -95,6 +96,7 @@ set(SOURCES
9596
include_directories(
9697
${CMAKE_SOURCE_DIR}/src
9798
${CMAKE_SOURCE_DIR}/src/include
99+
${CMAKE_SOURCE_DIR}/../data
98100
${CUDA_INCLUDE_DIRS}
99101
)
100102

tsne/CUDA/src/exe/main.cu

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ int main(int argc, char** argv)
6363
std::chrono::steady_clock::time_point time_end;
6464
double time_total = 0.0;
6565
double time_total_ = 0.0;
66+
int success = 99;
6667

6768
TIMER_START()
6869

@@ -125,7 +126,12 @@ int main(int argc, char** argv)
125126
}
126127

127128
// Do the t-SNE
128-
time_total_ = tsnecuda::RunTsne(opt);
129+
time_total_ = tsnecuda::RunTsne(opt, success);
130+
if (success == 0) {
131+
std::cout << "Verification SUCCESSFUL\n";
132+
} else {
133+
std::cout << "Verification FAILED\n";
134+
}
129135
std::cout << "\nDone!\n";
130136
} catch (std::exception const& e) {
131137
std::cout << "Exception: " << e.what() << "\n";
@@ -134,5 +140,5 @@ int main(int argc, char** argv)
134140
TIMER_END()
135141
TIMER_PRINT("tsne - total time for whole calculation")
136142

137-
return 0;
143+
return success;
138144
}

tsne/CUDA/src/fit_tsne.cu

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
#include <chrono>
3636
#include "include/fit_tsne.h"
37+
#include "verify.hpp"
3738

3839
// #ifndef DEBUG_TIME
3940
// #define DEBUG_TIME
@@ -62,7 +63,7 @@
6263
#define PRINT_IL_TIMER(x) std::cout << #x << ": " << ((float)x.count()) / 1000000.0 << "s" << std::endl
6364
#endif
6465

65-
double tsnecuda::RunTsne(tsnecuda::Options& opt)
66+
double tsnecuda::RunTsne(tsnecuda::Options& opt, int& success)
6667
{
6768
std::chrono::steady_clock::time_point time_start_;
6869
std::chrono::steady_clock::time_point time_end_;
@@ -406,8 +407,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
406407
std::cout << "done." << std::endl;
407408
}
408409

409-
// int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
410-
// size_t work_size, work_size_dft, work_size_idft;
410+
int fft_dimensions[2] = {n_fft_coeffs, n_fft_coeffs}; // {780, 780}
411+
size_t work_size_idft, work_size_dft;
412+
// size_t work_size;
411413

412414
// std::cout << "Setting up dft plans...\n";
413415
// // *** TIMED SEPARATELY. NOT ADDED TO PERF TIME ***
@@ -424,41 +426,41 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
424426
// TIME_SINCE(time_start);
425427

426428
// TIME_START();
427-
// cufftHandle plan_dft;
428-
// CufftSafeCall(cufftCreate(&plan_dft));
429-
// CufftSafeCall(cufftMakePlanMany(
430-
// plan_dft,
431-
// 2,
432-
// fft_dimensions,
433-
// NULL,
434-
// 1,
435-
// n_fft_coeffs * n_fft_coeffs,
436-
// NULL,
437-
// 1,
438-
// n_fft_coeffs * (n_fft_coeffs / 2 + 1),
439-
// CUFFT_R2C,
440-
// n_terms,
441-
// &work_size_dft)
442-
// );
429+
cufftHandle plan_dft;
430+
CufftSafeCall(cufftCreate(&plan_dft));
431+
CufftSafeCall(cufftMakePlanMany(
432+
plan_dft,
433+
2,
434+
fft_dimensions,
435+
NULL,
436+
1,
437+
n_fft_coeffs * n_fft_coeffs,
438+
NULL,
439+
1,
440+
n_fft_coeffs * (n_fft_coeffs / 2 + 1),
441+
CUFFT_R2C,
442+
n_terms,
443+
&work_size_dft)
444+
);
443445
// TIME_SINCE(time_start);
444446

445447
// TIME_START();
446-
// cufftHandle plan_idft;
447-
// CufftSafeCall(cufftCreate(&plan_idft));
448-
// CufftSafeCall(cufftMakePlanMany(
449-
// plan_idft,
450-
// 2,
451-
// fft_dimensions,
452-
// NULL,
453-
// 1,
454-
// n_fft_coeffs * (n_fft_coeffs / 2 + 1),
455-
// NULL,
456-
// 1,
457-
// n_fft_coeffs * n_fft_coeffs,
458-
// CUFFT_C2R,
459-
// n_terms,
460-
// &work_size_idft)
461-
// );
448+
cufftHandle plan_idft;
449+
CufftSafeCall(cufftCreate(&plan_idft));
450+
CufftSafeCall(cufftMakePlanMany(
451+
plan_idft,
452+
2,
453+
fft_dimensions,
454+
NULL,
455+
1,
456+
n_fft_coeffs * (n_fft_coeffs / 2 + 1),
457+
NULL,
458+
1,
459+
n_fft_coeffs * n_fft_coeffs,
460+
CUFFT_C2R,
461+
n_terms,
462+
&work_size_idft)
463+
);
462464
// TIME_SINCE(time_start);
463465
// std::cout << "done.\n";
464466

@@ -545,8 +547,8 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
545547
#endif
546548

547549
tsnecuda::NbodyFFT2D(
548-
// plan_dft,
549-
// plan_idft,
550+
plan_dft,
551+
plan_idft,
550552
fft_kernel_tilde_device, // input
551553
fft_w_coefficients, // intermediate value
552554
N,
@@ -697,6 +699,9 @@ double tsnecuda::RunTsne(tsnecuda::Options& opt)
697699
dump_file << host_ys[i] << " " << host_ys[i + num_points] << std::endl;
698700
}
699701
dump_file.close();
702+
703+
std::string golden_file = "../../data/tsne_mnist_output_golden.txt";
704+
success = verify(golden_file, opt.get_dump_file(), 0.2, 10.0);
700705
TIMER_END_()
701706

702707
host_ys.clear();

tsne/CUDA/src/include/fit_tsne.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
#include "include/kernels/rep_forces.h"
5757

5858
namespace tsnecuda {
59-
double RunTsne(tsnecuda::Options& opt);
59+
double RunTsne(tsnecuda::Options& opt, int& success);
6060
}
6161

6262
#endif

tsne/CUDA/src/include/kernels/nbodyfft.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ void PrecomputeFFT2D(
5959
thrust::device_vector<thrust::complex<float>>& fft_scratchpad_device, double& duration); // added
6060

6161
void NbodyFFT2D(
62-
// cufftHandle& plan_dft,
63-
// cufftHandle& plan_idft,
62+
cufftHandle& plan_dft,
63+
cufftHandle& plan_idft,
6464
thrust::device_vector<thrust::complex<float>>& fft_kernel_tilde_device,
6565
thrust::device_vector<thrust::complex<float>>& fft_w_coefficients,
6666
int N,

tsne/CUDA/src/include/utils/thrust_transform_functions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct FunctionalEntropy {
4747
__host__ __device__
4848
float operator()(const float& x) const {
4949
float val = x * log(x);
50-
return (val != val || isinf(val)) ? 0 : val;
50+
return (x == 0 || val != val || isinf(val)) ? 0 : val;
5151
}
5252
};
5353

0 commit comments

Comments
 (0)