Skip to content

Commit 1020d88

Browse files
Hashing algorithms (#18)
* Initial Commit * Update * Benchmarks * Cleanup * Update CMakeLists.txt
1 parent 8a86eed commit 1020d88

40 files changed

+5217
-0
lines changed

example-07/CMakeLists.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
cmake_minimum_required(VERSION 3.0)
2+
project(SYCL_HASH LANGUAGES CXX)
3+
set(CMAKE_CXX_STANDARD 17)
4+
5+
option(VERBOSE_LIB "Adds various prints in the code" OFF)
6+
if (VERBOSE_LIB)
7+
#message(WARNING "Verbose mode on. Did you forget it?")
8+
add_compile_definitions(VERBOSE_HASH_LIB)
9+
endif ()
10+
11+
# If you're using the DPCPP compiler, these flags will be used. Set here the devies you want to target
12+
set(DPCPP_FLAGS -fsycl -fsycl-targets=spir64_x86_64,nvptx64-nvidia-cuda -Xcuda-ptxas -v -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_75 -Wno-linker-warnings)
13+
include(cmake/FindSYCL.cmake)
14+
15+
# Default C++ Flags for warnings and optimisation
16+
set(WARNING_FLAGS "-Wall -Wextra -Wshadow -Wdouble-promotion -fno-common -Winit-self -Wuninitialized -Wmissing-declarations -Woverloaded-virtual")
17+
set(EXTRA_W_FLAGS "-pedantic -Wall -Wextra -Wcast-align -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wmissing-declarations -Wmissing-include-dirs -Woverloaded-virtual -Wredundant-decls -Wshadow -Wsign-conversion -Wsign-promo -Wstrict-overflow=5") #-Wnoexcept -Wold-style-cast -Wstrict-null-sentinel -switch-default -Wlogical-op
18+
set(DISABLED_WARNINGS "-Wno-c++20-extensions -Wno-inline-namespace-reopened-noninline -Wno-undef -Wno-unused -Wno-unused-command-line-argument")
19+
set(OPT_FLAGS "-march=native -mtune=native -Ofast -fomit-frame-pointer")
20+
21+
# Adding the flags to the targets
22+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OPT_FLAGS} ${DISABLED_WARNINGS}")
23+
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNING_FLAGS} ${EXTRA_W_FLAGS} ${DISABLED_WARNINGS} -g -Og")
24+
25+
include_directories(${PROJECT_BINARY_DIR} ${PROJECT_SOURCE_DIR}/include)
26+
27+
28+
set(sycl_hash_all_kernels
29+
src/hash_functions/sha256.cpp
30+
src/hash_functions/blake2b.cpp
31+
src/hash_functions/sha1.cpp
32+
src/hash_functions/md5.cpp
33+
src/hash_functions/keccak.cpp
34+
src/hash_functions/md2.cpp
35+
src/tools/queue_tester.cpp
36+
)
37+
38+
set(sycl_hash_all_sources
39+
src/benchmarks/misc.hpp
40+
include/sycl_hash.hpp
41+
include/internal/config.hpp
42+
include/internal/handle.hpp
43+
include/internal/common.hpp
44+
include/internal/determine_kernel_config.hpp
45+
include/internal/sync_api.hpp
46+
include/internal/async_api.hpp
47+
include/hash_functions/sha256.hpp
48+
include/hash_functions/blake2b.hpp
49+
include/hash_functions/sha1.hpp
50+
include/hash_functions/md5.hpp
51+
include/hash_functions/keccak.hpp
52+
include/hash_functions/md2.hpp
53+
include/tools/fill_rand.hpp
54+
include/tools/sycl_queue_helpers.hpp
55+
include/tools/usm_smart_ptr.hpp
56+
include/tools/runtime_byte_array.hpp
57+
include/tools/intrinsics.hpp
58+
)
59+
60+
add_library(sycl_hash SHARED ${sycl_hash_all_sources} ${sycl_hash_all_kernels})
61+
add_sycl_to_target(TARGET sycl_hash SOURCES ${sycl_hash_all_kernels})
62+
63+
add_executable(demo demo_main.cpp src/benchmarks/misc.hpp)
64+
target_link_libraries(demo PUBLIC sycl_hash)
65+
add_sycl_to_target(TARGET demo SOURCES demo_main.cpp)
66+
67+
include(tests/CMakeLists.txt)

example-07/README.md

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# SYCL Hashing Algorithms
2+
3+
This repository contains hashing algorithms implemented using [SYCL](https://www.khronos.org/sycl/) which is a heterogeneous programming model based on standard C++.
4+
5+
The following hashing methods are currently available:
6+
7+
- sha256
8+
- sha1 (unsecure)
9+
- md2 (unsecure)
10+
- md5 (unsecure)
11+
- keccak (128 224 256 288 384 512)
12+
- sha3 (224 256 384 512)
13+
- blake2b
14+
15+
## Benchmarks
16+
17+
Some functions were ported from a CUDA implementation. The SYCL code was tested unchanged across the different implementations and hardware. Here's how they perform (the values are in GB/s):
18+
19+
| Function | Native CUDA | SYCL on DPC++ CUDA (optimised) | SYCL on ComputeCPP CPU (spir64/spirv64) | SYCL on DPC++ CPU (spir64_x86_64) | SYCL on hipSYCL (omp/cuda) |
20+
| -------- | ----------- | ------------------------------------------- | --------------------------------------- | --------------------------------- | -------------------------- |
21+
| keccak | 15.7 | 23.0 | 4.14 / 3.89 | 4.98 | 4.32 / 23.2 |
22+
| md5 | 14.6 | 20.3 | 6.26 / 5.89 | 9.93 | 9.27 / 20.2 |
23+
| blake2b | 14.7 | 21.6 | 9.46 / 10.0 | 12.4 | 7.71 / 17.9 |
24+
| sha1 | 14.7 | 19.34 | 3.61 / 3.35 | 3.30 | 4.39 / 19.2 |
25+
| sha256 | 13.5 | 19.15 | 2.23 / 2.00 | 2.91 | 2.93 / 19.1 |
26+
| md2 | 4.18 | 4.23 | 0.22 / 0.25 | 0.176 | 0.25 / 2.33 |
27+
28+
### Note
29+
30+
Something broke the spir64 backend of DPC++ and it produces now very slow code
31+
32+
Benchmark configuration:
33+
34+
- block_size: 512 kiB
35+
- n_blocks: 4\*1536
36+
- n_outbit: 128
37+
- GPU: GTX 1660 Ti
38+
- OS: rhel8.4
39+
- CPU: 2x E5-2670 v2
40+
41+
### Remark
42+
43+
These are not the "best" settings as the optimum changes with the algorithm. The benchmarks measure the time to run 40 iterations, without copying the memory between the device and the host. In a real application, you
44+
could be memory bound.
45+
46+
## How to build
47+
48+
```bash
49+
git clone https://github.com/Michoumichmich/SYCL-Hashing-Algorithms.git ; cd SYCL-Hashing-Algorithms;
50+
mkdir build; cd build
51+
CXX=<sycl_compiler> cmake .. -DCMAKE_BUILD_TYPE=Release
52+
make
53+
```
54+
55+
This will build the library, and a demo executable. Running it will perform a benchmark on your CPU and CUDA device (if available).
56+
57+
You do not necessarily need to pass the `<sycl_compiler>` to cmake, it depends on the implementation you're using and its toolchain.
58+
59+
## How to use
60+
61+
Let's assume you used this [script](https://github.com/Michoumichmich/oneAPI-setup-script) to setup the toolchain with CUDA support.
62+
63+
Here's a minimal example:
64+
65+
```C++
66+
#include <sycl/sycl.hpp> // SYCL headers
67+
#include "sycl_hash.hpp" // The headers
68+
#include "tools/sycl_queue_helpers.hpp" // To make sycl queue
69+
using namespace hash;
70+
71+
int main(){
72+
auto cuda_q = try_get_queue(cuda_selector{}); // create a queue on a cuda device and attach an exception handler
73+
74+
constexpr int hash_size = get_block_size<method::sha256>();
75+
constexpr int n_blocks = 20; // amount of hash to do in parallel
76+
constexpr int item_size = 1024;
77+
78+
byte input[n_blocks * item_size]; // get an array of 20 same-sized data items to hash;
79+
byte output[n_blocks * hash_size]; // reserve space for the output
80+
81+
compute<method::sha256>(cuda_q, input, item_size, output, n_blocks); // do the computing
82+
compute_sha256(cuda_q, input, item_size, output, n_blocks); // identical
83+
84+
/**
85+
* For SHA3 one could write:
86+
* compute_sha3<512>(cuda_q, input, item_size, output, n_blocks);
87+
*/
88+
89+
return 0;
90+
}
91+
```
92+
93+
And, for clang build with
94+
95+
```
96+
-fsycl -fsycl-targets=spir64_x86_64,nvptx64-nvidia-cuda--sm_50 -I<include_dir> <build_dir>/libsycl_hash.a
97+
```
98+
99+
And your hash will run on the GPU.
100+
101+
# Sources
102+
103+
You may find [here](https://github.com/Michoumichmich/cuda-hashing-algos-with-benchmark) the fork of the original CUDA implementations with the benchmarks added.
104+
105+
# Tested implementations
106+
107+
- [Intel's clang](https://github.com/intel/llvm) with OpenCL on CPU (using Intel's driver) and [Codeplay's CUDA backend](https://www.codeplay.com/solutions/oneapi/for-cuda/)
108+
- [hipSYCL](https://github.com/illuhad/hipSYCL) on macOS with the OpenMP backend (set `hipSYCL_DIR` then `cmake .. -DHIPSYCL_TARGETS="..."`)
109+
- [ComputeCPP](https://developer.codeplay.com/products/computecpp/ce/home) you can build with `cmake .. -DComputeCpp_DIR=/path_to_computecpp -DCOMPUTECPP_BITCODE=spir64 -DCMAKE_BUILD_TYPE=Release`, Tested on the host
110+
device, `spir64` and `spirv64`. See [ComputeCpp SDK](https://github.com/codeplaysoftware/computecpp-sdk)
111+
112+
# Acknowledgements
113+
114+
This repository contains code written by Matt Zweil & The Mochimo Core Contributor Team. Please see the [files](https://github.com/mochimodev/cuda-hashing-algos) for their respective licences.

example-07/cmake/FindSYCL.cmake

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
set(A_SYCL_FOUND false)
2+
3+
find_package(hipSYCL CONFIG)
4+
5+
#[ { "name": "My Compiler Kit", "compilers": { "C": "/home/michel/sycl_workspace/deploy/bin/clang-13", "CXX": "/home/michel/sycl_workspace/deploy/bin/clang++" },"environmentVariables":{"LD_PRELOAD":"/opt/intel/opencl/libOpenCL.so.1"} } ]
6+
7+
8+
if (hipSYCL_FOUND)
9+
set(A_SYCL_FOUND true)
10+
if (NOT CMAKE_BUILD_TYPE)
11+
set(CMAKE_BUILD_TYPE Release)
12+
endif ()
13+
14+
cmake_policy(SET CMP0005 NEW)
15+
add_definitions(-DHIPSYCL_DEBUG_LEVEL=0)
16+
17+
if (NOT HIPSYCL_DEBUG_LEVEL)
18+
if (CMAKE_BUILD_TYPE MATCHES "Debug")
19+
set(HIPSYCL_DEBUG_LEVEL 3 CACHE STRING
20+
"Choose the debug level, options are: 0 (no debug), 1 (print errors), 2 (also print warnings), 3 (also print general information)"
21+
FORCE)
22+
else ()
23+
set(HIPSYCL_DEBUG_LEVEL 2 CACHE STRING
24+
"Choose the debug level, options are: 0 (no debug), 1 (print errors), 2 (also print warnings), 3 (also print general information)"
25+
FORCE)
26+
endif ()
27+
endif ()
28+
endif ()
29+
30+
31+
if (ComputeCpp_DIR)
32+
include(cmake/Modules/FindComputeCpp.cmake)
33+
34+
if (ComputeCpp_ROOT_DIR)
35+
set(A_SYCL_FOUND true)
36+
endif ()
37+
add_compile_definitions(USING_COMPUTECPP)
38+
message(STATUS " Using ComputeCpp CMake")
39+
message(STATUS " Path to ComputeCpp implementation: ${COMPUTECPP_PACKAGE_ROOT_DIR} ")
40+
#set(CMAKE_CXX_STANDARD 11)
41+
include(FindOpenCL)
42+
endif ()
43+
44+
45+
if (TRISYCL_INCLUDE_DIR AND NOT A_SYCL_FOUND)
46+
set(A_SYCL_FOUND true)
47+
message(STATUS " Using triSYCL CMake")
48+
include(FindTriSYCL)
49+
endif ()
50+
51+
# We expect the DPCPP compiler to have used
52+
if (NOT A_SYCL_FOUND)
53+
function(add_sycl_to_target arg1 arg2)
54+
target_compile_options(${arg2} PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${DPCPP_FLAGS} -sycl-std=2020 -std=c++20 -fsycl-unnamed-lambda>)
55+
target_link_options(${arg2} PRIVATE ${DPCPP_FLAGS} -sycl-std=2020 -std=c++20 -fsycl-unnamed-lambda)
56+
endfunction()
57+
58+
endif ()
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
cmake_minimum_required(VERSION 3.4.3)
2+
3+
if (CMAKE_COMPILER_IS_GNUCXX)
4+
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
5+
message(FATAL_ERROR "host compiler - gcc version must be > 4.8")
6+
endif ()
7+
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
8+
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
9+
message(FATAL_ERROR "host compiler - clang version must be > 3.6")
10+
endif ()
11+
endif ()
12+
13+
if (MSVC)
14+
set(ComputeCpp_STL_CHECK_SRC __STL_check)
15+
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp
16+
"#include <CL/sycl.hpp> \n"
17+
"int main() { return 0; }\n")
18+
set(_stl_test_command ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}
19+
-sycl
20+
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
21+
-isystem ${ComputeCpp_INCLUDE_DIRS}
22+
-isystem ${OpenCL_INCLUDE_DIRS}
23+
-o ${ComputeCpp_STL_CHECK_SRC}.sycl
24+
-c ${ComputeCpp_STL_CHECK_SRC}.cpp)
25+
execute_process(
26+
COMMAND ${_stl_test_command}
27+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
28+
RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
29+
ERROR_VARIABLE ComputeCpp_STL_CHECK_ERROR_OUTPUT
30+
OUTPUT_QUIET)
31+
if (NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
32+
# Try disabling compiler version checks
33+
execute_process(
34+
COMMAND ${_stl_test_command}
35+
-D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
36+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
37+
RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
38+
ERROR_VARIABLE ComputeCpp_STL_CHECK_ERROR_OUTPUT
39+
OUTPUT_QUIET)
40+
if (NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
41+
# Try again with __CUDACC__ and _HAS_CONDITIONAL_EXPLICIT=0. This relaxes the restritions in the MSVC headers
42+
execute_process(
43+
COMMAND ${_stl_test_command}
44+
-D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
45+
-D_HAS_CONDITIONAL_EXPLICIT=0
46+
-D__CUDACC__
47+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
48+
RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
49+
ERROR_VARIABLE ComputeCpp_STL_CHECK_ERROR_OUTPUT
50+
OUTPUT_QUIET)
51+
if (NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
52+
message(FATAL_ERROR "compute++ cannot consume hosted STL headers. This means that compute++ can't \
53+
compile a simple program in this platform and will fail when used in this system. \
54+
\n ${ComputeCpp_STL_CHECK_ERROR_OUTPUT}")
55+
else ()
56+
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
57+
-D_HAS_CONDITIONAL_EXPLICIT=0
58+
-D__CUDACC__)
59+
endif ()
60+
else ()
61+
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH)
62+
endif ()
63+
endif ()
64+
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp
65+
${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl)
66+
endif (MSVC)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
cmake_minimum_required(VERSION 3.4.3)
2+
3+
# These should match the types of IR output by compute++
4+
set(IR_MAP_spir bc)
5+
set(IR_MAP_spir64 bc)
6+
set(IR_MAP_spir32 bc)
7+
set(IR_MAP_spirv spv)
8+
set(IR_MAP_spirv64 spv)
9+
set(IR_MAP_spirv32 spv)
10+
set(IR_MAP_aorta-x86_64 o)
11+
set(IR_MAP_aorta-aarch64 o)
12+
set(IR_MAP_aorta-rcar-cve o)
13+
set(IR_MAP_custom-spir64 bc)
14+
set(IR_MAP_custom-spir32 bc)
15+
set(IR_MAP_custom-spirv64 spv)
16+
set(IR_MAP_custom-spirv32 spv)
17+
set(IR_MAP_ptx64 s)
18+
set(IR_MAP_amdgcn s)
19+
20+
# Retrieves the filename extension of the IR output of compute++
21+
function(get_sycl_target_extension output)
22+
set(syclExtension ${IR_MAP_${COMPUTECPP_BITCODE}})
23+
if (NOT syclExtension)
24+
# Needed when using multiple device targets
25+
set(syclExtension "bc")
26+
endif ()
27+
set(${output} ${syclExtension} PARENT_SCOPE)
28+
endfunction()

0 commit comments

Comments
 (0)