Skip to content

Commit 1214391

Browse files
committed
[Project] Add algo papers info
1 parent 2a01ce1 commit 1214391

File tree

5 files changed

+35
-27
lines changed

5 files changed

+35
-27
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,22 @@ cuBool
333333
}
334334
```
335335
336+
## Algorithms
337+
338+
In this section listed all the related papers, articles and links, which
339+
were used as an algorithmic foundation for implementation of sparse linear
340+
boolean algebra operations (sparse matrix-matrix multiplication, sparse matrix-vector
341+
multiplication, sparse vector-matrix multiplication, matrix-matrix element-wise addition and etc.):
342+
343+
- High-performance and Memory-saving Sparse General Matrix-Matrix Multiplication for NVIDIA Pascal GPU,
344+
Yusuke Nagasaka, Akira Nukada, Satoshi Matsuoka
345+
- GPU Merge Path - A GPU Merging Algorithm,
346+
Oded Green, Robert McColl, David A. Bader
347+
- Efficient Sparse Matrix-Vector Multiplication on GPUs using the CSR Storage Format,
348+
Joseph L. Greathouse, Mayank Daga
349+
- Atomic Reduction Based Sparse Matrix-Transpose Vector Multiplication on GPUs,
350+
Yuan Tao, Yangdong Deng, Shuai Mu, Mingfa Zhu, Limin Xiao, Li Ruan, Zhibin Huang
351+
336352
## License
337353
338354
This project is licensed under MIT License. License text can be found in the

cubool/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ if (CUBOOL_WITH_CUDA)
140140
sources/cuda/kernels/slow_sort.cuh
141141
sources/cuda/kernels/bin_search.cuh
142142
sources/cuda/kernels/spgemv.cuh
143-
sources/cuda/kernels/spgemtv.cuh
143+
sources/cuda/kernels/spgemv_t.cuh
144144
sources/cuda/kernels/spewiseadd.cuh
145145
sources/cuda/kernels/sptranspose.cuh
146146
sources/cuda/kernels/sptranspose2.cuh

cubool/sources/cuda/cuda_vector_vxm.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
#include <cuda/cuda_vector.hpp>
2626
#include <cuda/cuda_matrix.hpp>
27-
#include <cuda/kernels/spgemtv.cuh>
27+
#include <cuda/kernels/spgemv_t.cuh>
2828
#include <core/error.hpp>
2929
#include <cassert>
3030

@@ -42,7 +42,7 @@ namespace cubool {
4242

4343
m->resizeStorageToDim();
4444

45-
kernels::SpGEMtV<index, DeviceAlloc<index>> functor;
45+
kernels::SpGEMVT<index, DeviceAlloc<index>> functor;
4646
auto result = functor(v->mVectorImpl, m->mMatrixImpl);
4747

4848
mVectorImpl = std::move(result);

cubool/sources/cuda/kernels/spgemv.cuh

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,15 +111,11 @@ namespace cubool {
111111
thrust::device_ptr<const IndexType> rowConfig) { // Rows to process for each bin)
112112

113113
EXPAND_SIDE_EFFECTS(
114-
(binSizes[Bins::id] > 0 ?
115-
__spgemv<IndexType, Bins::threads, Bins::blockSize>
116-
<<<binSizes[Bins::id] / Bins::dispatchRatio +
117-
(binSizes[Bins::id] % Bins::dispatchRatio ? 1 : 0),
118-
Bins::blockSize,
119-
0,
120-
streamsWrapper.streams[Bins::id]>>>
121-
(rowOffsets, colIndices, v, x, rowConfig + binOffset[Bins::id], binSizes[Bins::id])
122-
: void())
114+
(binSizes[Bins::id] > 0 ?
115+
__spgemv<IndexType, Bins::threads, Bins::blockSize>
116+
<<<binSizes[Bins::id] / Bins::dispatchRatio + (binSizes[Bins::id] % Bins::dispatchRatio ? 1 : 0), Bins::blockSize, 0, streamsWrapper.streams[Bins::id]>>>
117+
(rowOffsets, colIndices, v, x, rowConfig + binOffset[Bins::id], binSizes[Bins::id])
118+
: void())
123119
);
124120
}
125121

cubool/sources/cuda/kernels/spgemtv.cuh renamed to cubool/sources/cuda/kernels/spgemv_t.cuh

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ namespace cubool {
3535
namespace kernels {
3636

3737
template<typename IndexType, size_t threads, size_t blockSize>
38-
__global__ void __spgemtv(thrust::device_ptr<const IndexType> rowOffsets, // Input csr matrix rows
39-
thrust::device_ptr<const IndexType> colIndices, // Input csr matrix col indices
40-
thrust::device_ptr<IndexType> x, // Output dense x vector (x = M*v)
41-
thrust::device_ptr<const IndexType> rowConfig, // Rows to process for each bin
42-
IndexType rowsCount) { // Num of rows to process
38+
__global__ void __spgemv_t(thrust::device_ptr<const IndexType> rowOffsets, // Input csr matrix rows
39+
thrust::device_ptr<const IndexType> colIndices, // Input csr matrix col indices
40+
thrust::device_ptr<IndexType> x, // Output dense x vector (x = M*v)
41+
thrust::device_ptr<const IndexType> rowConfig, // Rows to process for each bin
42+
IndexType rowsCount) { // Num of rows to process
4343
// Split block into number of groups of size `threads`.
4444
// Each group process its own row.
4545

@@ -62,7 +62,7 @@ namespace cubool {
6262
}
6363

6464
template<typename IndexType, typename AllocType>
65-
struct SpGEMtV {
65+
struct SpGEMVT {
6666
template<typename T>
6767
using ContainerType = thrust::device_vector<T, typename AllocType::template rebind<T>::other>;
6868
using MatrixType = nsparse::matrix<bool, IndexType, AllocType>;
@@ -78,15 +78,11 @@ namespace cubool {
7878
thrust::device_ptr<const IndexType> rowConfig) { // Rows to process for each bin)
7979

8080
EXPAND_SIDE_EFFECTS(
81-
(binSizes[Bins::id] > 0 ?
82-
__spgemtv<IndexType, Bins::threads, Bins::blockSize>
83-
<<<binSizes[Bins::id] / Bins::dispatchRatio +
84-
(binSizes[Bins::id] % Bins::dispatchRatio ? 1 : 0),
85-
Bins::blockSize,
86-
0,
87-
streamsWrapper.streams[Bins::id]>>>
88-
(rowOffsets, colIndices, x, rowConfig + binOffset[Bins::id], binSizes[Bins::id])
89-
: void())
81+
(binSizes[Bins::id] > 0 ?
82+
__spgemv_t<IndexType, Bins::threads, Bins::blockSize>
83+
<<<binSizes[Bins::id] / Bins::dispatchRatio + (binSizes[Bins::id] % Bins::dispatchRatio ? 1 : 0), Bins::blockSize, 0, streamsWrapper.streams[Bins::id]>>>
84+
(rowOffsets, colIndices, x, rowConfig + binOffset[Bins::id], binSizes[Bins::id])
85+
: void())
9086
);
9187
}
9288

0 commit comments

Comments
 (0)