Skip to content

Commit 199f342

Browse files
committed
[Code] Setup cuda vector primitive && add basic ops implementation
1 parent 79194cb commit 199f342

File tree

12 files changed

+349
-53
lines changed

12 files changed

+349
-53
lines changed

cubool/CMakeLists.txt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,15 @@ set(TARGET_NAME cubool)
1717
set(TARGET_FILE_NAME)
1818
set(DEFINES_LIST)
1919

20-
# Platform checks
20+
# Mode
21+
if (CUBOOL_DEBUG)
22+
list(APPEND DEFINES_LIST CUBOOL_DEBUG)
23+
endif()
24+
if (CUBOOL_RELEASE)
25+
list(APPEND DEFINES_LIST CUBOOL_RELEASE)
26+
endif()
27+
28+
# Platform checks
2129
if(APPLE)
2230
list(APPEND DEFINES_LIST CUBOOL_PLATFORM_MACOS)
2331
set(TARGET_FILE_NAME "lib${TARGET_NAME}.dylib")
@@ -111,14 +119,18 @@ if (CUBOOL_WITH_CUDA)
111119
sources/cuda/cuda_instance.cpp
112120
sources/cuda/cuda_matrix.hpp
113121
sources/cuda/cuda_matrix.cu
114-
sources/cuda/cuda_matrix_build.cu
115-
sources/cuda/cuda_matrix_extract.cu
116122
sources/cuda/cuda_matrix_ewiseadd.cu
117123
sources/cuda/cuda_matrix_kronecker.cu
118124
sources/cuda/cuda_matrix_multiply.cu
119125
sources/cuda/cuda_matrix_transpose.cu
120126
sources/cuda/cuda_matrix_reduce.cu
121127
sources/cuda/cuda_matrix_extract_sub_matrix.cu
128+
sources/cuda/cuda_vector.hpp
129+
sources/cuda/cuda_vector.cu
130+
sources/cuda/details/meta.hpp
131+
sources/cuda/details/sp_vector.hpp
132+
sources/cuda/details/host_allocator.hpp
133+
sources/cuda/details/device_allocator.cuh
122134
sources/cuda/kernels/slow_sort.cuh
123135
sources/cuda/kernels/bin_search.cuh
124136
sources/cuda/kernels/sptranspose.cuh

cubool/sources/cuda/cuda_backend.cu

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,43 @@
2424

2525
#include <cuda/cuda_backend.hpp>
2626
#include <cuda/cuda_matrix.hpp>
27+
#include <cuda/cuda_vector.hpp>
2728
#include <core/library.hpp>
2829
#include <io/logger.hpp>
2930

31+
#include <iostream>
32+
3033
namespace cubool {
3134

3235
void CudaBackend::initialize(hints initHints) {
3336
if (CudaInstance::isCudaDeviceSupported()) {
3437
mInstance = new CudaInstance(initHints & CUBOOL_HINT_GPU_MEM_MANAGED);
3538
}
3639

37-
// No device. Cannot init this backend
40+
#ifdef CUBOOL_DEBUG
41+
if (mInstance == nullptr) {
42+
// No device. Cannot init this backend
43+
std::cerr << "Failed to initialize Cuda-backend" << std::endl;
44+
}
45+
#endif
3846
}
3947

4048
void CudaBackend::finalize() {
4149
assert(mMatCount == 0);
50+
assert(mVecCount == 0);
4251

4352
if (mMatCount > 0) {
4453
LogStream stream(*Library::getLogger());
4554
stream << Logger::Level::Error
4655
<< "Lost some (" << mMatCount << ") matrix objects" << LogStream::cmt;
4756
}
4857

58+
if (mVecCount > 0) {
59+
LogStream stream(*Library::getLogger());
60+
stream << Logger::Level::Error
61+
<< "Lost some (" << mVecCount << ") vector objects" << LogStream::cmt;
62+
}
63+
4964
if (mInstance) {
5065
delete mInstance;
5166
mInstance = nullptr;
@@ -62,7 +77,8 @@ namespace cubool {
6277
}
6378

6479
VectorBase* CudaBackend::createVector(size_t nrows) {
65-
RAISE_ERROR(NotImplemented, "Not implemented");
80+
mVecCount++;
81+
return new CudaVector(nrows, getInstance());
6682
}
6783

6884
void CudaBackend::releaseMatrix(MatrixBase *matrixBase) {
@@ -71,7 +87,8 @@ namespace cubool {
7187
}
7288

7389
void CudaBackend::releaseVector(VectorBase *vectorBase) {
74-
RAISE_ERROR(NotImplemented, "Not implemented");
90+
mVecCount--;
91+
delete vectorBase;
7592
}
7693

7794
void CudaBackend::queryCapabilities(cuBool_DeviceCaps &caps) {

cubool/sources/cuda/cuda_backend.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ namespace cubool {
5252
private:
5353
CudaInstance* mInstance;
5454
size_t mMatCount = 0;
55+
size_t mVecCount = 0;
5556
};
5657

5758
}

cubool/sources/cuda/cuda_instance.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ namespace cubool {
3434
CudaInstance::CudaInstance(bool useManagedMemory) {
3535
gInstance = this;
3636
mMemoryType = useManagedMemory? Managed: Default;
37-
38-
#ifdef CUBOOL_DEBUG
39-
sendMessage(CUBOOL_STATUS_SUCCESS, "Initialize CuBool instance");
40-
printDeviceCapabilities();
41-
#endif // CUBOOL_DEBUG
4237
}
4338

4439
void CudaInstance::allocate(void* &ptr, size_t size) const {

cubool/sources/cuda/cuda_matrix.cu

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <cuda/cuda_matrix.hpp>
2626
#include <core/error.hpp>
2727
#include <utils/timer.hpp>
28+
#include <utils/data_utils.hpp>
2829
#include <algorithm>
2930

3031
namespace cubool {
@@ -38,6 +39,39 @@ namespace cubool {
3839
RAISE_ERROR(NotImplemented, "This function is not supported for this matrix class");
3940
}
4041

42+
void CudaMatrix::build(const index *rows, const index *cols, size_t nvals, bool isSorted, bool noDuplicates) {
43+
if (nvals == 0) {
44+
mMatrixImpl.zero_dim(); // no content, empty matrix
45+
return;
46+
}
47+
48+
// Build csr structure and store on cpu side
49+
std::vector<index> rowOffsets;
50+
std::vector<index> colIndices;
51+
52+
DataUtils::buildFromData(getNrows(), getNcols(), rows, cols, nvals, rowOffsets, colIndices, isSorted, noDuplicates);
53+
54+
// Move actual data to the matrix implementation
55+
this->transferToDevice(rowOffsets, colIndices);
56+
}
57+
58+
void CudaMatrix::extract(index *rows, index *cols, size_t &nvals) {
59+
assert(nvals >= getNvals());
60+
61+
// Set nvals to the exact number of nnz values
62+
nvals = getNvals();
63+
64+
if (nvals > 0) {
65+
// Copy data to the host
66+
std::vector<index> rowOffsets;
67+
std::vector<index> colIndices;
68+
69+
this->transferFromDevice(rowOffsets, colIndices);
70+
71+
DataUtils::extractData(getNrows(), getNcols(), rows, cols, nvals, rowOffsets, colIndices);
72+
}
73+
}
74+
4175
void CudaMatrix::clone(const MatrixBase &otherBase) {
4276
auto other = dynamic_cast<const CudaMatrix*>(&otherBase);
4377

cubool/sources/cuda/cuda_matrix.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ namespace cubool {
4040
using HostAlloc = details::HostAllocator<T>;
4141
using MatrixImplType = nsparse::matrix<bool, index, DeviceAlloc<index>>;
4242

43-
explicit CudaMatrix(size_t nrows, size_t ncols, CudaInstance& instance);
43+
CudaMatrix(size_t nrows, size_t ncols, CudaInstance& instance);
4444
~CudaMatrix() override = default;
4545

4646
void setElement(index i, index j) override;

cubool/sources/cuda/cuda_vector.cu

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/**********************************************************************************/
2+
/* MIT License */
3+
/* */
4+
/* Copyright (c) 2020, 2021 JetBrains-Research */
5+
/* */
6+
/* Permission is hereby granted, free of charge, to any person obtaining a copy */
7+
/* of this software and associated documentation files (the "Software"), to deal */
8+
/* in the Software without restriction, including without limitation the rights */
9+
/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */
10+
/* copies of the Software, and to permit persons to whom the Software is */
11+
/* furnished to do so, subject to the following conditions: */
12+
/* */
13+
/* The above copyright notice and this permission notice shall be included in all */
14+
/* copies or substantial portions of the Software. */
15+
/* */
16+
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */
17+
/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */
18+
/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */
19+
/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */
20+
/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */
21+
/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */
22+
/* SOFTWARE. */
23+
/**********************************************************************************/
24+
25+
#include <cuda/cuda_vector.hpp>
26+
#include <core/error.hpp>
27+
#include <utils/data_utils.hpp>
28+
29+
namespace cubool {
30+
31+
CudaVector::CudaVector(size_t nrows, CudaInstance &instance)
32+
: mVectorImpl(nrows), mInstance(instance) {
33+
34+
}
35+
36+
void CudaVector::setElement(index i) {
37+
RAISE_ERROR(NotImplemented, "This function is not supported for this vector class");
38+
}
39+
40+
void CudaVector::build(const index *rows, size_t nvals, bool isSorted, bool noDuplicates) {
41+
if (nvals == 0) {
42+
// Empty vector, no values (but preserve dim)
43+
mVectorImpl = VectorImplType(getNrows());
44+
return;
45+
}
46+
47+
// Validate data, sort, remove duplicates and etc.
48+
std::vector<index> data;
49+
DataUtils::buildVectorFromData(getNrows(), rows, nvals, data, isSorted, noDuplicates);
50+
51+
// Transfer data to GPU
52+
thrust::device_vector<index, DeviceAlloc<index>> deviceData(data.size());
53+
thrust::copy(data.begin(), data.end(), deviceData.begin());
54+
55+
// New vec instance
56+
mVectorImpl = VectorImplType(std::move(deviceData), getNrows(), data.size());
57+
}
58+
59+
void CudaVector::extract(index *rows, size_t &nvals) {
60+
assert(nvals >= getNvals());
61+
62+
nvals = getNvals();
63+
64+
if (nvals > 0) {
65+
assert(rows);
66+
67+
// Transfer data from GPU
68+
thrust::copy(mVectorImpl.m_rows_index.begin(), mVectorImpl.m_rows_index.end(), rows);
69+
}
70+
}
71+
72+
void CudaVector::extractSubVector(const VectorBase &otherBase, index i, index nrows, bool checkTime) {
73+
RAISE_ERROR(NotImplemented, "This function is not implemented");
74+
75+
}
76+
77+
void CudaVector::clone(const VectorBase &otherBase) {
78+
auto other = dynamic_cast<const CudaVector*>(&otherBase);
79+
80+
CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Passed vector does not belong to vector class");
81+
CHECK_RAISE_ERROR(other != this, InvalidArgument, "Vectors must differ");
82+
83+
assert(this->getNrows() == other->getNrows());
84+
this->mVectorImpl = other->mVectorImpl;
85+
}
86+
87+
void CudaVector::reduce(index &result, bool checkTime) {
88+
result = getNvals();
89+
}
90+
91+
void CudaVector::reduceMatrix(const struct MatrixBase &matrix, bool transpose, bool checkTime) {
92+
RAISE_ERROR(NotImplemented, "This function is not implemented");
93+
94+
}
95+
96+
void CudaVector::eWiseAdd(const VectorBase &aBase, const VectorBase &bBase, bool checkTime) {
97+
RAISE_ERROR(NotImplemented, "This function is not implemented");
98+
99+
}
100+
101+
void CudaVector::multiplyVxM(const VectorBase &vBase, const struct MatrixBase &mBase, bool checkTime) {
102+
RAISE_ERROR(NotImplemented, "This function is not implemented");
103+
104+
}
105+
106+
void CudaVector::multiplyMxV(const struct MatrixBase &mBase, const VectorBase &vBase, bool checkTime) {
107+
RAISE_ERROR(NotImplemented, "This function is not implemented");
108+
109+
}
110+
111+
index CudaVector::getNrows() const {
112+
return mVectorImpl.m_rows;
113+
}
114+
115+
index CudaVector::getNvals() const {
116+
return mVectorImpl.m_vals;
117+
}
118+
119+
}

cubool/sources/cuda/cuda_matrix_extract.cu renamed to cubool/sources/cuda/cuda_vector.hpp

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,46 @@
2222
/* SOFTWARE. */
2323
/**********************************************************************************/
2424

25-
#include <cuda/cuda_matrix.hpp>
26-
#include <utils/data_utils.hpp>
25+
#ifndef CUBOOL_CUDA_VECTOR_HPP
26+
#define CUBOOL_CUDA_VECTOR_HPP
27+
28+
#include <backend/vector_base.hpp>
29+
#include <cuda/cuda_instance.hpp>
30+
#include <cuda/details/sp_vector.hpp>
31+
#include <cuda/details/device_allocator.cuh>
2732

2833
namespace cubool {
2934

30-
void CudaMatrix::extract(index *rows, index *cols, size_t &nvals) {
31-
assert(nvals >= getNvals());
35+
class CudaVector final: public VectorBase {
36+
public:
37+
template<typename T>
38+
using DeviceAlloc = details::DeviceAllocator<T>;
39+
using VectorImplType = details::SpVector<index, DeviceAlloc<index>>;
40+
41+
CudaVector(size_t nrows, CudaInstance& instance);
42+
~CudaVector() override = default;
43+
44+
void setElement(index i) override;
45+
void build(const index *rows, size_t nvals, bool isSorted, bool noDuplicates) override;
46+
void extract(index *rows, size_t &nvals) override;
47+
void extractSubVector(const VectorBase &otherBase, index i, index nrows, bool checkTime) override;
48+
49+
void clone(const VectorBase &otherBase) override;
50+
void reduce(index &result, bool checkTime) override;
51+
void reduceMatrix(const struct MatrixBase &matrix, bool transpose, bool checkTime) override;
3252

33-
// Set nvals to the exact number of nnz values
34-
nvals = getNvals();
53+
void eWiseAdd(const VectorBase &aBase, const VectorBase &bBase, bool checkTime) override;
54+
void multiplyVxM(const VectorBase &vBase, const struct MatrixBase &mBase, bool checkTime) override;
55+
void multiplyMxV(const struct MatrixBase &mBase, const VectorBase &vBase, bool checkTime) override;
3556

36-
if (nvals > 0) {
37-
// Copy data to the host
38-
std::vector<index> rowOffsets;
39-
std::vector<index> colIndices;
57+
index getNrows() const override;
58+
index getNvals() const override;
4059

41-
this->transferFromDevice(rowOffsets, colIndices);
60+
private:
61+
mutable VectorImplType mVectorImpl;
62+
CudaInstance& mInstance;
63+
};
4264

43-
DataUtils::extractData(getNrows(), getNcols(), rows, cols, nvals, rowOffsets, colIndices);
44-
}
45-
}
65+
}
4666

47-
}
67+
#endif //CUBOOL_CUDA_VECTOR_HPP

0 commit comments

Comments
 (0)