[Code] Setup cuda vector primitive && add basic ops implementation

EgorOrachyov · EgorOrachyov · commit 199f34203a21 · 2021-05-03T15:43:35.000+03:00
diff --git a/cubool/CMakeLists.txt b/cubool/CMakeLists.txt
@@ -17,7 +17,15 @@ set(TARGET_NAME cubool)
 set(TARGET_FILE_NAME)
 set(DEFINES_LIST)
 
-# Platform checks
+# Mode
+if (CUBOOL_DEBUG)
+    list(APPEND DEFINES_LIST CUBOOL_DEBUG)
+endif()
+if (CUBOOL_RELEASE)
+    list(APPEND DEFINES_LIST CUBOOL_RELEASE)
+endif()
+
+    # Platform checks
 if(APPLE)
     list(APPEND DEFINES_LIST CUBOOL_PLATFORM_MACOS)
     set(TARGET_FILE_NAME "lib${TARGET_NAME}.dylib")
@@ -111,14 +119,18 @@ if (CUBOOL_WITH_CUDA)
         sources/cuda/cuda_instance.cpp
         sources/cuda/cuda_matrix.hpp
         sources/cuda/cuda_matrix.cu
-        sources/cuda/cuda_matrix_build.cu
-        sources/cuda/cuda_matrix_extract.cu
         sources/cuda/cuda_matrix_ewiseadd.cu
         sources/cuda/cuda_matrix_kronecker.cu
         sources/cuda/cuda_matrix_multiply.cu
         sources/cuda/cuda_matrix_transpose.cu
         sources/cuda/cuda_matrix_reduce.cu
         sources/cuda/cuda_matrix_extract_sub_matrix.cu
+        sources/cuda/cuda_vector.hpp
+        sources/cuda/cuda_vector.cu
+        sources/cuda/details/meta.hpp
+        sources/cuda/details/sp_vector.hpp
+        sources/cuda/details/host_allocator.hpp
+        sources/cuda/details/device_allocator.cuh
         sources/cuda/kernels/slow_sort.cuh
         sources/cuda/kernels/bin_search.cuh
         sources/cuda/kernels/sptranspose.cuh
diff --git a/cubool/sources/cuda/cuda_backend.cu b/cubool/sources/cuda/cuda_backend.cu
@@ -24,28 +24,43 @@
 
 #include <cuda/cuda_backend.hpp>
 #include <cuda/cuda_matrix.hpp>
+#include <cuda/cuda_vector.hpp>
 #include <core/library.hpp>
 #include <io/logger.hpp>
 
+#include <iostream>
+
 namespace cubool {
 
     void CudaBackend::initialize(hints initHints) {
         if (CudaInstance::isCudaDeviceSupported()) {
             mInstance = new CudaInstance(initHints & CUBOOL_HINT_GPU_MEM_MANAGED);
         }
 
-        // No device. Cannot init this backend
+#ifdef CUBOOL_DEBUG
+        if (mInstance == nullptr) {
+            // No device. Cannot init this backend
+            std::cerr << "Failed to initialize Cuda-backend" << std::endl;
+        }
+#endif
     }
 
     void CudaBackend::finalize() {
         assert(mMatCount == 0);
+        assert(mVecCount == 0);
 
         if (mMatCount > 0) {
             LogStream stream(*Library::getLogger());
             stream << Logger::Level::Error
                    << "Lost some (" << mMatCount << ") matrix objects" << LogStream::cmt;
         }
 
+        if (mVecCount > 0) {
+            LogStream stream(*Library::getLogger());
+            stream << Logger::Level::Error
+                   << "Lost some (" << mVecCount << ") vector objects" << LogStream::cmt;
+        }
+
         if (mInstance) {
             delete mInstance;
             mInstance = nullptr;
@@ -62,7 +77,8 @@ namespace cubool {
     }
 
     VectorBase* CudaBackend::createVector(size_t nrows) {
-        RAISE_ERROR(NotImplemented, "Not implemented");
+        mVecCount++;
+        return new CudaVector(nrows, getInstance());
     }
 
     void CudaBackend::releaseMatrix(MatrixBase *matrixBase) {
@@ -71,7 +87,8 @@ namespace cubool {
     }
 
     void CudaBackend::releaseVector(VectorBase *vectorBase) {
-        RAISE_ERROR(NotImplemented, "Not implemented");
+        mVecCount--;
+        delete vectorBase;
     }
 
     void CudaBackend::queryCapabilities(cuBool_DeviceCaps &caps) {
diff --git a/cubool/sources/cuda/cuda_backend.hpp b/cubool/sources/cuda/cuda_backend.hpp
@@ -52,6 +52,7 @@ namespace cubool {
     private:
         CudaInstance* mInstance;
         size_t mMatCount = 0;
+        size_t mVecCount = 0;
     };
 
 }
diff --git a/cubool/sources/cuda/cuda_instance.cpp b/cubool/sources/cuda/cuda_instance.cpp
@@ -34,11 +34,6 @@ namespace cubool {
     CudaInstance::CudaInstance(bool useManagedMemory) {
         gInstance = this;
         mMemoryType = useManagedMemory? Managed: Default;
-
-#ifdef CUBOOL_DEBUG
-        sendMessage(CUBOOL_STATUS_SUCCESS, "Initialize CuBool instance");
-        printDeviceCapabilities();
-#endif // CUBOOL_DEBUG
     }
 
     void CudaInstance::allocate(void* &ptr, size_t size) const {
diff --git a/cubool/sources/cuda/cuda_matrix.cu b/cubool/sources/cuda/cuda_matrix.cu
@@ -25,6 +25,7 @@
 #include <cuda/cuda_matrix.hpp>
 #include <core/error.hpp>
 #include <utils/timer.hpp>
+#include <utils/data_utils.hpp>
 #include <algorithm>
 
 namespace cubool {
@@ -38,6 +39,39 @@ namespace cubool {
         RAISE_ERROR(NotImplemented, "This function is not supported for this matrix class");
     }
 
+    void CudaMatrix::build(const index *rows, const index *cols, size_t nvals, bool isSorted, bool noDuplicates) {
+        if (nvals == 0) {
+            mMatrixImpl.zero_dim();  // no content, empty matrix
+            return;
+        }
+
+        // Build csr structure and store on cpu side
+        std::vector<index> rowOffsets;
+        std::vector<index> colIndices;
+
+        DataUtils::buildFromData(getNrows(), getNcols(), rows, cols, nvals, rowOffsets, colIndices, isSorted, noDuplicates);
+
+        // Move actual data to the matrix implementation
+        this->transferToDevice(rowOffsets, colIndices);
+    }
+
+    void CudaMatrix::extract(index *rows, index *cols, size_t &nvals) {
+        assert(nvals >= getNvals());
+
+        // Set nvals to the exact number of nnz values
+        nvals = getNvals();
+
+        if (nvals > 0) {
+            // Copy data to the host
+            std::vector<index> rowOffsets;
+            std::vector<index> colIndices;
+
+            this->transferFromDevice(rowOffsets, colIndices);
+
+            DataUtils::extractData(getNrows(), getNcols(), rows, cols, nvals, rowOffsets, colIndices);
+        }
+    }
+
     void CudaMatrix::clone(const MatrixBase &otherBase) {
         auto other = dynamic_cast<const CudaMatrix*>(&otherBase);
 
diff --git a/cubool/sources/cuda/cuda_matrix.hpp b/cubool/sources/cuda/cuda_matrix.hpp
@@ -40,7 +40,7 @@ namespace cubool {
         using HostAlloc = details::HostAllocator<T>;
         using MatrixImplType = nsparse::matrix<bool, index, DeviceAlloc<index>>;
 
-        explicit CudaMatrix(size_t nrows, size_t ncols, CudaInstance& instance);
+        CudaMatrix(size_t nrows, size_t ncols, CudaInstance& instance);
         ~CudaMatrix() override = default;
 
         void setElement(index i, index j) override;
diff --git a/cubool/sources/cuda/cuda_vector.cu b/cubool/sources/cuda/cuda_vector.cu
@@ -0,0 +1,119 @@
+/**********************************************************************************/
+/* MIT License                                                                    */
+/*                                                                                */
+/* Copyright (c) 2020, 2021 JetBrains-Research                                    */
+/*                                                                                */
+/* Permission is hereby granted, free of charge, to any person obtaining a copy   */
+/* of this software and associated documentation files (the "Software"), to deal  */
+/* in the Software without restriction, including without limitation the rights   */
+/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell      */
+/* copies of the Software, and to permit persons to whom the Software is          */
+/* furnished to do so, subject to the following conditions:                       */
+/*                                                                                */
+/* The above copyright notice and this permission notice shall be included in all */
+/* copies or substantial portions of the Software.                                */
+/*                                                                                */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR     */
+/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,       */
+/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE    */
+/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER         */
+/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  */
+/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  */
+/* SOFTWARE.                                                                      */
+/**********************************************************************************/
+
+#include <cuda/cuda_vector.hpp>
+#include <core/error.hpp>
+#include <utils/data_utils.hpp>
+
+namespace cubool {
+
+    CudaVector::CudaVector(size_t nrows, CudaInstance &instance)
+        : mVectorImpl(nrows), mInstance(instance) {
+
+    }
+
+    void CudaVector::setElement(index i) {
+        RAISE_ERROR(NotImplemented, "This function is not supported for this vector class");
+    }
+
+    void CudaVector::build(const index *rows, size_t nvals, bool isSorted, bool noDuplicates) {
+        if (nvals == 0) {
+            // Empty vector, no values (but preserve dim)
+            mVectorImpl = VectorImplType(getNrows());
+            return;
+        }
+
+        // Validate data, sort, remove duplicates and etc.
+        std::vector<index> data;
+        DataUtils::buildVectorFromData(getNrows(), rows, nvals, data, isSorted, noDuplicates);
+
+        // Transfer data to GPU
+        thrust::device_vector<index, DeviceAlloc<index>> deviceData(data.size());
+        thrust::copy(data.begin(), data.end(), deviceData.begin());
+
+        // New vec instance
+        mVectorImpl = VectorImplType(std::move(deviceData), getNrows(), data.size());
+    }
+
+    void CudaVector::extract(index *rows, size_t &nvals) {
+        assert(nvals >= getNvals());
+
+        nvals = getNvals();
+
+        if (nvals > 0) {
+            assert(rows);
+
+            // Transfer data from GPU
+            thrust::copy(mVectorImpl.m_rows_index.begin(), mVectorImpl.m_rows_index.end(), rows);
+        }
+    }
+
+    void CudaVector::extractSubVector(const VectorBase &otherBase, index i, index nrows, bool checkTime) {
+        RAISE_ERROR(NotImplemented, "This function is not implemented");
+
+    }
+
+    void CudaVector::clone(const VectorBase &otherBase) {
+        auto other = dynamic_cast<const CudaVector*>(&otherBase);
+
+        CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Passed vector does not belong to vector class");
+        CHECK_RAISE_ERROR(other != this, InvalidArgument, "Vectors must differ");
+
+        assert(this->getNrows() == other->getNrows());
+        this->mVectorImpl = other->mVectorImpl;
+    }
+
+    void CudaVector::reduce(index &result, bool checkTime) {
+        result = getNvals();
+    }
+
+    void CudaVector::reduceMatrix(const struct MatrixBase &matrix, bool transpose, bool checkTime) {
+        RAISE_ERROR(NotImplemented, "This function is not implemented");
+
+    }
+
+    void CudaVector::eWiseAdd(const VectorBase &aBase, const VectorBase &bBase, bool checkTime) {
+        RAISE_ERROR(NotImplemented, "This function is not implemented");
+
+    }
+
+    void CudaVector::multiplyVxM(const VectorBase &vBase, const struct MatrixBase &mBase, bool checkTime) {
+        RAISE_ERROR(NotImplemented, "This function is not implemented");
+
+    }
+
+    void CudaVector::multiplyMxV(const struct MatrixBase &mBase, const VectorBase &vBase, bool checkTime) {
+        RAISE_ERROR(NotImplemented, "This function is not implemented");
+
+    }
+
+    index CudaVector::getNrows() const {
+        return mVectorImpl.m_rows;
+    }
+
+    index CudaVector::getNvals() const {
+        return mVectorImpl.m_vals;
+    }
+
+}
diff --git a/cubool/sources/cuda/cuda_vector.hpp b/cubool/sources/cuda/cuda_vector.hpp
@@ -22,26 +22,46 @@
 /* SOFTWARE.                                                                      */
 /**********************************************************************************/
 
-#include <cuda/cuda_matrix.hpp>
-#include <utils/data_utils.hpp>
+#ifndef CUBOOL_CUDA_VECTOR_HPP
+#define CUBOOL_CUDA_VECTOR_HPP
+
+#include <backend/vector_base.hpp>
+#include <cuda/cuda_instance.hpp>
+#include <cuda/details/sp_vector.hpp>
+#include <cuda/details/device_allocator.cuh>
 
 namespace cubool {
 
-    void CudaMatrix::extract(index *rows, index *cols, size_t &nvals) {
-        assert(nvals >= getNvals());
+    class CudaVector final: public VectorBase {
+    public:
+        template<typename T>
+        using DeviceAlloc = details::DeviceAllocator<T>;
+        using VectorImplType = details::SpVector<index, DeviceAlloc<index>>;
+
+        CudaVector(size_t nrows, CudaInstance& instance);
+        ~CudaVector() override = default;
+
+        void setElement(index i) override;
+        void build(const index *rows, size_t nvals, bool isSorted, bool noDuplicates) override;
+        void extract(index *rows, size_t &nvals) override;
+        void extractSubVector(const VectorBase &otherBase, index i, index nrows, bool checkTime) override;
+
+        void clone(const VectorBase &otherBase) override;
+        void reduce(index &result, bool checkTime) override;
+        void reduceMatrix(const struct MatrixBase &matrix, bool transpose, bool checkTime) override;
 
-        // Set nvals to the exact number of nnz values
-        nvals = getNvals();
+        void eWiseAdd(const VectorBase &aBase, const VectorBase &bBase, bool checkTime) override;
+        void multiplyVxM(const VectorBase &vBase, const struct MatrixBase &mBase, bool checkTime) override;
+        void multiplyMxV(const struct MatrixBase &mBase, const VectorBase &vBase, bool checkTime) override;
 
-        if (nvals > 0) {
-            // Copy data to the host
-            std::vector<index> rowOffsets;
-            std::vector<index> colIndices;
+        index getNrows() const override;
+        index getNvals() const override;
 
-            this->transferFromDevice(rowOffsets, colIndices);
+    private:
+        mutable VectorImplType mVectorImpl;
+        CudaInstance& mInstance;
+    };
 
-            DataUtils::extractData(getNrows(), getNcols(), rows, cols, nvals, rowOffsets, colIndices);
-        }
-    }
+}
 
-}
+#endif //CUBOOL_CUDA_VECTOR_HPP
diff --git a/cubool/sources/cuda/details/meta.hpp b/cubool/sources/cuda/details/meta.hpp
diff --git a/cubool/sources/cuda/details/sp_vector.hpp b/cubool/sources/cuda/details/sp_vector.hpp
diff --git a/cubool/sources/sequential/sq_vector.cpp b/cubool/sources/sequential/sq_vector.cpp
diff --git a/cubool/sources/utils/data_utils.cpp b/cubool/sources/utils/data_utils.cpp

Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@ namespace cubool {`
`52`	`52`	`private:`
`53`	`53`	`CudaInstance* mInstance;`
`54`	`54`	`size_t mMatCount = 0;`
	`55`	`+ size_t mVecCount = 0;`
`55`	`56`	`};`
`56`	`57`
`57`	`58`	`}`