SparseLinearAlgebra
diff --git a/‎.github/workflows/ubuntu.yml
Lines changed: 3 additions & 7 deletions b/‎.github/workflows/ubuntu.yml
Lines changed: 3 additions & 7 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 39 additions & 7 deletions b/‎README.md
Lines changed: 39 additions & 7 deletions
diff --git a/‎cubool/CMakeLists.txt
Lines changed: 13 additions & 7 deletions b/‎cubool/CMakeLists.txt
Lines changed: 13 additions & 7 deletions
diff --git a/‎cubool/include/cubool/cubool.h
Lines changed: 11 additions & 11 deletions b/‎cubool/include/cubool/cubool.h
Lines changed: 11 additions & 11 deletions
diff --git a/‎cubool/sources/core/library.cpp
Lines changed: 6 additions & 7 deletions b/‎cubool/sources/core/library.cpp
Lines changed: 6 additions & 7 deletions
diff --git a/‎cubool/sources/core/library.hpp
Lines changed: 2 additions & 2 deletions b/‎cubool/sources/core/library.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎cubool/sources/core/matrix.cpp
Lines changed: 36 additions & 11 deletions b/‎cubool/sources/core/matrix.cpp
Lines changed: 36 additions & 11 deletions
@@ -62,14 +62,10 @@ jobs:
 
       - name: Run unit-tests (sequential backend)
         working-directory: ${{ env.build_dir }}
-        run: bash scripts/tests_run_fallback.sh
+        run: bash scripts/run_tests_fallback.sh
         shell: bash
 
       - name: Run regression-tests (sequential backend)
-        working-directory: ${{ env.build_dir }}
-        run: |
-          cd python
-          export PYTHONPATH="`pwd`:$PYTHONPATH"
-          cd tests
-          python3 -m unittest discover -v
+        working-directory: ${{ env.build_dir }}/python
+        run: bash run_tests.sh
         shell: bash
@@ -1,7 +1,7 @@
 # CuBool library Cmake config file
 # Add this file as sub-directory to your project to use library functionality
 
-cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
 project(cubool LANGUAGES CXX)
 
 # Exposed to the user build options
 
@@ -12,8 +12,11 @@ work with sparse matrices written on the NVIDIA CUDA platform. The primary
 goal of the library is implementation, testing and profiling algorithms for
 solving *formal-language-constrained problems*, such as *context-free* 
 and *regular* path queries with various semantics for graph databases.
-The library provides C-compatible API, written in the GraphBLAS style,
-as well as python high-level wrapper with automated resources management and fancy syntax sugar.
+The library provides C-compatible API, written in the GraphBLAS style.
+
+**The library** is shipped with python package **pycubool** - wrapper for
+cuBool library C API. This package exports library features and primitives 
+in high-level format with automated resources management and fancy syntax sugar.
 
 **The primary library primitive** is a sparse boolean matrix. The library provides 
 the most popular operations for matrix manipulation, such as construction from
@@ -34,7 +37,7 @@ prototyping algorithms on a local computer for later running on a powerful serve
 - [X] Sparse matrix element-wise addition
 - [X] Sparse matrix kronecker
 - [X] Sparse matrix transpose
-- [X] Sparse matrix submatrix
+- [X] Sparse matrix extract sub-matrix
 - [X] Sparse matrix reduce
 - [X] Sparse matrix slicing
 - [X] Matrix cached filling
@@ -54,12 +57,28 @@ prototyping algorithms on a local computer for later running on a powerful serve
 - [ ] Publish built artifacts and shared libs
 - [ ] Publish stable source code archives
 
+## Installation
+
+If you are running OS **Ubuntu 20.04** or higher you can download the official
+PyPI **pycubool** python package, which includes compiled library source code
+with Cuda and Sequential computations support. Installation process 
+requires only `python3` to be installed on your machine. Python can be installed 
+as follows [link](https://phoenixnap.com/kb/how-to-install-python-3-ubuntu).
+
+If all requirements are satisfied, run the following command to install PyPI package:
+```shell script
+$ python3 -m pip install -i https://test.pypi.org/simple/ pycubool
+``` 
+
 ## Getting Started
 
+This section gives instructions to build the library from sources.
+These steps are required if you want to build library for your specific platform with custom build settings.
+
 ### Requirements
 
 - Linux Ubuntu (tested on 20.04)
-- CMake Version 3.17 or higher
+- CMake Version 3.15 or higher
 - CUDA Compatible GPU device
 - GCC Compiler 
 - NVIDIA CUDA toolkit
@@ -141,7 +160,7 @@ Configure build in Release mode with tests and run actual compilation process:
 ```shell script
 $ cmake .. -DCMAKE_BUILD_TYPE=Release -DCUBOOL_BUILD_TESTS=ON
 $ cmake --build . --target all -j `nproc`
-$ bash ./scripts/tests_run_all.sh
+$ bash ./scripts/run_tests_all.sh
 ```
 
 By default, the following cmake options will be automatically enabled:
@@ -255,9 +274,10 @@ cuBool
 │   │   └── sequential - fallback cpu backend
 │   ├── utils - testing utilities
 │   └── tests - gtest-based unit-tests collection
-├── python - pycubool related source
+├── python - pycubool related sources
 │   ├── pycubool - cubool library wrapper for python (similar to pygraphblas)
-│   └── tests - tests for python wrapper
+│   ├── tests - regression tests for python wrapper
+│   └── data - generate data for pycubool regression tests
 ├── deps - project dependencies
 │   ├── cub - cuda utility, required for nsparse
 │   ├── gtest - google test framework for unit testing
@@ -273,6 +293,18 @@ cuBool
 - Pavel Alimov (Github : [Krekep](https://github.com/Krekep))
 - Semyon Grigorev (Github: [gsvgit](https://github.com/gsvgit))
 
+## Citation 
+
+```ignorelang
+@online{cuBool,
+  author = {Orachyov, Egor and Alimov, Pavel and Grigorev, Semyon},
+  title = {cuBool: sparse Boolean linear algebra for Nvidia Cuda},
+  year = 2020,
+  url = {https://github.com/JetBrains-Research/cuBool},
+  note = {Version Alpha}
+}
+```
+
 ## License
 
 This project is licensed under MIT License. License text can be found in the 
 
@@ -26,6 +26,8 @@ set(CUBOOL_SOURCES
     sources/io/logger.hpp
     sources/utils/exclusive_scan.hpp
     sources/utils/timer.hpp
+    sources/utils/csr_utils.cpp
+    sources/utils/csr_utils.hpp
         )
 
 set(CUBOOL_C_API_SOURCES
@@ -72,6 +74,8 @@ if (CUBOOL_WITH_CUDA)
         sources/cuda/instance.cpp
         sources/cuda/matrix_csr.hpp
         sources/cuda/matrix_csr.cu
+        sources/cuda/matrix_csr_build.cu
+        sources/cuda/matrix_csr_extract.cu
         sources/cuda/matrix_csr_ewiseadd.cu
         sources/cuda/matrix_csr_kronecker.cu
         sources/cuda/matrix_csr_multiply.cu
@@ -142,13 +146,15 @@ if (CUBOOL_WITH_CUDA)
     set_target_properties(cubool PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 
     # Settings: https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
-    target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-        -arch=sm_30
-        -gencode=arch=compute_30,code=sm_30
-        -gencode=arch=compute_35,code=sm_35
-        -gencode=arch=compute_50,code=sm_50
-        -gencode=arch=compute_52,code=sm_52
-        -gencode=arch=compute_52,code=compute_52>)
+    #target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+    #    # todo: fix this flag later -arch=sm_30 ?
+    #    # todo: can we omit arch flag?
+    #    -gencode=arch=compute_30,code=sm_30
+    #    -gencode=arch=compute_50,code=sm_50
+    #    -gencode=arch=compute_52,code=sm_52
+    #    -gencode=arch=compute_60,code=sm_60
+    #    -gencode=arch=compute_61,code=sm_61
+    #    -gencode=arch=compute_61,code=compute_61>)
 
     target_compile_options(cubool PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -use_fast_math -Xptxas -O2>)
 
 
@@ -73,27 +73,27 @@ typedef enum cuBool_Status {
 /** Generic lib hits for matrix processing */
 typedef enum cuBool_Hint {
     /** No hints passed */
-    CUBOOL_HINT_NO = 0x0,
+    CUBOOL_HINT_NO = 0,
     /** Force Cpu based backend usage */
-    CUBOOL_HINT_CPU_BACKEND = 0x1,
+    CUBOOL_HINT_CPU_BACKEND = 1,
     /** Use managed gpu memory type instead of default (device) memory */
-    CUBOOL_HINT_GPU_MEM_MANAGED = 0x2,
+    CUBOOL_HINT_GPU_MEM_MANAGED = 2,
     /** Mark input data as row-col sorted */
-    CUBOOL_HINT_VALUES_SORTED = 0x4,
+    CUBOOL_HINT_VALUES_SORTED = 4,
     /** Accumulate result of the operation in the result matrix */
-    CUBOOL_HINT_ACCUMULATE = 0x8,
+    CUBOOL_HINT_ACCUMULATE = 8,
     /** Finalize library state, even if not all resources were explicitly released */
-    CUBOOL_HINT_RELAXED_FINALIZE = 0x16,
+    CUBOOL_HINT_RELAXED_FINALIZE = 16,
     /** Logging hint: log includes error message */
-    CUBOOL_HINT_LOG_ERROR = 0x32,
+    CUBOOL_HINT_LOG_ERROR = 32,
     /** Logging hint: log includes warning message */
-    CUBOOL_HINT_LOG_WARNING = 0x64,
+    CUBOOL_HINT_LOG_WARNING = 64,
     /** Logging hint: log includes all types of messages */
-    CUBOOL_HINT_LOG_ALL = 0x128,
+    CUBOOL_HINT_LOG_ALL = 128,
     /** No duplicates in the build data */
-    CUBOOL_HINT_NO_DUPLICATES = 0x256,
+    CUBOOL_HINT_NO_DUPLICATES = 256,
     /** Performs time measurement and logs elapsed operation time */
-    CUBOOL_HINT_TIME_CHECK = 0x512
+    CUBOOL_HINT_TIME_CHECK = 512
 } cuBool_Hint;
 
 /** Hit mask */
 
@@ -179,7 +179,7 @@ namespace cubool {
             logDeviceInfo();
     }
 
-    MatrixBase *Library::createMatrix(size_t nrows, size_t ncols) {
+    Matrix *Library::createMatrix(size_t nrows, size_t ncols) {
         CHECK_RAISE_ERROR(nrows > 0, InvalidArgument, "Cannot create matrix with zero dimension");
         CHECK_RAISE_ERROR(ncols > 0, InvalidArgument, "Cannot create matrix with zero dimension");
 
@@ -193,17 +193,16 @@ namespace cubool {
         return m;
     }
 
-    void Library::releaseMatrix(MatrixBase *matrixBase) {
+    void Library::releaseMatrix(Matrix *matrix) {
         if (mRelaxedRelease && !mBackend) return;
 
-        auto m = (Matrix*)(matrixBase);
-        CHECK_RAISE_ERROR(mAllocated.find(m) != mAllocated.end(), InvalidArgument, "No such matrix was allocated");
+        CHECK_RAISE_ERROR(mAllocated.find(matrix) != mAllocated.end(), InvalidArgument, "No such matrix was allocated");
 
         LogStream stream(*getLogger());
-        stream << Logger::Level::Info << "Release Matrix " << m->getDebugMarker() << LogStream::cmt;
+        stream << Logger::Level::Info << "Release Matrix " << matrix->getDebugMarker() << LogStream::cmt;
 
-        mAllocated.erase(m);
-        delete m;
+        mAllocated.erase(matrix);
+        delete matrix;
     }
 
     void Library::handleError(const std::exception& error) {
 
@@ -38,8 +38,8 @@ namespace cubool {
         static void finalize();
         static void validate();
         static void setupLogging(const char* logFileName, cuBool_Hints hints);
-        static class MatrixBase *createMatrix(size_t nrows, size_t ncols);
-        static void releaseMatrix(class MatrixBase *matrixBase);
+        static class Matrix *createMatrix(size_t nrows, size_t ncols);
+        static void releaseMatrix(class Matrix *matrix);
         static void handleError(const std::exception& error);
         static void queryCapabilities(cuBool_DeviceCaps& caps);
         static void logDeviceInfo();
 
@@ -69,6 +69,13 @@ namespace cubool {
         CHECK_RAISE_ERROR(cols != nullptr || nvals == 0, InvalidArgument, "Null ptr cols array");
 
         this->releaseCache();
+
+        LogStream stream(*Library::getLogger());
+        stream << Logger::Level::Info
+               << "Matrix:build:" << this->getDebugMarker() << " "
+               << "isSorted=" << isSorted << ", "
+               << "noDuplicates=" << noDuplicates << LogStream::cmt;
+
         mHnd->build(rows, cols, nvals, isSorted, noDuplicates);
     }
 
@@ -98,7 +105,8 @@ namespace cubool {
         CHECK_RAISE_ERROR(nrows == this->getNrows(), InvalidArgument, "Result matrix has incompatible size for extracted sub-matrix range");
         CHECK_RAISE_ERROR(ncols == this->getNcols(), InvalidArgument, "Result matrix has incompatible size for extracted sub-matrix range");
 
-        this->commitCache();
+        other->commitCache();
+        this->releaseCache(); // Values of this matrix won't be used any more
 
         if (checkTime) {
             TIMER_ACTION(timer, mHnd->extractSubMatrix(*other->mHnd, i, j, nrows, ncols, false));
@@ -122,13 +130,18 @@ namespace cubool {
 
         CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Passed matrix does not belong to core matrix class");
 
+        if (this == other)
+            return;
+
         auto M = other->getNrows();
         auto N = other->getNcols();
 
         CHECK_RAISE_ERROR(M == this->getNrows(), InvalidArgument, "Cloned matrix has incompatible size");
         CHECK_RAISE_ERROR(N == this->getNcols(), InvalidArgument, "Cloned matrix has incompatible size");
 
-        this->commitCache();
+        other->commitCache();
+        this->releaseCache(); // Values of this matrix won't be used any more
+
         mHnd->clone(*other->mHnd);
     }
 
@@ -144,6 +157,7 @@ namespace cubool {
         CHECK_RAISE_ERROR(N == this->getNrows(), InvalidArgument, "Transposed matrix has incompatible size");
 
         this->commitCache();
+        this->releaseCache(); // Values of this matrix won't be used any more
 
         if (checkTime) {
             TIMER_ACTION(timer, mHnd->transpose(*other->mHnd, false));
@@ -171,7 +185,8 @@ namespace cubool {
         CHECK_RAISE_ERROR(M == this->getNrows(), InvalidArgument, "Matrix has incompatible size");
         CHECK_RAISE_ERROR(1 == this->getNcols(), InvalidArgument, "Matrix has incompatible size");
 
-        this->commitCache();
+        other->commitCache();
+        this->releaseCache(); // Values of this matrix won't be used any more
 
         if (checkTime) {
             TIMER_ACTION(timer, mHnd->reduce(*other->mHnd, false));
@@ -204,7 +219,13 @@ namespace cubool {
         CHECK_RAISE_ERROR(N == this->getNcols(), InvalidArgument, "Matrix has incompatible size for operation result");
         CHECK_RAISE_ERROR(T == b->getNrows(), InvalidArgument, "Cannot multiply passed matrices");
 
-        this->commitCache();
+        a->commitCache();
+        b->commitCache();
+
+        if (accumulate)
+            this->commitCache();
+        else
+            this->releaseCache();
 
         if (checkTime) {
             TIMER_ACTION(timer, mHnd->multiply(*a->mHnd, *b->mHnd, accumulate, false));
@@ -238,7 +259,9 @@ namespace cubool {
         CHECK_RAISE_ERROR(M * K == this->getNrows(), InvalidArgument, "Matrix has incompatible size for operation result");
         CHECK_RAISE_ERROR(N * T == this->getNcols(), InvalidArgument, "Matrix has incompatible size for operation result");
 
-        this->commitCache();
+        a->commitCache();
+        b->commitCache();
+        this->releaseCache();
 
         if (checkTime) {
             TIMER_ACTION(timer, mHnd->kronecker(*a->mHnd, *b->mHnd, false));
@@ -273,7 +296,9 @@ namespace cubool {
         CHECK_RAISE_ERROR(M == this->getNrows(), InvalidArgument, "Matrix has incompatible size for operation result");
         CHECK_RAISE_ERROR(N == this->getNcols(), InvalidArgument, "Matrix has incompatible size for operation result");
 
-        this->commitCache();
+        a->commitCache();
+        b->commitCache();
+        this->releaseCache();
 
         if (checkTime) {
             TIMER_ACTION(timer, mHnd->eWiseAdd(*a->mHnd, *b->mHnd, false));
@@ -339,17 +364,17 @@ namespace cubool {
         bool isSorted = false;
         bool noDuplicates = false;
 
-        // We will have to join old and new values
         if (mHnd->getNvals() > 0) {
-            // Build tmp matrix with new values
+            // We will have to join old and new values
+            // Create tmp matrix and merge values
+
             MatrixBase* tmp = mProvider->createMatrix(getNrows(), getNcols());
             tmp->build(mCachedI.data(), mCachedJ.data(), cachedNvals, isSorted, noDuplicates);
-
-            // Add new values to current matrix content
             mHnd->eWiseAdd(*mHnd, *tmp, false);
+            mProvider->releaseMatrix(tmp);
         }
-        // Otherwise, new values are used to build matrix content
         else {
+            // Otherwise, new values are used to build matrix content
             mHnd->build(mCachedI.data(), mCachedJ.data(), cachedNvals, isSorted, noDuplicates);
         }