Skip to content

Commit 1c7a67d

Browse files
committed
[Code] Add seq ewiseadd and spgemm impl
1 parent 507a2b6 commit 1c7a67d

15 files changed

+286
-9
lines changed

cubool/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ if (CUBOOL_WITH_SEQUENTIAL)
9999
sources/sequential/sq_reduce.hpp
100100
sources/sequential/sq_submatrix.cpp
101101
sources/sequential/sq_submatrix.hpp
102-
sources/sequential/sq_exclusive_scan.hpp
103-
)
102+
sources/sequential/sq_exclusive_scan.hpp)
104103
endif()
105104

106105
# Shared library object config

cubool/sources/cuda/matrix_csr.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ namespace cubool {
132132
auto other = dynamic_cast<const MatrixCsr*>(&otherBase);
133133

134134
CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Passed matrix does not belong to csr matrix class");
135+
CHECK_RAISE_ERROR(other != this, InvalidArgument, "Matrices must differ");
135136

136137
size_t M = other->getNrows();
137138
size_t N = other->getNcols();

cubool/sources/cuda/matrix_csr_extract_sub_matrix.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ namespace cubool {
3131
auto other = dynamic_cast<const MatrixCsr*>(&otherBase);
3232

3333
CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Provided matrix does not belong to matrix csr class");
34+
CHECK_RAISE_ERROR(other != this, InvalidArgument, "Matrices must differ");
3435

3536
assert(nrows > 0);
3637
assert(ncols > 0);

cubool/sources/sequential/sq_ewiseadd.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,98 @@
2323
/**********************************************************************************/
2424

2525
#include <sequential/sq_ewiseadd.hpp>
26+
#include <sequential/sq_exclusive_scan.hpp>
2627

2728
namespace cubool {
2829

30+
void sq_ewiseadd(const CsrData& a, const CsrData& b, CsrData& out) {
31+
out.rowOffsets.resize(a.nrows + 1, 0);
32+
33+
size_t nvals = 0;
34+
35+
// Count nnz of the result matrix to allocate memory
36+
for (index i = 0; i < a.nrows; i++) {
37+
index ak = a.rowOffsets[i];
38+
index bk = b.rowOffsets[i];
39+
index asize = a.rowOffsets[i + 1] - ak;
40+
index bsize = b.rowOffsets[i + 1] - bk;
41+
42+
const index* ar = &a.colIndices[ak];
43+
const index* br = &b.colIndices[bk];
44+
const index* arend = ar + asize;
45+
const index* brend = br + bsize;
46+
47+
index nvalsInRow = 0;
48+
49+
while (ar != arend && br != brend) {
50+
if (*ar == *br) {
51+
nvalsInRow++;
52+
ar++;
53+
br++;
54+
}
55+
else if (*ar < *br) {
56+
nvalsInRow++;
57+
ar++;
58+
}
59+
else {
60+
nvalsInRow++;
61+
br++;
62+
}
63+
}
64+
65+
nvalsInRow += (size_t)(arend - ar);
66+
nvalsInRow += (size_t)(brend - br);
67+
68+
nvals += nvalsInRow;
69+
out.rowOffsets[i] = nvalsInRow;
70+
}
71+
72+
// Eval row offsets
73+
sq_exclusive_scan(out.rowOffsets.begin(), out.rowOffsets.end(), 0);
74+
75+
// Allocate memory for values
76+
out.nvals = nvals;
77+
out.colIndices.resize(nvals);
78+
79+
// Fill sorted column indices
80+
size_t k = 0;
81+
for (index i = 0; i < a.nrows; i++) {
82+
const index* ar = &a.colIndices[a.rowOffsets[i]];
83+
const index* br = &b.colIndices[b.rowOffsets[i]];
84+
const index* arend = &a.colIndices[a.rowOffsets[i + 1]];
85+
const index* brend = &b.colIndices[b.rowOffsets[i + 1]];
86+
87+
while (ar != arend && br != brend) {
88+
if (*ar == *br) {
89+
out.colIndices[k] = *ar;
90+
k++;
91+
ar++;
92+
br++;
93+
}
94+
else if (*ar < *br) {
95+
out.colIndices[k] = *ar;
96+
k++;
97+
ar++;
98+
}
99+
else {
100+
out.colIndices[k] = *br;
101+
k++;
102+
br++;
103+
}
104+
}
105+
106+
while (ar != arend) {
107+
out.colIndices[k] = *ar;
108+
k++;
109+
ar++;
110+
}
111+
112+
while (br != brend) {
113+
out.colIndices[k] = *br;
114+
k++;
115+
br++;
116+
}
117+
}
118+
}
119+
29120
}

cubool/sources/sequential/sq_ewiseadd.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,19 @@
2525
#ifndef CUBOOL_SQ_EWISEADD_HPP
2626
#define CUBOOL_SQ_EWISEADD_HPP
2727

28+
#include <sequential/sq_csr_data.hpp>
29+
2830
namespace cubool {
2931

32+
/**
33+
* Element-wise addition of the matrices `a` and `b`.
34+
*
35+
* @param a Input matrix
36+
* @param b Input matrix
37+
* @param[out] out Where to store the result
38+
*/
39+
void sq_ewiseadd(const CsrData& a, const CsrData& b, CsrData& out);
40+
3041
}
3142

3243
#endif //CUBOOL_SQ_EWISEADD_HPP

cubool/sources/sequential/sq_kronecker.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ namespace cubool {
4040
size_t nvals = a.nvals * b.nvals;
4141

4242
out.nvals = nvals;
43+
out.rowOffsets.clear();
4344
out.rowOffsets.resize(a.nrows * b.nrows + 1, 0);
4445
out.colIndices.resize(nvals);
4546

cubool/sources/sequential/sq_matrix.cpp

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#include <sequential/sq_transpose.hpp>
2727
#include <sequential/sq_submatrix.hpp>
2828
#include <sequential/sq_kronecker.hpp>
29+
#include <sequential/sq_ewiseadd.hpp>
30+
#include <sequential/sq_spgemm.hpp>
2931
#include <sequential/sq_exclusive_scan.hpp>
3032
#include <core/error.hpp>
3133
#include <algorithm>
@@ -51,6 +53,7 @@ namespace cubool {
5153
auto nrows = mData.nrows;
5254
auto ncols = mData.ncols;
5355

56+
mData.rowOffsets.clear();
5457
mData.rowOffsets.resize(nrows + 1, 0);
5558
mData.colIndices.clear();
5659
mData.colIndices.reserve(nvals);
@@ -119,6 +122,7 @@ namespace cubool {
119122
auto other = dynamic_cast<const SqMatrix*>(&otherBase);
120123

121124
CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
125+
CHECK_RAISE_ERROR(other != this, InvalidArgument, "Matrices must differ");
122126

123127
auto M = this->getNrows();
124128
auto N = this->getNcols();
@@ -133,6 +137,7 @@ namespace cubool {
133137
auto other = dynamic_cast<const SqMatrix*>(&otherBase);
134138

135139
CHECK_RAISE_ERROR(other != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
140+
CHECK_RAISE_ERROR(other != this, InvalidArgument, "Matrices must differ");
136141

137142
auto M = other->getNrows();
138143
auto N = other->getNcols();
@@ -154,7 +159,12 @@ namespace cubool {
154159
assert(N == this->getNrows());
155160
assert(M == this->getNcols());
156161

157-
sq_transpose(other->mData, this->mData);
162+
CsrData out;
163+
out.nrows = this->getNrows();
164+
out.ncols = this->getNcols();
165+
166+
sq_transpose(other->mData, out);
167+
this->mData = std::move(out);
158168
}
159169

160170
void SqMatrix::reduce(const MatrixBase &otherBase) {
@@ -167,7 +177,36 @@ namespace cubool {
167177
}
168178

169179
void SqMatrix::multiply(const MatrixBase &aBase, const MatrixBase &bBase, bool accumulate) {
180+
auto a = dynamic_cast<const SqMatrix*>(&aBase);
181+
auto b = dynamic_cast<const SqMatrix*>(&bBase);
182+
183+
CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
184+
CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
185+
186+
auto M = a->getNrows();
187+
auto N = b->getNcols();
188+
189+
assert(a->getNcols() == b->getNrows());
190+
assert(M == this->getNrows());
191+
assert(N == this->getNcols());
192+
193+
CsrData out;
194+
out.nrows = this->getNrows();
195+
out.ncols = this->getNcols();
196+
197+
sq_spgemm(a->mData, b->mData, out);
198+
199+
if (accumulate) {
200+
CsrData out2;
201+
out2.nrows = this->getNrows();
202+
out2.ncols = this->getNcols();
170203

204+
sq_ewiseadd(this->mData, out, out2);
205+
this->mData = std::move(out2);
206+
}
207+
else {
208+
this->mData = std::move(out);
209+
}
171210
}
172211

173212
void SqMatrix::kronecker(const MatrixBase &aBase, const MatrixBase &bBase) {
@@ -185,11 +224,35 @@ namespace cubool {
185224
assert(M * K == this->getNrows());
186225
assert(N * T == this->getNcols());
187226

188-
sq_kronecker(a->mData, b->mData, this->mData);
227+
CsrData out;
228+
out.nrows = this->getNrows();
229+
out.ncols = this->getNcols();
230+
231+
sq_kronecker(a->mData, b->mData, out);
232+
this->mData = std::move(out);
189233
}
190234

191235
void SqMatrix::eWiseAdd(const MatrixBase &aBase, const MatrixBase &bBase) {
236+
auto a = dynamic_cast<const SqMatrix*>(&aBase);
237+
auto b = dynamic_cast<const SqMatrix*>(&bBase);
238+
239+
CHECK_RAISE_ERROR(a != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
240+
CHECK_RAISE_ERROR(b != nullptr, InvalidArgument, "Provided matrix does not belongs to sequential matrix class");
241+
242+
auto M = a->getNrows();
243+
auto N = a->getNcols();
244+
245+
assert(M== this->getNrows());
246+
assert(N== this->getNcols());
247+
assert(M== b->getNrows());
248+
assert(N== b->getNcols());
249+
250+
CsrData out;
251+
out.nrows = this->getNrows();
252+
out.ncols = this->getNcols();
192253

254+
sq_ewiseadd(a->mData, b->mData, out);
255+
this->mData = std::move(out);
193256
}
194257

195258
index SqMatrix::getNrows() const {

cubool/sources/sequential/sq_spgemm.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,73 @@
2323
/**********************************************************************************/
2424

2525
#include <sequential/sq_spgemm.hpp>
26+
#include <sequential/sq_exclusive_scan.hpp>
27+
#include <algorithm>
28+
#include <limits>
2629

2730
namespace cubool {
2831

32+
void sq_spgemm(const CsrData& a, const CsrData& b, CsrData& out) {
33+
index max = std::numeric_limits<index>::max();
34+
35+
// Evaluate total nnz and nnz per row
36+
size_t nvals = 0;
37+
out.rowOffsets.resize(a.nrows + 1);
38+
std::vector<index> mask(b.ncols, max);
39+
40+
for (index i = 0; i < a.nrows; i++) {
41+
size_t nvalsInRow = 0;
42+
43+
for (index ak = a.rowOffsets[i]; ak < a.rowOffsets[i + 1]; ak++) {
44+
index k = a.colIndices[ak];
45+
46+
for (index bk = b.rowOffsets[k]; bk < b.rowOffsets[k + 1]; bk++) {
47+
index j = b.colIndices[bk];
48+
49+
// Do not compute col nnz twice
50+
if (mask[j] != i) {
51+
mask[j] = i;
52+
nvalsInRow += 1;
53+
}
54+
}
55+
}
56+
57+
nvals += nvalsInRow;
58+
out.rowOffsets[i] = nvalsInRow;
59+
}
60+
61+
// Row offsets
62+
sq_exclusive_scan(out.rowOffsets.begin(), out.rowOffsets.end(), 0);
63+
64+
out.colIndices.resize(nvals);
65+
66+
// Fill column indices per row and sort
67+
mask.clear();
68+
mask.resize(b.ncols, max);
69+
70+
for (index i = 0; i < a.nrows; i++) {
71+
size_t id = 0;
72+
size_t first = out.rowOffsets[i];
73+
size_t last = out.rowOffsets[i + 1];
74+
75+
for (index ak = a.rowOffsets[i]; ak < a.rowOffsets[i + 1]; ak++) {
76+
index k = a.colIndices[ak];
77+
78+
for (index bk = b.rowOffsets[k]; bk < b.rowOffsets[k + 1]; bk++) {
79+
index j = b.colIndices[bk];
80+
81+
// Do not compute col nnz twice
82+
if (mask[j] != i) {
83+
mask[j] = i;
84+
out.colIndices[first + id] = j;
85+
id += 1;
86+
}
87+
}
88+
}
89+
90+
// Sort indices within row
91+
std::sort(out.colIndices.begin() + first, out.colIndices.begin() + last);
92+
}
93+
}
94+
2995
}

cubool/sources/sequential/sq_spgemm.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,19 @@
2525
#ifndef CUBOOL_SQ_SPGEMM_HPP
2626
#define CUBOOL_SQ_SPGEMM_HPP
2727

28+
#include <sequential/sq_csr_data.hpp>
29+
2830
namespace cubool {
2931

32+
/**
33+
* Matrix-matrix multiplication of `a` and `b`.
34+
*
35+
* @param a Input matrix
36+
* @param b Input matrix
37+
* @param[out] out Where to store result
38+
*/
39+
void sq_spgemm(const CsrData& a, const CsrData& b, CsrData& out);
40+
3041
}
3142

3243
#endif //CUBOOL_SQ_SPGEMM_HPP

cubool/sources/sequential/sq_submatrix.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ namespace cubool {
4343
}
4444

4545
sub.nvals = nvals;
46-
sub.rowOffsets.resize(nrows + 1);
46+
sub.rowOffsets.clear();
47+
sub.rowOffsets.resize(nrows + 1, 0);
4748
sub.colIndices.resize(nvals);
4849

4950
size_t idx = 0;

0 commit comments

Comments
 (0)