25
25
#include < cuda/matrix_csr.hpp>
26
26
#include < core/error.hpp>
27
27
#include < utils/exclusive_scan.hpp>
28
+ #include < utils/timer.hpp>
28
29
#include < algorithm>
29
30
30
31
namespace cubool {
@@ -38,131 +39,6 @@ namespace cubool {
38
39
RAISE_ERROR (NotImplemented, " This function is not supported for this matrix class" );
39
40
}
40
41
41
- void MatrixCsr::build (const index *rows, const index *cols, size_t nvals, bool isSorted, bool noDuplicates) {
42
- if (nvals == 0 ) {
43
- mMatrixImpl .zero_dim (); // no content, empty matrix
44
- return ;
45
- }
46
-
47
- thrust::host_vector<index, HostAlloc<index>> rowOffsets;
48
- rowOffsets.resize (getNrows () + 1 , 0 );
49
-
50
- thrust::host_vector<index, HostAlloc<index>> colIndices;
51
- colIndices.resize (nvals);
52
-
53
- // Compute nnz per row
54
- for (size_t idx = 0 ; idx < nvals; idx++) {
55
- index i = rows[idx];
56
- index j = cols[idx];
57
-
58
- CHECK_RAISE_ERROR (i < getNrows () && j < getNcols (), InvalidArgument, " Out of matrix bounds value" );
59
-
60
- rowOffsets[i] += 1 ;
61
- }
62
-
63
- // Exclusive scan to eval rows offsets
64
- ::cubool::exclusive_scan (rowOffsets.begin(), rowOffsets.end(), 0);
65
-
66
- // Write offsets for cols
67
- std::vector<size_t > writeOffsets (getNrows (), 0 );
68
-
69
- for (size_t idx = 0 ; idx < nvals; idx++) {
70
- index i = rows[idx];
71
- index j = cols[idx];
72
-
73
- colIndices[rowOffsets[i] + writeOffsets[i]] = j;
74
- writeOffsets[i] += 1 ;
75
- }
76
-
77
- if (!isSorted) {
78
- for (size_t i = 0 ; i < getNrows (); i++) {
79
- auto begin = rowOffsets[i];
80
- auto end = rowOffsets[i + 1 ];
81
-
82
- // Sort col values within row
83
- thrust::sort (colIndices.begin () + begin, colIndices.begin () + end, [](const index& a, const index& b) {
84
- return a < b;
85
- });
86
- }
87
- }
88
-
89
- // Reduce duplicated values
90
- if (!noDuplicates) {
91
- size_t unique = 0 ;
92
- for (size_t i = 0 ; i < getNrows (); i++) {
93
- index prev = std::numeric_limits<index>::max ();
94
-
95
- for (size_t k = rowOffsets[i]; k < rowOffsets[i + 1 ]; k++) {
96
- if (prev != colIndices[k]) {
97
- unique += 1 ;
98
- }
99
-
100
- prev = colIndices[k];
101
- }
102
- }
103
-
104
- thrust::host_vector<index, HostAlloc<index>> rowOffsetsReduced;
105
- rowOffsetsReduced.resize (getNrows () + 1 , 0 );
106
-
107
- thrust::host_vector<index, HostAlloc<index>> colIndicesReduced;
108
- colIndicesReduced.reserve (unique);
109
-
110
- for (size_t i = 0 ; i < getNrows (); i++) {
111
- index prev = std::numeric_limits<index>::max ();
112
-
113
- for (size_t k = rowOffsets[i]; k < rowOffsets[i + 1 ]; k++) {
114
- if (prev != colIndices[k]) {
115
- rowOffsetsReduced[i] += 1 ;
116
- colIndicesReduced.push_back (colIndices[k]);
117
- }
118
-
119
- prev = colIndices[k];
120
- }
121
- }
122
-
123
- // Exclusive scan to eval rows offsets
124
- ::cubool::exclusive_scan (rowOffsetsReduced.begin(), rowOffsetsReduced.end(), 0);
125
-
126
- // Now result in respective place
127
- std::swap (rowOffsets, rowOffsetsReduced);
128
- std::swap (colIndices, colIndicesReduced);
129
- }
130
-
131
- // Create device buffers and copy data from the cpu side
132
- thrust::device_vector<index, DeviceAlloc<index>> rowsDeviceVec = rowOffsets;
133
- thrust::device_vector<index, DeviceAlloc<index>> colsDeviceVec = colIndices;
134
-
135
- // Move actual data to the matrix implementation
136
- mMatrixImpl = std::move (MatrixImplType (std::move (colsDeviceVec), std::move (rowsDeviceVec), getNrows (), getNcols (), colIndices.size ()));
137
- }
138
-
139
- void MatrixCsr::extract (index *rows, index *cols, size_t &nvals) {
140
- assert (nvals >= getNvals ());
141
-
142
- // Set nvals to the exact number of nnz values
143
- nvals = getNvals ();
144
-
145
- if (nvals > 0 ) {
146
- auto & rowsDeviceVec = mMatrixImpl .m_row_index ;
147
- auto & colsDeviceVec = mMatrixImpl .m_col_index ;
148
-
149
- // Copy data to the host
150
- thrust::host_vector<index, HostAlloc<index>> rowsVec = rowsDeviceVec;
151
- thrust::host_vector<index, HostAlloc<index>> colsVec = colsDeviceVec;
152
-
153
- // Iterate over csr formatted data
154
- size_t idx = 0 ;
155
- for (index i = 0 ; i < getNrows (); i++) {
156
- for (index j = rowsVec[i]; j < rowsVec[i + 1 ]; j++) {
157
- rows[idx] = i;
158
- cols[idx] = colsVec[j];
159
-
160
- idx += 1 ;
161
- }
162
- }
163
- }
164
- }
165
-
166
42
void MatrixCsr::clone (const MatrixBase &otherBase) {
167
43
auto other = dynamic_cast <const MatrixCsr*>(&otherBase);
168
44
@@ -190,6 +66,16 @@ namespace cubool {
190
66
}
191
67
}
192
68
69
+ void MatrixCsr::clearAndResizeStorageToDim () const {
70
+ if (mMatrixImpl .m_vals > 0 ) {
71
+ // Release only if have some nnz values
72
+ mMatrixImpl .zero_dim ();
73
+ }
74
+
75
+ // Normally resize if no storage is actually allocated
76
+ this ->resizeStorageToDim ();
77
+ }
78
+
193
79
index MatrixCsr::getNrows () const {
194
80
return mNrows ;
195
81
}
@@ -210,4 +96,24 @@ namespace cubool {
210
96
return mMatrixImpl .m_vals == 0 ;
211
97
}
212
98
99
+ void MatrixCsr::transferToDevice (const std::vector<index> &rowOffsets, const std::vector<index> &colIndices) {
100
+ // Create device buffers and copy data from the cpu side
101
+ thrust::device_vector<index, DeviceAlloc<index>> rowsDeviceVec (rowOffsets.size ());
102
+ thrust::device_vector<index, DeviceAlloc<index>> colsDeviceVec (colIndices.size ());
103
+
104
+ thrust::copy (rowOffsets.begin (), rowOffsets.end (), rowsDeviceVec.begin ());
105
+ thrust::copy (colIndices.begin (), colIndices.end (), colsDeviceVec.begin ());
106
+
107
+ // Move actual data to the matrix implementation
108
+ mMatrixImpl = std::move (MatrixImplType (std::move (colsDeviceVec), std::move (rowsDeviceVec), getNrows (), getNcols (), colIndices.size ()));
109
+ }
110
+
111
+ void MatrixCsr::transferFromDevice (std::vector<index> &rowOffsets, std::vector<index> &colIndices) const {
112
+ rowOffsets.resize (mMatrixImpl .m_row_index .size ());
113
+ colIndices.resize (mMatrixImpl .m_col_index .size ());
114
+
115
+ thrust::copy (mMatrixImpl .m_row_index .begin (), mMatrixImpl .m_row_index .end (), rowOffsets.begin ());
116
+ thrust::copy (mMatrixImpl .m_col_index .begin (), mMatrixImpl .m_col_index .end (), colIndices.begin ());
117
+ }
118
+
213
119
}
0 commit comments