@@ -94,13 +94,14 @@ namespace cv { namespace cudev {
94
94
__host__ UniqueTexture (const size_t sizeInBytes, T* data, const bool normalizedCoords = false , const cudaTextureFilterMode filterMode = cudaFilterModePoint,
95
95
const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType)
96
96
{
97
- create (1 , static_cast < int >( sizeInBytes/ sizeof (T)) , data, sizeInBytes , normalizedCoords, filterMode, addressMode, readMode);
97
+ create (sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode);
98
98
}
99
99
100
100
__host__ ~UniqueTexture () {
101
101
if (tex != cudaTextureObject_t ()) {
102
102
try {
103
103
CV_CUDEV_SAFE_CALL (cudaDestroyTextureObject (tex));
104
+ CV_CUDEV_SAFE_CALL (cudaFree (internalSrc));
104
105
}
105
106
catch (const cv::Exception& ex) {
106
107
std::ostringstream os;
@@ -132,39 +133,62 @@ namespace cv { namespace cudev {
132
133
__host__ explicit operator bool () const noexcept { return tex != cudaTextureObject_t (); }
133
134
134
135
private:
136
+ __host__ void createTextureObject (cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
137
+ const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
138
+ {
139
+ cudaTextureDesc texDescr;
140
+ std::memset (&texDescr, 0 , sizeof (texDescr));
141
+ texDescr.normalizedCoords = normalizedCoords;
142
+ texDescr.filterMode = filterMode;
143
+ texDescr.addressMode [0 ] = addressMode;
144
+ texDescr.addressMode [1 ] = addressMode;
145
+ texDescr.addressMode [2 ] = addressMode;
146
+ texDescr.readMode = readMode;
147
+ CV_CUDEV_SAFE_CALL (cudaCreateTextureObject (&tex, &texRes, &texDescr, 0 ));
148
+ }
149
+
150
+ template <class T1 >
151
+ __host__ void create (const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
152
+ const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
153
+ {
154
+ cudaResourceDesc texRes;
155
+ std::memset (&texRes, 0 , sizeof (texRes));
156
+ texRes.resType = cudaResourceTypeLinear;
157
+ texRes.res .linear .devPtr = data;
158
+ texRes.res .linear .sizeInBytes = sizeInBytes;
159
+ texRes.res .linear .desc = cudaCreateChannelDesc<T1>();
160
+ createTextureObject (texRes, normalizedCoords, filterMode, addressMode, readMode);
161
+ }
162
+
163
+ __host__ void create (const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
164
+ const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
165
+ {
166
+ create<uint2>(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode);
167
+ }
135
168
136
169
template <class T1 >
137
170
__host__ void create (const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
138
171
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
139
172
{
140
173
cudaResourceDesc texRes;
141
174
std::memset (&texRes, 0 , sizeof (texRes));
142
- if (rows == 1 ) {
143
- CV_Assert (rows == 1 && cols*sizeof (T) == step);
144
- texRes.resType = cudaResourceTypeLinear;
145
- texRes.res .linear .devPtr = data;
146
- texRes.res .linear .sizeInBytes = step;
147
- texRes.res .linear .desc = cudaCreateChannelDesc<T1>();
175
+ texRes.resType = cudaResourceTypePitch2D;
176
+ texRes.res .pitch2D .height = rows;
177
+ texRes.res .pitch2D .width = cols;
178
+ // temporary fix for single row/columns until TexturePtr is reworked
179
+ if (rows == 1 || cols == 1 ) {
180
+ size_t dStep = 0 ;
181
+ CV_CUDEV_SAFE_CALL (cudaMallocPitch (&internalSrc, &dStep, cols * sizeof (T1), rows));
182
+ CV_CUDEV_SAFE_CALL (cudaMemcpy2D (internalSrc, dStep, data, step, cols * sizeof (T1), rows, cudaMemcpyDeviceToDevice));
183
+ texRes.res .pitch2D .devPtr = internalSrc;
184
+ texRes.res .pitch2D .pitchInBytes = dStep;
148
185
}
149
186
else {
150
- texRes.resType = cudaResourceTypePitch2D;
151
187
texRes.res .pitch2D .devPtr = data;
152
- texRes.res .pitch2D .height = rows;
153
- texRes.res .pitch2D .width = cols;
154
188
texRes.res .pitch2D .pitchInBytes = step;
155
- texRes.res .pitch2D .desc = cudaCreateChannelDesc<T1>();
156
189
}
157
-
158
- cudaTextureDesc texDescr;
159
- std::memset (&texDescr, 0 , sizeof (texDescr));
160
- texDescr.normalizedCoords = normalizedCoords;
161
- texDescr.filterMode = filterMode;
162
- texDescr.addressMode [0 ] = addressMode;
163
- texDescr.addressMode [1 ] = addressMode;
164
- texDescr.addressMode [2 ] = addressMode;
165
- texDescr.readMode = readMode;
166
-
167
- CV_CUDEV_SAFE_CALL (cudaCreateTextureObject (&tex, &texRes, &texDescr, 0 ));
190
+ texRes.res .pitch2D .desc = cudaCreateChannelDesc<T1>();
191
+ createTextureObject (texRes, normalizedCoords, filterMode, addressMode, readMode);
168
192
}
169
193
170
194
__host__ void create (const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
@@ -175,6 +199,7 @@ namespace cv { namespace cudev {
175
199
176
200
private:
177
201
cudaTextureObject_t tex;
202
+ T* internalSrc = 0 ;
178
203
};
179
204
180
205
/* * @brief sharable smart CUDA texture object
0 commit comments