Skip to content

Commit 325e6ab

Browse files
committed
Merge pull request #3418 from cudawarped:fix_issue_3412
2 parents e247b68 + abd3ca8 commit 325e6ab

File tree

3 files changed

+57
-32
lines changed

3 files changed

+57
-32
lines changed

modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -876,17 +876,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
876876
Texture texIyy(kLevelHeight, kLevelWidth, Iyy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
877877
Texture texIy0(kLevelHeight, kLevelWidth, Iy0.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
878878
Texture texIxy(kLevelHeight, kLevelWidth, Ixy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
879-
Texture texDiffX(1, kLevelSizeInBytes / sizeof(float), diffusivity_x.ptr(), kLevelSizeInBytes);
880-
Texture texDiffY(1, kLevelSizeInBytes / sizeof(float), diffusivity_y.ptr(), kLevelSizeInBytes);
879+
Texture texDiffX(kLevelSizeInBytes, diffusivity_x.ptr());
880+
Texture texDiffY(kLevelSizeInBytes, diffusivity_y.ptr());
881881

882882
// flow
883-
Texture texU(1, kLevelSizeInBytes / sizeof(float), ptrU->ptr(), kLevelSizeInBytes);
884-
Texture texV(1, kLevelSizeInBytes / sizeof(float), ptrV->ptr(), kLevelSizeInBytes);
883+
Texture texU(kLevelSizeInBytes, ptrU->ptr());
884+
Texture texV(kLevelSizeInBytes, ptrV->ptr());
885885
// flow increments
886-
Texture texDu(1, kLevelSizeInBytes / sizeof(float), du.ptr(), kLevelSizeInBytes);
887-
Texture texDv(1, kLevelSizeInBytes / sizeof(float), dv.ptr(), kLevelSizeInBytes);
888-
Texture texDuNew(1, kLevelSizeInBytes / sizeof(float), du_new.ptr(), kLevelSizeInBytes);
889-
Texture texDvNew(1, kLevelSizeInBytes / sizeof(float), dv_new.ptr(), kLevelSizeInBytes);
886+
Texture texDu(kLevelSizeInBytes, du.ptr());
887+
Texture texDv(kLevelSizeInBytes, dv.ptr());
888+
Texture texDuNew(kLevelSizeInBytes, du_new.ptr());
889+
Texture texDvNew(kLevelSizeInBytes, dv_new.ptr());
890890

891891
dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT));
892892
dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT);

modules/cudawarping/test/test_remap.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ CUDA_TEST_P(Remap, Accuracy)
173173

174174
INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine(
175175
ALL_DEVICES,
176-
DIFFERENT_SIZES,
176+
DIFFERENT_SIZES_EXTRA,
177177
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
178178
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
179179
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
@@ -198,7 +198,7 @@ CUDA_TEST_P(RemapOutOfScope, Regression_18224)
198198

199199
INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine(
200200
ALL_DEVICES,
201-
DIFFERENT_SIZES,
201+
DIFFERENT_SIZES_EXTRA,
202202
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
203203
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)),
204204
testing::Values(BorderType(cv::BORDER_CONSTANT)),

modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,14 @@ namespace cv { namespace cudev {
9494
__host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint,
9595
const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType)
9696
{
97-
create(1, static_cast<int>(sizeInBytes/sizeof(T)), data, sizeInBytes, normalizedCoords, filterMode, addressMode, readMode);
97+
create(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode);
9898
}
9999

100100
__host__ ~UniqueTexture() {
101101
if (tex != cudaTextureObject_t()) {
102102
try {
103103
CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex));
104+
CV_CUDEV_SAFE_CALL(cudaFree(internalSrc));
104105
}
105106
catch (const cv::Exception& ex) {
106107
std::ostringstream os;
@@ -132,39 +133,62 @@ namespace cv { namespace cudev {
132133
__host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); }
133134

134135
private:
136+
__host__ void createTextureObject(cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
137+
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
138+
{
139+
cudaTextureDesc texDescr;
140+
std::memset(&texDescr, 0, sizeof(texDescr));
141+
texDescr.normalizedCoords = normalizedCoords;
142+
texDescr.filterMode = filterMode;
143+
texDescr.addressMode[0] = addressMode;
144+
texDescr.addressMode[1] = addressMode;
145+
texDescr.addressMode[2] = addressMode;
146+
texDescr.readMode = readMode;
147+
CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0));
148+
}
149+
150+
template <class T1>
151+
__host__ void create(const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
152+
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
153+
{
154+
cudaResourceDesc texRes;
155+
std::memset(&texRes, 0, sizeof(texRes));
156+
texRes.resType = cudaResourceTypeLinear;
157+
texRes.res.linear.devPtr = data;
158+
texRes.res.linear.sizeInBytes = sizeInBytes;
159+
texRes.res.linear.desc = cudaCreateChannelDesc<T1>();
160+
createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode);
161+
}
162+
163+
__host__ void create(const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
164+
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
165+
{
166+
create<uint2>(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode);
167+
}
135168

136169
template <class T1>
137170
__host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
138171
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
139172
{
140173
cudaResourceDesc texRes;
141174
std::memset(&texRes, 0, sizeof(texRes));
142-
if (rows == 1) {
143-
CV_Assert(rows == 1 && cols*sizeof(T) == step);
144-
texRes.resType = cudaResourceTypeLinear;
145-
texRes.res.linear.devPtr = data;
146-
texRes.res.linear.sizeInBytes = step;
147-
texRes.res.linear.desc = cudaCreateChannelDesc<T1>();
175+
texRes.resType = cudaResourceTypePitch2D;
176+
texRes.res.pitch2D.height = rows;
177+
texRes.res.pitch2D.width = cols;
178+
// temporary fix for single row/columns until TexturePtr is reworked
179+
if (rows == 1 || cols == 1) {
180+
size_t dStep = 0;
181+
CV_CUDEV_SAFE_CALL(cudaMallocPitch(&internalSrc, &dStep, cols * sizeof(T1), rows));
182+
CV_CUDEV_SAFE_CALL(cudaMemcpy2D(internalSrc, dStep, data, step, cols * sizeof(T1), rows, cudaMemcpyDeviceToDevice));
183+
texRes.res.pitch2D.devPtr = internalSrc;
184+
texRes.res.pitch2D.pitchInBytes = dStep;
148185
}
149186
else {
150-
texRes.resType = cudaResourceTypePitch2D;
151187
texRes.res.pitch2D.devPtr = data;
152-
texRes.res.pitch2D.height = rows;
153-
texRes.res.pitch2D.width = cols;
154188
texRes.res.pitch2D.pitchInBytes = step;
155-
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T1>();
156189
}
157-
158-
cudaTextureDesc texDescr;
159-
std::memset(&texDescr, 0, sizeof(texDescr));
160-
texDescr.normalizedCoords = normalizedCoords;
161-
texDescr.filterMode = filterMode;
162-
texDescr.addressMode[0] = addressMode;
163-
texDescr.addressMode[1] = addressMode;
164-
texDescr.addressMode[2] = addressMode;
165-
texDescr.readMode = readMode;
166-
167-
CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0));
190+
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T1>();
191+
createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode);
168192
}
169193

170194
__host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
@@ -175,6 +199,7 @@ namespace cv { namespace cudev {
175199

176200
private:
177201
cudaTextureObject_t tex;
202+
T* internalSrc = 0;
178203
};
179204

180205
/** @brief sharable smart CUDA texture object

0 commit comments

Comments
 (0)