Skip to content

Commit f769fe4

Browse files
Merge pull request #183 from CVCUDA/v0.10.1-beta
Merge patched release code for v0.10.1 into main
2 parents 669197a + 574e42c commit f769fe4

File tree

7 files changed

+708
-938
lines changed

7 files changed

+708
-938
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
[![License](https://img.shields.io/badge/License-Apache_2.0-yellogreen.svg)](https://opensource.org/licenses/Apache-2.0)
2020

21-
![Version](https://img.shields.io/badge/Version-v0.10.0--beta-blue)
21+
![Version](https://img.shields.io/badge/Version-v0.10.1--beta-blue)
2222

2323
![Platform](https://img.shields.io/badge/Platform-linux--64_%7C_win--64_wsl2%7C_aarch64-gray)
2424

bench/BenchCvtColor.cpp

Lines changed: 31 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -21,158 +21,63 @@
2121

2222
#include <nvbench/nvbench.cuh>
2323

24-
#include <map>
25-
#include <stdexcept>
26-
#include <tuple>
27-
28-
inline static std::tuple<NVCVColorConversionCode, NVCVImageFormat, NVCVImageFormat> StringToFormats(
29-
const std::string &str)
30-
{
31-
// clang-format off
32-
static const std::map<std::string, std::tuple<NVCVColorConversionCode, NVCVImageFormat, NVCVImageFormat>> codeMap{
33-
{ "RGB2BGR", {NVCV_COLOR_RGB2BGR, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_BGR8 }},
34-
{ "RGB2RGBA", {NVCV_COLOR_RGB2RGBA, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_RGBA8}},
35-
{ "RGBA2RGB", {NVCV_COLOR_RGBA2RGB, NVCV_IMAGE_FORMAT_RGBA8, NVCV_IMAGE_FORMAT_RGB8 }},
36-
{ "RGB2GRAY", {NVCV_COLOR_RGB2GRAY, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_Y8 }},
37-
{ "GRAY2RGB", {NVCV_COLOR_GRAY2RGB, NVCV_IMAGE_FORMAT_Y8, NVCV_IMAGE_FORMAT_RGB8 }},
38-
{ "RGB2HSV", {NVCV_COLOR_RGB2HSV, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_HSV8 }},
39-
{ "HSV2RGB", {NVCV_COLOR_HSV2RGB, NVCV_IMAGE_FORMAT_HSV8, NVCV_IMAGE_FORMAT_RGB8 }},
40-
{ "RGB2YUV", {NVCV_COLOR_RGB2YUV, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_YUV8 }},
41-
{ "YUV2RGB", {NVCV_COLOR_YUV2RGB, NVCV_IMAGE_FORMAT_YUV8, NVCV_IMAGE_FORMAT_RGB8 }},
42-
{"RGB2YUV_NV12", {NVCV_COLOR_RGB2YUV_NV12, NVCV_IMAGE_FORMAT_RGB8, NVCV_IMAGE_FORMAT_NV12 }},
43-
{"YUV2RGB_NV12", {NVCV_COLOR_YUV2RGB_NV12, NVCV_IMAGE_FORMAT_NV12, NVCV_IMAGE_FORMAT_RGB8 }},
44-
};
45-
// clang-format on
46-
47-
if (auto it = codeMap.find(str); it != codeMap.end())
48-
{
49-
return it->second;
50-
}
51-
else
52-
{
53-
throw std::invalid_argument("Unrecognized color code");
54-
}
55-
}
56-
57-
template<typename BT>
58-
inline float BytesPerPixel(NVCVImageFormat imgFormat)
59-
{
60-
switch (imgFormat)
61-
{
62-
#define CVCUDA_BYTES_PER_PIXEL_CASE(FORMAT, BYTES) \
63-
case FORMAT: \
64-
return BYTES * sizeof(BT)
65-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_RGB8, 3);
66-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_BGR8, 3);
67-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_HSV8, 3);
68-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_RGBA8, 4);
69-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_YUV8, 3);
70-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_NV12, 1.5f);
71-
CVCUDA_BYTES_PER_PIXEL_CASE(NVCV_IMAGE_FORMAT_Y8, 1);
72-
#undef CVCUDA_BYTES_PER_PIXEL_CASE
73-
default:
74-
throw std::invalid_argument("Unrecognized format");
75-
}
76-
}
77-
78-
// Adapted from src/util/TensorDataUtils.hpp
79-
inline static nvcv::Tensor CreateTensor(int numImages, int imgWidth, int imgHeight, const nvcv::ImageFormat &imgFormat)
80-
{
81-
if (imgFormat == NVCV_IMAGE_FORMAT_NV12 || imgFormat == NVCV_IMAGE_FORMAT_NV12_ER
82-
|| imgFormat == NVCV_IMAGE_FORMAT_NV21 || imgFormat == NVCV_IMAGE_FORMAT_NV21_ER)
83-
{
84-
int height420 = (imgHeight * 3) / 2;
85-
if (height420 % 3 != 0 || imgWidth % 2 != 0)
86-
{
87-
throw std::invalid_argument("Invalid height");
88-
}
89-
90-
return nvcv::Tensor(numImages, {imgWidth, height420}, nvcv::ImageFormat(NVCV_IMAGE_FORMAT_Y8));
91-
}
92-
else
93-
{
94-
return nvcv::Tensor(numImages, {imgWidth, imgHeight}, imgFormat);
95-
}
96-
}
97-
98-
template<typename BT>
99-
inline void CvtColor(nvbench::state &state, nvbench::type_list<BT>)
24+
template<typename T>
25+
inline void CvtColor(nvbench::state &state, nvbench::type_list<T>)
10026
try
10127
{
10228
long3 shape = benchutils::GetShape<3>(state.get_string("shape"));
10329
long varShape = state.get_int64("varShape");
104-
std::tuple<NVCVColorConversionCode, NVCVImageFormat, NVCVImageFormat> formats
105-
= StringToFormats(state.get_string("code"));
10630

107-
NVCVColorConversionCode code = std::get<0>(formats);
108-
nvcv::ImageFormat inFormat{std::get<1>(formats)};
109-
nvcv::ImageFormat outFormat{std::get<2>(formats)};
31+
using BT = typename nvcv::cuda::BaseType<T>;
32+
33+
int ch = nvcv::cuda::NumElements<T>;
11034

111-
state.add_global_memory_reads(shape.x * shape.y * shape.z * BytesPerPixel<BT>(inFormat));
112-
state.add_global_memory_writes(shape.x * shape.y * shape.z * BytesPerPixel<BT>(outFormat));
35+
NVCVColorConversionCode code = ch == 3 ? NVCV_COLOR_BGR2RGB : NVCV_COLOR_BGRA2RGBA;
36+
37+
state.add_global_memory_reads(shape.x * shape.y * shape.z * sizeof(T));
38+
state.add_global_memory_writes(shape.x * shape.y * shape.z * sizeof(T));
11339

11440
cvcuda::CvtColor op;
11541

42+
// clang-format off
43+
11644
if (varShape < 0) // negative var shape means use Tensor
11745
{
118-
nvcv::Tensor src = CreateTensor(shape.x, shape.z, shape.y, inFormat);
119-
nvcv::Tensor dst = CreateTensor(shape.x, shape.z, shape.y, outFormat);
46+
nvcv::Tensor src({{shape.x, shape.y, shape.z, ch}, "NHWC"}, benchutils::GetDataType<BT>());
47+
nvcv::Tensor dst({{shape.x, shape.y, shape.z, ch}, "NHWC"}, benchutils::GetDataType<BT>());
12048

12149
benchutils::FillTensor<BT>(src, benchutils::RandomValues<BT>());
12250

123-
state.exec(nvbench::exec_tag::sync,
124-
[&op, &src, &dst, &code](nvbench::launch &launch) { op(launch.get_stream(), src, dst, code); });
51+
state.exec(nvbench::exec_tag::sync, [&op, &src, &dst, &code](nvbench::launch &launch)
52+
{
53+
op(launch.get_stream(), src, dst, code);
54+
});
12555
}
12656
else // zero and positive var shape means use ImageBatchVarShape
12757
{
128-
if (inFormat.chromaSubsampling() != nvcv::ChromaSubsampling::CSS_444
129-
|| outFormat.chromaSubsampling() != nvcv::ChromaSubsampling::CSS_444)
130-
{
131-
state.skip("Skipping formats that have subsampled planes for the varshape benchmark");
132-
}
133-
134-
std::vector<nvcv::Image> imgSrc;
135-
std::vector<nvcv::Image> imgDst;
136-
nvcv::ImageBatchVarShape src(shape.x);
137-
nvcv::ImageBatchVarShape dst(shape.x);
138-
std::vector<std::vector<uint8_t>> srcVec(shape.x);
58+
nvcv::ImageBatchVarShape src(shape.x);
59+
nvcv::ImageBatchVarShape dst(shape.x);
13960

140-
auto randomValuesU8 = benchutils::RandomValues<uint8_t>();
61+
benchutils::FillImageBatch<T>(src, long2{shape.z, shape.y}, long2{varShape, varShape},
62+
benchutils::RandomValues<T>());
63+
dst.pushBack(src.begin(), src.end());
14164

142-
for (int i = 0; i < shape.x; i++)
65+
state.exec(nvbench::exec_tag::sync, [&op, &src, &dst, &code](nvbench::launch &launch)
14366
{
144-
imgSrc.emplace_back(nvcv::Size2D{(int)shape.z, (int)shape.y}, inFormat);
145-
imgDst.emplace_back(nvcv::Size2D{(int)shape.z, (int)shape.y}, outFormat);
146-
147-
int srcRowStride = imgSrc[i].size().w * inFormat.planePixelStrideBytes(0);
148-
int srcBufSize = imgSrc[i].size().h * srcRowStride;
149-
srcVec[i].resize(srcBufSize);
150-
for (int idx = 0; idx < srcBufSize; idx++)
151-
{
152-
srcVec[i][idx] = randomValuesU8();
153-
}
154-
155-
auto imgData = imgSrc[i].exportData<nvcv::ImageDataStridedCuda>();
156-
CUDA_CHECK_ERROR(cudaMemcpy2D(imgData->plane(0).basePtr, imgData->plane(0).rowStride, srcVec[i].data(),
157-
srcRowStride, srcRowStride, imgSrc[i].size().h, cudaMemcpyHostToDevice));
158-
}
159-
src.pushBack(imgSrc.begin(), imgSrc.end());
160-
dst.pushBack(imgDst.begin(), imgDst.end());
161-
162-
state.exec(nvbench::exec_tag::sync,
163-
[&op, &src, &dst, &code](nvbench::launch &launch) { op(launch.get_stream(), src, dst, code); });
67+
op(launch.get_stream(), src, dst, code);
68+
});
16469
}
16570
}
16671
catch (const std::exception &err)
16772
{
16873
state.skip(err.what());
16974
}
17075

171-
using BaseTypes = nvbench::type_list<uint8_t>;
76+
// clang-format on
77+
78+
using CvtColorTypes = nvbench::type_list<uchar3, uchar4>;
17279

173-
NVBENCH_BENCH_TYPES(CvtColor, NVBENCH_TYPE_AXES(BaseTypes))
174-
.set_type_axes_names({"BaseType"})
175-
.add_string_axis("shape", {"1x1080x1920", "64x720x1280"})
176-
.add_string_axis("code", {"RGB2BGR", "RGB2RGBA", "RGBA2RGB", "RGB2GRAY", "GRAY2RGB", "RGB2HSV", "HSV2RGB",
177-
"RGB2YUV", "YUV2RGB", "RGB2YUV_NV12", "YUV2RGB_NV12"})
80+
NVBENCH_BENCH_TYPES(CvtColor, NVBENCH_TYPE_AXES(CvtColorTypes))
81+
.set_type_axes_names({"InOutDataType"})
82+
.add_string_axis("shape", {"1x1080x1920"})
17883
.add_int64_axis("varShape", {-1, 0});

cmake/ConfigCUDA.cmake

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@ set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
3232
# Compress kernels to generate smaller executables
3333
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=--compress-all")
3434

35-
# Enable device lambdas
36-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")
37-
3835
if(NOT USE_CMAKE_CUDA_ARCHITECTURES)
3936
set(CMAKE_CUDA_ARCHITECTURES "$ENV{CUDAARCHS}")
4037

docs/sphinx/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ Copyright
123123
:maxdepth: 1
124124
:hidden:
125125

126+
v0.10.1-beta <relnotes/v0.10.1-beta>
126127
v0.10.0-beta <relnotes/v0.10.0-beta>
127128
v0.9.0-beta <relnotes/v0.9.0-beta>
128129
v0.8.0-beta <relnotes/v0.8.0-beta>

docs/sphinx/relnotes/v0.10.1-beta.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
..
2+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
.. _v0.10.1-beta:
18+
19+
v0.10.1-beta
20+
============
21+
22+
Release Highlights
23+
------------------
24+
25+
CV-CUDA v0.10.1 reverts the OpCvtColor performance improvements introduced in v0.10.0 due to discovered bugs.
26+
These optimizations will be reintroduced, with consolidated testing, in a future release.
27+
28+
License
29+
-------
30+
31+
CV-CUDA is licensed under the `Apache 2.0 <https://github.com/CVCUDA/CV-CUDA/blob/main/LICENSE.md>`_ license.
32+
33+
Resources
34+
---------
35+
36+
1. `CV-CUDA GitHub <https://github.com/CVCUDA/CV-CUDA>`_
37+
2. `CV-CUDA Increasing Throughput and Reducing Costs for AI-Based Computer Vision with CV-CUDA <https://developer.nvidia.com/blog/increasing-throughput-and-reducing-costs-for-computer-vision-with-cv-cuda/>`_
38+
3. `NVIDIA Announces Microsoft, Tencent, Baidu Adopting CV-CUDA for Computer Vision AI <https://blogs.nvidia.com/blog/2023/03/21/cv-cuda-ai-computer-vision/>`_
39+
4. `CV-CUDA helps Tencent Cloud audio and video PaaS platform achieve full-process GPU acceleration for video enhancement AI <https://developer.nvidia.com/zh-cn/blog/cv-cuda-high-performance-image-processing/>`_
40+
41+
Acknowledgements
42+
----------------
43+
44+
CV-CUDA is developed jointly by NVIDIA and the ByteDance Machine Learning team.

0 commit comments

Comments
 (0)