Skip to content

Commit c6923df

Browse files
authored
Add ceil_ilog2 (NVIDIA#4485)
1 parent e11380a commit c6923df

File tree

3 files changed

+46
-10
lines changed

3 files changed

+46
-10
lines changed

docs/libcudacxx/extended_api/math/ilog.rst

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,34 @@
33
``cuda::ilog2`` and ``cuda::ilog10``
44
====================================
55

6-
.. code:: cpp
6+
.. code:: cuda
77
88
template <typename T>
99
[[nodiscard]] __host__ __device__ inline constexpr
1010
int ilog2(T value) noexcept;
1111
12+
.. code:: cuda
13+
14+
template <typename T>
15+
[[nodiscard]] __host__ __device__ inline constexpr
16+
int ceil_ilog2(T value) noexcept;
17+
1218
.. code:: cuda
1319
1420
template <typename T>
1521
[[nodiscard]] __host__ __device__ inline constexpr
1622
int ilog10(T value) noexcept;
1723
18-
The functions compute the logarithm to the base 2 and 10 respectively of an integer value.
24+
The functions compute the logarithm to the base 2 and 10 of an integer value.
1925

2026
**Parameters**
2127

2228
- ``value``: The input value.
2329

2430
**Return value**
2531

26-
- The logarithm to the base 2 and 10 respectively, rounded down to the nearest integer.
32+
- ``ilog2``, ``ceil_ilog2``: The logarithm to the base 2, rounded down and up to the nearest integer respectively.
33+
- ``ilog10``: The logarithm to the 10, rounded down to the nearest integer.
2734

2835
**Constraints**
2936

@@ -38,6 +45,7 @@ The functions compute the logarithm to the base 2 and 10 respectively of an inte
3845
The function performs the following operations in device code:
3946

4047
- ``ilog2``: ``FLO``
48+
- ``ceil_ilog2``: ``FLO``, ``POPC``, ``ADD``, comparison
4149
- ``ilog10``: ``FLO``, ``FMUL``, ``F2I``, constant memory lookup, ``SEL`` + ``IADD`` only if ``T == uint32_t`` or ``T == __uint128_t``
4250

4351
Example
@@ -50,7 +58,9 @@ Example
5058
5159
__global__ void ilog_kernel() {
5260
assert(cuda::ilog2(20) == 4);
61+
assert(cuda::ceil_ilog2(20) == 5);
5362
assert(cuda::ilog2(32) == 5);
63+
assert(cuda::ceil_ilog2(32) == 5);
5464
assert(cuda::ilog10(100) == 2);
5565
assert(cuda::ilog10(2000) == 3);
5666
}
@@ -61,4 +71,4 @@ Example
6171
return 0;
6272
}
6373
64-
`See it on Godbolt 🔗 <https://godbolt.org/z/nndYnTWer>`_
74+
`See it on Godbolt 🔗 <https://godbolt.org/z/nqrYvrGTq>`_

libcudacxx/include/cuda/__cmath/ilog.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# pragma system_header
2222
#endif // no system header
2323

24+
#include <cuda/std/__bit/has_single_bit.h>
2425
#include <cuda/std/__bit/integral.h>
2526
#include <cuda/std/__cmath/rounding_functions.h>
2627
#include <cuda/std/__concepts/concept_macros.h>
@@ -41,9 +42,17 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr int ilog2(_Tp __t) noexcept
4142
{
4243
using _Up = _CUDA_VSTD::make_unsigned_t<_Tp>;
4344
_CCCL_ASSERT(__t > 0, "ilog2() argument must be strictly positive");
44-
auto __log10_approx = _CUDA_VSTD::__bit_log2(static_cast<_Up>(__t));
45-
_CCCL_ASSUME(__log10_approx <= _CUDA_VSTD::numeric_limits<_Tp>::digits);
46-
return __log10_approx;
45+
auto __log2_approx = _CUDA_VSTD::__bit_log2(static_cast<_Up>(__t));
46+
_CCCL_ASSUME(__log2_approx <= _CUDA_VSTD::numeric_limits<_Tp>::digits);
47+
return __log2_approx;
48+
}
49+
50+
_CCCL_TEMPLATE(typename _Tp)
51+
_CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::__cccl_is_cv_integer, _Tp))
52+
_LIBCUDACXX_HIDE_FROM_ABI constexpr int ceil_ilog2(_Tp __t) noexcept
53+
{
54+
using _Up = _CUDA_VSTD::make_unsigned_t<_Tp>;
55+
return ::cuda::ilog2(__t) + !_CUDA_VSTD::has_single_bit(static_cast<_Up>(__t));
4756
}
4857

4958
[[nodiscard]] _LIBCUDACXX_HIDE_FROM_ABI constexpr _CUDA_VSTD::array<uint32_t, 10> __power_of_10_32bit() noexcept

libcudacxx/test/libcudacxx/cuda/cmath/ilog.pass.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include <cuda/std/limits>
1515
#include <cuda/std/type_traits>
1616

17-
#include "cuda/std/__type_traits/is_constant_evaluated.h"
1817
#include "test_macros.h"
1918

2019
template <class T>
@@ -24,16 +23,33 @@ __host__ __device__ constexpr void test_log2()
2423
for (T value = 1; value <= cuda::std::numeric_limits<T>::max() / 2; value *= 2)
2524
{
2625
assert(cuda::ilog2(value) == i);
27-
if (i >= 1)
26+
if (value > 1)
2827
{
2928
assert(cuda::ilog2(static_cast<T>(value - 1)) == i - 1);
30-
assert(cuda::ilog2(static_cast<T>(value + 1)) == i); // not true if value == 1
29+
assert(cuda::ilog2(static_cast<T>(value + 1)) == i);
3130
}
3231
i++;
3332
}
3433
assert(cuda::ilog2(cuda::std::numeric_limits<T>::max()) == cuda::std::numeric_limits<T>::digits - 1);
3534
}
3635

36+
template <class T>
37+
__host__ __device__ constexpr void test_ceil_log2()
38+
{
39+
int i = 0;
40+
for (T value = 1; value <= cuda::std::numeric_limits<T>::max() / 2; value *= 2)
41+
{
42+
assert(cuda::ceil_ilog2(value) == i);
43+
assert(cuda::ceil_ilog2(static_cast<T>(value + 1)) == i + 1);
44+
if (value > 2)
45+
{
46+
assert(cuda::ceil_ilog2(static_cast<T>(value - 1)) == i);
47+
}
48+
i++;
49+
}
50+
assert(cuda::ceil_ilog2(cuda::std::numeric_limits<T>::max()) == cuda::std::numeric_limits<T>::digits);
51+
}
52+
3753
template <class T>
3854
__host__ __device__ constexpr void test_log10()
3955
{
@@ -62,6 +78,7 @@ template <class T>
6278
__host__ __device__ constexpr void test()
6379
{
6480
test_log2<T>();
81+
test_ceil_log2<T>();
6582
test_log10<T>();
6683
}
6784

0 commit comments

Comments
 (0)