From a5148ec3c08fa5b06f769b170af7bb8543f2b9b6 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 25 Mar 2025 03:44:59 -0400 Subject: [PATCH 01/12] Optimize ranges::{for_each, for_each_n} for segmented iterators --- libcxx/include/__algorithm/ranges_for_each.h | 14 ++++-- .../include/__algorithm/ranges_for_each_n.h | 15 ++++-- .../nonmodifying/for_each_n.bench.cpp | 2 +- .../alg.foreach/ranges.for_each.pass.cpp | 46 +++++++++++++++++-- .../alg.foreach/ranges.for_each_n.pass.cpp | 46 ++++++++++++++++++- 5 files changed, 108 insertions(+), 15 deletions(-) diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index de39bc5522753..475f85366188e 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H +#include <__algorithm/for_each.h> #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> @@ -41,9 +42,16 @@ struct __for_each { template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - for (; __first != __last; ++__first) - std::invoke(__func, std::invoke(__proj, *__first)); - return {std::move(__first), std::move(__func)}; + if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) { + auto __n = __last - __first; + auto __end = __first + __n; + std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }); + return {std::move(__end), std::move(__func)}; + } else { + for (; __first != __last; ++__first) + std::invoke(__func, std::invoke(__proj, *__first)); + return {std::move(__first), std::move(__func)}; + } } public: diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 603cb723233c8..3108d66001295 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H +#include <__algorithm/for_each.h> #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> @@ -40,11 +41,17 @@ struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { - while (__count-- > 0) { - std::invoke(__func, std::invoke(__proj, *__first)); - ++__first; + if constexpr (random_access_iterator<_Iter>) { + auto __last = __first + __count; + std::for_each(__first, __last, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }); + return {std::move(__last), std::move(__func)}; + } else { + while (__count-- > 0) { + std::invoke(__func, std::invoke(__proj, *__first)); + ++__first; + } + return {std::move(__first), std::move(__func)}; } - return {std::move(__first), std::move(__func)}; } }; diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index 784708c7e01eb..0de291395463a 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -21,7 +21,7 @@ int main(int argc, char** argv) { auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); }; - // std::for_each_n + // {std,ranges}::for_each_n { auto bm = [](std::string name, auto for_each_n) { using ElemType = typename Container::value_type; diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp index 8b9b6e82cbcb2..2f4bfb9db6dba 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp @@ -20,7 +20,10 @@ #include #include +#include +#include #include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -30,7 +33,7 @@ struct Callable { }; template -concept HasForEachIt = requires (Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); }; +concept HasForEachIt = requires(Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); }; static_assert(HasForEachIt); static_assert(!HasForEachIt); @@ -47,7 +50,7 @@ static_assert(!HasForEachItFunc); static_assert(!HasForEachItFunc); template -concept HasForEachR = requires (Range range) { std::ranges::for_each(range, Callable{}); }; +concept HasForEachR = requires(Range range) { std::ranges::for_each(range, Callable{}); }; static_assert(HasForEachR>); static_assert(!HasForEachR); @@ -68,7 +71,7 @@ constexpr void test_iterator() { { // simple test { auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + int a[] = {1, 6, 3, 4}; std::same_as> decltype(auto) ret = std::ranges::for_each(Iter(a), Sent(Iter(a + 4)), func); assert(a[0] == 1); @@ -81,8 +84,8 @@ constexpr void test_iterator() { assert(i == 4); } { - auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + auto func = [i = 0](int& a) mutable { a += i++; }; + int a[] = {1, 6, 3, 4}; auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 4))); std::same_as> decltype(auto) ret = std::ranges::for_each(range, func); @@ -110,6 +113,30 @@ constexpr void test_iterator() { } } +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +/*TEST_CONSTEXPR_CXX23*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr + // check that segmented iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::ranges::for_each(d, deque_test(d, index)); + } +} + constexpr bool test() { test_iterator, sentinel_wrapper>>(); test_iterator, sentinel_wrapper>>(); @@ -146,6 +173,15 @@ constexpr bool test() { } } + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr + test_segmented_deque_iterator(); + + { + std::vector> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}}; + auto v = vec | std::views::join; + std::ranges::for_each(v, [i = 0](int x) mutable { assert(x == 2 * i++); }, [](int x) { return 2 * x; }); + } + return true; } diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp index d4b2d053d08ce..ad1447b7348f5 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp @@ -17,7 +17,12 @@ #include #include +#include +#include +#include #include +#include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -27,7 +32,7 @@ struct Callable { }; template -concept HasForEachN = requires (Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); }; +concept HasForEachN = requires(Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); }; static_assert(HasForEachN); static_assert(!HasForEachN); @@ -45,7 +50,7 @@ template constexpr void test_iterator() { { // simple test auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + int a[] = {1, 6, 3, 4}; std::same_as> auto ret = std::ranges::for_each_n(Iter(a), 4, func); assert(a[0] == 1); @@ -64,6 +69,30 @@ constexpr void test_iterator() { } } +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +/*TEST_CONSTEXPR_CXX23*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr + // check that segmented iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::ranges::for_each_n(d.begin(), d.size(), deque_test(d, index)); + } +} + constexpr bool test() { test_iterator>(); test_iterator>(); @@ -89,6 +118,19 @@ constexpr bool test() { assert(a[2].other == 6); } + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr + test_segmented_deque_iterator(); + + { + std::vector> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}}; + auto v = vec | std::views::join; + std::ranges::for_each_n( + v.begin(), + std::ranges::distance(v), + [i = 0](int x) mutable { assert(x == 2 * i++); }, + [](int x) { return 2 * x; }); + } + return true; } From 90c826b3dd2a4a1bcf36bc486e61da8468a90d56 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 25 Mar 2025 21:29:27 -0400 Subject: [PATCH 02/12] Address ldionne's review comments --- libcxx/include/__algorithm/for_each.h | 1 + libcxx/include/__algorithm/ranges_for_each.h | 4 +++- libcxx/include/__algorithm/ranges_for_each_n.h | 4 +++- .../alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp | 2 +- .../alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp | 2 +- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index b6c2c7c056edd..2a44c1dc60704 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -11,6 +11,7 @@ #define _LIBCPP___ALGORITHM_FOR_EACH_H #include <__algorithm/for_each_segment.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index 475f85366188e..5d27befd9619f 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -11,6 +11,7 @@ #include <__algorithm/for_each.h> #include <__algorithm/in_fun_result.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -45,7 +46,8 @@ struct __for_each { if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) { auto __n = __last - __first; auto __end = __first + __n; - std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }); + auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; + std::__for_each<_RangeAlgPolicy>(__first, __end, __f); return {std::move(__end), std::move(__func)}; } else { for (; __first != __last; ++__first) diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 3108d66001295..8384ba3bb14e6 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -11,6 +11,7 @@ #include <__algorithm/for_each.h> #include <__algorithm/in_fun_result.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -43,7 +44,8 @@ struct __for_each_n { operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { if constexpr (random_access_iterator<_Iter>) { auto __last = __first + __count; - std::for_each(__first, __last, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }); + auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; + std::__for_each<_RangeAlgPolicy>(__first, __last, __f); return {std::move(__last), std::move(__func)}; } else { while (__count-- > 0) { diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp index 2f4bfb9db6dba..14be4a42f667c 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp @@ -127,7 +127,7 @@ struct deque_test { /*TEST_CONSTEXPR_CXX23*/ void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr - // check that segmented iterators work properly + // check that segmented deque iterators work properly int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; for (const int size : sizes) { std::deque d(size); diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp index ad1447b7348f5..ac073d3052170 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp @@ -83,7 +83,7 @@ struct deque_test { /*TEST_CONSTEXPR_CXX23*/ void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr - // check that segmented iterators work properly + // check that segmented deque iterators work properly int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; for (const int size : sizes) { std::deque d(size); From fae4de0486d87661286337560d4af55fc2b0dbca Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 25 Mar 2025 23:11:34 -0400 Subject: [PATCH 03/12] Fix test and ADL call --- .../alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp | 6 +++--- .../alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp index 14be4a42f667c..a6d0afde3186a 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp @@ -125,8 +125,8 @@ struct deque_test { } }; -/*TEST_CONSTEXPR_CXX23*/ -void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr +/*TEST_CONSTEXPR_CXX26*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr // check that segmented deque iterators work properly int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; for (const int size : sizes) { @@ -173,7 +173,7 @@ constexpr bool test() { } } - if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr test_segmented_deque_iterator(); { diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp index ac073d3052170..1578763694231 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp @@ -81,8 +81,8 @@ struct deque_test { } }; -/*TEST_CONSTEXPR_CXX23*/ -void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX23 once std::deque is constexpr +/*TEST_CONSTEXPR_CXX26*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr // check that segmented deque iterators work properly int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; for (const int size : sizes) { @@ -118,7 +118,7 @@ constexpr bool test() { assert(a[2].other == 6); } - if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED when std::deque is made constexpr + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr test_segmented_deque_iterator(); { From 37d68a31684a7717738d41d2523c667f1acda610 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 26 Mar 2025 11:10:37 -0400 Subject: [PATCH 04/12] Make for_each segmented iterator optimization valid for C++03 --- libcxx/include/__algorithm/for_each_n.h | 1 + libcxx/include/__algorithm/ranges_for_each_n.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h index 29351ec39f4e7..169de84b4d95f 100644 --- a/libcxx/include/__algorithm/for_each_n.h +++ b/libcxx/include/__algorithm/for_each_n.h @@ -14,6 +14,7 @@ #include <__algorithm/for_each_n_segment.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 8384ba3bb14e6..a5c81868c2062 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -18,6 +18,7 @@ #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/projected.h> #include <__ranges/concepts.h> #include <__utility/move.h> @@ -42,8 +43,8 @@ struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { - if constexpr (random_access_iterator<_Iter>) { - auto __last = __first + __count; + if constexpr (forward_iterator<_Iter>) { + auto __last = std::ranges::next(__first, __count); auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; std::__for_each<_RangeAlgPolicy>(__first, __last, __f); return {std::move(__last), std::move(__func)}; From 2a83548375a78def38a0060b5b2a0dd64c271d16 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Thu, 27 Mar 2025 11:50:12 -0400 Subject: [PATCH 05/12] Allow transitive include of in affected headers --- libcxx/include/experimental/iterator | 1 + libcxx/include/mutex | 1 + libcxx/include/shared_mutex | 1 + 3 files changed, 3 insertions(+) diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index d92613845a662..565bb83903ac3 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -127,6 +127,7 @@ _LIBCPP_POP_MACROS # if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/include/mutex b/libcxx/include/mutex index e058b3113073e..f616bad3ac171 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -504,6 +504,7 @@ _LIBCPP_POP_MACROS # include # include # include +# include # include # include # include diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index e6759e413dfef..6469c02ca5874 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -457,6 +457,7 @@ _LIBCPP_POP_MACROS # endif // _LIBCPP_HAS_THREADS # if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) From 5cc4af82cc65eb8bf00c5bb394e4bb3739503a53 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Thu, 27 Mar 2025 12:00:54 -0400 Subject: [PATCH 06/12] Remove unnecessary _AlgoPolicy template parameter --- libcxx/include/__algorithm/for_each.h | 1 - libcxx/include/__algorithm/ranges_for_each.h | 3 +-- libcxx/include/__algorithm/ranges_for_each_n.h | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index 2a44c1dc60704..b6c2c7c056edd 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -11,7 +11,6 @@ #define _LIBCPP___ALGORITHM_FOR_EACH_H #include <__algorithm/for_each_segment.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index 5d27befd9619f..096e60683e39d 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -11,7 +11,6 @@ #include <__algorithm/for_each.h> #include <__algorithm/in_fun_result.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -47,7 +46,7 @@ struct __for_each { auto __n = __last - __first; auto __end = __first + __n; auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; - std::__for_each<_RangeAlgPolicy>(__first, __end, __f); + std::__for_each(__first, __end, __f); return {std::move(__end), std::move(__func)}; } else { for (; __first != __last; ++__first) diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index a5c81868c2062..9c6c2b97a2ad1 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -11,7 +11,6 @@ #include <__algorithm/for_each.h> #include <__algorithm/in_fun_result.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -46,7 +45,7 @@ struct __for_each_n { if constexpr (forward_iterator<_Iter>) { auto __last = std::ranges::next(__first, __count); auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; - std::__for_each<_RangeAlgPolicy>(__first, __last, __f); + std::__for_each(__first, __last, __f); return {std::move(__last), std::move(__func)}; } else { while (__count-- > 0) { From b74e1881ca0ea9d3b6b01474046a85ed60fbc987 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Fri, 28 Mar 2025 20:26:31 -0400 Subject: [PATCH 07/12] Apply optimization for join_view segmented iterators --- libcxx/docs/ReleaseNotes/21.rst | 6 + .../include/__algorithm/ranges_for_each_n.h | 5 +- .../nonmodifying/for_each.bench.cpp | 23 +++- .../nonmodifying/for_each_join_view.bench.cpp | 122 ++++++++++++++++++ .../nonmodifying/for_each_n.bench.cpp | 14 ++ 5 files changed, 165 insertions(+), 5 deletions(-) create mode 100644 libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 6cbc0baf29487..b652ed2f4eb1e 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -64,11 +64,17 @@ Improvements and New Features - The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x. +<<<<<<< HEAD - The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance up to 10x, depending on type of sorted elements and the initial state of the sorted array. - The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available in C++23 and later. +======= +- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators, + resulting in performance improvements of up to 21.2x for ``std::deque::iterator`` segmented inputs and 17.9x for + ``join_view`` of ``vector>``. +>>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators) - The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque`` iterators, and up to 13.9x for ``std::join_view>>`` iterators. diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 9c6c2b97a2ad1..b92eeb6fa8d7c 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -9,7 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H -#include <__algorithm/for_each.h> +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> @@ -43,9 +43,8 @@ struct __for_each_n { _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { if constexpr (forward_iterator<_Iter>) { - auto __last = std::ranges::next(__first, __count); auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; - std::__for_each(__first, __last, __f); + auto __last = std::for_each_n(__first, __count, __f); return {std::move(__last), std::move(__func)}; } else { while (__count-- > 0) { diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp index 760accbe4d929..1e33cf70f8487 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp @@ -23,6 +23,7 @@ int main(int argc, char** argv) { // {std,ranges}::for_each { auto bm = [](std::string name, auto for_each) { + using ElemType = typename Container::value_type; benchmark::RegisterBenchmark( name, [for_each](auto& st) { @@ -33,16 +34,34 @@ int main(int argc, char** argv) { for ([[maybe_unused]] auto _ : st) { benchmark::DoNotOptimize(c); - auto result = for_each(first, last, [](int& x) { x = std::clamp(x, 10, 100); }); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); benchmark::DoNotOptimize(result); } }) ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two + ->Arg(1024) + ->Arg(4096) ->Arg(8192) - ->Arg(1 << 20); + ->Arg(1 << 14) + ->Arg(1 << 16) + ->Arg(1 << 18); }; + bm.operator()>("std::for_each(vector)", std_for_each); + bm.operator()>("std::for_each(deque)", std_for_each); + bm.operator()>("std::for_each(list)", std_for_each); + bm.operator()>("rng::for_each(vector)", std::ranges::for_each); + bm.operator()>("rng::for_each(deque)", std::ranges::for_each); + bm.operator()>("rng::for_each(list)", std::ranges::for_each); + + bm.operator()>("std::for_each(vector)", std_for_each); + bm.operator()>("std::for_each(deque)", std_for_each); + bm.operator()>("std::for_each(list)", std_for_each); + bm.operator()>("rng::for_each(vector)", std::ranges::for_each); + bm.operator()>("rng::for_each(deque)", std::ranges::for_each); + bm.operator()>("rng::for_each(list)", std::ranges::for_each); + bm.operator()>("std::for_each(vector)", std_for_each); bm.operator()>("std::for_each(deque)", std_for_each); bm.operator()>("std::for_each(list)", std_for_each); diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp new file mode 100644 index 0000000000000..28398ac988bf7 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp @@ -0,0 +1,122 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include +#include +#include +#include +#include +#include +#include + +#include + +int main(int argc, char** argv) { + auto std_for_each = [](auto first, auto last, auto f) { return std::for_each(first, last, f); }; + auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); }; + + // {std,ranges}::for_each + { + auto bm = [](std::string name, auto for_each) { + using C1 = typename Container::value_type; + using ElemType = typename C1::value_type; + + benchmark::RegisterBenchmark( + name, + [for_each](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + Container c(segments); + for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { + c[i].resize(std::min(seg_size, n), ElemType(1)); + } + + auto view = c | std::views::join; + auto first = view.begin(); + auto last = view.end(); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(32) + ->Arg(50) // non power-of-two + ->Arg(1024) + ->Arg(4096) + ->Arg(8192) + ->Arg(1 << 14) + ->Arg(1 << 16) + ->Arg(1 << 18); + }; + bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); + bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); + bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); + bm.operator()>>( + "rng::for_each(join_view(vector>)", std::ranges::for_each); + bm.operator()>>( + "rng::for_each(join_view(vector>)", std::ranges::for_each); + bm.operator()>>("rng::for_each(join_view(vector>)", std::ranges::for_each); + } + + // {std,ranges}::for_each_n + { + auto bm = [](std::string name, auto for_each_n) { + using C1 = typename Container::value_type; + using ElemType = typename C1::value_type; + benchmark::RegisterBenchmark( + name, + [for_each_n](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + Container c(segments); + for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { + c[i].resize(std::min(seg_size, n), ElemType(1)); + } + + auto view = c | std::views::join; + auto first = view.begin(); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp(x, 10, 100); }); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(32) + ->Arg(50) // non power-of-two + ->Arg(1024) + ->Arg(4096) + ->Arg(8192) + ->Arg(1 << 14) + ->Arg(1 << 16) + ->Arg(1 << 18); + }; + bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>>( + "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); + bm.operator()>>( + "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); + bm.operator()>>( + "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + return 0; +} diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index 0de291395463a..f0dcc30a39e14 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -48,6 +48,20 @@ int main(int argc, char** argv) { ->Arg(1 << 16) ->Arg(1 << 18); }; + bm.operator()>("std::for_each_n(vector)", std_for_each_n); + bm.operator()>("std::for_each_n(deque)", std_for_each_n); + bm.operator()>("std::for_each_n(list)", std_for_each_n); + bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); + + bm.operator()>("std::for_each_n(vector)", std_for_each_n); + bm.operator()>("std::for_each_n(deque)", std_for_each_n); + bm.operator()>("std::for_each_n(list)", std_for_each_n); + bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); + bm.operator()>("std::for_each_n(vector)", std_for_each_n); bm.operator()>("std::for_each_n(deque)", std_for_each_n); bm.operator()>("std::for_each_n(list)", std_for_each_n); From 1f7ad3453b2b390019575a52d10fb237593d5d70 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sat, 29 Mar 2025 11:21:07 -0400 Subject: [PATCH 08/12] Consistently extend segmented iterator optimization to ranges::for_each --- libcxx/docs/ReleaseNotes/21.rst | 2 +- libcxx/include/__algorithm/ranges_for_each.h | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index b652ed2f4eb1e..a3bbc59b9bd2b 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -72,7 +72,7 @@ Improvements and New Features in C++23 and later. ======= - The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators, - resulting in performance improvements of up to 21.2x for ``std::deque::iterator`` segmented inputs and 17.9x for + resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for ``join_view`` of ``vector>``. >>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators) diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index 096e60683e39d..961f7558149a3 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -10,7 +10,9 @@ #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H #include <__algorithm/for_each.h> +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> +#include <__concepts/assignable.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -42,11 +44,14 @@ struct __for_each { template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) { - auto __n = __last - __first; - auto __end = __first + __n; - auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; - std::__for_each(__first, __end, __f); + if constexpr (std::assignable_from<_Iter&, _Sent>) { + _Iter __end = std::move(__last); + std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }); + return {std::move(__end), std::move(__func)}; + } else if constexpr (sized_sentinel_for<_Sent, _Iter>) { + auto __end = std::for_each_n(__first, __last - __first, [&](auto&& __val) { + std::invoke(__func, std::invoke(__proj, __val)); + }); return {std::move(__end), std::move(__func)}; } else { for (; __first != __last; ++__first) From ca54b95bcd525c6c26f2bb264f0bca1d157edb4f Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 2 Apr 2025 23:15:57 -0400 Subject: [PATCH 09/12] Fix review comments --- libcxx/docs/ReleaseNotes/21.rst | 5 + libcxx/include/__algorithm/for_each.h | 18 ++- libcxx/include/__algorithm/for_each_n.h | 19 ++- .../include/__algorithm/for_each_n_segment.h | 6 + libcxx/include/__algorithm/ranges_for_each.h | 16 +-- .../include/__algorithm/ranges_for_each_n.h | 14 +- .../nonmodifying/for_each.bench.cpp | 56 ++++++-- .../nonmodifying/for_each_join_view.bench.cpp | 122 ------------------ .../nonmodifying/for_each_n.bench.cpp | 54 ++++++-- 9 files changed, 124 insertions(+), 186 deletions(-) delete mode 100644 libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index a3bbc59b9bd2b..49c188ebac420 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -72,9 +72,14 @@ Improvements and New Features in C++23 and later. ======= - The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators, +<<<<<<< HEAD resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for ``join_view`` of ``vector>``. >>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators) +======= + resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of + ``vector>``. +>>>>>>> 590136ba0d9f (Fix review comments) - The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque`` iterators, and up to 13.9x for ``std::join_view>>`` iterators. diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index b6c2c7c056edd..01ddad761bb57 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -12,6 +12,8 @@ #include <__algorithm/for_each_segment.h> #include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> @@ -21,21 +23,24 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __for_each(_InputIterator __first, _Sent __last, _Func& __f) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) { for (; __first != __last; ++__first) - __f(*__first); + std::invoke(__f, std::invoke(__proj, *__first)); + return __first; } #ifndef _LIBCPP_CXX03_LANG template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) { +__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __func); + std::__for_each(__lfirst, __llast, __func, __proj); }); } #endif // !_LIBCPP_CXX03_LANG @@ -43,7 +48,8 @@ __for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __f template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function for_each(_InputIterator __first, _InputIterator __last, _Function __f) { - std::__for_each(__first, __last, __f); + __identity __proj; + std::__for_each(__first, __last, __f, __proj); return __f; } diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h index 169de84b4d95f..953662afd6310 100644 --- a/libcxx/include/__algorithm/for_each_n.h +++ b/libcxx/include/__algorithm/for_each_n.h @@ -13,8 +13,9 @@ #include <__algorithm/for_each.h> #include <__algorithm/for_each_n_segment.h> #include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> @@ -34,16 +35,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value && _Or< _Not<__is_segmented_iterator<_InputIterator> >, _Not<__has_random_access_local_iterator<_InputIterator> > >::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) { +__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; _IntegralSize __n = __orig_n; while (__n > 0) { - __f(*__first); + std::invoke(__f, std::invoke(__proj, *__first)); ++__first; --__n; } @@ -53,12 +55,13 @@ __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) { template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter __for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) { typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n; auto __last = __first + __n; - std::__for_each(__first, __last, __f); + std::__for_each(__first, __last, __f, __proj); return std::move(__last); } @@ -66,16 +69,17 @@ __for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) { template ::value && __is_segmented_iterator<_SegmentedIterator>::value && __has_random_access_iterator_category< typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator -__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) { +__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __f); + std::__for_each(__lfirst, __llast, __f, __proj); }); } #endif // !_LIBCPP_CXX03_LANG @@ -85,7 +89,8 @@ __for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) { template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) { - return std::__for_each_n(__first, __orig_n, __f); + __identity __proj; + return std::__for_each_n(__first, __orig_n, __f, __proj); } #endif // _LIBCPP_STD_VER >= 17 diff --git a/libcxx/include/__algorithm/for_each_n_segment.h b/libcxx/include/__algorithm/for_each_n_segment.h index 1b522fb373eee..6c257dbcdc3ea 100644 --- a/libcxx/include/__algorithm/for_each_n_segment.h +++ b/libcxx/include/__algorithm/for_each_n_segment.h @@ -10,7 +10,13 @@ #define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H #include <__config> +<<<<<<< HEAD #include <__iterator/iterator_traits.h> +======= +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +>>>>>>> 4a86118918e8 (Fix review comments) #include <__iterator/segmented_iterator.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index 961f7558149a3..ed0dcde688406 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -44,19 +44,13 @@ struct __for_each { template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - if constexpr (std::assignable_from<_Iter&, _Sent>) { - _Iter __end = std::move(__last); - std::for_each(__first, __end, [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }); - return {std::move(__end), std::move(__func)}; - } else if constexpr (sized_sentinel_for<_Sent, _Iter>) { - auto __end = std::for_each_n(__first, __last - __first, [&](auto&& __val) { - std::invoke(__func, std::invoke(__proj, __val)); - }); + if constexpr (!std::assignable_from<_Iter&, _Sent> && sized_sentinel_for<_Sent, _Iter>) { + auto __n = __last - __first; + auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj); return {std::move(__end), std::move(__func)}; } else { - for (; __first != __last; ++__first) - std::invoke(__func, std::invoke(__proj, *__first)); - return {std::move(__first), std::move(__func)}; + auto __end = std::__for_each(std::move(__first), std::move(__last), __func, __proj); + return {std::move(__end), std::move(__func)}; } } diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index b92eeb6fa8d7c..ebcd38a8eef6f 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -17,7 +17,6 @@ #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> #include <__iterator/projected.h> #include <__ranges/concepts.h> #include <__utility/move.h> @@ -42,17 +41,8 @@ struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { - if constexpr (forward_iterator<_Iter>) { - auto __f = [&](auto&& __val) { std::invoke(__func, std::invoke(__proj, __val)); }; - auto __last = std::for_each_n(__first, __count, __f); - return {std::move(__last), std::move(__func)}; - } else { - while (__count-- > 0) { - std::invoke(__func, std::invoke(__proj, *__first)); - ++__first; - } - return {std::move(__first), std::move(__func)}; - } + auto __last = std::__for_each_n(std::move(__first), __count, __func, __proj); + return {std::move(__last), std::move(__func)}; } }; diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp index 1e33cf70f8487..9151ca19c7862 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -48,20 +49,6 @@ int main(int argc, char** argv) { ->Arg(1 << 16) ->Arg(1 << 18); }; - bm.operator()>("std::for_each(vector)", std_for_each); - bm.operator()>("std::for_each(deque)", std_for_each); - bm.operator()>("std::for_each(list)", std_for_each); - bm.operator()>("rng::for_each(vector)", std::ranges::for_each); - bm.operator()>("rng::for_each(deque)", std::ranges::for_each); - bm.operator()>("rng::for_each(list)", std::ranges::for_each); - - bm.operator()>("std::for_each(vector)", std_for_each); - bm.operator()>("std::for_each(deque)", std_for_each); - bm.operator()>("std::for_each(list)", std_for_each); - bm.operator()>("rng::for_each(vector)", std::ranges::for_each); - bm.operator()>("rng::for_each(deque)", std::ranges::for_each); - bm.operator()>("rng::for_each(list)", std::ranges::for_each); - bm.operator()>("std::for_each(vector)", std_for_each); bm.operator()>("std::for_each(deque)", std_for_each); bm.operator()>("std::for_each(list)", std_for_each); @@ -70,6 +57,47 @@ int main(int argc, char** argv) { bm.operator()>("rng::for_each(list)", std::ranges::for_each); } + // {std,ranges}::for_each for join_view + { + auto bm = [](std::string name, auto for_each) { + using C1 = typename Container::value_type; + using ElemType = typename C1::value_type; + + benchmark::RegisterBenchmark( + name, + [for_each](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + Container c(segments); + for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { + c[i].resize(std::min(seg_size, n), ElemType(1)); + } + + auto view = c | std::views::join; + auto first = view.begin(); + auto last = view.end(); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(32) + ->Arg(50) // non power-of-two + ->Arg(1024) + ->Arg(4096) + ->Arg(8192) + ->Arg(1 << 14) + ->Arg(1 << 16) + ->Arg(1 << 18); + }; + bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); + bm.operator()>>("rng::for_each(join_view(vector>)", std::ranges::for_each); + } + benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp deleted file mode 100644 index 28398ac988bf7..0000000000000 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_join_view.bench.cpp +++ /dev/null @@ -1,122 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include -#include - -#include - -int main(int argc, char** argv) { - auto std_for_each = [](auto first, auto last, auto f) { return std::for_each(first, last, f); }; - auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); }; - - // {std,ranges}::for_each - { - auto bm = [](std::string name, auto for_each) { - using C1 = typename Container::value_type; - using ElemType = typename C1::value_type; - - benchmark::RegisterBenchmark( - name, - [for_each](auto& st) { - std::size_t const size = st.range(0); - std::size_t const seg_size = 256; - std::size_t const segments = (size + seg_size - 1) / seg_size; - Container c(segments); - for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { - c[i].resize(std::min(seg_size, n), ElemType(1)); - } - - auto view = c | std::views::join; - auto first = view.begin(); - auto last = view.end(); - - for ([[maybe_unused]] auto _ : st) { - benchmark::DoNotOptimize(c); - auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); - benchmark::DoNotOptimize(result); - } - }) - ->Arg(8) - ->Arg(32) - ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); - }; - bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); - bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); - bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); - bm.operator()>>( - "rng::for_each(join_view(vector>)", std::ranges::for_each); - bm.operator()>>( - "rng::for_each(join_view(vector>)", std::ranges::for_each); - bm.operator()>>("rng::for_each(join_view(vector>)", std::ranges::for_each); - } - - // {std,ranges}::for_each_n - { - auto bm = [](std::string name, auto for_each_n) { - using C1 = typename Container::value_type; - using ElemType = typename C1::value_type; - benchmark::RegisterBenchmark( - name, - [for_each_n](auto& st) { - std::size_t const size = st.range(0); - std::size_t const seg_size = 256; - std::size_t const segments = (size + seg_size - 1) / seg_size; - Container c(segments); - for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { - c[i].resize(std::min(seg_size, n), ElemType(1)); - } - - auto view = c | std::views::join; - auto first = view.begin(); - - for ([[maybe_unused]] auto _ : st) { - benchmark::DoNotOptimize(c); - auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp(x, 10, 100); }); - benchmark::DoNotOptimize(result); - } - }) - ->Arg(8) - ->Arg(32) - ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); - }; - bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); - bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); - bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); - bm.operator()>>( - "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); - bm.operator()>>( - "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); - bm.operator()>>( - "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index f0dcc30a39e14..3ace25a6052b6 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -48,20 +48,6 @@ int main(int argc, char** argv) { ->Arg(1 << 16) ->Arg(1 << 18); }; - bm.operator()>("std::for_each_n(vector)", std_for_each_n); - bm.operator()>("std::for_each_n(deque)", std_for_each_n); - bm.operator()>("std::for_each_n(list)", std_for_each_n); - bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); - bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); - bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); - - bm.operator()>("std::for_each_n(vector)", std_for_each_n); - bm.operator()>("std::for_each_n(deque)", std_for_each_n); - bm.operator()>("std::for_each_n(list)", std_for_each_n); - bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); - bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); - bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); - bm.operator()>("std::for_each_n(vector)", std_for_each_n); bm.operator()>("std::for_each_n(deque)", std_for_each_n); bm.operator()>("std::for_each_n(list)", std_for_each_n); @@ -105,6 +91,46 @@ int main(int argc, char** argv) { bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); } + // {std,ranges}::for_each_n for join_view + { + auto bm = [](std::string name, auto for_each_n) { + using C1 = typename Container::value_type; + using ElemType = typename C1::value_type; + benchmark::RegisterBenchmark( + name, + [for_each_n](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + Container c(segments); + for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { + c[i].resize(std::min(seg_size, n), ElemType(1)); + } + + auto view = c | std::views::join; + auto first = view.begin(); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp(x, 10, 100); }); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(32) + ->Arg(50) // non power-of-two + ->Arg(1024) + ->Arg(4096) + ->Arg(8192) + ->Arg(1 << 14) + ->Arg(1 << 16) + ->Arg(1 << 18); + }; + bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>>( + "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); + } + benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); From 100521b7bade2343e292eb0c5434eea9c2fb9de9 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Fri, 4 Apr 2025 21:17:22 -0400 Subject: [PATCH 10/12] Fix invoke call by using std::__invoke --- libcxx/include/__algorithm/for_each.h | 12 ++++++------ libcxx/include/__algorithm/for_each_n.h | 9 +++++---- libcxx/include/__algorithm/ranges_for_each.h | 1 - libcxx/include/__algorithm/ranges_for_each_n.h | 1 - 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index 01ddad761bb57..a3cee6783154f 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -13,9 +13,9 @@ #include <__algorithm/for_each_segment.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -27,13 +27,13 @@ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator __for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) { for (; __first != __last; ++__first) - std::invoke(__f, std::invoke(__proj, *__first)); + std::__invoke(__f, std::__invoke(__proj, *__first)); return __first; } #ifndef _LIBCPP_CXX03_LANG template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void @@ -45,9 +45,9 @@ __for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __f } #endif // !_LIBCPP_CXX03_LANG -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function -for_each(_InputIterator __first, _InputIterator __last, _Function __f) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func +for_each(_InputIterator __first, _InputIterator __last, _Func __f) { __identity __proj; std::__for_each(__first, __last, __f, __proj); return __f; diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h index 953662afd6310..fb0f14fae49ae 100644 --- a/libcxx/include/__algorithm/for_each_n.h +++ b/libcxx/include/__algorithm/for_each_n.h @@ -14,12 +14,13 @@ #include <__algorithm/for_each_n_segment.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__iterator/next.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> #include <__type_traits/negation.h> +#include <__type_traits/invoke.h> #include <__utility/convert_to_integral.h> #include <__utility/move.h> @@ -45,7 +46,7 @@ __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; _IntegralSize __n = __orig_n; while (__n > 0) { - std::invoke(__f, std::invoke(__proj, *__first)); + std::__invoke(__f, std::__invoke(__proj, *__first)); ++__first; --__n; } @@ -86,9 +87,9 @@ __for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __pr #if _LIBCPP_STD_VER >= 17 -template +template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) { +for_each_n(_InputIterator __first, _Size __orig_n, _Func __f) { __identity __proj; return std::__for_each_n(__first, __orig_n, __f, __proj); } diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index ed0dcde688406..1b11b52798dd6 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -15,7 +15,6 @@ #include <__concepts/assignable.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/projected.h> #include <__ranges/access.h> diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index ebcd38a8eef6f..3aab1b79c10a1 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -13,7 +13,6 @@ #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> From 05161a1637eee204ad275f75c4356a2559730e5a Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sat, 5 Apr 2025 16:45:17 -0400 Subject: [PATCH 11/12] Refactor to simplify logic of for_each_n_segment.h --- libcxx/docs/ReleaseNotes/21.rst | 15 ++----- libcxx/include/__algorithm/for_each.h | 11 ++++- libcxx/include/__algorithm/for_each_n.h | 7 ++-- .../include/__algorithm/for_each_n_segment.h | 6 --- .../nonmodifying/for_each_n.bench.cpp | 41 ++----------------- 5 files changed, 19 insertions(+), 61 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 49c188ebac420..9f1a32a222f0d 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -64,22 +64,11 @@ Improvements and New Features - The ``num_put::do_put`` integral overloads have been optimized, resulting in a performance improvement of up to 2.4x. -<<<<<<< HEAD - The ``std::stable_sort`` algorithm uses radix sort for floating-point types now, which can improve the performance up to 10x, depending on type of sorted elements and the initial state of the sorted array. - The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available in C++23 and later. -======= -- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators, -<<<<<<< HEAD - resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` segmented inputs and 24.9x for - ``join_view`` of ``vector>``. ->>>>>>> 50ac206d4a13 (Apply optimization for join_view segmented iterators) -======= - resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of - ``vector>``. ->>>>>>> 590136ba0d9f (Fix review comments) - The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque`` iterators, and up to 13.9x for ``std::join_view>>`` iterators. @@ -87,6 +76,10 @@ Improvements and New Features - The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively. +- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators, + resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of + ``vector>``. + Deprecations and Removals ------------------------- diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index a3cee6783154f..4167eec3506e4 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -16,11 +16,15 @@ #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template @@ -36,12 +40,13 @@ template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func, _Proj& __proj) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator +__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { std::__for_each(__lfirst, __llast, __func, __proj); }); + return __last; } #endif // !_LIBCPP_CXX03_LANG @@ -55,4 +60,6 @@ for_each(_InputIterator __first, _InputIterator __last, _Func __f) { _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_FOR_EACH_H diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h index fb0f14fae49ae..9a6c6bb5175d6 100644 --- a/libcxx/include/__algorithm/for_each_n.h +++ b/libcxx/include/__algorithm/for_each_n.h @@ -15,12 +15,11 @@ #include <__config> #include <__functional/identity.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> -#include <__type_traits/negation.h> #include <__type_traits/invoke.h> +#include <__type_traits/negation.h> #include <__utility/convert_to_integral.h> #include <__utility/move.h> @@ -59,11 +58,11 @@ template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter -__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) { +__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n; auto __last = __first + __n; std::__for_each(__first, __last, __f, __proj); - return std::move(__last); + return __last; } #ifndef _LIBCPP_CXX03_LANG diff --git a/libcxx/include/__algorithm/for_each_n_segment.h b/libcxx/include/__algorithm/for_each_n_segment.h index 6c257dbcdc3ea..1b522fb373eee 100644 --- a/libcxx/include/__algorithm/for_each_n_segment.h +++ b/libcxx/include/__algorithm/for_each_n_segment.h @@ -10,13 +10,7 @@ #define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H #include <__config> -<<<<<<< HEAD #include <__iterator/iterator_traits.h> -======= -#include <__iterator/distance.h> -#include <__iterator/iterator_traits.h> -#include <__iterator/next.h> ->>>>>>> 4a86118918e8 (Fix review comments) #include <__iterator/segmented_iterator.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index 3ace25a6052b6..e6624bd304447 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -51,44 +51,9 @@ int main(int argc, char** argv) { bm.operator()>("std::for_each_n(vector)", std_for_each_n); bm.operator()>("std::for_each_n(deque)", std_for_each_n); bm.operator()>("std::for_each_n(list)", std_for_each_n); - } - - // std::for_each_n for join_view - { - auto bm = [](std::string name, auto for_each_n) { - using C1 = typename Container::value_type; - using ElemType = typename C1::value_type; - benchmark::RegisterBenchmark( - name, - [for_each_n](auto& st) { - std::size_t const size = st.range(0); - std::size_t const seg_size = 256; - std::size_t const segments = (size + seg_size - 1) / seg_size; - Container c(segments); - for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { - c[i].resize(std::min(seg_size, n), ElemType(1)); - } - - auto view = c | std::views::join; - auto first = view.begin(); - - for ([[maybe_unused]] auto _ : st) { - benchmark::DoNotOptimize(c); - auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp(x, 10, 100); }); - benchmark::DoNotOptimize(result); - } - }) - ->Arg(8) - ->Arg(32) - ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); - }; - bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); } // {std,ranges}::for_each_n for join_view From 18bf207d08c796bf580a44e4b76e932d9efde7b5 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sat, 7 Jun 2025 07:56:55 -0400 Subject: [PATCH 12/12] Address ldionne's comments --- libcxx/docs/ReleaseNotes/21.rst | 9 +++------ libcxx/include/__algorithm/ranges_for_each.h | 5 ++++- .../algorithms/nonmodifying/for_each.bench.cpp | 14 ++------------ .../algorithms/nonmodifying/for_each_n.bench.cpp | 14 ++------------ 4 files changed, 11 insertions(+), 31 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 9f1a32a222f0d..349c37ae625f1 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -70,16 +70,13 @@ Improvements and New Features - The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available in C++23 and later. -- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of - up to 17.7x for ``std::deque`` iterators, and up to 13.9x for ``std::join_view>>`` iterators. +- The ``std::for_each_n``, ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for + segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque`` iterators, and up + to 13.9x for ``std::join_view>>`` iterators. - The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively. -- The ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for segmented iterators, - resulting in performance improvements of up to 21.3x for ``std::deque::iterator`` and 24.9x for ``join_view`` of - ``vector>``. - Deprecations and Removals ------------------------- diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index 1b11b52798dd6..e9c84e8583f87 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -43,7 +43,10 @@ struct __for_each { template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - if constexpr (!std::assignable_from<_Iter&, _Sent> && sized_sentinel_for<_Sent, _Iter>) { + // In the case where we have different iterator and sentinel types, the segmented iterator optimization + // in std::for_each will not kick in. Therefore, we prefer std::for_each_n in that case (whenever we can + // obtain the `n`). + if constexpr (!std::assignable_from<_Iter&, _Sent> && std::sized_sentinel_for<_Sent, _Iter>) { auto __n = __last - __first; auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj); return {std::move(__end), std::move(__func)}; diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp index 9151ca19c7862..f58f336f8b892 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp @@ -42,12 +42,7 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>("std::for_each(vector)", std_for_each); bm.operator()>("std::for_each(deque)", std_for_each); @@ -87,12 +82,7 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); bm.operator()>>("rng::for_each(join_view(vector>)", std::ranges::for_each); diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index e6624bd304447..e643e647722cb 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -41,12 +41,7 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>("std::for_each_n(vector)", std_for_each_n); bm.operator()>("std::for_each_n(deque)", std_for_each_n); @@ -84,12 +79,7 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); bm.operator()>>(