Skip to content

Commit ce93998

Browse files
committed
Optimizing NFC algorithm
1 parent 732204c commit ce93998

File tree

1 file changed

+41
-50
lines changed

1 file changed

+41
-50
lines changed

lib/inc/sys_string/impl/unicode/algorithms.h

Lines changed: 41 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -956,74 +956,64 @@ namespace sysstr
956956
return dest;
957957

958958
stack_or_heap_buffer<char32_t, 32> buffer;
959-
960-
auto status = get_nfc_qc_status(*first);
961959

960+
auto cur = first;
962961
for ( ; ; )
963962
{
964-
auto conv_range = find_conversion_range(status, first, last);
965-
for ( ; first != conv_range.begin(); ++first)
966-
dest = write_unsafe<OutEnc>(*first, dest);
967-
if (conv_range.empty())
968-
{
969-
assert(first == last);
970-
break;
971-
}
963+
auto status = get_nfc_qc_status(*cur);
972964

973-
if constexpr (std::ranges::sized_range<decltype(conv_range)>)
974-
buffer.reserve(conv_range.size());
975-
nfd<utf32>()(conv_range, std::back_inserter(buffer));
976-
dest = convert(buffer, dest);
977-
first = conv_range.end();
978-
if (first == last)
979-
break;
980-
status = nfc_qc_status::stable;
981-
buffer.clear();
982-
}
983-
984-
return dest;
985-
}
986-
987-
private:
988-
template<std::forward_iterator It, std::sentinel_for<It> EndIt>
989-
requires(std::is_same_v<std::iter_value_t<It>, char32_t>)
990-
inline auto find_conversion_range(nfc_qc_status first_status,
991-
It first, EndIt last) -> std::ranges::subrange<It>
992-
{
993-
using namespace util;
994-
using namespace util::unicode;
995-
996-
auto status = first_status;
997-
It start = first;
998-
for ( ; ; )
999-
{
1000965
if (status == nfc_qc_status::bad)
1001966
{
1002-
for (++first; first != last; ++first)
967+
for (++cur; ; ++cur)
1003968
{
1004-
status = get_nfc_qc_status(*first);
969+
if (cur == last)
970+
return convert_slow(buffer, first, cur, dest);
971+
status = get_nfc_qc_status(*cur);
1005972
if (status == nfc_qc_status::stable)
1006973
break;
1007974
}
1008-
return {start, first};
975+
dest = convert_slow(buffer, first, cur, dest);
976+
buffer.clear();
977+
first = cur;
978+
if (++cur == last)
979+
return write_unsafe<OutEnc>(*first, dest);
1009980
}
1010-
if (status == nfc_qc_status::stable)
981+
else if (status == nfc_qc_status::stable)
1011982
{
1012-
start = first;
1013-
if (++first == last)
1014-
return {first, first};
983+
for ( ; first != cur; ++first)
984+
dest = write_unsafe<OutEnc>(*first, dest);
985+
for (++cur; ; ++cur)
986+
{
987+
if (cur == last)
988+
return write_unsafe<OutEnc>(*first, dest);
989+
status = get_nfc_qc_status(*cur);
990+
if (status != nfc_qc_status::stable)
991+
break;
992+
write_unsafe<OutEnc>(*first, dest);
993+
++first;
994+
}
1015995
}
1016996
else
1017997
{
1018-
if (++first == last)
1019-
return {start, first};
998+
if (++cur == last)
999+
return convert_slow(buffer, first, cur, dest);
10201000
}
1021-
1022-
status = get_nfc_qc_status(*first);
10231001
}
1024-
return {first, first}; // == {last, last}
10251002
}
1003+
1004+
private:
1005+
template<std::forward_iterator It, std::sentinel_for<It> EndIt, std::output_iterator<utf_char_of<OutEnc>> OutIt>
1006+
requires(std::is_same_v<std::iter_value_t<It>, char32_t>)
1007+
static auto convert_slow(util::stack_or_heap_buffer<char32_t, 32> & buffer,It first, EndIt last, OutIt dest) -> OutIt
1008+
{
1009+
if constexpr (std::sized_sentinel_for<It, EndIt>)
1010+
buffer.reserve(last - first);
1011+
nfd<utf32>()(std::ranges::subrange{first, last}, std::back_inserter(buffer));
1012+
return convert(buffer, dest);
1013+
}
1014+
10261015

1016+
SYS_STRING_FORCE_INLINE
10271017
static auto get_nfc_qc_status(char32_t c) -> nfc_qc_status
10281018
{
10291019
using namespace util;
@@ -1042,7 +1032,7 @@ namespace sysstr
10421032

10431033
template<std::ranges::forward_range Range, std::output_iterator<utf_char_of<OutEnc>> OutIt>
10441034
requires(utf_encoding_of<std::ranges::range_value_t<Range>> == utf32)
1045-
inline auto convert(const Range & range, OutIt dest) -> OutIt
1035+
static inline auto convert(const Range & range, OutIt dest) -> OutIt
10461036
{
10471037
using namespace util;
10481038
using namespace util::unicode;
@@ -1145,6 +1135,7 @@ namespace sysstr
11451135

11461136
}
11471137

1138+
SYS_STRING_FORCE_INLINE
11481139
static auto find_composition(uint32_t val , const uint32_t * compositions) -> uint32_t
11491140
{
11501141
for ( ; ; )

0 commit comments

Comments
 (0)