From ab07c55cc0a3800cb6e4a05e793c6072ade41ee6 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Mon, 7 Jul 2025 21:45:33 +0000 Subject: [PATCH 1/6] [libc] Cleaned up wcsspn and wcscspn --- libc/src/wchar/CMakeLists.txt | 12 +++++++ libc/src/wchar/wchar_utils.h | 33 +++++++++++++++++++ libc/src/wchar/wcscspn.cpp | 26 ++++++++------- libc/src/wchar/wcsspn.cpp | 28 ++++++++-------- .../llvm-project-overlay/libc/BUILD.bazel | 13 ++++++++ 5 files changed, 87 insertions(+), 25 deletions(-) create mode 100644 libc/src/wchar/wchar_utils.h diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index f2f4b1d38f0f3..85d582d9ccc76 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -1,3 +1,13 @@ +add_header_library( + wchar_utils + HDRS + wchar_utils.h + DEPENDS + libc.hdr.types.size_t + libc.hdr.types.wchar_t + libc.src.__support.common +) + add_entrypoint_object( wcslen SRCS @@ -211,6 +221,7 @@ add_entrypoint_object( DEPENDS libc.hdr.wchar_macros libc.hdr.types.size_t + libc.src.wchar.wchar_utils ) add_entrypoint_object( @@ -222,6 +233,7 @@ add_entrypoint_object( DEPENDS libc.hdr.wchar_macros libc.hdr.types.size_t + libc.src.wchar.wchar_utils ) add_entrypoint_object( diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h new file mode 100644 index 0000000000000..2fc3ac34469e7 --- /dev/null +++ b/libc/src/wchar/wchar_utils.h @@ -0,0 +1,33 @@ +//===-- wchar utils ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCHAR_UTILS_H +#define LLVM_LIBC_SRC_WCHAR_WCHAR_UTILS_H + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +template +LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, Check check) { + size_t i = 0; + for (; s1[i]; ++i) { + if (!check(s1[i])) + return i; + } + return i; +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCHAR_UTILS_H diff --git a/libc/src/wchar/wcscspn.cpp b/libc/src/wchar/wcscspn.cpp index 8869d84cdfdee..afd3eb20e51ef 100644 --- a/libc/src/wchar/wcscspn.cpp +++ b/libc/src/wchar/wcscspn.cpp @@ -12,23 +12,25 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { -bool check(wchar_t c, const wchar_t *s2) { - for (int n = 0; s2[n]; ++n) { - if (s2[n] == c) - return false; +struct CheckCSpan { + const wchar_t *str; + CheckCSpan(const wchar_t *w) { str = w; } + bool operator()(wchar_t c) { + for (int n = 0; str[n]; ++n) { + if (str[n] == c) + return false; + } + return true; } - return true; -} +}; + LLVM_LIBC_FUNCTION(size_t, wcscspn, (const wchar_t *s1, const wchar_t *s2)) { - size_t i = 0; - for (; s1[i]; ++i) { - if (!check(s1[i], s2)) - return i; - } - return i; + CheckCSpan check(s2); + return internal::inline_wcsspn(s1, check); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsspn.cpp b/libc/src/wchar/wcsspn.cpp index 23de381a2d954..f3c4be99f7e50 100644 --- a/libc/src/wchar/wcsspn.cpp +++ b/libc/src/wchar/wcsspn.cpp @@ -12,23 +12,25 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "wchar_utils.h" + +struct CheckSpan { + const wchar_t *str; + CheckSpan(const wchar_t *w) { str = w; } + bool operator()(wchar_t c) { + for (int n = 0; str[n]; ++n) { + if (str[n] == c) + return true; + } + return false; + } +}; namespace LIBC_NAMESPACE_DECL { -bool check(wchar_t c, const wchar_t *s2) { - for (int n = 0; s2[n]; ++n) { - if (s2[n] == c) - return true; - } - return false; -} LLVM_LIBC_FUNCTION(size_t, wcsspn, (const wchar_t *s1, const wchar_t *s2)) { - size_t i = 0; - for (; s1[i]; ++i) { - if (!check(s1[i], s2)) - return i; - } - return i; + CheckSpan check(s2); + return internal::inline_wcsspn(s1, check); } } // namespace LIBC_NAMESPACE_DECL diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index d3fc6912cd4e4..675263778b530 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5729,6 +5729,17 @@ libc_function( ############################## wchar targets ############################### +libc_support_library( + name = "wchar_utils", + hdrs = ["src/wchar/wchar_utils.h"], + deps = [ + ":__support_common", + ":__support_macros_attributes", + ":types_size_t", + ":types_wchar_t", + ], +) + libc_function( name = "btowc", srcs = ["src/wchar/btowc.cpp"], @@ -5826,6 +5837,7 @@ libc_function( ":__support_macros_config", ":types_size_t", ":types_wchar_t", + ":wchar_utils", ], ) @@ -5911,6 +5923,7 @@ libc_function( ":__support_macros_config", ":types_size_t", ":types_wchar_t", + ":wchar_utils", ], ) From 259f77621e809d9a3f7bd988d5714dbc9ad6924f Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Mon, 7 Jul 2025 22:44:16 +0000 Subject: [PATCH 2/6] moved check function to wchar_utils --- libc/src/wchar/wchar_utils.h | 18 +++++++++++++++--- libc/src/wchar/wcscspn.cpp | 15 +-------------- libc/src/wchar/wcsspn.cpp | 15 +-------------- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index 2fc3ac34469e7..0415499dfe6ff 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -14,14 +14,26 @@ #include "src/__support/common.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE +bool check_span(wchar_t c, const wchar_t *str) { + for (int n = 0; str[n]; ++n) { + if (str[n] == c) + return true; + } + return false; +} + namespace LIBC_NAMESPACE_DECL { namespace internal { -template -LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, Check check) { +// To avoid duplicated code, call this with true for wcscspn and call with false +// for wcsspn +LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, const wchar_t *s2, + bool invert) { size_t i = 0; for (; s1[i]; ++i) { - if (!check(s1[i])) + bool check = check_span(s1[i], s2); + check = invert ? !check : check; + if (!check) return i; } return i; diff --git a/libc/src/wchar/wcscspn.cpp b/libc/src/wchar/wcscspn.cpp index afd3eb20e51ef..d734b91e063c9 100644 --- a/libc/src/wchar/wcscspn.cpp +++ b/libc/src/wchar/wcscspn.cpp @@ -16,21 +16,8 @@ namespace LIBC_NAMESPACE_DECL { -struct CheckCSpan { - const wchar_t *str; - CheckCSpan(const wchar_t *w) { str = w; } - bool operator()(wchar_t c) { - for (int n = 0; str[n]; ++n) { - if (str[n] == c) - return false; - } - return true; - } -}; - LLVM_LIBC_FUNCTION(size_t, wcscspn, (const wchar_t *s1, const wchar_t *s2)) { - CheckCSpan check(s2); - return internal::inline_wcsspn(s1, check); + return internal::inline_wcsspn(s1, s2, true); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcsspn.cpp b/libc/src/wchar/wcsspn.cpp index f3c4be99f7e50..2d3fca05996ad 100644 --- a/libc/src/wchar/wcsspn.cpp +++ b/libc/src/wchar/wcsspn.cpp @@ -14,23 +14,10 @@ #include "src/__support/macros/config.h" #include "wchar_utils.h" -struct CheckSpan { - const wchar_t *str; - CheckSpan(const wchar_t *w) { str = w; } - bool operator()(wchar_t c) { - for (int n = 0; str[n]; ++n) { - if (str[n] == c) - return true; - } - return false; - } -}; - namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcsspn, (const wchar_t *s1, const wchar_t *s2)) { - CheckSpan check(s2); - return internal::inline_wcsspn(s1, check); + return internal::inline_wcsspn(s1, s2, false); } } // namespace LIBC_NAMESPACE_DECL From d43e66d588152b75ee4e47462f024fc5d2222809 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Mon, 7 Jul 2025 23:34:45 +0000 Subject: [PATCH 3/6] moved check_span into namespaces --- libc/src/wchar/wchar_utils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index 0415499dfe6ff..ff0c11366a2b7 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -14,6 +14,9 @@ #include "src/__support/common.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE +namespace LIBC_NAMESPACE_DECL { +namespace internal { + bool check_span(wchar_t c, const wchar_t *str) { for (int n = 0; str[n]; ++n) { if (str[n] == c) @@ -22,9 +25,6 @@ bool check_span(wchar_t c, const wchar_t *str) { return false; } -namespace LIBC_NAMESPACE_DECL { -namespace internal { - // To avoid duplicated code, call this with true for wcscspn and call with false // for wcsspn LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, const wchar_t *s2, From ef10e8e230e54e560782cc55a0a5e097fd0be2d3 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Tue, 8 Jul 2025 19:00:11 +0000 Subject: [PATCH 4/6] renamed function --- libc/src/wchar/wchar_utils.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index ff0c11366a2b7..976bd21dcab9d 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -17,7 +17,8 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -bool check_span(wchar_t c, const wchar_t *str) { +// returns true if the character exists in the string +LIBC_INLINE bool internal_wcschr(wchar_t c, const wchar_t *str) { for (int n = 0; str[n]; ++n) { if (str[n] == c) return true; @@ -31,7 +32,7 @@ LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, const wchar_t *s2, bool invert) { size_t i = 0; for (; s1[i]; ++i) { - bool check = check_span(s1[i], s2); + bool check = internal_wcschr(s1[i], s2); check = invert ? !check : check; if (!check) return i; From a01af7035b3fbed27ecd48703eac90e3c7d83f88 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Wed, 9 Jul 2025 21:09:33 +0000 Subject: [PATCH 5/6] simplified logic --- libc/src/wchar/wchar_utils.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index 976bd21dcab9d..f83b48ac40dd4 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -29,12 +29,11 @@ LIBC_INLINE bool internal_wcschr(wchar_t c, const wchar_t *str) { // To avoid duplicated code, call this with true for wcscspn and call with false // for wcsspn LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, const wchar_t *s2, - bool invert) { + bool match_set) { size_t i = 0; for (; s1[i]; ++i) { - bool check = internal_wcschr(s1[i], s2); - check = invert ? !check : check; - if (!check) + bool in_set = internal_wcschr(s1[i], s2); + if (in_set == match_set) return i; } return i; From f2a5e8a9fedcb9d23c3b74c2c1855c3f6e54d7c3 Mon Sep 17 00:00:00 2001 From: Sriya Pratipati Date: Wed, 9 Jul 2025 22:25:00 +0000 Subject: [PATCH 6/6] added more descriptive comment --- libc/src/wchar/wchar_utils.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index f83b48ac40dd4..4880093a2514e 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -26,14 +26,14 @@ LIBC_INLINE bool internal_wcschr(wchar_t c, const wchar_t *str) { return false; } -// To avoid duplicated code, call this with true for wcscspn and call with false -// for wcsspn +// bool should be true for wcscspn for complimentary span +// should be false for wcsspn since we want it to span LIBC_INLINE size_t inline_wcsspn(const wchar_t *s1, const wchar_t *s2, - bool match_set) { + bool not_match_set) { size_t i = 0; for (; s1[i]; ++i) { bool in_set = internal_wcschr(s1[i], s2); - if (in_set == match_set) + if (in_set == not_match_set) return i; } return i;