Skip to content

Commit d93cc7a

Browse files
authored
[libc] WCS to integer internal function (#147857)
Duplicated str_to_integer.h and modified it to work with widechars. A future patch will implement the public functions (wcstol, wcstoll, etc) by calling this internal function.
1 parent ef24b4b commit d93cc7a

File tree

4 files changed

+418
-0
lines changed

4 files changed

+418
-0
lines changed

libc/src/__support/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,19 @@ add_header_library(
180180
libc.src.__support.common
181181
)
182182

183+
add_header_library(
184+
wcs_to_integer
185+
HDRS
186+
wcs_to_integer.h
187+
DEPENDS
188+
.wctype_utils
189+
.str_to_num_result
190+
libc.hdr.errno_macros
191+
libc.src.__support.CPP.limits
192+
libc.src.__support.CPP.type_traits
193+
libc.src.__support.common
194+
)
195+
183196
add_header_library(
184197
integer_to_string
185198
HDRS

libc/src/__support/wcs_to_integer.h

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
//===-- Widechar string to integer conversion utils -------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
10+
#define LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
11+
12+
#include "hdr/errno_macros.h" // For ERANGE
13+
#include "src/__support/CPP/limits.h"
14+
#include "src/__support/CPP/type_traits.h"
15+
#include "src/__support/CPP/type_traits/make_unsigned.h"
16+
#include "src/__support/big_int.h"
17+
#include "src/__support/common.h"
18+
#include "src/__support/macros/config.h"
19+
#include "src/__support/str_to_num_result.h"
20+
#include "src/__support/uint128.h"
21+
#include "src/__support/wctype_utils.h"
22+
23+
namespace LIBC_NAMESPACE_DECL {
24+
namespace internal {
25+
26+
// Returns the idx of the first character in src that is not a whitespace
27+
// character (as determined by iswspace())
28+
LIBC_INLINE size_t
29+
first_non_whitespace(const wchar_t *__restrict src,
30+
size_t src_len = cpp::numeric_limits<size_t>::max()) {
31+
size_t src_cur = 0;
32+
while (src_cur < src_len && internal::iswspace(src[src_cur])) {
33+
++src_cur;
34+
}
35+
return src_cur;
36+
}
37+
38+
// checks if the next 3 characters of the string pointer are the start of a
39+
// hexadecimal number. Does not advance the string pointer.
40+
LIBC_INLINE bool
41+
is_hex_start(const wchar_t *__restrict src,
42+
size_t src_len = cpp::numeric_limits<size_t>::max()) {
43+
if (src_len < 3)
44+
return false;
45+
return *src == L'0' && towlower(*(src + 1)) == L'x' && iswalnum(*(src + 2)) &&
46+
b36_wchar_to_int(*(src + 2)) < 16;
47+
}
48+
49+
// Takes the address of the string pointer and parses the base from the start of
50+
// it.
51+
LIBC_INLINE int infer_base(const wchar_t *__restrict src, size_t src_len) {
52+
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
53+
// sequence of the decimal digits and the letters a (or A) through f (or F)
54+
// with values 10 through 15 respectively." (C standard 6.4.4.1)
55+
if (is_hex_start(src, src_len))
56+
return 16;
57+
// An octal number is defined as "the prefix 0 optionally followed by a
58+
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
59+
// number that starts with 0, including just 0, is an octal number.
60+
if (src_len > 0 && src[0] == L'0')
61+
return 8;
62+
// A decimal number is defined as beginning "with a nonzero digit and
63+
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
64+
return 10;
65+
}
66+
67+
template <class T>
68+
LIBC_INLINE StrToNumResult<T>
69+
wcstointeger(const wchar_t *__restrict src, int base,
70+
const size_t src_len = cpp::numeric_limits<size_t>::max()) {
71+
using ResultType = make_integral_or_big_int_unsigned_t<T>;
72+
73+
ResultType result = 0;
74+
75+
bool is_number = false;
76+
size_t src_cur = 0;
77+
int error_val = 0;
78+
79+
if (src_len == 0)
80+
return {0, 0, 0};
81+
82+
if (base < 0 || base == 1 || base > 36)
83+
return {0, 0, EINVAL};
84+
85+
src_cur = first_non_whitespace(src, src_len);
86+
87+
wchar_t result_sign = L'+';
88+
if (src[src_cur] == L'+' || src[src_cur] == L'-') {
89+
result_sign = src[src_cur];
90+
++src_cur;
91+
}
92+
93+
if (base == 0)
94+
base = infer_base(src + src_cur, src_len - src_cur);
95+
96+
if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
97+
src_cur = src_cur + 2;
98+
99+
constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
100+
const bool is_positive = (result_sign == L'+');
101+
102+
ResultType constexpr NEGATIVE_MAX =
103+
!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
104+
: cpp::numeric_limits<T>::max();
105+
ResultType const abs_max =
106+
(is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
107+
ResultType const abs_max_div_by_base =
108+
abs_max / static_cast<ResultType>(base);
109+
110+
while (src_cur < src_len && iswalnum(src[src_cur])) {
111+
int cur_digit = b36_wchar_to_int(src[src_cur]);
112+
if (cur_digit >= base)
113+
break;
114+
115+
is_number = true;
116+
++src_cur;
117+
118+
// If the number has already hit the maximum value for the current type then
119+
// the result cannot change, but we still need to advance src to the end of
120+
// the number.
121+
if (result == abs_max) {
122+
error_val = ERANGE;
123+
continue;
124+
}
125+
126+
if (result > abs_max_div_by_base) {
127+
result = abs_max;
128+
error_val = ERANGE;
129+
} else {
130+
result = result * static_cast<ResultType>(base);
131+
}
132+
if (result > abs_max - static_cast<ResultType>(cur_digit)) {
133+
result = abs_max;
134+
error_val = ERANGE;
135+
} else {
136+
result = result + static_cast<ResultType>(cur_digit);
137+
}
138+
}
139+
140+
ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0;
141+
142+
if (error_val == ERANGE) {
143+
if (is_positive || IS_UNSIGNED)
144+
return {cpp::numeric_limits<T>::max(), str_len, error_val};
145+
else // T is signed and there is a negative overflow
146+
return {cpp::numeric_limits<T>::min(), str_len, error_val};
147+
}
148+
149+
return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
150+
}
151+
152+
} // namespace internal
153+
} // namespace LIBC_NAMESPACE_DECL
154+
155+
#endif // LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H

libc/test/src/__support/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,17 @@ add_libc_test(
141141
libc.src.__support.str_to_integer
142142
)
143143

144+
add_libc_test(
145+
wcs_to_integer_test
146+
SUITE
147+
libc-support-tests
148+
SRCS
149+
wcs_to_integer_test.cpp
150+
DEPENDS
151+
libc.src.__support.integer_literals
152+
libc.src.__support.wcs_to_integer
153+
)
154+
144155
add_libc_test(
145156
integer_to_string_test
146157
SUITE

0 commit comments

Comments
 (0)