|
19 | 19 | namespace LIBC_NAMESPACE_DECL {
|
20 | 20 | namespace internal {
|
21 | 21 |
|
| 22 | +// This is for utf-8 bytes other than the first byte |
| 23 | +constexpr size_t ENCODED_BITS_PER_UTF8 = 6; |
| 24 | +// The number of bits per utf-8 byte that actually encode character |
| 25 | +// Information not metadata (# of bits excluding the byte headers) |
| 26 | +constexpr uint32_t MASK_ENCODED_BITS = |
| 27 | + mask_trailing_ones<uint32_t, ENCODED_BITS_PER_UTF8>(); |
| 28 | + |
22 | 29 | CharacterConverter::CharacterConverter(mbstate *mbstate) { state = mbstate; }
|
23 | 30 |
|
24 | 31 | void CharacterConverter::clear() {
|
@@ -61,10 +68,8 @@ int CharacterConverter::push(char8_t utf8_byte) {
|
61 | 68 | }
|
62 | 69 | // Any subsequent push
|
63 | 70 | // Adding 6 more bits so need to left shift
|
64 |
| - constexpr size_t ENCODED_BITS_PER_UTF8 = 6; |
65 | 71 | if (num_ones == 1 && !isComplete()) {
|
66 |
| - char32_t byte = |
67 |
| - utf8_byte & mask_trailing_ones<uint32_t, ENCODED_BITS_PER_UTF8>(); |
| 72 | + char32_t byte = utf8_byte & MASK_ENCODED_BITS; |
68 | 73 | state->partial = state->partial << ENCODED_BITS_PER_UTF8;
|
69 | 74 | state->partial |= byte;
|
70 | 75 | state->bytes_processed++;
|
@@ -117,12 +122,6 @@ ErrorOr<char8_t> CharacterConverter::pop_utf8() {
|
117 | 122 | constexpr char8_t FIRST_BYTE_HEADERS[] = {0, 0xC0, 0xE0, 0xF0};
|
118 | 123 | constexpr char8_t CONTINUING_BYTE_HEADER = 0x80;
|
119 | 124 |
|
120 |
| - // the number of bits per utf-8 byte that actually encode character |
121 |
| - // information not metadata (# of bits excluding the byte headers) |
122 |
| - constexpr size_t ENCODED_BITS_PER_UTF8 = 6; |
123 |
| - constexpr int MASK_ENCODED_BITS = |
124 |
| - mask_trailing_ones<unsigned int, ENCODED_BITS_PER_UTF8>(); |
125 |
| - |
126 | 125 | char32_t output;
|
127 | 126 |
|
128 | 127 | // Shift to get the next 6 bits from the utf32 encoding
|
|
0 commit comments