Skip to content

Commit ac3e757

Browse files
committed
utf8.c: Add comments; move decls closer to use
1 parent a66489b commit ac3e757

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

utf8.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,8 @@ S_does_utf8_overflow(const U8 * const s,
601601
const U8 * e,
602602
const bool consider_overlongs)
603603
{
604+
PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
605+
604606
/* Returns an int indicating whether or not the UTF-8 sequence from 's' to
605607
* 'e' - 1 would overflow an IV on this platform; that is if it represents
606608
* a code point larger than the highest representable code point. It
@@ -623,9 +625,6 @@ S_does_utf8_overflow(const U8 * const s,
623625
const STRLEN len = e - s;
624626
const U8 *x;
625627
const U8 * y = (const U8 *) HIGHEST_REPRESENTABLE_UTF;
626-
int is_overlong = 0;
627-
628-
PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
629628

630629
for (x = s; x < e; x++, y++) {
631630

@@ -654,7 +653,7 @@ S_does_utf8_overflow(const U8 * const s,
654653
* there's not enough information to tell */
655654
return (len >= STRLENs(HIGHEST_REPRESENTABLE_UTF)) ? 0 : -1;
656655

657-
overflows_if_not_overlong:
656+
overflows_if_not_overlong: ;
658657

659658
/* Here, a well-formed sequence overflows. If we are assuming
660659
* well-formedness, return that it overflows. */
@@ -666,7 +665,7 @@ S_does_utf8_overflow(const U8 * const s,
666665
* overflow if you were to calculate it out.
667666
*
668667
* See if it actually is overlong */
669-
is_overlong = is_utf8_overlong(s, len);
668+
int is_overlong = is_utf8_overlong(s, len);
670669

671670
/* If it isn't overlong, is well-formed, so overflows */
672671
if (is_overlong == 0) {
@@ -678,7 +677,9 @@ S_does_utf8_overflow(const U8 * const s,
678677
return -1;
679678
}
680679

681-
/* Here, it appears to overflow, but it is also overlong */
680+
/* Here, it appears to overflow, but it is also overlong. That overlong
681+
* may evaluate to something that doesn't overflow; or it may evaluate to
682+
* something that does. Figure it out */
682683

683684
#if 6 * UTF_CONTINUATION_BYTE_INFO_BITS <= IVSIZE * CHARBITS
684685

@@ -699,9 +700,10 @@ S_does_utf8_overflow(const U8 * const s,
699700
*
700701
* FE consists of 7 bytes total; the FE start byte contributes 0 bits of
701702
* information (the high 7 bits, all ones, say that the sequence is 7 bytes
702-
* long, and the bottom, zero, bit is s placeholder. That leaves the 6
703-
* continuation bytes to contribute UTF_CONTINUATION_BYTE_INFO_BITS each.
704-
If that number of bits doesn't exceed the word size, it can't overflow. */
703+
* long, and the bottom, zero, bit is 0, so doesn't add anything. That
704+
* leaves the 6 continuation bytes to contribute
705+
* UTF_CONTINUATION_BYTE_INFO_BITS each. If that number of bits doesn't
706+
* exceed the word size, it can't overflow. */
705707

706708
return 0;
707709

0 commit comments

Comments
 (0)