@@ -601,6 +601,8 @@ S_does_utf8_overflow(const U8 * const s,
601
601
const U8 * e ,
602
602
const bool consider_overlongs )
603
603
{
604
+ PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW ;
605
+
604
606
/* Returns an int indicating whether or not the UTF-8 sequence from 's' to
605
607
* 'e' - 1 would overflow an IV on this platform; that is if it represents
606
608
* a code point larger than the highest representable code point. It
@@ -623,9 +625,6 @@ S_does_utf8_overflow(const U8 * const s,
623
625
const STRLEN len = e - s ;
624
626
const U8 * x ;
625
627
const U8 * y = (const U8 * ) HIGHEST_REPRESENTABLE_UTF ;
626
- int is_overlong = 0 ;
627
-
628
- PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW ;
629
628
630
629
for (x = s ; x < e ; x ++ , y ++ ) {
631
630
@@ -654,7 +653,7 @@ S_does_utf8_overflow(const U8 * const s,
654
653
* there's not enough information to tell */
655
654
return (len >= STRLENs (HIGHEST_REPRESENTABLE_UTF )) ? 0 : -1 ;
656
655
657
- overflows_if_not_overlong :
656
+ overflows_if_not_overlong : ;
658
657
659
658
/* Here, a well-formed sequence overflows. If we are assuming
660
659
* well-formedness, return that it overflows. */
@@ -666,7 +665,7 @@ S_does_utf8_overflow(const U8 * const s,
666
665
* overflow if you were to calculate it out.
667
666
*
668
667
* See if it actually is overlong */
669
- is_overlong = is_utf8_overlong (s , len );
668
+ int is_overlong = is_utf8_overlong (s , len );
670
669
671
670
/* If it isn't overlong, is well-formed, so overflows */
672
671
if (is_overlong == 0 ) {
@@ -678,7 +677,9 @@ S_does_utf8_overflow(const U8 * const s,
678
677
return -1 ;
679
678
}
680
679
681
- /* Here, it appears to overflow, but it is also overlong */
680
+ /* Here, it appears to overflow, but it is also overlong. That overlong
681
+ * may evaluate to something that doesn't overflow; or it may evaluate to
682
+ * something that does. Figure it out */
682
683
683
684
#if 6 * UTF_CONTINUATION_BYTE_INFO_BITS <= IVSIZE * CHARBITS
684
685
@@ -699,9 +700,10 @@ S_does_utf8_overflow(const U8 * const s,
699
700
*
700
701
* FE consists of 7 bytes total; the FE start byte contributes 0 bits of
701
702
* information (the high 7 bits, all ones, say that the sequence is 7 bytes
702
- * long, and the bottom, zero, bit is s placeholder. That leaves the 6
703
- * continuation bytes to contribute UTF_CONTINUATION_BYTE_INFO_BITS each.
704
- If that number of bits doesn't exceed the word size, it can't overflow. */
703
+ * long, and the bottom, zero, bit is 0, so doesn't add anything. That
704
+ * leaves the 6 continuation bytes to contribute
705
+ * UTF_CONTINUATION_BYTE_INFO_BITS each. If that number of bits doesn't
706
+ * exceed the word size, it can't overflow. */
705
707
706
708
return 0 ;
707
709
0 commit comments