@@ -1008,12 +1008,6 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1008
1008
bool result_needs_to_be_utf8 = FALSE;
1009
1009
bool left_utf8 = FALSE;
1010
1010
bool right_utf8 = FALSE;
1011
- U8 * left_non_downgraded = NULL ;
1012
- U8 * right_non_downgraded = NULL ;
1013
- Size_t left_non_downgraded_len = 0 ;
1014
- Size_t right_non_downgraded_len = 0 ;
1015
- char * non_downgraded = NULL ;
1016
- Size_t non_downgraded_len = 0 ;
1017
1011
1018
1012
PERL_ARGS_ASSERT_DO_VOP ;
1019
1013
@@ -1031,34 +1025,25 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1031
1025
/* This needs to come after SvPV to ensure that string overloading has
1032
1026
fired off. */
1033
1027
1034
- /* Create downgraded temporaries of any UTF-8 encoded operands */
1028
+ /* Create downgraded temporaries of any UTF-8 encoded operands. As of
1029
+ * perl-5.32, we no longer accept above-FF code points at all */
1035
1030
if (DO_UTF8 (left )) {
1036
- const U8 * save_lc = (U8 * ) lc ;
1037
-
1038
1031
left_utf8 = TRUE;
1039
1032
result_needs_to_be_utf8 = TRUE;
1040
1033
1041
- left_non_downgraded_len = leftlen ;
1042
- lc = (char * ) bytes_from_utf8_loc ((const U8 * ) lc , & leftlen ,
1043
- & left_utf8 ,
1044
- (const U8 * * ) & left_non_downgraded );
1045
- /* Calculate the number of trailing unconvertible bytes. This quantity
1046
- * is the original length minus the length of the converted portion. */
1047
- left_non_downgraded_len -= left_non_downgraded - save_lc ;
1048
- SAVEFREEPV (lc );
1034
+ lc = (char * ) bytes_from_utf8 ((const U8 * ) lc , & leftlen , & left_utf8 );
1035
+ if (! left_utf8 ) {
1036
+ SAVEFREEPV (lc );
1037
+ }
1049
1038
}
1050
1039
if (DO_UTF8 (right )) {
1051
- const U8 * save_rc = (U8 * ) rc ;
1052
-
1053
1040
right_utf8 = TRUE;
1054
1041
result_needs_to_be_utf8 = TRUE;
1055
1042
1056
- right_non_downgraded_len = rightlen ;
1057
- rc = (char * ) bytes_from_utf8_loc ((const U8 * ) rc , & rightlen ,
1058
- & right_utf8 ,
1059
- (const U8 * * ) & right_non_downgraded );
1060
- right_non_downgraded_len -= right_non_downgraded - save_rc ;
1061
- SAVEFREEPV (rc );
1043
+ rc = (char * ) bytes_from_utf8 ((const U8 * ) rc , & rightlen , & right_utf8 );
1044
+ if (! right_utf8 ) {
1045
+ SAVEFREEPV (rc );
1046
+ }
1062
1047
}
1063
1048
1064
1049
/* We set 'len' to the length that the operation actually operates on. The
@@ -1069,9 +1054,7 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1069
1054
* on zeros without having to do it. In the case of '&', the result is
1070
1055
* zero, and the dangling portion is simply discarded. For '|' and '^', the
1071
1056
* result is the same as the other operand, so the dangling part is just
1072
- * appended to the final result, unchanged. As of perl-5.32, we no longer
1073
- * accept above-FF code points in the dangling portion.
1074
- */
1057
+ * appended to the final result, unchanged. */
1075
1058
if (left_utf8 || right_utf8 ) {
1076
1059
Perl_croak (aTHX_ FATAL_ABOVE_FF_MSG , PL_op_desc [optype ]);
1077
1060
}
@@ -1173,29 +1156,11 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
1173
1156
sv_catpvn_nomg (sv , lsave + len , leftlen - len );
1174
1157
}
1175
1158
* SvEND (sv ) = '\0' ;
1176
-
1177
- /* If there is trailing stuff that couldn't be converted from UTF-8, it
1178
- * is appended as-is for the ^ and | operators. This preserves
1179
- * backwards compatibility */
1180
- if (right_non_downgraded ) {
1181
- non_downgraded = (char * ) right_non_downgraded ;
1182
- non_downgraded_len = right_non_downgraded_len ;
1183
- }
1184
- else if (left_non_downgraded ) {
1185
- non_downgraded = (char * ) left_non_downgraded ;
1186
- non_downgraded_len = left_non_downgraded_len ;
1187
- }
1188
-
1189
1159
break ;
1190
1160
}
1191
1161
1192
1162
if (result_needs_to_be_utf8 ) {
1193
1163
sv_utf8_upgrade_nomg (sv );
1194
-
1195
- /* Append any trailing UTF-8 as-is. */
1196
- if (non_downgraded ) {
1197
- sv_catpvn_nomg (sv , non_downgraded , non_downgraded_len );
1198
- }
1199
1164
}
1200
1165
1201
1166
SvTAINT (sv );
0 commit comments