Skip to content

Commit 3be0722

Browse files
committed
refcounted_he_(new|fetch)_pvn: Don't roll-own code
The new function utf8_to_bytes_new_pv() does a better job than these code sections that are nearly duplicates of each other. It's better for several reasons, such as that for long keys, it checks a word at a time if it is downgradable. This is a follow on to #22638, which was closed without the original commits that this replaces.
1 parent b9c32dc commit 3be0722

File tree

1 file changed

+17
-66
lines changed

1 file changed

+17
-66
lines changed

hv.c

Lines changed: 17 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3684,47 +3684,21 @@ SV *
36843684
Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
36853685
const char *keypv, STRLEN keylen, U32 hash, U32 flags)
36863686
{
3687-
U8 utf8_flag;
36883687
PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN;
36893688

3689+
U8 utf8_flag;
3690+
U8 * free_me = NULL;
3691+
36903692
if (flags & ~(REFCOUNTED_HE_KEY_UTF8|REFCOUNTED_HE_EXISTS))
36913693
Perl_croak(aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf,
36923694
(UV)flags);
36933695
if (!chain)
36943696
goto ret;
3695-
if (flags & REFCOUNTED_HE_KEY_UTF8) {
3696-
/* For searching purposes, canonicalise to Latin-1 where possible. */
3697-
const char *keyend = keypv + keylen, *p;
3698-
STRLEN nonascii_count = 0;
3699-
for (p = keypv; p != keyend; p++) {
3700-
if (! UTF8_IS_INVARIANT(*p)) {
3701-
if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
3702-
goto canonicalised_key;
3703-
}
3704-
nonascii_count++;
3705-
p++;
3706-
}
3707-
}
3708-
if (nonascii_count) {
3709-
char *q;
3710-
const char *p = keypv, *keyend = keypv + keylen;
3711-
keylen -= nonascii_count;
3712-
Newx(q, keylen, char);
3713-
SAVEFREEPV(q);
3714-
keypv = q;
3715-
for (; p != keyend; p++, q++) {
3716-
U8 c = (U8)*p;
3717-
if (UTF8_IS_INVARIANT(c)) {
3718-
*q = (char) c;
3719-
}
3720-
else {
3721-
p++;
3722-
*q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
3723-
}
3724-
}
3725-
}
3697+
/* For searching purposes, canonicalise to Latin-1 where possible. */
3698+
if ( flags & REFCOUNTED_HE_KEY_UTF8
3699+
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
3700+
{
37263701
flags &= ~REFCOUNTED_HE_KEY_UTF8;
3727-
canonicalised_key: ;
37283702
}
37293703
utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8) ? HVhek_UTF8 : 0;
37303704
if (!hash)
@@ -3744,6 +3718,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
37443718
utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8)
37453719
#endif
37463720
) {
3721+
Safefree(free_me);
37473722
if (flags & REFCOUNTED_HE_EXISTS)
37483723
return (chain->refcounted_he_data[0] & HVrhek_typemask)
37493724
== HVrhek_delete
@@ -3752,6 +3727,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
37523727
}
37533728
}
37543729
ret:
3730+
Safefree(free_me);
37553731
return flags & REFCOUNTED_HE_EXISTS ? NULL : &PL_sv_placeholder;
37563732
}
37573733

@@ -3836,14 +3812,16 @@ struct refcounted_he *
38363812
Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
38373813
const char *keypv, STRLEN keylen, U32 hash, SV *value, U32 flags)
38383814
{
3815+
PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN;
3816+
38393817
STRLEN value_len = 0;
38403818
const char *value_p = NULL;
38413819
bool is_pv;
38423820
char value_type;
38433821
char hekflags;
38443822
STRLEN key_offset = 1;
38453823
struct refcounted_he *he;
3846-
PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN;
3824+
U8 * free_me = NULL;
38473825

38483826
if (!value || value == &PL_sv_placeholder) {
38493827
value_type = HVrhek_delete;
@@ -3867,39 +3845,11 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
38673845
}
38683846
hekflags = value_type;
38693847

3870-
if (flags & REFCOUNTED_HE_KEY_UTF8) {
3871-
/* Canonicalise to Latin-1 where possible. */
3872-
const char *keyend = keypv + keylen, *p;
3873-
STRLEN nonascii_count = 0;
3874-
for (p = keypv; p != keyend; p++) {
3875-
if (! UTF8_IS_INVARIANT(*p)) {
3876-
if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, keyend)) {
3877-
goto canonicalised_key;
3878-
}
3879-
nonascii_count++;
3880-
p++;
3881-
}
3882-
}
3883-
if (nonascii_count) {
3884-
char *q;
3885-
const char *p = keypv, *keyend = keypv + keylen;
3886-
keylen -= nonascii_count;
3887-
Newx(q, keylen, char);
3888-
SAVEFREEPV(q);
3889-
keypv = q;
3890-
for (; p != keyend; p++, q++) {
3891-
U8 c = (U8)*p;
3892-
if (UTF8_IS_INVARIANT(c)) {
3893-
*q = (char) c;
3894-
}
3895-
else {
3896-
p++;
3897-
*q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
3898-
}
3899-
}
3900-
}
3848+
/* Canonicalise to Latin-1 where possible. */
3849+
if ( (flags & REFCOUNTED_HE_KEY_UTF8)
3850+
&& utf8_to_bytes_new_pv(&keypv, &keylen, &free_me))
3851+
{
39013852
flags &= ~REFCOUNTED_HE_KEY_UTF8;
3902-
canonicalised_key: ;
39033853
}
39043854
if (flags & REFCOUNTED_HE_KEY_UTF8)
39053855
hekflags |= HVhek_UTF8;
@@ -3939,6 +3889,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
39393889
he->refcounted_he_data[0] = hekflags;
39403890
he->refcounted_he_refcnt = 1;
39413891

3892+
Safefree(free_me);
39423893
return he;
39433894
}
39443895

0 commit comments

Comments
 (0)