@@ -3684,47 +3684,21 @@ SV *
3684
3684
Perl_refcounted_he_fetch_pvn (pTHX_ const struct refcounted_he * chain ,
3685
3685
const char * keypv , STRLEN keylen , U32 hash , U32 flags )
3686
3686
{
3687
- U8 utf8_flag ;
3688
3687
PERL_ARGS_ASSERT_REFCOUNTED_HE_FETCH_PVN ;
3689
3688
3689
+ U8 utf8_flag ;
3690
+ U8 * free_me = NULL ;
3691
+
3690
3692
if (flags & ~(REFCOUNTED_HE_KEY_UTF8 |REFCOUNTED_HE_EXISTS ))
3691
3693
Perl_croak (aTHX_ "panic: refcounted_he_fetch_pvn bad flags %" UVxf ,
3692
3694
(UV )flags );
3693
3695
if (!chain )
3694
3696
goto ret ;
3695
- if (flags & REFCOUNTED_HE_KEY_UTF8 ) {
3696
- /* For searching purposes, canonicalise to Latin-1 where possible. */
3697
- const char * keyend = keypv + keylen , * p ;
3698
- STRLEN nonascii_count = 0 ;
3699
- for (p = keypv ; p != keyend ; p ++ ) {
3700
- if (! UTF8_IS_INVARIANT (* p )) {
3701
- if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE (p , keyend )) {
3702
- goto canonicalised_key ;
3703
- }
3704
- nonascii_count ++ ;
3705
- p ++ ;
3706
- }
3707
- }
3708
- if (nonascii_count ) {
3709
- char * q ;
3710
- const char * p = keypv , * keyend = keypv + keylen ;
3711
- keylen -= nonascii_count ;
3712
- Newx (q , keylen , char );
3713
- SAVEFREEPV (q );
3714
- keypv = q ;
3715
- for (; p != keyend ; p ++ , q ++ ) {
3716
- U8 c = (U8 )* p ;
3717
- if (UTF8_IS_INVARIANT (c )) {
3718
- * q = (char ) c ;
3719
- }
3720
- else {
3721
- p ++ ;
3722
- * q = (char ) EIGHT_BIT_UTF8_TO_NATIVE (c , * p );
3723
- }
3724
- }
3725
- }
3697
+ /* For searching purposes, canonicalise to Latin-1 where possible. */
3698
+ if ( flags & REFCOUNTED_HE_KEY_UTF8
3699
+ && utf8_to_bytes_new_pv (& keypv , & keylen , & free_me ))
3700
+ {
3726
3701
flags &= ~REFCOUNTED_HE_KEY_UTF8 ;
3727
- canonicalised_key : ;
3728
3702
}
3729
3703
utf8_flag = (flags & REFCOUNTED_HE_KEY_UTF8 ) ? HVhek_UTF8 : 0 ;
3730
3704
if (!hash )
@@ -3744,6 +3718,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
3744
3718
utf8_flag == (HEK_FLAGS (chain -> refcounted_he_hek ) & HVhek_UTF8 )
3745
3719
#endif
3746
3720
) {
3721
+ Safefree (free_me );
3747
3722
if (flags & REFCOUNTED_HE_EXISTS )
3748
3723
return (chain -> refcounted_he_data [0 ] & HVrhek_typemask )
3749
3724
== HVrhek_delete
@@ -3752,6 +3727,7 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain,
3752
3727
}
3753
3728
}
3754
3729
ret :
3730
+ Safefree (free_me );
3755
3731
return flags & REFCOUNTED_HE_EXISTS ? NULL : & PL_sv_placeholder ;
3756
3732
}
3757
3733
@@ -3836,14 +3812,16 @@ struct refcounted_he *
3836
3812
Perl_refcounted_he_new_pvn (pTHX_ struct refcounted_he * parent ,
3837
3813
const char * keypv , STRLEN keylen , U32 hash , SV * value , U32 flags )
3838
3814
{
3815
+ PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN ;
3816
+
3839
3817
STRLEN value_len = 0 ;
3840
3818
const char * value_p = NULL ;
3841
3819
bool is_pv ;
3842
3820
char value_type ;
3843
3821
char hekflags ;
3844
3822
STRLEN key_offset = 1 ;
3845
3823
struct refcounted_he * he ;
3846
- PERL_ARGS_ASSERT_REFCOUNTED_HE_NEW_PVN ;
3824
+ U8 * free_me = NULL ;
3847
3825
3848
3826
if (!value || value == & PL_sv_placeholder ) {
3849
3827
value_type = HVrhek_delete ;
@@ -3867,39 +3845,11 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
3867
3845
}
3868
3846
hekflags = value_type ;
3869
3847
3870
- if (flags & REFCOUNTED_HE_KEY_UTF8 ) {
3871
- /* Canonicalise to Latin-1 where possible. */
3872
- const char * keyend = keypv + keylen , * p ;
3873
- STRLEN nonascii_count = 0 ;
3874
- for (p = keypv ; p != keyend ; p ++ ) {
3875
- if (! UTF8_IS_INVARIANT (* p )) {
3876
- if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE (p , keyend )) {
3877
- goto canonicalised_key ;
3878
- }
3879
- nonascii_count ++ ;
3880
- p ++ ;
3881
- }
3882
- }
3883
- if (nonascii_count ) {
3884
- char * q ;
3885
- const char * p = keypv , * keyend = keypv + keylen ;
3886
- keylen -= nonascii_count ;
3887
- Newx (q , keylen , char );
3888
- SAVEFREEPV (q );
3889
- keypv = q ;
3890
- for (; p != keyend ; p ++ , q ++ ) {
3891
- U8 c = (U8 )* p ;
3892
- if (UTF8_IS_INVARIANT (c )) {
3893
- * q = (char ) c ;
3894
- }
3895
- else {
3896
- p ++ ;
3897
- * q = (char ) EIGHT_BIT_UTF8_TO_NATIVE (c , * p );
3898
- }
3899
- }
3900
- }
3848
+ /* Canonicalise to Latin-1 where possible. */
3849
+ if ( (flags & REFCOUNTED_HE_KEY_UTF8 )
3850
+ && utf8_to_bytes_new_pv (& keypv , & keylen , & free_me ))
3851
+ {
3901
3852
flags &= ~REFCOUNTED_HE_KEY_UTF8 ;
3902
- canonicalised_key : ;
3903
3853
}
3904
3854
if (flags & REFCOUNTED_HE_KEY_UTF8 )
3905
3855
hekflags |= HVhek_UTF8 ;
@@ -3939,6 +3889,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent,
3939
3889
he -> refcounted_he_data [0 ] = hekflags ;
3940
3890
he -> refcounted_he_refcnt = 1 ;
3941
3891
3892
+ Safefree (free_me );
3942
3893
return he ;
3943
3894
}
3944
3895
0 commit comments