diff --git a/embed.fnc b/embed.fnc index a1e3f4de9983..4d8fd6855178 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3825,6 +3825,16 @@ p |void |dump_sv_child |NN SV *sv CRTip |unsigned int|variant_byte_number \ |PERL_UINTMAX_T word #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) +Cp |void |category_lock |const UV mask \ + |NN const char *file \ + |const line_t caller_line +Cp |void |category_unlock|const UV mask \ + |NN const char *file \ + |const line_t caller_line +Cip |int |posix_LC_foo_ |const int c \ + |const U8 classnum +#endif #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) ARdp |I32 |my_chsize |int fd \ |Off_t length @@ -4517,6 +4527,13 @@ RS |char * |my_setlocale_debug_string_i \ |NULLOK const char *retval \ |const line_t line # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +S |void |update_PL_curlocales_i \ + |const locale_category_index index \ + |NN const char *new_locale \ + |const line_t caller_line +# endif # if defined(HAS_LOCALECONV) && \ ( defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC) ) S |void |populate_hash_from_localeconv \ @@ -4577,14 +4594,8 @@ S |const char *|querylocale_2008_i \ |const locale_category_index index \ |const line_t line S |locale_t|use_curlocale_scratch -# if !defined(USE_QUERYLOCALE) -S |void |update_PL_curlocales_i \ - |const locale_category_index index \ - |NN const char *new_locale \ - |const line_t caller_line -# endif -# elif defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) S |bool |less_dicey_bool_setlocale_r \ |const int cat \ |NN const char *locale diff --git a/embed.h b/embed.h index 44e3b7f8773a..8eafc2e074c3 100644 --- a/embed.h +++ b/embed.h @@ -825,6 +825,11 @@ # if !defined(EBCDIC) # define variant_byte_number Perl_variant_byte_number # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) +# define category_lock(a,b,c) Perl_category_lock(aTHX_ a,b,c) +# define category_unlock(a,b,c) Perl_category_unlock(aTHX_ a,b,c) +# define posix_LC_foo_(a,b) Perl_posix_LC_foo_(aTHX_ a,b) +# endif # if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) # define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b) # endif @@ -1343,6 +1348,10 @@ # if defined(DEBUGGING) # define my_setlocale_debug_string_i(a,b,c,d) S_my_setlocale_debug_string_i(aTHX_ a,b,c,d) # endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) +# endif # if defined(HAS_LOCALECONV) && \ ( defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC) ) # define populate_hash_from_localeconv(a,b,c,d,e) S_populate_hash_from_localeconv(aTHX_ a,b,c,d,e) @@ -1375,12 +1384,8 @@ # define bool_setlocale_2008_i(a,b,c) S_bool_setlocale_2008_i(aTHX_ a,b,c) # define querylocale_2008_i(a,b) S_querylocale_2008_i(aTHX_ a,b) # define use_curlocale_scratch() S_use_curlocale_scratch(aTHX) -# if !defined(USE_QUERYLOCALE) -# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c) -# endif -# elif defined(USE_LOCALE_THREADS) && \ - !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) # define less_dicey_bool_setlocale_r(a,b) S_less_dicey_bool_setlocale_r(aTHX_ a,b) # define less_dicey_setlocale_r(a,b) S_less_dicey_setlocale_r(aTHX_ a,b) # endif diff --git a/embedvar.h b/embedvar.h index b70dd1745db1..693c226b6eef 100644 --- a/embedvar.h +++ b/embedvar.h @@ -200,6 +200,7 @@ # define PL_numeric_name (vTHX->Inumeric_name) # define PL_numeric_radix_sv (vTHX->Inumeric_radix_sv) # define PL_numeric_standard (vTHX->Inumeric_standard) +# define PL_NUMERIC_toggle_depth (vTHX->INUMERIC_toggle_depth) # define PL_numeric_underlying (vTHX->Inumeric_underlying) # define PL_numeric_underlying_is_standard (vTHX->Inumeric_underlying_is_standard) # define PL_ofsgv (vTHX->Iofsgv) @@ -223,6 +224,7 @@ # define PL_parser (vTHX->Iparser) # define PL_patchlevel (vTHX->Ipatchlevel) # define PL_peepp (vTHX->Ipeepp) +# define PL_perl_controls_locale (vTHX->Iperl_controls_locale) # define PL_perl_destruct_level (vTHX->Iperl_destruct_level) # define PL_perldb (vTHX->Iperldb) # define PL_perlio (vTHX->Iperlio) @@ -250,6 +252,8 @@ # define PL_replgv (vTHX->Ireplgv) # define PL_restartjmpenv (vTHX->Irestartjmpenv) # define PL_restartop (vTHX->Irestartop) +# define PL_restore_locale (vTHX->Irestore_locale) +# define PL_restore_locale_depth (vTHX->Irestore_locale_depth) # define PL_rpeepp (vTHX->Irpeepp) # define PL_rs (vTHX->Irs) # define PL_runops (vTHX->Irunops) diff --git a/handy.h b/handy.h index f2837a3bf45a..e4e461dbacf4 100644 --- a/handy.h +++ b/handy.h @@ -1555,9 +1555,16 @@ or casts # define HIGHEST_REGCOMP_DOT_H_SYNC_ CC_VERTSPACE_ -/* The members of the third group below do not need to be coordinated with data - * structures in regcomp.[ch] and regexec.c. */ -# define CC_IDFIRST_ 16 +/* These three follow immediately after the final function that has a version + * defined by C, like isascii(), so they overlap with anything else. They are + * used in the 'PL_libc_char_fcns' data structure, along with the ones above + * them */ +# define CC_IDFIRST_ 16 +# define CC_TOLOWER_ (CC_IDFIRST_ + 1) +# define CC_TOUPPER_ (CC_TOLOWER_ + 1) + +/* The members of the fourth group below do not need to be coordinated with + * data structures in regcomp.[ch] and regexec.c. */ # define CC_CHARNAME_CONT_ 17 # define CC_NONLATIN1_FOLD_ 18 # define CC_NONLATIN1_SIMPLE_FOLD_ 19 @@ -2025,7 +2032,7 @@ END_EXTERN_C # define is_posix_XDIGIT(c) isxdigit((U8) (c)) #endif -/* Below is the next level up, which currently expands to nothing more +/* Below is the next level up, which on most platforms expands to nothing more * than the previous layer. These are the macros to use if you really need * something whose input domain is a byte, and the locale isn't UTF-8; that is, * where you normally would have to use things like bare isalnum(). @@ -2037,7 +2044,13 @@ END_EXTERN_C * (Note, proper general operation of the bare libc functions requires you to * cast to U8. These do that for you automatically.) */ +/* In this one circumstance, the macro is implemented with a lock; otherwise it + * expands to just the layer below */ +#ifdef EMULATE_THREAD_SAFE_LOCALES +# define WRAP_U8_LC_(c, classnum, posix) posix_LC_foo_((c), (classnum)) +#else # define WRAP_U8_LC_(c, classnum, posix) posix(c) +#endif #define isU8_ALPHANUMERIC_LC(c) \ WRAP_U8_LC_((c), CC_ALPHANUMERIC_, is_posix_ALPHANUMERIC) diff --git a/inline.h b/inline.h index 9af51b2c5ef6..88efc52d46b8 100644 --- a/inline.h +++ b/inline.h @@ -318,6 +318,49 @@ S_PerlEnv_putenv(pTHX_ char * str) #endif +/* ------------------------------- handy.h ------------------------------- */ + +#ifdef EMULATE_THREAD_SAFE_LOCALES + +PERL_STATIC_INLINE int +Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum) { + int result; + + LC_CTYPE_LOCK; + + /* All calls to this (so far) are with a 'classnum' known at compile time, + * so the compiler should constant fold this down to a single assignment */ + switch (classnum) { + case CC_ALPHANUMERIC_:result = (bool) is_posix_ALPHANUMERIC(c); break; + case CC_ALPHA_: result = (bool) is_posix_ALPHA(c); break; + case CC_ASCII_: result = (bool) is_posix_ASCII(c); break; + case CC_BLANK_: result = (bool) is_posix_BLANK(c); break; + case CC_CASED_: result = (bool) is_posix_CASED(c); break; + case CC_CNTRL_: result = (bool) is_posix_CNTRL(c); break; + case CC_DIGIT_: result = (bool) is_posix_DIGIT(c); break; + case CC_GRAPH_: result = (bool) is_posix_GRAPH(c); break; + case CC_LOWER_: result = (bool) is_posix_LOWER(c); break; + case CC_PRINT_: result = (bool) is_posix_PRINT(c); break; + case CC_PUNCT_: result = (bool) is_posix_PUNCT(c); break; + case CC_SPACE_: result = (bool) is_posix_SPACE(c); break; + case CC_UPPER_: result = (bool) is_posix_UPPER(c); break; + case CC_WORDCHAR_: result = (bool) is_posix_WORDCHAR(c); break; + case CC_XDIGIT_: result = (bool) is_posix_XDIGIT(c); break; + case CC_IDFIRST_: result = (bool) is_posix_IDFIRST(c); break; + case CC_TOLOWER_: result = to_posix_LOWER(c); break; + case CC_TOUPPER_: result = to_posix_UPPER(c); break; + + default: + LC_CTYPE_UNLOCK; + locale_panic_(Perl_form(aTHX_ "Unknown charclass %d", classnum)); + } + + LC_CTYPE_UNLOCK; + return result; +} + +#endif + /* ------------------------------- mg.h ------------------------------- */ #if defined(PERL_CORE) || defined(PERL_EXT) diff --git a/intrpvar.h b/intrpvar.h index e17ce3fb5c56..20f2399d5e5b 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -757,7 +757,15 @@ PERLVARI(I, cur_locale_obj, locale_t, LC_GLOBAL_LOCALE) * is almost always toggled into the C locale, and the locale it nominally is * is stored as PL_numeric_name. */ PERLVARA(I, curlocales, LOCALE_CATEGORIES_COUNT_ + 1, const char *) +#endif +#ifdef EMULATE_THREAD_SAFE_LOCALES +PERLVARA(I, restore_locale, LOCALE_CATEGORIES_COUNT_, const char *) +PERLVARA(I, restore_locale_depth, LOCALE_CATEGORIES_COUNT_, Size_t) +PERLVARI(I, NUMERIC_toggle_depth, int, 0) +#endif +#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE)) +PERLVARI(I, perl_controls_locale, bool, true) #endif #ifdef USE_PL_CUR_LC_ALL PERLVARI(I, cur_LC_ALL, const char *, NULL) diff --git a/locale.c b/locale.c index 60ffde2ebb4a..015f50387ddd 100644 --- a/locale.c +++ b/locale.c @@ -40,13 +40,6 @@ * platform than it actually is. This allows you to make changes and catch * some errors without having access to those other platforms. * - * This code now has multi-thread-safe locale handling on systems that support - * that. This is completely transparent to most XS code. On earlier systems, - * it would be possible to emulate thread-safe locales, but this likely would - * involve a lot of locale switching, and would require XS code changes. - * Macros could be written so that the code wouldn't have to know which type of - * system is being used. - * * Table-driven code is used for simplicity and clarity, as many operations * differ only in which category is being worked on. However the system * categories need not be small contiguous integers, so do not lend themselves @@ -67,6 +60,16 @@ * bool_setlocale_2008_i() function is used to hide the different API from the * outside. This makes it completely transparent to most XS code. * + * On other threaded-systems, the code here, in conjunction with other code in + * the system, emulates thread-safe locales by using mutexes to lock other + * threads out, and change the global locale to the desired per-thread value + * just before operations that care about it. All such operations must declare + * their need before executing, or it won't work. All of the Perl core does + * this, which makes pure Perl code locale thread-safe. XS code can be + * extended to work by using the macros for the purpose in perl.h. The need + * for mutexes means that in these platforms, much of the code in this file + * must be done while in critical sections. + * * A huge complicating factor is that the LC_NUMERIC category is normally held * in the C locale, except during those relatively rare times when it needs to * be in the underlying locale. There is a bunch of code to accomplish this, @@ -77,7 +80,7 @@ * opportunities for avoiding work. We don't have to necessarily create a safe * copy to return if no return is desired. * - * There are 3.5 major implementations here; which one chosen depends on what + * There are 4.5 major implementations here; which one chosen depends on what * the platform has available, and Configuration options. * * 1) Raw posix_setlocale(). This implementation is basically the libc @@ -100,7 +103,9 @@ * * 2) An implementation that adds a minimal layer above implementation 1), * making that implementation uninterruptible and returning a - * per-thread/per-category value. + * per-thread/per-category value. Currently, this is for threaded perls + * on platforms where layers 3a and 3b are not available, and where layer 4 + * has not been selected. * * 3a and 3b) An implementation of POSIX 2008 thread-safe locale handling, * hiding from the programmer the completely different API for this. @@ -117,7 +122,32 @@ * are buggy, in one way or another. There are workarounds encoded here, * where feasible, for platforms where the bugs are amenable to that * (glibc, for example). But other platforms instead don't use this - * implementation. + * implementation, but the next one below. + * + * 4) A thread-safe emulation implementation that, in conjunction with changes + * to C code, makes locale handling thread-safe. Those changes are simply + * to wrap locale-dependent system calls with macros that delimit a critical + * section in which they change the global locale to the one the thread + * expects. The perl core has made those changes, so pure perl programs + * become thread-safe. Well-behaved XS code also keeps things thread-safe, + * either by not using locale-dependent system calls, or by changing to use + * the wrapper macros. This layer is not chosen if the platform has native + * thread-safe locale handling. Also, currently perl must have been + * Configured with "-Accflags=-DEMULATE_THREAD_SAFE_LOCALES". + * + * This implementation is based on the observation that the underlying + * locale matters only to relatively few libc calls, and only during their + * execution. It can be anything at all at any other time. What the proper + * locale should be for each category is kept in the array PL_curlocales[]. + * Each locale-dependent operation must be wrapped in mutex lock/unlock + * operations. The lock additionally compares what libc knows the locale to + * be, and what it should be for this thread at this time, and changes the + * actual locale to the proper value if necessary. That's all that is + * needed. However additionally, the unlock restores the locale to what it + * was at the time of the lock. This improves the chances that a thread not + * under perl's control (such as Gtk) will still work. (If mutex calls are + * added to lock out that thread from running when the other threads are + * using locale-dependent functions, then things should completely work.) * * z/OS (os390) is an outlier. Locales really don't work under threads when * either the radix character isn't a dot, or attempts are made to change @@ -225,10 +255,10 @@ * crippled locale implementation. * * -Accflags=-DNO_THREAD_SAFE_LOCALE - * Even if thread-safe operations are available on this platform and - * would otherwise be used (because this is a perl with multiplicity), - * perl is compiled to not use them. This could be useful on - * platforms where the libc is buggy. + * Don't use the thread-safe operations on this platform (should they + * be available) nor try to emulate them (if they are not available) + * even on a threaded perl. This could be useful on platforms where + * the libc is buggy, or the emulation runs into problems. * * -Accflags=-DNO_POSIX_2008_LOCALE * Even if the libc locale operations specified by the Posix 2008 @@ -269,6 +299,13 @@ * these have no effect. Otherwise they cause perl to be compiled to * always keep the named category(ies) in the C locale. * + * -Accflags=-DEMULATE_THREAD_SAFE_LOCALES + * This has no effect on unthreaded perls, nor when perl thinks that + * the platform has thread-safe locale handling. But otherwise, it + * enables the code to emulate thread-safe locale handling. + * Effectively it chooses implementation 4) instead of implementation + * 2) from the list above. + * * -Accflags=-DHAS_BROKEN_SETLOCALE_QUERY_LC_ALL * This would be set in a hints file to tell perl that doing a libc * setlocale(LC_ALL, NULL) @@ -1946,7 +1983,8 @@ S_setlocale_i(pTHX_ const int category, const char * locale) /*===========================================================================*/ #elif defined(USE_LOCALE_THREADS) \ - && ! defined(USE_THREAD_SAFE_LOCALE) + && ! defined(USE_THREAD_SAFE_LOCALE) \ + && ! defined(EMULATE_THREAD_SAFE_LOCALES) /* Here, there are threads, and there is no support for thread-safe * operation. This is a dangerous situation, which perl is documented as @@ -2035,6 +2073,440 @@ S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char * locale) # define TOGGLE_LOCK(i) POSIX_SETLOCALE_LOCK # define TOGGLE_UNLOCK(i) POSIX_SETLOCALE_UNLOCK +/*===========================================================================*/ +#elif defined(EMULATE_THREAD_SAFE_LOCALES) + +/* Here, use our emulation of thread safe locales. PL_curlocales[] keeps what + * the name of the locale should be for each category in the current thread. + * And so, S_bool_setlocale_emulate_safe_r() wraps each call to the system's + * setlocale() with saving the return into PL_curlocales. + * + * The locale is changed to the one specified by PL_curlocales[] just before + * any libc call affected by it, and restored just afterwards. */ + +# define querylocale_i(i) S_querylocale_emulate_safe_i(aTHX_ i, __LINE__) +# define querylocale_c(cat) querylocale_i(cat##_INDEX_) +# define querylocale_r(cat) querylocale_i(get_category_index(cat)) + +STATIC const char * +S_querylocale_emulate_safe_i(pTHX_ const locale_category_index cat_index, + const line_t caller_line) +{ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + " querylocale_emulate_safe_i(%u: %s);" + " called from %" LINE_Tf "\n", + cat_index, category_names[cat_index], caller_line)); + +# ifdef LC_ALL + + /* It can be somewhat expensive to calculate LC_ALL from its constituent + * categories, and the value might change many times before it is actually + * used. Therefore, it is only done as needed. This is such a place */ + if ( cat_index == LC_ALL_INDEX_ + && PL_curlocales[LC_ALL_INDEX_] == NULL) + { + /* Call just for its side effect */ + (void) calculate_LC_ALL_string(PL_curlocales, INTERNAL_FORMAT, + WANT_TEMP_PV, + caller_line); + } + +# endif + + return mortalized_pv_copy(PL_curlocales[cat_index]); +} + +/*---------------------------------------------------------------------------*/ + +# define bool_setlocale_r(cat, locale) \ + S_bool_setlocale_emulate_safe_r(aTHX_ cat, locale, __LINE__) +# define bool_setlocale_i(i, locale) \ + bool_setlocale_r(categories[i], locale) +# define bool_setlocale_c(cat, locale) bool_setlocale_r(cat, locale) + +STATIC bool +S_bool_setlocale_emulate_safe_r(pTHX_ + const int category, + const char * wanted_locale, + const line_t caller_line) +{ + /* Set the locale to 'wanted_locale' for the category given by our internal + * index number, and save the result for later use. */ + + assert(wanted_locale); + + STDIZED_SETLOCALE_LOCK; + const char * new_locale = savepv(stdized_setlocale(category, + wanted_locale)); + STDIZED_SETLOCALE_UNLOCK; + + if (! new_locale) { + SET_EINVAL; + return false; + } + + update_PL_curlocales_i(get_category_index(category), + new_locale, caller_line); + Safefree(new_locale); + + return true; +} +/*---------------------------------------------------------------------------*/ + +# define void_setlocale_r_with_caller(cat, locale, file, line) \ + STMT_START { \ + if (! bool_setlocale_r(cat, locale)) \ + setlocale_failure_panic_via_i(get_category_index(cat), \ + NULL, locale, __LINE__, 0, \ + file, line); \ + } STMT_END + +# define void_setlocale_c_with_caller(cat, locale, file, line) \ + void_setlocale_r_with_caller(cat, locale, file, line) + +# define void_setlocale_i_with_caller(i, locale, file, line) \ + void_setlocale_r_with_caller(categories[i], locale, file, line) + +# define void_setlocale_r(cat, locale) \ + void_setlocale_r_with_caller(cat, locale, __FILE__, __LINE__) +# define void_setlocale_c(cat, locale) void_setlocale_r(cat, locale) +# define void_setlocale_i(i, locale) void_setlocale_r(categories[i], locale) + +/* When the locale is toggled in this file, automatically enter a critical + * section and call category_lock() to make sure the category is in the proper + * locale for this thread. LC_NUMERIC is handled specially, so + * PL_NUMERIC_toggle_depth is used */ +# define TOGGLING_LOCKS 1 +# define DEBUG_TOGGLE(i) \ + DEBUG_Lv(PerlIO_printf(Perl_debug_log, \ + "new depth=%d, index=%d, caller=%" LINE_Tf \ + "\n", PL_NUMERIC_toggle_depth, i, __LINE__)) +# ifdef USE_LOCALE_NUMERIC +# define TOGGLE_NUMERIC(i) \ + STMT_START { \ + if (i == LC_NUMERIC_INDEX_) PL_NUMERIC_toggle_depth++; \ + } STMT_END +# define UNTOGGLE_NUMERIC(i) \ + STMT_START { \ + if (i == LC_NUMERIC_INDEX_) { \ + if (PL_NUMERIC_toggle_depth <= 0) { \ + locale_panic_("toggling down failed"); \ + } \ + PL_NUMERIC_toggle_depth--; \ + } \ + } STMT_END +# else +# define TOGGLE_NUMERIC(i) +# define UNTOGGLE_NUMERIC(i) +# endif +# define TOGGLE_LOCK(i) \ + STMT_START { \ + TOGGLE_NUMERIC(i); \ + DEBUG_TOGGLE(i); \ + LC_CATEGORY_LOCK_i_(i); \ + } STMT_END +# define TOGGLE_UNLOCK(i) \ + STMT_START { \ + LC_CATEGORY_UNLOCK_i_(i); \ + UNTOGGLE_NUMERIC(i); \ + DEBUG_TOGGLE(i); \ + } STMT_END +/*---------------------------------------------------------------------------*/ +/* utility functions for emulating thread-safe locales. + * + * When emulating thread-safe locales, our per-thread data structures get set + * up as normal, but the actual locale is global to all threads. All functions + * that depend on the locale need to be protected by critical sections + * surrounded by these two functions that lock, and then unlock after the + * operation is completed. The first function does a lock and then changes the + * locale to the desired one for this thread, based on the per-thread data + * structures. The restore function restores to what the locale on original + * entry was, and unlocks. This is effectively a just-in-time locale setting + * scheme. + * + * There are two mechanisms to accommodate the need for more than one category + * being used by a function. + * 1) Its parameter is a mask of all the categories this call is to set. The + * bit position in the mask corresponds to our category index. The + * function loops over all the set bits in the mask. + * 2) In case that the control flow doesn't allow for all categories to be + * known at the same call, this also implements a stack. The lock is + * called for each needed category. Since the locks are general + * semaphores, only the first call results in an actual lock. Each call + * thus changes the locale for its category(ies) to the desired one(s), + * pushing onto the stack what it should be restored to afterwards. The + * paired unlock calls unwind the stack until the final one causes the + * mutex to be released. Most libc call require one or two categories. + * + * One could argue that there is no reason to restore afterwards, that the next + * just-in-time call will set the locale to the correct one. But doing the + * restore allows this scheme to work like truly thread-safe implementations + * when one thread is in the global locale. By restoring, we leave code not + * under this scheme to have the global thread for itself. There is a big + * caveat here, though. That thread must run in a critical section. This + * isn't the case with the other thread-safe implementations. */ + +void +Perl_category_lock(pTHX_ const UV mask, + const char * file, + const line_t caller_line) +{ + PERL_ARGS_ASSERT_CATEGORY_LOCK; + + /* The highest set bit needs to correspond to a legal index; LC_ALL_INDEX_ + * is the biggest such one */ + assert(mask != 0); + assert((mask & ~(LC_INDEX_TO_BIT_(LC_ALL_INDEX_) - 1)) == 0); + + dSAVE_ERRNO; + +# ifndef DEBUGGING + PERL_UNUSED_ARG(file); +# endif + + if (UNLIKELY(! PL_perl_controls_locale)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_lock but outside of perl" + " controlling the locale: nothing done;" + " called from %s: %d\n", file, caller_line)); + RESTORE_ERRNO; + return; + } + + DEBLvG_U(PerlIO_printf(Perl_debug_log, + "Entering category_lock; mask=%" UVxf + ", called from %s: %d\n", mask, file, caller_line)); + + LOCALE_LOCK; + + UV working_mask = mask; + + /* Unlikely, but handle the case of being called with LC_ALL by changing + * the mask to all ones for the individual categories. This works because + * LC_ALL_INDEX_ is 1 greater than the highest individual category index, + * and they are all consecutive */ + if (UNLIKELY(working_mask & LC_INDEX_TO_BIT_(LC_ALL_INDEX_))) { + working_mask = LC_INDEX_TO_BIT_(LC_ALL_INDEX_) - 1; + } + + while (working_mask) { + + /* Get the bit position of the next lowest set bit. That is our + * internal index for the category being locked this iteration. + * Turn the bit off so we don't work on this category again in this + * function call. */ + const locale_category_index cat_index + = (locale_category_index) lsbit_pos(working_mask); + working_mask &= ~ (1 << cat_index); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "category_lock; processing %s\n", + category_names[cat_index])); + + /* The term 'toggled' is overloaded here. Sorry. Alone of the + * categories, LC_NUMERIC is kept in a locale indistinguishable from + * the C locale with respect to numeric operations. This is primarily + * so that XS code doesn't have to consider that the radix character + * could be a comma. But there are times when it is toggled to the + * real underlying locale for the program. For the purposes of these + * comments, this will be referred to as toggling_1. + * + * Independently, code in this file may temporarily switch the locale + * for any category to some other locale. This is also called + * toggling. For the purposes of these comments, this will be referred + * to as toggling_2. + * + * For this emulation implementation, what the locale for + * any category should be is kept in PL_curlocales[]. The toggling_2 + * functions simply change the appropriate array elements, and this + * function makes sure to change to it for the duration of a libc call + * that needs it to be in the correct locale. Afterwards, the element + * is restored to its previous value. + * + * But it's more complicated for LC_NUMERIC. When toggled by one of + * the toggling_2 functions in this file, LC_NUMERIC is also given by + * PL_curlocales[]. PL_NUMERIC_toggle_depth > 0 indicates this. + * Otherwise, the toggling_1 mechanism is in effect. Comments at + * S_new_numeric describe this */ +# ifndef USE_LOCALE_NUMERIC + const char * wanted = PL_curlocales[cat_index]; +# else + const char * wanted; + if (cat_index != LC_NUMERIC_INDEX_ || PL_NUMERIC_toggle_depth > 0) { + wanted = PL_curlocales[cat_index]; + } + else if (PL_numeric_underlying) { + wanted = PL_numeric_name; + } + else /* Here we want an LC_NUMERIC locale equivalent to C. This + next conditional may save us from having to toggle below */ + if (PL_numeric_underlying_is_standard) + { + wanted = PL_numeric_name; + } + else { + wanted = "C"; + } + +# endif + + assert(wanted); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: wanted=%s\n", + category_names[cat_index], wanted)); + + /* Get the category desired, and what its current locale is */ + const int cat = categories[cat_index]; + const char * currently = stdized_setlocale(cat, NULL); + + /* If we aren't in the desired locale, change to it, saving a copy of + * the one we actually are in before the change */ + if (strNE(currently, wanted)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: Calling setlocale(%d, %s)\n", file, caller_line, + cat, wanted)); + if (stdized_setlocale(cat, wanted) == NULL) { + setlocale_failure_panic_i(cat_index, currently, wanted, + __LINE__, caller_line); + NOT_REACHED; /* NOTREACHED */ + } + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: %d: Category %d already was %s\n", + file, caller_line, cat, wanted)); + } + + /* Here, we have toggled to the desired locale, so 'currently' eq + * 'wanted' + * + * This may be a recursive call. Everything remains locked during the + * recursion. We restore to the original locale after the recursion + * gets unwound. The intermediate values aren't needed. */ + if (PL_restore_locale_depth[cat_index] == 0) { + + /* Only need to change what's there if no current value or differs + * from the new one */ + if ( PL_restore_locale[cat_index] == NULL + || strNE(wanted, PL_restore_locale[cat_index])) + { + Safefree(PL_restore_locale[cat_index]); + PL_restore_locale[cat_index] = savepv(wanted); + } + } + + /* Indicate our new recursion depth */ + PL_restore_locale_depth[cat_index]++; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: PL_restore is now %s," + " recursion depth=%zu\n", + file, caller_line, PL_restore_locale[cat_index], + PL_restore_locale_depth[cat_index])); + } + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "Leaving category_lock\n")); + + RESTORE_ERRNO; +} + +void +Perl_category_unlock(pTHX_ const UV mask, + const char * file, + const line_t caller_line) +{ + PERL_ARGS_ASSERT_CATEGORY_UNLOCK; + assert(mask != 0); + assert((mask & ~(LC_INDEX_TO_BIT_(LC_ALL_INDEX_) - 1)) == 0); + + /* Undoes a matching category_lock(). Note that must be locked on input. + * Will unlock when recursion entirely gets unwound */ + + dSAVE_ERRNO; + +# ifndef DEBUGGING + PERL_UNUSED_ARG(file); +# endif + + if (UNLIKELY(! PL_perl_controls_locale)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_unlock but outside of perl" + " controlling the locale: nothing done;" + " called from %s: %d\n", file, caller_line)); + RESTORE_ERRNO; + return; + } + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "Entering category_unlock; mask=%" UVxf + ", called from %s: %d\n", mask, file, caller_line)); + + unsigned int working_mask = mask; + if (UNLIKELY(working_mask & LC_INDEX_TO_BIT_(LC_ALL_INDEX_))) { + working_mask = LC_INDEX_TO_BIT_(LC_ALL_INDEX_) - 1; + } + + while (working_mask) { + + /* Get the bit position of the next lowest set bit. That is our + * internal index for the category being unlocked this iteration. + * Turn the bit off so we don't work on this category again in this + * function call. */ + const locale_category_index cat_index + = (locale_category_index) lsbit_pos(working_mask); + working_mask &= ~ (1 << cat_index); + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "category_unlock; processing %s\n", + category_names[cat_index])); + + const int cat = categories[cat_index]; + + /* Un-recursing */ + PL_restore_locale_depth[cat_index]--; + + /* Only restore when the depth gets back to 0 */ + if (PL_restore_locale_depth[cat_index] == 0) { + + /* What we currently are */ + const char * currently = stdized_setlocale(cat, NULL); + + /* And what we need to be changed to */ + const char * wanted = PL_restore_locale[cat_index]; + + /* If we need to change, do it */ + if (strNE(currently, wanted)) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s:%d: Calling setlocale(%d, %s)\n", + file, caller_line, cat, wanted)); + if (stdized_setlocale(cat, wanted) == NULL) { + setlocale_failure_panic_via_i(cat_index, + currently, + wanted, + __LINE__, 0, + file, caller_line); + } + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "%s: %d: Category %d already was %s\n", + file, caller_line, cat, wanted)); + } + + Safefree(wanted); + PL_restore_locale[cat_index] = NULL; + } + } + + /* Doesn't actually unlock until recursion fully unwound */ + LOCALE_UNLOCK; + + DEBUG_Lv(PerlIO_printf(Perl_debug_log, "Leaving category_unlock\n")); + + RESTORE_ERRNO; +} + /*===========================================================================*/ #elif defined(USE_POSIX_2008_LOCALE) @@ -2760,26 +3232,48 @@ S_update_PL_curlocales_i(pTHX_ const char * new_locale, const line_t caller_line) { - /* Update PL_curlocales[], which is parallel to the other ones indexed by - * our mapping of libc category number to our internal equivalents. */ - PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I; + /* There are two implementations that use PL_curlocales[], an array + * parallel to the other ones indexed by our mapping of libc category + * number to our internal equivalents. + * + * This function updates the 'index'th element to be 'new_locale'. It + * knows about the requirements of each implementation. In the POSIX 2008 + * case, everything is already calculated, so never does an element have to + * be checked for needing to stay in the "C" locale. In the thread-safe + * emulation case, no checking has yet been done, so this routine needs to + * do it */ +# ifndef LC_ALL + PERL_UNUSED_ARG(caller_line); +# else + if (index == LC_ALL_INDEX_) { /* For LC_ALL, we change all individual categories to correspond, * including the LC_ALL element */ for (unsigned int i = 0; i <= LC_ALL_INDEX_; i++) { Safefree(PL_curlocales[i]); - PL_curlocales[i] = NULL; } + /* In the POSIX 2008 case, everything is already calculated, so never + * does an element have to be checked for needing to stay in the "C" + * locale. In the thread-safe emulation case, no checking has yet been + * done, so this routine needs to do it */ + +# if ! defined(HAS_IGNORED_LOCALE_CATEGORIES_) || defined(USE_POSIX_2008) + + const parse_LC_ALL_STRING_action action = no_override; + +# else + + const parse_LC_ALL_STRING_action action = override_if_ignored; + +# endif + switch (parse_LC_ALL_string(new_locale, (const char **) &PL_curlocales, - check_that_overridden, /* things should - have already - been overridden - */ + action, true, /* Always fill array */ true, /* Panic if fails, as to get here it earlier had to have succeeded @@ -2799,7 +3293,11 @@ S_update_PL_curlocales_i(pTHX_ PL_curlocales[LC_ALL_INDEX_] = savepv(new_locale); } } - else { /* Not LC_ALL */ + else + +# endif + + { /* Not LC_ALL */ /* Update the single category's record */ Safefree(PL_curlocales[index]); @@ -3473,7 +3971,13 @@ S_new_numeric(pTHX_ const char *newnum, bool force) * handling, so that all the libc functions that are affected by LC_NUMERIC * will work as expected. This can be skipped if we already know that the * locale is indistinguishable from the C locale. */ - if (! force && strEQ(PL_numeric_name, newnum)) { + if ( (! force && strEQ(PL_numeric_name, newnum)) + +# if defined(USE_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + || ! PL_perl_controls_locale +# endif + + ) { if (! PL_numeric_underlying_is_standard) { set_numeric_standard(__FILE__, __LINE__); } @@ -3693,6 +4197,8 @@ S_new_ctype(pTHX_ const char *newctype, bool force) * Turkic. Make sure these two are the only anomalies. (We don't * require towupper and towlower because they aren't in C89.) */ + LC_CTYPE_LOCK; + # if defined(HAS_TOWUPPER) && defined (HAS_TOWLOWER) if (towupper('i') == 0x130 && towlower('I') == 0x131) @@ -3708,6 +4214,8 @@ S_new_ctype(pTHX_ const char *newctype, bool force) check_for_problems = TRUE; maybe_utf8_turkic = TRUE; } + + LC_CTYPE_UNLOCK; } else { /* Not a canned locale we know the values for. Compute them */ @@ -3812,7 +4320,9 @@ S_new_ctype(pTHX_ const char *newctype, bool force) * locale requires more than one byte, there are going to be BIG problems. * */ + LC_CTYPE_LOCK; const int mb_cur_max = MB_CUR_MAX; + LC_CTYPE_UNLOCK; if (mb_cur_max > 1 && ! PL_in_utf8_CTYPE_locale @@ -5153,7 +5663,8 @@ be dealt with immediately. #if defined(USE_THREADS) \ && ( ! defined(LOCALECONV_IS_THREAD_SAFE) \ || ! defined(USE_THREAD_SAFE_LOCALE) \ - || defined(TS_W32_BROKEN_LOCALECONV)) + || defined(TS_W32_BROKEN_LOCALECONV) \ + || defined(USE_THREAD_SAFE_EMULATION)) # define LOCALECONV_NEEDS_CRITICAL_SECTION #endif @@ -5823,7 +6334,6 @@ S_populate_hash_from_localeconv(pTHX_ HV * hv, restore_toggled_locale_c(LC_MONETARY, orig_MONETARY_locale);\ } \ } STMT_END - # endif } @@ -6996,6 +7506,7 @@ S_emulate_langinfo(pTHX_ const PERL_INTMAX_T item, const char * orig_CTYPE_locale; orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, locale); + Perl_sv_setpvf(aTHX_ sv, CODE_PAGE_FORMAT, CODE_PAGE_FUNCTION); retval_type = RETVAL_IN_sv; @@ -8548,7 +9059,8 @@ S_give_perl_locale_control(pTHX_ # endif # if ! defined(USE_THREAD_SAFE_LOCALE) \ - && ! defined(USE_POSIX_2008_LOCALE) + && ! defined(USE_POSIX_2008_LOCALE) \ + && ! defined(EMULATE_THREAD_SAFE_LOCALES) # if defined(LC_ALL) PERL_UNUSED_ARG(lc_all_string); # else @@ -8569,6 +9081,13 @@ S_give_perl_locale_control(pTHX_ } # endif +# endif +# if defined(USE_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + + /* This routine converts Perl to controlling the locale, and we need to + * tell this before calling new_LC_ALL() */ + PL_perl_controls_locale = true; + # endif /* Finally, update our remaining records. 'true' => force recalculation. @@ -8805,6 +9324,11 @@ Perl_init_i18nl10n(pTHX_ int printwarn) PL_cur_LC_ALL = savepv("C"); # endif +# ifdef EMULATE_THREAD_SAFE_LOCALES + for (unsigned int i = 0; i < LC_ALL_INDEX_; i++) { + PL_restore_locale[i] = NULL; + } +# endif # if ! defined(PERL_LC_ALL_USES_NAME_VALUE_PAIRS) && defined(LC_ALL) LOCALE_LOCK; @@ -9845,12 +10369,14 @@ Perl_mem_collxfrm_(pTHX_ const char *input_string, /* Then the transformation of the input. We loop until successful, or we * give up */ for (;;) { + LC_CATEGORY_LOCK_2_c_(LC_CTYPE, LC_COLLATE); errno = 0; *xlen = my_strxfrm(xbuf + COLLXFRM_HDR_LEN, s, xAlloc - COLLXFRM_HDR_LEN); + LC_CATEGORY_UNLOCK_2_c_(LC_CTYPE, LC_COLLATE); /* If the transformed string occupies less space than we told strxfrm() * was available, it means it transformed the whole string. */ @@ -10131,13 +10657,20 @@ Perl_strxfrm(pTHX_ SV * src) * qualifies), these yield the correct one */ #if defined(USE_LOCALE_CTYPE) # define WHICH_LC_INDEX LC_CTYPE_INDEX_ +# define WHICH_LOCK LC_CTYPE_LOCK +# define WHICH_UNLOCK LC_CTYPE_UNLOCK #elif defined(USE_LOCALE_MESSAGES) # define WHICH_LC_INDEX LC_MESSAGES_INDEX_ +# define WHICH_LOCK LC_MESSAGES_LOCK +# define WHICH_UNLOCK LC_MESSAGES_UNLOCK #endif /*===========================================================================*/ /* First set of implementations, when have strerror_l() */ +#define MY_STRERROR_LOCK LC_MESSAGES_LOCK +#define MY_STRERROR_UNLOCK LC_MESSAGES_UNLOCK + #if defined(USE_POSIX_2008_LOCALE) && defined(HAS_STRERROR_L) # if ! defined(USE_LOCALE_CTYPE) && ! defined(USE_LOCALE_MESSAGES) @@ -10236,7 +10769,12 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) DEBUG_STRERROR_ENTER(errnum, 0); + gwLOCALE_LOCK; + const char *errstr = savepv(Strerror(errnum)); + + gwLOCALE_UNLOCK; + *utf8ness = UTF8NESS_IMMATERIAL; DEBUG_STRERROR_RETURN(errstr, utf8ness); @@ -10261,7 +10799,12 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) const char *errstr; if (IN_LC(categories[WHICH_LC_INDEX])) { + WHICH_LOCK; + gwLOCALE_LOCK; errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; + WHICH_UNLOCK; + *utf8ness = get_locale_string_utf8ness_i(errstr, LOCALE_UTF8NESS_UNKNOWN, NULL, WHICH_LC_INDEX); @@ -10272,7 +10815,9 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) const char * orig_locale = toggle_locale_i(WHICH_LC_INDEX, "C"); + gwLOCALE_LOCK; errstr = savepv(Strerror(errnum)); + gwLOCALE_UNLOCK; restore_toggled_locale_i(WHICH_LC_INDEX, orig_locale); @@ -10306,6 +10851,7 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) /* XXX Can fail on z/OS */ LOCALE_LOCK; + gwLOCALE_LOCK; const char* orig_CTYPE_locale = toggle_locale_c(LC_CTYPE, desired_locale); @@ -10316,6 +10862,7 @@ Perl_my_strerror(pTHX_ const int errnum, utf8ness_t * utf8ness) restore_toggled_locale_c(LC_MESSAGES, orig_MESSAGES_locale); restore_toggled_locale_c(LC_CTYPE, orig_CTYPE_locale); + gwLOCALE_UNLOCK; LOCALE_UNLOCK; *utf8ness = get_locale_string_utf8ness_i(errstr, LOCALE_UTF8NESS_UNKNOWN, @@ -10372,10 +10919,21 @@ To return to Perl control, and restart the gotcha prevention services, call C>. Behavior is undefined for any pure Perl code that executes while the switch is in effect. -The global locale and the per-thread locales are independent. As long as just -one thread converts to the global locale, everything works smoothly. But if -more than one does, they can easily interfere with each other, and races are -likely. On Windows systems prior to Visual Studio 15 (at which point Microsoft +On perls without per thread-locales, there is only the global locale; so +calling this function effectively just disables the gotcha prevention services. + +On perls with per-thread locales, they are independent from the global locale. +As long as just one thread converts to the global locale, everything works +smoothly. But if more than one does, they can easily interfere with each +other, and races are likely. + +On perls that emulate per-thread locales, there is, behind the scenes, actually +just the global locale. Unlike the native per-thread locale platforms, any +thread that calls this function is likely to have races with the remaining +threads when calling locale-dependent libc functions, unless appropriate +mutexes have been inserted. + +On Windows systems prior to Visual Studio 15 (at which point Microsoft fixed a bug), races can occur (even if only one thread has been converted to the global locale), but only if you use the following operations: @@ -10426,6 +10984,9 @@ handle all cases of single- vs multi-thread, POSIX 2008-supported or not. freelocale(old_locale); \ } \ } STMT_END +#elif defined(EMULATE_THREAD_SAFE_LOCALES) +# define CHANGE_SYSTEM_LOCALE_TO_GLOBAL \ + PL_perl_controls_locale = false #else # define CHANGE_SYSTEM_LOCALE_TO_GLOBAL #endif @@ -10455,7 +11016,7 @@ Perl_switch_to_global_locale(pTHX) # else - const bool perl_controls = false; + const bool perl_controls = PL_perl_controls_locale; # endif @@ -10464,7 +11025,12 @@ Perl_switch_to_global_locale(pTHX) return; } -# ifdef LC_ALL +# if ! defined(USE_POSIX_2008_LOCALE) && ! defined(USE_THREAD_SAFE_LOCALE) + + PL_perl_controls_locale = false; + +# else +# ifdef LC_ALL const char * thread_locale = calculate_LC_ALL_string(NULL, EXTERNAL_FORMAT_FOR_SET, @@ -10473,7 +11039,7 @@ Perl_switch_to_global_locale(pTHX) CHANGE_SYSTEM_LOCALE_TO_GLOBAL; posix_setlocale(LC_ALL, thread_locale); -# else /* Must be USE_POSIX_2008_LOCALE) */ +# else const char * cur_thread_locales[LC_ALL_INDEX_]; @@ -10491,6 +11057,7 @@ Perl_switch_to_global_locale(pTHX) } POSIX_SETLOCALE_UNLOCK; +# endif # endif # ifdef USE_LOCALE_NUMERIC @@ -10511,9 +11078,9 @@ Perl_switch_to_global_locale(pTHX) This function copies the state of the program global locale into the calling thread, and converts that thread to using per-thread locales, if it wasn't -already, and the platform supports them. The LC_NUMERIC locale is toggled into -the standard state (using the C locale's conventions), if not within the -lexical scope of S>. +already, and the platform supports them or perl is Configured to emulate them. +The LC_NUMERIC locale is toggled into the standard state (using the C locale's +conventions), if not within the lexical scope of S>. Perl will now consider itself to have control of the locale. @@ -10534,8 +11101,11 @@ multi-threaded systems that don't have multi-thread safe locale operations. Using the libc L> function should be avoided. Nevertheless, certain non-Perl libraries called from XS, do call it, and their behavior may not be able to be changed. This function, along with -C>, can be used to get seamless behavior in these -circumstances, as long as only one thread is involved. +C>, can be used to get seamless behavior on systems +with per-thread locale handling, as long as only one thread is involved. +To get seamless behavior on platforms where perl emulates per-thread locale +handling, mutexes would have to be added to wrap that thread's locale-dependent +functions. If the library has an option to turn off its locale manipulation, doing that is preferable to using this mechanism. C is such a library. @@ -10556,9 +11126,18 @@ Perl_sync_locale(pTHX) #else - bool was_in_global = TRUE; + /* First, switch to the global locale, and note if we were already there */ -# ifdef USE_THREAD_SAFE_LOCALE + bool was_in_global; + +# if ! defined(USE_THREAD_SAFE_LOCALE) + + /* When not using thread-safe locales, as far as the system is concerned, + * there only is the global locale. */ + + was_in_global = PL_perl_controls_locale; + +# else /* Below is thread-safe */ # if defined(WIN32) int config_return = _configthreadlocale(_DISABLE_PER_THREAD_LOCALE); @@ -10567,7 +11146,7 @@ Perl_sync_locale(pTHX) } was_in_global = (config_return == _DISABLE_PER_THREAD_LOCALE); -# elif defined(USE_POSIX_2008_LOCALE) +# elif defined(USE_POSIX_2008_LOCALE) /* Thread-safe POSIX 2008 */ was_in_global = (LC_GLOBAL_LOCALE == uselocale(LC_GLOBAL_LOCALE)); @@ -10622,7 +11201,10 @@ Perl_switch_locale_context(pTHX) * * There are two implementations where this is an issue. For the other * implementations, it doesn't matter because libc is using global values - * that all threads know about. + * that all threads know about. This is true even for the thread-safe + * emulation, as everything to libc is still a global, and we use + * PL_curlocales (for example) to know what the correct locale(s) should + * be, and this variable is under control of aTHX. * * The two implementations are where libc keeps thread-specific information * on its own. These are @@ -10660,9 +11242,19 @@ Perl_switch_locale_context(pTHX) # elif defined(WIN32) - if (! bool_setlocale_c(LC_ALL, PL_cur_LC_ALL)) { - locale_panic_(Perl_form(aTHX_ "Can't setlocale(%s)", PL_cur_LC_ALL)); + if (! PL_perl_controls_locale) { + return; + } + + if (_configthreadlocale(_ENABLE_PER_THREAD_LOCALE) == -1) { + locale_panic_("_configthreadlocale returned an error"); + } + + const char * lc_all_copy = savepv(PL_cur_LC_ALL); + if (! bool_setlocale_c(LC_ALL, lc_all_copy)) { + locale_panic_(Perl_form(aTHX_ "Can't setlocale(%s)", lc_all_copy)); } + Safefree(lc_all_copy); # endif @@ -10674,8 +11266,12 @@ void Perl_thread_locale_init(pTHX) { -#ifdef USE_THREAD_SAFE_LOCALE -# ifdef USE_POSIX_2008_LOCALE +#if defined(USE_LOCALE) +# ifndef USE_THREAD_SAFE_LOCALE + + PL_perl_controls_locale = TRUE; + +# elif defined(USE_POSIX_2008_LOCALE) /* Called from a thread on startup. * @@ -10702,12 +11298,18 @@ Perl_thread_locale_init(pTHX) PL_cur_locale_obj = PL_C_locale_obj; # endif -# elif defined(WIN32) +# else +# ifdef WIN32 /* On Windows, make sure new thread has per-thread locales enabled */ if (_configthreadlocale(_ENABLE_PER_THREAD_LOCALE) == -1) { locale_panic_("_configthreadlocale returned an error"); } + + PL_perl_controls_locale = true; + +# endif + void_setlocale_c(LC_ALL, "C"); # endif @@ -10743,6 +11345,11 @@ Perl_thread_locale_term(pTHX) PL_cur_locale_obj = LC_GLOBAL_LOCALE; +#endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) + + assert(aTHX == 0 || PL_NUMERIC_toggle_depth == 0); + #endif #ifdef WIN32_USE_FAKE_OLD_MINGW_LOCALES diff --git a/makedef.pl b/makedef.pl index abd61996b7fd..126493c12b7b 100644 --- a/makedef.pl +++ b/makedef.pl @@ -68,6 +68,7 @@ BEGIN } use constant PLATFORM => $ARGS{PLATFORM}; +use constant LIBC => uc $Config{libc} =~ s/^-l//r; # This makes us able to use, e.g., $define{WIN32}, so you don't have to # remember what things came from %ARGS. @@ -167,8 +168,8 @@ BEGIN if ($define{USE_LOCALE_THREADS} && ! $define{NO_THREAD_SAFE_LOCALE}) { if ( $define{USE_POSIX_2008_LOCALE} - || ($define{WIN32} && ( $cctype !~ /\D/ - && $cctype >= 80))) + || ($define{WIN32} && ( ($cctype eq "GCC" && LIBC eq "UCRT") + || ($cctype !~ /\D/ && $cctype >= 80)))) { $define{USE_THREAD_SAFE_LOCALE} = 1; } @@ -195,7 +196,7 @@ BEGIN $define{USE_PL_CUR_LC_ALL} = 1; } - if ($cctype < 140) { + if ($cctype =~ /\D/ || $cctype < 140) { $define{TS_W32_BROKEN_LOCALECONV} = 1; } } @@ -476,6 +477,25 @@ sub readvar { ); } +unless ($define{USE_LOCALE} && ( $define{WIN32} + || ! $define{USE_THREAD_SAFE_LOCALE})) +{ + ++$skip{$_} foreach qw( + PL_perl_controls_locale + ); +} + +unless ($define{EMULATE_THREAD_SAFE_LOCALES}) +{ + ++$skip{$_} foreach qw( + PL_restore_locale + PL_restore_locale_depth + PL_NUMERIC_toggle_depth + Perl_category_lock + Perl_category_unlock + ); +} + unless ($define{USE_PERL_SWITCH_LOCALE_CONTEXT}) { ++$skip{$_} foreach qw( diff --git a/mg.c b/mg.c index 4b6d4ab62266..82c6299dd26e 100644 --- a/mg.c +++ b/mg.c @@ -1096,12 +1096,11 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) else if (strEQ(remaining, "AFE_LOCALES")) { #if ! defined(USE_ITHREADS) || defined(USE_THREAD_SAFE_LOCALE) - sv_setuv(sv, (UV) 1); - +#elif defined(EMULATE_THREAD_SAFE_LOCALES) + sv_setuv(sv, (UV) 2); #else sv_setuv(sv, (UV) 0); - #endif } diff --git a/perl.c b/perl.c index 1d1598af8abe..caad64aa0834 100644 --- a/perl.c +++ b/perl.c @@ -1127,6 +1127,12 @@ perl_destruct(pTHXx) PL_curlocales[i] = NULL; } #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_restore_locale); i++) { + Safefree(PL_restore_locale[i]); + PL_restore_locale[i] = NULL; + } +#endif #if defined(USE_POSIX_2008_LOCALE) && defined(USE_THREADS) { /* This also makes sure we aren't using a locale object that gets freed diff --git a/perl.h b/perl.h index fbf94fe1b882..730392fbdeab 100644 --- a/perl.h +++ b/perl.h @@ -58,9 +58,6 @@ # undef _UCRT # ifdef USE_LOCALE # define TS_W32_BROKEN_LOCALECONV -# ifdef USE_THREADS -# define EMULATE_THREAD_SAFE_LOCALES -# endif # endif #endif @@ -1277,8 +1274,11 @@ typedef enum { # endif /* POSIX 2008 has no means of finding out the current locale without a - * querylocale; so must keep track of it ourselves */ -# if (defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) + * querylocale; so must keep track of it ourselves. And for thread-safe + * emulation, we keep track because the system doesn't have per-thread + * information */ +# if (defined(USE_POSIX_2008_LOCALE) && ! defined(USE_QUERYLOCALE)) \ + || defined(EMULATE_THREAD_SAFE_LOCALES) # define USE_PL_CURLOCALES # endif @@ -7269,19 +7269,92 @@ the plain locale pragma without a parameter (S>) is in effect. #define LOCALE_READ_LOCK LOCALE_LOCK #define LOCALE_READ_UNLOCK LOCALE_UNLOCK +#ifndef EMULATE_THREAD_SAFE_LOCALES +# define LC_CATEGORY_LOCK_2_c_(i1, i2) +# define LC_CATEGORY_UNLOCK_2_c_(i1, i2) +#else -#ifndef LC_NUMERIC_LOCK -# define LC_NUMERIC_LOCK(cond) NOOP -# define LC_NUMERIC_UNLOCK NOOP -#endif +/* The macros for the individual categories are defined in terms of +* these two sets. */ +# define LC_INDEX_TO_BIT_(i) (1 << (i)) +# define LC_CATEGORY_LOCK_i_(i) \ + category_lock(LC_INDEX_TO_BIT_(i), __FILE__, __LINE__) +# define LC_CATEGORY_UNLOCK_i_(i) \ + category_unlock(LC_INDEX_TO_BIT_(i), __FILE__, __LINE__) + +# define LC_CATEGORY_LOCK_c_(cat) LC_CATEGORY_LOCK_i_(cat##_INDEX_) +# define LC_CATEGORY_UNLOCK_c_(cat) LC_CATEGORY_UNLOCK_i_(cat##_INDEX_) + +# define LC_CATEGORY_LOCK_2_i_(i1, i2) \ + category_lock((LC_INDEX_TO_BIT_(i1) | LC_INDEX_TO_BIT_(i2)), \ + __FILE__, __LINE__) +# define LC_CATEGORY_UNLOCK_2_i_(i1, i2) \ + category_unlock((LC_INDEX_TO_BIT_(i1) | LC_INDEX_TO_BIT_(i2)), \ + __FILE__, __LINE__) + +# define LC_CATEGORY_LOCK_2_c_(cat1, cat2) \ + LC_CATEGORY_LOCK_2_i_(cat1##_INDEX_, cat2##_INDEX_) +# define LC_CATEGORY_UNLOCK_2_c_(cat1, cat2) \ + LC_CATEGORY_UNLOCK_2_i_(cat1##_INDEX_, cat2##_INDEX_) + +# ifdef LC_COLLATE +# define LC_COLLATE_LOCK LC_CATEGORY_LOCK_c_(LC_COLLATE) +# define LC_COLLATE_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_COLLATE) +# endif +# ifdef LC_CTYPE +# define LC_CTYPE_LOCK LC_CATEGORY_LOCK_c_(LC_CTYPE) +# define LC_CTYPE_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_CTYPE) +# endif +# ifdef LC_MESSAGES +# define LC_MESSAGES_LOCK LC_CATEGORY_LOCK_c_(LC_MESSAGES) +# define LC_MESSAGES_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_MESSAGES) +# endif +# ifdef LC_MONETARY +# define LC_MONETARY_LOCK LC_CATEGORY_LOCK_c_(LC_MONETARY) +# define LC_MONETARY_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_MONETARY) +# endif +# ifdef LC_NUMERIC + + /* This is the one category we may already have defined. It needs to be + * overwritten. We ignore the parameter in this case, since in this + * thread-safe emulation, all the threads are jumbled together */ +# undef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + LC_CATEGORY_LOCK_c_(LC_NUMERIC) +# undef LC_NUMERIC_UNLOCK +# define LC_NUMERIC_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_NUMERIC) +# endif +# ifdef LC_TIME +# define LC_TIME_LOCK LC_CATEGORY_LOCK_c_(LC_TIME) +# define LC_TIME_UNLOCK LC_CATEGORY_UNLOCK_c_(LC_TIME) +# endif +#endif + +/* Below are lock definitions for individual functions that Perl uses. All + * such need to be in terms of the locale category(ies) that affect them, plus + * gwLOCALE_LOCK if they read/write global space. It is best to create a + * definition for each function to hide those details, and allow it to be more + * easily maintained. */ +#ifdef LC_CTYPE_LOCK +# define MBLEN_LOCK_ LC_CTYPE_LOCK +# define MBLEN_UNLOCK_ LC_CTYPE_UNLOCK +# define MBRLEN_LOCK_ LC_CTYPE_LOCK +# define MBRLEN_UNLOCK_ LC_CTYPE_UNLOCK +# define MBTOWC_LOCK_ LC_CTYPE_LOCK +# define MBTOWC_UNLOCK_ LC_CTYPE_UNLOCK +# define MBRTOWC_LOCK_ LC_CTYPE_LOCK +# define MBRTOWC_UNLOCK_ LC_CTYPE_UNLOCK +# define WCTOMB_LOCK_ LC_CTYPE_LOCK +# define WCTOMB_UNLOCK_ LC_CTYPE_UNLOCK +# define WCRTOMB_LOCK_ LC_CTYPE_LOCK +# define WCRTOMB_UNLOCK_ LC_CTYPE_UNLOCK +#else /* These non-reentrant versions use global space */ # define MBLEN_LOCK_ gwLOCALE_LOCK # define MBLEN_UNLOCK_ gwLOCALE_UNLOCK - # define MBTOWC_LOCK_ gwLOCALE_LOCK # define MBTOWC_UNLOCK_ gwLOCALE_UNLOCK - # define WCTOMB_LOCK_ gwLOCALE_LOCK # define WCTOMB_UNLOCK_ gwLOCALE_UNLOCK @@ -7295,13 +7368,52 @@ the plain locale pragma without a parameter (S>) is in effect. # define WCRTOMB_LOCK_ NOOP # define WCRTOMB_UNLOCK_ NOOP +# define LC_CTYPE_LOCK LOCALE_LOCK +# define LC_CTYPE_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_COLLATE_LOCK) # define LC_COLLATE_LOCK LOCALE_LOCK # define LC_COLLATE_UNLOCK LOCALE_UNLOCK +#endif + +#if ! defined(LC_MESSAGES_LOCK) +# define LC_MESSAGES_LOCK LOCALE_LOCK +# define LC_MESSAGES_UNLOCK LOCALE_UNLOCK +#endif +#if ! defined(LC_MONETARY_LOCK) +# define LC_MONETARY_LOCK LOCALE_LOCK +# define LC_MONETARY_UNLOCK LOCALE_UNLOCK +#endif + + /* Ifndef one or the other */ +#ifdef LC_TIME_LOCK +# define STRFTIME_LOCK \ + STMT_START { \ + LC_CATEGORY_LOCK_2_c_(LC_CTYPE, LC_TIME); \ + ENV_READ_LOCK; \ + PERL_ASSERT_CATEGORY_EQ_CTYPE(LC_TIME); \ + } STMT_END +# define STRFTIME_UNLOCK \ + STMT_START { \ + ENV_READ_UNLOCK; \ + LC_CATEGORY_UNLOCK_2_c_(LC_CTYPE, LC_TIME); \ + } STMT_END +#else # define STRFTIME_LOCK ENV_LOCK # define STRFTIME_UNLOCK ENV_UNLOCK +# define LC_TIME_LOCK LOCALE_LOCK +# define LC_TIME_UNLOCK LOCALE_UNLOCK +#endif + #ifdef USE_LOCALE_NUMERIC +# ifndef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond_to_panic_if_already_locked) \ + LOCALE_LOCK_(cond_to_panic_if_already_locked) +# define LC_NUMERIC_UNLOCK LOCALE_UNLOCK_ +# endif /* These macros are for toggling between the underlying locale (UNDERLYING or * LOCAL) and the C locale (STANDARD). (Actually we don't have to use the C @@ -7626,6 +7738,11 @@ cannot have changed since the precalculation. #endif /* !USE_LOCALE_NUMERIC */ +#ifndef LC_NUMERIC_LOCK +# define LC_NUMERIC_LOCK(cond) NOOP +# define LC_NUMERIC_UNLOCK NOOP +#endif + #ifdef USE_LOCALE_THREADS # define ENV_LOCK PERL_WRITE_LOCK(&PL_env_mutex) # define ENV_UNLOCK PERL_WRITE_UNLOCK(&PL_env_mutex) diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 53eb40feb31e..88da75374958 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -2560,10 +2560,11 @@ This variable was added in Perl v5.8.0. =item ${^SAFE_LOCALES} X<${^SAFE_LOCALES}> -Reflects if safe locale operations are available to this perl (when the -value is 1) or not (the value is 0). This variable is always 1 if the -perl has been compiled without threads. It is also 1 if this perl is -using thread-safe locale operations. Note that an individual thread may +Reflects if safe locale operations are available to this perl or not. +This variable is always 1 if the perl has been compiled without threads. +It is also 1 if this perl is using thread-safe locale operations. And +it is 2 if this perl is emulating thread-safe locale operations. Note +that an individual thread may choose to use the global locale (generally unsafe) by calling L. This variable currently is still set to 1 in such threads. diff --git a/proto.h b/proto.h index c040ff33e5eb..f396820b12c6 100644 --- a/proto.h +++ b/proto.h @@ -5447,6 +5447,24 @@ Perl_variant_byte_number(PERL_UINTMAX_T word) # endif #endif +#if defined(EMULATE_THREAD_SAFE_LOCALES) +PERL_CALLCONV void +Perl_category_lock(pTHX_ const UV mask, const char *file, const line_t caller_line); +# define PERL_ARGS_ASSERT_CATEGORY_LOCK \ + assert(file) + +PERL_CALLCONV void +Perl_category_unlock(pTHX_ const UV mask, const char *file, const line_t caller_line); +# define PERL_ARGS_ASSERT_CATEGORY_UNLOCK \ + assert(file) + +# if !defined(PERL_NO_INLINE_FUNCTIONS) +PERL_STATIC_INLINE int +Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum); +# define PERL_ARGS_ASSERT_POSIX_LC_FOO_ + +# endif +#endif /* defined(EMULATE_THREAD_SAFE_LOCALES) */ #if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE) PERL_CALLCONV I32 Perl_my_chsize(pTHX_ int fd, Off_t length) @@ -7118,6 +7136,14 @@ S_my_setlocale_debug_string_i(pTHX_ const locale_category_index cat_index, const __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_MY_SETLOCALE_DEBUG_STRING_I +# endif +# if defined(EMULATE_THREAD_SAFE_LOCALES) || \ + ( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) ) +STATIC void +S_update_PL_curlocales_i(pTHX_ const locale_category_index index, const char *new_locale, const line_t caller_line); +# define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ + assert(new_locale) + # endif # if defined(HAS_LOCALECONV) && \ ( defined(USE_LOCALE_MONETARY) || defined(USE_LOCALE_NUMERIC) ) @@ -7207,16 +7233,9 @@ STATIC locale_t S_use_curlocale_scratch(pTHX); # define PERL_ARGS_ASSERT_USE_CURLOCALE_SCRATCH -# if !defined(USE_QUERYLOCALE) -STATIC void -S_update_PL_curlocales_i(pTHX_ const locale_category_index index, const char *new_locale, const line_t caller_line); -# define PERL_ARGS_ASSERT_UPDATE_PL_CURLOCALES_I \ - assert(new_locale) - -# endif -# elif defined(USE_LOCALE_THREADS) && \ - !defined(USE_THREAD_SAFE_LOCALE) && \ - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) /* && +# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \ + defined(USE_LOCALE_THREADS) && \ + !defined(USE_THREAD_SAFE_LOCALE) /* && !defined(USE_POSIX_2008_LOCALE) */ STATIC bool S_less_dicey_bool_setlocale_r(pTHX_ const int cat, const char *locale); @@ -7227,10 +7246,10 @@ STATIC const char * S_less_dicey_setlocale_r(pTHX_ const int category, const char *locale); # define PERL_ARGS_ASSERT_LESS_DICEY_SETLOCALE_R -# endif /* defined(USE_LOCALE_THREADS) && +# endif /* !defined(EMULATE_THREAD_SAFE_LOCALES) && + defined(USE_LOCALE_THREADS) && !defined(USE_POSIX_2008_LOCALE) && - !defined(USE_THREAD_SAFE_LOCALE) && - !defined(USE_THREAD_SAFE_LOCALE_EMULATION) */ + !defined(USE_THREAD_SAFE_LOCALE) */ # if defined(WIN32) || defined(WIN32_USE_FAKE_OLD_MINGW_LOCALES) STATIC wchar_t * S_Win_byte_string_to_wstring(const UINT code_page, const char *byte_string); diff --git a/sv.c b/sv.c index 94774f12af68..c2e0573f7e75 100644 --- a/sv.c +++ b/sv.c @@ -16111,6 +16111,10 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_subname = sv_dup_inc(proto_perl->Isubname, param); +/* The new locale starts in the global C locale. */ +#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE)) + PL_perl_controls_locale = false; +#endif #ifdef USE_PL_CURLOCALES for (i = 0; i < (int) C_ARRAY_LENGTH(PL_curlocales); i++) { PL_curlocales[i] = SAVEPV("C"); @@ -16119,6 +16123,13 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_PL_CUR_LC_ALL PL_cur_LC_ALL = SAVEPV("C"); #endif +# ifdef EMULATE_THREAD_SAFE_LOCALES + for (i = 0; i < (int) C_ARRAY_LENGTH(PL_restore_locale); i++) { + PL_restore_locale[i] = NULL; + PL_restore_locale_depth[i] = 0; + } + PL_NUMERIC_toggle_depth = 0; +#endif #ifdef USE_LOCALE_CTYPE Copy(PL_fold, PL_fold_locale, 256, U8);