Skip to content

Commit 61fb53d

Browse files
committed
Add ability to emulate thread-safe locale operations
Locale information was originally global for an entire process. Later, it was realized that different threads could want to be running in different locales. Windows added this ability, and POSIX 2008 followed suit (though using a completely different API). When available, perl automatically uses these capabilities. But many platforms have neither, or their implementation, such as on Darwin, is buggy. This commit adds the capability for Perl programs to operate as if the platform were thread-safe. This implementation is based on the observation that the underlying locale matters only to relatively few libc calls, and only during their execution. It can be anything at all at any other time. perl keeps what the proper locale should be for each category in a a per-thread array. Each locale-dependent operation must be wrapped in mutex lock/unlock operations. The lock additionally compares what libc knows the locale to be, and what it should be for this thread at this time, and changes the actual locale to the proper value if necessary. That's all that is needed. This commit adds macros to perl.h, for example "MBTOWC_LOCK_", that expand to do the mutex lock, and change the global locale to the expected value. On perls built without this emulation capability, they are no-ops. All code in the perl core (unless I've missed something), are changed to use these macros (there weren't actually many places that needed this). Thus, any pure perl program will automatically become locale-thread-safe under this Configuration. In order for XS code to also become locale-thread-safe, it must use these macros to wrap calls to locale-dependent functions. Relatively few modules call such functions. For example, the only one I found that ships with the perl core is Time::Piece, and it has more fundamental issues with running under threads than this. I am preparing pull requests for it. Thus, this is not completely transparent to code like native-thread-safe locale handling is. Therefore ${^SAFE_LOCALES} returns 2 (instead of 1) for this type of thread-safety. Another deficiency compared to the native thread safety is when a thread calls a non-perl library that accesses the locale. The typical example is Gtk (though this particular application can be configured to not be problematic). With the native safe threads, everything works as long as only one such thread is used per Perl program. That thread would then be the only one operating in the global locale, hence there are no conflicts. With this emulation, all threads are operating in the global locale, and mutexes would have to be used to prevent conflicts. To minimize those, the code added in this commit restores the global locale when through to the state it was in when started. A major concern is the performance impact. This is after all trading speed for accuracy. lib/locale_threads.t is noticeably slower when this is being used. But that is doing multiple threads constantly using locale-dependent operations. I don't notice any change with the rest of the test suite. In pure perl, this only comes into play while in the scope of 'use locale' or when using some of the few POSIX:: functions that are locale-dependent. And to some extent when formatting, but the regular overhead there should dwarf what this adds. This commit leaves this feature off by default. The next commit changes that for the next few 5.39 development releases, so we can see if there is actually an issue.
1 parent 4367557 commit 61fb53d

14 files changed

+967
-101
lines changed

embed.fnc

+19-8
Original file line numberDiff line numberDiff line change
@@ -3825,6 +3825,16 @@ p |void |dump_sv_child |NN SV *sv
38253825
CRTip |unsigned int|variant_byte_number \
38263826
|PERL_UINTMAX_T word
38273827
#endif
3828+
#if defined(EMULATE_THREAD_SAFE_LOCALES)
3829+
Cp |void |category_lock |const UV mask \
3830+
|NN const char *file \
3831+
|const line_t caller_line
3832+
Cp |void |category_unlock|const UV mask \
3833+
|NN const char *file \
3834+
|const line_t caller_line
3835+
Cip |int |posix_LC_foo_ |const int c \
3836+
|const U8 classnum
3837+
#endif
38283838
#if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
38293839
ARdp |I32 |my_chsize |int fd \
38303840
|Off_t length
@@ -4510,6 +4520,13 @@ RS |char * |my_setlocale_debug_string_i \
45104520
|NULLOK const char *retval \
45114521
|const line_t line
45124522
# endif
4523+
# if defined(EMULATE_THREAD_SAFE_LOCALES) || \
4524+
( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) )
4525+
S |void |update_PL_curlocales_i \
4526+
|const locale_category_index index \
4527+
|NN const char *new_locale \
4528+
|const line_t caller_line
4529+
# endif
45134530
# if defined(HAS_NL_LANGINFO)
45144531
S |const char *|langinfo_sv_i \
45154532
|const nl_item item \
@@ -4569,14 +4586,8 @@ S |const char *|querylocale_2008_i \
45694586
|const locale_category_index index \
45704587
|const line_t line
45714588
S |locale_t|use_curlocale_scratch
4572-
# if !defined(USE_QUERYLOCALE)
4573-
S |void |update_PL_curlocales_i \
4574-
|const locale_category_index index \
4575-
|NN const char *new_locale \
4576-
|const line_t caller_line
4577-
# endif
4578-
# elif defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE) && \
4579-
!defined(USE_THREAD_SAFE_LOCALE_EMULATION)
4589+
# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \
4590+
defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE)
45804591
S |bool |less_dicey_bool_setlocale_r \
45814592
|const int cat \
45824593
|NN const char *locale

embed.h

+11-6
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,11 @@
825825
# if !defined(EBCDIC)
826826
# define variant_byte_number Perl_variant_byte_number
827827
# endif
828+
# if defined(EMULATE_THREAD_SAFE_LOCALES)
829+
# define category_lock(a,b,c) Perl_category_lock(aTHX_ a,b,c)
830+
# define category_unlock(a,b,c) Perl_category_unlock(aTHX_ a,b,c)
831+
# define posix_LC_foo_(a,b) Perl_posix_LC_foo_(aTHX_ a,b)
832+
# endif
828833
# if defined(F_FREESP) && !defined(HAS_CHSIZE) && !defined(HAS_TRUNCATE)
829834
# define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b)
830835
# endif
@@ -1339,6 +1344,10 @@
13391344
# if defined(DEBUGGING)
13401345
# define my_setlocale_debug_string_i(a,b,c,d) S_my_setlocale_debug_string_i(aTHX_ a,b,c,d)
13411346
# endif
1347+
# if defined(EMULATE_THREAD_SAFE_LOCALES) || \
1348+
( defined(USE_POSIX_2008_LOCALE) && !defined(USE_QUERYLOCALE) )
1349+
# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c)
1350+
# endif
13421351
# if defined(HAS_NL_LANGINFO)
13431352
# define langinfo_sv_i(a,b,c,d,e) S_langinfo_sv_i(aTHX_ a,b,c,d,e)
13441353
# endif
@@ -1370,12 +1379,8 @@
13701379
# define bool_setlocale_2008_i(a,b,c) S_bool_setlocale_2008_i(aTHX_ a,b,c)
13711380
# define querylocale_2008_i(a,b) S_querylocale_2008_i(aTHX_ a,b)
13721381
# define use_curlocale_scratch() S_use_curlocale_scratch(aTHX)
1373-
# if !defined(USE_QUERYLOCALE)
1374-
# define update_PL_curlocales_i(a,b,c) S_update_PL_curlocales_i(aTHX_ a,b,c)
1375-
# endif
1376-
# elif defined(USE_LOCALE_THREADS) && \
1377-
!defined(USE_THREAD_SAFE_LOCALE) && \
1378-
!defined(USE_THREAD_SAFE_LOCALE_EMULATION)
1382+
# elif !defined(EMULATE_THREAD_SAFE_LOCALES) && \
1383+
defined(USE_LOCALE_THREADS) && !defined(USE_THREAD_SAFE_LOCALE)
13791384
# define less_dicey_bool_setlocale_r(a,b) S_less_dicey_bool_setlocale_r(aTHX_ a,b)
13801385
# define less_dicey_setlocale_r(a,b) S_less_dicey_setlocale_r(aTHX_ a,b)
13811386
# endif

embedvar.h

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

handy.h

+17-4
Original file line numberDiff line numberDiff line change
@@ -1555,9 +1555,16 @@ or casts
15551555

15561556
# define HIGHEST_REGCOMP_DOT_H_SYNC_ CC_VERTSPACE_
15571557

1558-
/* The members of the third group below do not need to be coordinated with data
1559-
* structures in regcomp.[ch] and regexec.c. */
1560-
# define CC_IDFIRST_ 16
1558+
/* These three follow immediately after the final function that has a version
1559+
* defined by C, like isascii(), so they overlap with anything else. They are
1560+
* used in the 'PL_libc_char_fcns' data structure, along with the ones above
1561+
* them */
1562+
# define CC_IDFIRST_ 16
1563+
# define CC_TOLOWER_ (CC_IDFIRST_ + 1)
1564+
# define CC_TOUPPER_ (CC_TOLOWER_ + 1)
1565+
1566+
/* The members of the fourth group below do not need to be coordinated with
1567+
* data structures in regcomp.[ch] and regexec.c. */
15611568
# define CC_CHARNAME_CONT_ 17
15621569
# define CC_NONLATIN1_FOLD_ 18
15631570
# define CC_NONLATIN1_SIMPLE_FOLD_ 19
@@ -2025,7 +2032,7 @@ END_EXTERN_C
20252032
# define is_posix_XDIGIT(c) isxdigit((U8) (c))
20262033
#endif
20272034

2028-
/* Below is the next level up, which currently expands to nothing more
2035+
/* Below is the next level up, which on most platforms expands to nothing more
20292036
* than the previous layer. These are the macros to use if you really need
20302037
* something whose input domain is a byte, and the locale isn't UTF-8; that is,
20312038
* where you normally would have to use things like bare isalnum().
@@ -2037,7 +2044,13 @@ END_EXTERN_C
20372044
* (Note, proper general operation of the bare libc functions requires you to
20382045
* cast to U8. These do that for you automatically.) */
20392046

2047+
/* In this one circumstance, the macro is implemented with a lock; otherwise it
2048+
* expands to just the layer below */
2049+
#ifdef EMULATE_THREAD_SAFE_LOCALES
2050+
# define WRAP_U8_LC_(c, classnum, posix) posix_LC_foo_((c), (classnum))
2051+
#else
20402052
# define WRAP_U8_LC_(c, classnum, posix) posix(c)
2053+
#endif
20412054

20422055
#define isU8_ALPHANUMERIC_LC(c) \
20432056
WRAP_U8_LC_((c), CC_ALPHANUMERIC_, is_posix_ALPHANUMERIC)

inline.h

+43
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,49 @@ S_PerlEnv_putenv(pTHX_ char * str)
318318

319319
#endif
320320

321+
/* ------------------------------- handy.h ------------------------------- */
322+
323+
#ifdef EMULATE_THREAD_SAFE_LOCALES
324+
325+
PERL_STATIC_INLINE int
326+
Perl_posix_LC_foo_(pTHX_ const int c, const U8 classnum) {
327+
int result;
328+
329+
LC_CTYPE_LOCK;
330+
331+
/* All calls to this (so far) are with a 'classnum' known at compile time,
332+
* so the compiler should constant fold this down to a single assignment */
333+
switch (classnum) {
334+
case CC_ALPHANUMERIC_:result = (bool) is_posix_ALPHANUMERIC(c); break;
335+
case CC_ALPHA_: result = (bool) is_posix_ALPHA(c); break;
336+
case CC_ASCII_: result = (bool) is_posix_ASCII(c); break;
337+
case CC_BLANK_: result = (bool) is_posix_BLANK(c); break;
338+
case CC_CASED_: result = (bool) is_posix_CASED(c); break;
339+
case CC_CNTRL_: result = (bool) is_posix_CNTRL(c); break;
340+
case CC_DIGIT_: result = (bool) is_posix_DIGIT(c); break;
341+
case CC_GRAPH_: result = (bool) is_posix_GRAPH(c); break;
342+
case CC_LOWER_: result = (bool) is_posix_LOWER(c); break;
343+
case CC_PRINT_: result = (bool) is_posix_PRINT(c); break;
344+
case CC_PUNCT_: result = (bool) is_posix_PUNCT(c); break;
345+
case CC_SPACE_: result = (bool) is_posix_SPACE(c); break;
346+
case CC_UPPER_: result = (bool) is_posix_UPPER(c); break;
347+
case CC_WORDCHAR_: result = (bool) is_posix_WORDCHAR(c); break;
348+
case CC_XDIGIT_: result = (bool) is_posix_XDIGIT(c); break;
349+
case CC_IDFIRST_: result = (bool) is_posix_IDFIRST(c); break;
350+
case CC_TOLOWER_: result = to_posix_LOWER(c); break;
351+
case CC_TOUPPER_: result = to_posix_UPPER(c); break;
352+
353+
default:
354+
LC_CTYPE_UNLOCK;
355+
locale_panic_(Perl_form(aTHX_ "Unknown charclass %d", classnum));
356+
}
357+
358+
LC_CTYPE_UNLOCK;
359+
return result;
360+
}
361+
362+
#endif
363+
321364
/* ------------------------------- mg.h ------------------------------- */
322365

323366
#if defined(PERL_CORE) || defined(PERL_EXT)

intrpvar.h

+8
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,15 @@ PERLVARI(I, cur_locale_obj, locale_t, LC_GLOBAL_LOCALE)
757757
* is almost always toggled into the C locale, and the locale it nominally is
758758
* is stored as PL_numeric_name. */
759759
PERLVARA(I, curlocales, LOCALE_CATEGORIES_COUNT_ + 1, const char *)
760+
#endif
761+
#ifdef EMULATE_THREAD_SAFE_LOCALES
762+
PERLVARA(I, restore_locale, LOCALE_CATEGORIES_COUNT_, const char *)
763+
PERLVARA(I, restore_locale_depth, LOCALE_CATEGORIES_COUNT_, Size_t)
764+
PERLVARI(I, NUMERIC_toggle_depth, int, 0)
765+
#endif
760766

767+
#if defined(USE_LOCALE) && (defined(WIN32) || ! defined(USE_THREAD_SAFE_LOCALE))
768+
PERLVARI(I, perl_controls_locale, bool, true)
761769
#endif
762770
#ifdef USE_PL_CUR_LC_ALL
763771
PERLVARI(I, cur_LC_ALL, const char *, NULL)

0 commit comments

Comments
 (0)