Skip to content

Commit ee3ff3c

Browse files
committed
ensure libc is using a UTF8-compatible encoding
Some basic functionality in libc (such as printf) might be broken if the string encoding is not matching our Cstring expectations.
1 parent 3d75d1e commit ee3ff3c

File tree

6 files changed

+108
-70
lines changed

6 files changed

+108
-70
lines changed

src/support/libsupportinit.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,47 @@
77
extern "C" {
88
#endif
99

10-
static int isInitialized = 0;
10+
static const char *jl_strchrnul(const char *s, int c)
11+
{
12+
char *p = strchr(s, c);
13+
if (p)
14+
return p;
15+
return s + strlen(s);
16+
}
1117

1218
void libsupport_init(void)
1319
{
20+
static int isInitialized = 0;
1421
if (!isInitialized) {
15-
16-
setlocale(LC_ALL, ""); // set to user locale
17-
setlocale(LC_NUMERIC, "C"); // use locale-independent numeric formats
18-
1922
ios_init_stdstreams();
20-
21-
isInitialized=1;
23+
isInitialized = 1;
24+
25+
// adopt the user's locale for most formatting
26+
setlocale(LC_ALL, "");
27+
// but use locale-independent numeric formats (for parsing)
28+
setlocale(LC_NUMERIC, "C");
29+
// and try to specify ASCII or UTF-8 (preferred) for our Libc and Cstring functions
30+
char *ctype = setlocale(LC_CTYPE, NULL);
31+
if (ctype) {
32+
size_t codeset = jl_strchrnul(ctype, '.') - ctype;
33+
if (strncmp(ctype + codeset, ".UTF-8", strlen(".UTF-8")) == 0 ||
34+
strncmp(ctype + codeset, ".utf-8", strlen(".utf-8")) == 0 ||
35+
strncmp(ctype + codeset, ".utf8", strlen(".utf8")) == 0)
36+
return; // already UTF-8
37+
ctype = (char*)memcpy(malloc_s(codeset + sizeof(".UTF-8")), ctype, codeset);
38+
strcpy(ctype + codeset, ".UTF-8");
39+
}
40+
setlocale(LC_CTYPE, "C"); // ASCII
41+
#ifndef _OS_WINDOWS_
42+
if (setlocale(LC_CTYPE, "C.UTF-8") == NULL && // Linux/FreeBSD name
43+
setlocale(LC_CTYPE, "en_US.UTF-8") == NULL && // Common name
44+
setlocale(LC_CTYPE, "UTF-8") == NULL && // Apple name
45+
(ctype == NULL || setlocale(LC_CTYPE, ctype) == NULL)) { // attempt to form it manually
46+
ios_puts("WARNING: failed to select UTF-8 encoding, using ASCII\n", ios_stderr);
47+
}
48+
#endif
49+
if (ctype)
50+
free(ctype);
2251
}
2352
}
2453

stdlib/Dates/test/io.jl

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ module IOTests
55
using Test
66
using Dates
77

8+
const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
9+
include(joinpath(BASE_TEST_PATH, "testhelpers", "withlocales.jl"))
10+
811
@testset "string/show representation of Date" begin
912
@test string(Dates.Date(1, 1, 1)) == "0001-01-01" # January 1st, 1 AD/CE
1013
@test sprint(show, Dates.Date(1, 1, 1)) == "Dates.Date(\"0001-01-01\")"
@@ -515,36 +518,31 @@ end
515518
end
516519

517520
@testset "AM/PM" begin
518-
# get the current locale
519-
LC_TIME = 2
520-
time_locale = ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, C_NULL)
521-
try
522-
# set the locale
523-
ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, "C")
524-
525-
for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"),
526-
("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59"))
527-
d = DateTime("2018-01-01T$t24:00")
528-
t = Time("$t24:00")
529-
for HH in ("HH","II")
530-
@test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d
531-
@test Time("$t12","$HH:MMp") == t
532-
end
533-
tmstruct = Libc.strptime("%I:%M%p", t12)
534-
@test Time(tmstruct) == t
535-
@test uppercase(t12) == Dates.format(t, "II:MMp") ==
536-
Dates.format(d, "II:MMp") ==
537-
Libc.strftime("%I:%M%p", tmstruct)
521+
for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"),
522+
("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59"))
523+
d = DateTime("2018-01-01T$t24:00")
524+
t = Time("$t24:00")
525+
for HH in ("HH","II")
526+
@test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d
527+
@test Time("$t12","$HH:MMp") == t
538528
end
539-
for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ")
540-
@eval @test_throws ArgumentError Time($bad, "II:MMp")
529+
local tmstruct, strftime
530+
withlocales(["C"]) do
531+
# test am/pm comparison handling
532+
tmstruct = Libc.strptime("%I:%M%p", t12)
533+
strftime = Libc.strftime("%I:%M%p", tmstruct)
534+
nothing
541535
end
542-
# if am/pm is missing, defaults to 24-hour clock
543-
@eval Time("13:24", "II:MMp") == Time("13:24", "HH:MM")
544-
finally
545-
# recover the locale
546-
ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, time_locale)
536+
@test Time(tmstruct) == t
537+
@test uppercase(t12) == Dates.format(t, "II:MMp") ==
538+
Dates.format(d, "II:MMp") ==
539+
strftime
540+
end
541+
for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ")
542+
@test_throws ArgumentError Time(bad, "II:MMp")
547543
end
544+
# if am/pm is missing, defaults to 24-hour clock
545+
@test Time("13:24", "II:MMp") == Time("13:24", "HH:MM")
548546
end
549547

550548
end

stdlib/Dates/test/types.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ end
257257

258258
@testset "issue #31524" begin
259259
dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
260-
dt2 = Base.Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
260+
dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
261261

262262
time = Time(dt1)
263263
@test typeof(time) == Time

test/ccall.jl

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,15 +1710,14 @@ end
17101710
@test str == "hi+1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-1.1-2.2-3.3-4.4-5.5-6.6-7.7-8.8-9.9\n"
17111711
end
17121712

1713+
17131714
@testset "Cwstring" begin
1714-
n = 100
1715-
buffer = Array{Cwchar_t}(undef, n)
1716-
if Sys.iswindows()
1717-
# sprintf throws an error on Windows, see https://github.com/JuliaLang/julia/pull/36040#issuecomment-634774055
1718-
len = @ccall swprintf_s(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
1719-
else
1720-
len = @ccall swprintf(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
1721-
end
1715+
buffer = Array{Cwchar_t}(undef, 100)
1716+
len = @static if Sys.iswindows()
1717+
@ccall swprintf_s(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
1718+
else
1719+
@ccall swprintf(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
1720+
end
17221721
Libc.systemerror("swprintf", len < 0)
17231722
str = GC.@preserve buffer unsafe_string(pointer(buffer), len)
17241723
@test str == "α+β=15"

test/misc.jl

Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

33
isdefined(Main, :FakePTYs) || @eval Main include("testhelpers/FakePTYs.jl")
4+
include("testhelpers/withlocales.jl")
45

56
# Tests that do not really go anywhere else
67

@@ -697,36 +698,19 @@ end
697698

698699
# issue #27239
699700
@testset "strftime tests issue #27239" begin
700-
701-
# save current locales
702-
locales = Dict()
703-
for cat in 0:9999
704-
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL)
705-
if cstr != C_NULL
706-
locales[cat] = unsafe_string(cstr)
707-
end
708-
end
709-
710701
# change to non-Unicode Korean
711-
for (cat, _) in locales
712-
korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
713-
for lc in korloc
714-
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
715-
end
702+
korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
703+
timestrs = String[]
704+
withlocales(korloc) do
705+
# system dependent formats
706+
push!(timestrs, Libc.strftime(0.0))
707+
push!(timestrs, Libc.strftime("%a %A %b %B %p %Z", 0))
716708
end
717-
718-
# system dependent formats
719-
timestr_c = Libc.strftime(0.0)
720-
timestr_aAbBpZ = Libc.strftime("%a %A %b %B %p %Z", 0)
721-
722-
# recover locales
723-
for (cat, lc) in locales
724-
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
725-
end
726-
727709
# tests
728-
@test isvalid(timestr_c)
729-
@test isvalid(timestr_aAbBpZ)
710+
isempty(timestrs) && @warn "skipping stftime tests: no locale found for testing"
711+
for s in timestrs
712+
@test isvalid(s)
713+
end
730714
end
731715

732716

test/testhelpers/withlocales.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
function withlocales(f, newlocales)
4+
# save current locales
5+
locales = Dict{Int,String}()
6+
for cat in 0:9999
7+
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL)
8+
if cstr != C_NULL
9+
locales[cat] = unsafe_string(cstr)
10+
end
11+
end
12+
timestrs = String[]
13+
try
14+
# change to each of given locales
15+
for lc in newlocales
16+
set = true
17+
for (cat, _) in locales
18+
set &= ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) != C_NULL
19+
end
20+
set && f()
21+
end
22+
finally
23+
# recover locales
24+
for (cat, lc) in locales
25+
cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
26+
end
27+
end
28+
end

0 commit comments

Comments
 (0)