Skip to content

Commit 17de527

Browse files
authored
Unicode-compliant islower/uppercase (#38574)
* Unicode-compliant islower/uppercase * don't test isletter for non-L* letters * include titlecase in alphas test * add news
1 parent d20ca48 commit 17de527

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ New library features
3636
Standard library changes
3737
------------------------
3838

39+
* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]).
3940

4041
#### Package Manager
4142

base/strings/unicode.jl

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,8 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO
280280
"""
281281
islowercase(c::AbstractChar) -> Bool
282282
283-
Tests whether a character is a lowercase letter.
284-
A character is classified as lowercase if it belongs to Unicode category Ll,
285-
Letter: Lowercase.
283+
Tests whether a character is a lowercase letter (according to the Unicode
284+
standard's `Lowercase` derived property).
286285
287286
See also: [`isuppercase`](@ref).
288287
@@ -298,16 +297,15 @@ julia> islowercase('❤')
298297
false
299298
```
300299
"""
301-
islowercase(c::AbstractChar) = category_code(c) == UTF8PROC_CATEGORY_LL
300+
islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))
302301

303302
# true for Unicode upper and mixed case
304303

305304
"""
306305
isuppercase(c::AbstractChar) -> Bool
307306
308-
Tests whether a character is an uppercase letter.
309-
A character is classified as uppercase if it belongs to Unicode category Lu,
310-
Letter: Uppercase, or Lt, Letter: Titlecase.
307+
Tests whether a character is an uppercase letter (according to the Unicode
308+
standard's `Uppercase` derived property).
311309
312310
See also: [`islowercase`](@ref).
313311
@@ -323,10 +321,7 @@ julia> isuppercase('❤')
323321
false
324322
```
325323
"""
326-
function isuppercase(c::AbstractChar)
327-
cat = category_code(c)
328-
cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
329-
end
324+
isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))
330325

331326
"""
332327
iscased(c::AbstractChar) -> Bool

stdlib/Unicode/test/runtests.jl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,25 +93,28 @@ end
9393
@testset "#5939 uft8proc character predicates" begin
9494
alower=['a', 'd', 'j', 'y', 'z']
9595
ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
96-
for c in vcat(alower,ulower)
96+
for c in vcat(alower,ulower,['ª'])
9797
@test islowercase(c) == true
9898
@test isuppercase(c) == false
9999
@test isdigit(c) == false
100100
@test isnumeric(c) == false
101101
end
102102

103103
aupper=['A', 'D', 'J', 'Y', 'Z']
104-
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д']
104+
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Ж', 'Д']
105105

106-
for c in vcat(aupper,uupper)
106+
for c in vcat(aupper,uupper,[''])
107107
@test islowercase(c) == false
108108
@test isuppercase(c) == true
109109
@test isdigit(c) == false
110110
@test isnumeric(c) == false
111111
end
112112

113+
@test !isuppercase('Dž') # titlecase is not uppercase
114+
@test Base.Unicode.iscased('Dž') # but is "cased"
115+
113116
nocase=['א','']
114-
alphas=vcat(alower,ulower,aupper,uupper,nocase)
117+
alphas=vcat(alower,ulower,aupper,uupper,nocase,['Dž'])
115118

116119
for c in alphas
117120
@test isletter(c) == true

0 commit comments

Comments
 (0)