Skip to content

Commit 295b098

Browse files
authored
Merge pull request #25021 from JuliaLang/nl/unicode
Move Unicode-related functions to new Unicode stdlib package
2 parents 87c1d4f + 756936a commit 295b098

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+551
-445
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ The steps required to add a new docstring are listed below:
174174
Examples written within docstrings can be used as testcases known as "doctests" by annotating code blocks with `jldoctest`.
175175

176176
```jldoctest
177-
julia> uppercase("Docstring test")
177+
julia> Unicode.uppercase("Docstring test")
178178
"DOCSTRING TEST"
179179
```
180180

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,16 @@ Deprecated or removed
738738
* The `sum_kbn` and `cumsum_kbn` functions have been moved to the
739739
[KahanSummation](https://github.com/JuliaMath/KahanSummation.jl) package ([#24869]).
740740

741+
* Unicode-related string functions have been moved to the new `Unicode` standard
742+
library module ([#25021]). This applies to `normalize_string`, `graphemes`,
743+
`is_assigned_char`, `textwidth`, `isascii`, `islower`, `isupper`, `isalpha`,
744+
`isdigit`, `isxdigit`, `isnumber`, `isalnum`, `iscntrl`, `ispunct`, `isspace`,
745+
`isprint`, `isgraph`, `lowercase`, `uppercase`, `titlecase`, `lcfirst` and `ucfirst`.
746+
747+
* `isnumber` has been deprecated in favor of `isnumeric`, `is_assigned_char`
748+
in favor of `isassigned` and `normalize_string` in favor of `normalize`, all three
749+
in the new `Unicode` standard library module ([#25021]).
750+
741751
Command-line option changes
742752
---------------------------
743753

@@ -1711,3 +1721,4 @@ Command-line option changes
17111721
[#24413]: https://github.com/JuliaLang/julia/issues/24413
17121722
[#24653]: https://github.com/JuliaLang/julia/issues/24653
17131723
[#24869]: https://github.com/JuliaLang/julia/issues/24869
1724+
[#25021]: https://github.com/JuliaLang/julia/issues/25021

base/arrayshow.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
166166
screenwidth -= length(pre) + length(post)
167167
presp = repeat(" ", length(pre)) # indent each row to match pre string
168168
postsp = ""
169-
@assert textwidth(hdots) == textwidth(ddots)
169+
@assert Unicode.textwidth(hdots) == Unicode.textwidth(ddots)
170170
sepsize = length(sep)
171171
rowsA, colsA = indices(X,1), indices(X,2)
172172
m, n = length(rowsA), length(colsA)

base/char.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ function show(io::IO, c::Char)
6464
return
6565
end
6666
end
67-
if isprint(c)
67+
if Unicode.isprint(c)
6868
write(io, 0x27, c, 0x27)
6969
else
7070
u = UInt32(c)
@@ -81,6 +81,6 @@ end
8181
function show(io::IO, ::MIME"text/plain", c::Char)
8282
show(io, c)
8383
u = UInt32(c)
84-
print(io, ": ", isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4))
85-
print(io, " (category ", UTF8proc.category_abbrev(c), ": ", UTF8proc.category_string(c), ")")
84+
print(io, ": ", Unicode.isascii(c) ? "ASCII/" : "", "Unicode U+", hex(u, u > 0xffff ? 6 : 4))
85+
print(io, " (category ", Unicode.category_abbrev(c), ": ", Unicode.category_string(c), ")")
8686
end

base/client.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ function load_machine_file(path::AbstractString)
359359
s = split(line, '*'; keep = false)
360360
map!(strip, s, s)
361361
if length(s) > 1
362-
cnt = isnumber(s[1]) ? parse(Int,s[1]) : Symbol(s[1])
362+
cnt = all(isdigit, s[1]) ? parse(Int,s[1]) : Symbol(s[1])
363363
push!(machines,(s[2], cnt))
364364
else
365365
push!(machines,line)

base/deprecated.jl

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,13 +1072,6 @@ function Matrix()
10721072
return Matrix(uninitialized, 0, 0)
10731073
end
10741074

1075-
for name in ("alnum", "alpha", "cntrl", "digit", "number", "graph",
1076-
"lower", "print", "punct", "space", "upper", "xdigit")
1077-
f = Symbol("is",name)
1078-
@eval import .UTF8proc: $f
1079-
@eval @deprecate ($f)(s::AbstractString) all($f, s)
1080-
end
1081-
10821075
# TODO: remove warning for using `_` in parse_input_line in base/client.jl
10831076

10841077
# Special functions have been moved to a package
@@ -1512,7 +1505,7 @@ export hex2num
15121505
@deprecate convert(::Type{Symbol}, s::AbstractString) Symbol(s)
15131506
@deprecate convert(::Type{String}, s::Symbol) String(s)
15141507
@deprecate convert(::Type{String}, v::Vector{UInt8}) String(v)
1515-
@deprecate convert(::Type{S}, g::UTF8proc.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)
1508+
@deprecate convert(::Type{S}, g::Unicode.GraphemeIterator) where {S<:AbstractString} convert(S, g.s)
15161509

15171510
# Issue #19923
15181511
@deprecate ror circshift
@@ -2972,6 +2965,28 @@ end
29722965
@deprecate_moved sum_kbn "KahanSummation"
29732966
@deprecate_moved cumsum_kbn "KahanSummation"
29742967

2968+
# PR #25021
2969+
@deprecate_moved normalize_string "Unicode" true true
2970+
@deprecate_moved graphemes "Unicode" true true
2971+
@deprecate_moved is_assigned_char "Unicode" true true
2972+
@deprecate_moved textwidth "Unicode" true true
2973+
@deprecate_moved islower "Unicode" true true
2974+
@deprecate_moved isupper "Unicode" true true
2975+
@deprecate_moved isalpha "Unicode" true true
2976+
@deprecate_moved isdigit "Unicode" true true
2977+
@deprecate_moved isnumber "Unicode" true true
2978+
@deprecate_moved isalnum "Unicode" true true
2979+
@deprecate_moved iscntrl "Unicode" true true
2980+
@deprecate_moved ispunct "Unicode" true true
2981+
@deprecate_moved isspace "Unicode" true true
2982+
@deprecate_moved isprint "Unicode" true true
2983+
@deprecate_moved isgraph "Unicode" true true
2984+
@deprecate_moved lowercase "Unicode" true true
2985+
@deprecate_moved uppercase "Unicode" true true
2986+
@deprecate_moved titlecase "Unicode" true true
2987+
@deprecate_moved lcfirst "Unicode" true true
2988+
@deprecate_moved ucfirst "Unicode" true true
2989+
29752990
# END 0.7 deprecations
29762991

29772992
# BEGIN 1.0 deprecations

base/dict.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

33
function _truncate_at_width_or_chars(str, width, chars="", truncmark="")
4-
truncwidth = textwidth(truncmark)
4+
truncwidth = Unicode.textwidth(truncmark)
55
(width <= 0 || width < truncwidth) && return ""
66

77
wid = truncidx = lastidx = 0
88
idx = start(str)
99
while !done(str, idx)
1010
lastidx = idx
1111
c, idx = next(str, idx)
12-
wid += textwidth(c)
12+
wid += Unicode.textwidth(c)
1313
wid >= width - truncwidth && truncidx == 0 && (truncidx = lastidx)
1414
(wid >= width || c in chars) && break
1515
end

base/distributed/Distributed.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ using Base: Process, Semaphore, JLOptions, AnyDict, buffer_writes, wait_connecte
1515
binding_module, notify_error, atexit, julia_exename, julia_cmd,
1616
AsyncGenerator, display_error, acquire, release, invokelatest, warn_once,
1717
shell_escape_posixly, uv_error
18+
using Base.Unicode: isascii, isdigit, isnumeric
1819

1920
# NOTE: clusterserialize.jl imports additional symbols from Base.Serializer for use
2021

base/docs/utils.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Text / HTML objects
44

55
import Base: print, show, ==, hash
6+
using Base.Unicode
67

78
export HTML, @html_str
89

@@ -231,7 +232,8 @@ function matchinds(needle, haystack; acronym = false)
231232
for (i, char) in enumerate(haystack)
232233
isempty(chars) && break
233234
while chars[1] == ' ' shift!(chars) end # skip spaces
234-
if lowercase(char) == lowercase(chars[1]) && (!acronym || !isalpha(lastc))
235+
if Unicode.lowercase(char) == Unicode.lowercase(chars[1]) &&
236+
(!acronym || !Unicode.isalpha(lastc))
235237
push!(is, i)
236238
shift!(chars)
237239
end

base/exports.jl

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -725,40 +725,22 @@ export
725725
eachmatch,
726726
endswith,
727727
escape_string,
728-
graphemes,
729728
hex,
730729
hex2bytes,
731730
hex2bytes!,
732731
ind2chr,
733732
info,
734-
is_assigned_char,
735-
isalnum,
736-
isalpha,
737-
isascii,
738-
iscntrl,
739-
isdigit,
740-
isgraph,
741-
islower,
742733
ismatch,
743-
isnumber,
744-
isprint,
745-
ispunct,
746-
isspace,
747-
isupper,
748734
isvalid,
749-
isxdigit,
750735
join,
751-
lcfirst,
752736
logging,
753-
lowercase,
754737
lpad,
755738
lstrip,
756739
match,
757740
matchall,
758741
ncodeunits,
759742
ndigits,
760743
nextind,
761-
normalize_string,
762744
oct,
763745
prevind,
764746
print,
@@ -785,13 +767,9 @@ export
785767
string,
786768
strip,
787769
summary,
788-
textwidth,
789770
thisind,
790-
titlecase,
791771
transcode,
792-
ucfirst,
793772
unescape_string,
794-
uppercase,
795773
warn,
796774

797775
# random numbers

0 commit comments

Comments
 (0)