@@ -27,17 +27,67 @@ function fill_refs!(refs::AbstractArray, X::AbstractArray,
27
27
end
28
28
end
29
29
30
+ if VERSION >= v " 1.10"
31
+ const CUT_FMT = Printf. Format (" %.*g" )
32
+ end
33
+
30
34
"""
31
- default_formatter(from, to, i; leftclosed, rightclosed)
35
+ CategoricalArrays.default_formatter(from, to, i::Integer;
36
+ leftclosed::Bool, rightclosed::Bool,
37
+ sigdigits::Integer)
32
38
33
- Provide the default label format for the `cut(x, breaks)` method.
39
+ Provide the default label format for the `cut(x, breaks)` method,
40
+ which is `"[from, to)"` if `leftclosed` is `true` and `"[from, to)"` otherwise.
41
+
42
+ If they are floating points values, breaks are turned into to strings using
43
+ `@sprintf("%.*g", sigdigits, break)`
44
+ (or `to` using `@sprintf("%.*g", sigdigits, break)` for the last break).
34
45
"""
35
- default_formatter (from, to, i; leftclosed, rightclosed) =
36
- string (leftclosed ? " [" : " (" , from, " , " , to, rightclosed ? " ]" : " )" )
46
+ function default_formatter (from, to, i:: Integer ;
47
+ leftclosed:: Bool , rightclosed:: Bool ,
48
+ sigdigits:: Integer )
49
+ @static if VERSION >= v " 1.10"
50
+ from_str = from isa AbstractFloat ?
51
+ Printf. format (CUT_FMT, sigdigits, from) :
52
+ string (from)
53
+ to_str = to isa AbstractFloat ?
54
+ Printf. format (CUT_FMT, sigdigits, to) :
55
+ string (to)
56
+ else
57
+ from_str = from isa AbstractFloat ?
58
+ Printf. format (Printf. Format (" %.$(sigdigits) g" ), from) :
59
+ string (from)
60
+ to_str = to isa AbstractFloat ?
61
+ Printf. format (Printf. Format (" %.$(sigdigits) g" ), to) :
62
+ string (to)
63
+ end
64
+ string (leftclosed ? " [" : " (" , from_str, " , " , to_str, rightclosed ? " ]" : " )" )
65
+ end
66
+
67
+ """
68
+ CategoricalArrays.numbered_formatter(from, to, i::Integer;
69
+ leftclosed::Bool, rightclosed::Bool,
70
+ sigdigits::Integer)
71
+
72
+ Provide the default label format for the `cut(x, ngroups)` method
73
+ when `allowempty=true`, which is `"i: [from, to)"` if `leftclosed`
74
+ is `true` and `"i: [from, to)"` otherwise.
75
+
76
+ If they are floating points values, breaks are turned into to strings using
77
+ `@sprintf("%.*g", sigdigits, breaks)`
78
+ (or `to` using `@sprintf("%.*g", sigdigits, break)` for the last break).
79
+ """
80
+ numbered_formatter (from, to, i:: Integer ;
81
+ leftclosed:: Bool , rightclosed:: Bool ,
82
+ sigdigits:: Integer ) =
83
+ string (i, " : " ,
84
+ default_formatter (from, to, i, leftclosed= leftclosed, rightclosed= rightclosed,
85
+ sigdigits= sigdigits))
37
86
38
87
@doc raw """
39
88
cut(x::AbstractArray, breaks::AbstractVector;
40
89
labels::Union{AbstractVector,Function},
90
+ sigdigits::Integer=3,
41
91
extend::Union{Bool,Missing}=false, allowempty::Bool=false)
42
92
43
93
Cut a numeric array into intervals at values `breaks`
@@ -49,15 +99,25 @@ the last interval, which is closed on both ends, i.e. `[lower, upper]`.
49
99
If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
50
100
also accept them.
51
101
102
+ !!! note
103
+ For floating point data, breaks may be rounded to `sigdigits` significant digits
104
+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
105
+ used.
106
+
52
107
# Keyword arguments
53
108
* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
54
109
in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
55
110
all values in `x`; when `missing`, values outside of the breaks generate `missing` entries.
56
111
* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
57
- or numbers giving the names to use for
58
- the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
112
+ or numbers giving the names to use for the intervals; or a function
113
+ `f(from, to, i::Integer ; leftclosed::Bool , rightclosed::Bool, sigdigits::Integer )` that generates
59
114
the labels from the left and right interval boundaries and the group index. Defaults to
60
- `"[from, to)"` (or `"[from, to]"` for the rightmost interval if `extend == true`).
115
+ [`CategoricalArrays.default_formatter`](@ref), giving `"[from, to)"` (or `"[from, to]"`
116
+ for the rightmost interval if `extend == true`).
117
+ * `sigdigits::Integer=3`: the minimum number of significant digits to use in labels.
118
+ This value is increased automatically if necessary so that rounded breaks are unique.
119
+ Only used for floating point types and when `labels` is a function, in which case it
120
+ is passed to it as a keyword argument.
61
121
* `allowempty::Bool=false`: when `false`, an error is raised if some breaks other than
62
122
the last one appear multiple times, generating empty intervals; when `true`,
63
123
duplicate breaks are allowed and the intervals they generate are kept as
@@ -69,19 +129,19 @@ julia> using CategoricalArrays
69
129
70
130
julia> cut(-1:0.5:1, [0, 1], extend=true)
71
131
5-element CategoricalArray{String,1,UInt32}:
72
- "[-1.0, 0. 0)"
73
- "[-1.0, 0. 0)"
74
- "[0.0 , 1.0 ]"
75
- "[0.0 , 1.0 ]"
76
- "[0.0 , 1.0 ]"
132
+ "[-1, 0)"
133
+ "[-1, 0)"
134
+ "[0, 1]"
135
+ "[0, 1]"
136
+ "[0, 1]"
77
137
78
138
julia> cut(-1:0.5:1, 2)
79
139
5-element CategoricalArray{String,1,UInt32}:
80
- "Q1: [-1.0, 0. 0)"
81
- "Q1: [-1.0, 0. 0)"
82
- "Q2: [0.0 , 1.0 ]"
83
- "Q2: [0.0 , 1.0 ]"
84
- "Q2: [0.0 , 1.0 ]"
140
+ "[-1, 0)"
141
+ "[-1, 0)"
142
+ "[0 , 1]"
143
+ "[0 , 1]"
144
+ "[0 , 1]"
85
145
86
146
julia> cut(-1:0.5:1, 2, labels=["A", "B"])
87
147
5-element CategoricalArray{String,1,UInt32}:
@@ -114,15 +174,17 @@ julia> cut(-1:0.5:1, 3, labels=fmt)
114
174
@inline function cut (x:: AbstractArray , breaks:: AbstractVector ;
115
175
extend:: Union{Bool, Missing} = false ,
116
176
labels:: Union{AbstractVector{<:SupportedTypes},Function} = default_formatter,
177
+ sigdigits:: Integer = 3 ,
117
178
allowempty:: Bool = false )
118
- return _cut (x, breaks, extend, labels, allowempty)
179
+ return _cut (x, breaks, extend, labels, sigdigits, allowempty)
119
180
end
120
181
121
182
# Separate function for inferability (thanks to inlining of cut)
122
183
function _cut (x:: AbstractArray{T, N} , breaks:: AbstractVector ,
123
184
extend:: Union{Bool, Missing} ,
124
185
labels:: Union{AbstractVector{<:SupportedTypes},Function} ,
125
- allowempty:: Bool = false ) where {T, N}
186
+ sigdigits:: Integer ,
187
+ allowempty:: Bool ) where {T, N}
126
188
if ! issorted (breaks)
127
189
breaks = sort (breaks)
128
190
end
@@ -179,21 +241,60 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
179
241
end
180
242
end
181
243
244
+ # Find minimal number of digits so that distinct breaks remain so
245
+ if eltype (breaks) <: AbstractFloat
246
+ while true
247
+ local i
248
+ for outer i in 2 : lastindex (breaks)
249
+ b1 = breaks[i- 1 ]
250
+ b2 = breaks[i]
251
+ isequal (b1, b2) && continue
252
+
253
+ @static if VERSION >= v " 1.9"
254
+ b1_str = Printf. format (CUT_FMT, sigdigits, b1)
255
+ b2_str = Printf. format (CUT_FMT, sigdigits, b2)
256
+ else
257
+ b1_str = Printf. format (Printf. Format (" %.$(sigdigits) g" ), b1)
258
+ b2_str = Printf. format (Printf. Format (" %.$(sigdigits) g" ), b2)
259
+ end
260
+ if b1_str == b2_str
261
+ sigdigits += 1
262
+ break
263
+ end
264
+ end
265
+ i == lastindex (breaks) && break
266
+ end
267
+ end
182
268
n = length (breaks)
183
269
n >= 2 || throw (ArgumentError (" at least two breaks must be provided when extend is not true" ))
184
270
if labels isa Function
185
271
from = breaks[1 : n- 1 ]
186
272
to = breaks[2 : n]
187
- firstlevel = labels (from[1 ], to[1 ], 1 ,
188
- leftclosed= ! isequal (breaks[1 ], breaks[2 ]), rightclosed= false )
273
+ local firstlevel
274
+ try
275
+ firstlevel = labels (from[1 ], to[1 ], 1 ,
276
+ leftclosed= ! isequal (breaks[1 ], breaks[2 ]), rightclosed= false ,
277
+ sigdigits= sigdigits)
278
+ catch
279
+ # Support functions defined before v1.0, where sigdigits did not exist
280
+ Base. depwarn (" `labels` function is now required to accept a `sigdigits` keyword argument" ,
281
+ :cut )
282
+ labels_orig = labels
283
+ labels = (from, to, i; leftclosed, rightclosed, sigdigits) ->
284
+ labels_orig (from, to, i; leftclosed, rightclosed)
285
+ firstlevel = labels_orig (from[1 ], to[1 ], 1 ,
286
+ leftclosed= ! isequal (breaks[1 ], breaks[2 ]), rightclosed= false )
287
+ end
189
288
levs = Vector {typeof(firstlevel)} (undef, n- 1 )
190
289
levs[1 ] = firstlevel
191
290
for i in 2 : n- 2
192
291
levs[i] = labels (from[i], to[i], i,
193
- leftclosed= ! isequal (breaks[i], breaks[i+ 1 ]), rightclosed= false )
292
+ leftclosed= ! isequal (breaks[i], breaks[i+ 1 ]), rightclosed= false ,
293
+ sigdigits= sigdigits)
194
294
end
195
295
levs[end ] = labels (from[end ], to[end ], n- 1 ,
196
- leftclosed= true , rightclosed= true )
296
+ leftclosed= true , rightclosed= true ,
297
+ sigdigits= sigdigits)
197
298
else
198
299
length (labels) == n- 1 ||
199
300
throw (ArgumentError (" labels must be of length $(n- 1 ) , but got length $(length (labels)) " ))
@@ -213,14 +314,6 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
213
314
CategoricalArray {S, N} (refs, pool)
214
315
end
215
316
216
- """
217
- quantile_formatter(from, to, i; leftclosed, rightclosed)
218
-
219
- Provide the default label format for the `cut(x, ngroups)` method.
220
- """
221
- quantile_formatter (from, to, i; leftclosed, rightclosed) =
222
- string (" Q" , i, " : " , leftclosed ? " [" : " (" , from, " , " , to, rightclosed ? " ]" : " )" )
223
-
224
317
"""
225
318
Find first value in (sorted) `v` which is greater than or equal to each quantile
226
319
in (sorted) `qs`.
247
340
"""
248
341
cut(x::AbstractArray, ngroups::Integer;
249
342
labels::Union{AbstractVector{<:AbstractString},Function},
343
+ sigdigits::Integer=3,
250
344
allowempty::Bool=false)
251
345
252
346
Cut a numeric array into `ngroups` quantiles.
@@ -257,19 +351,32 @@ but breaks are taken from actual data values instead of estimated quantiles.
257
351
If `x` contains `missing` values, they are automatically skipped when computing
258
352
quantiles.
259
353
354
+ !!! note
355
+ For floating point data, breaks may be rounded to `sigdigits` significant digits
356
+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
357
+ used.
358
+
260
359
# Keyword arguments
261
360
* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
262
- or numbers giving the names to use for
263
- the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
361
+ or numbers giving the names to use for the intervals; or a function
362
+ `f(from, to, i::Integer ; leftclosed::Bool , rightclosed::Bool, sigdigits::Integer )` that generates
264
363
the labels from the left and right interval boundaries and the group index. Defaults to
265
- `"Qi: [from, to)"` (or `"Qi: [from, to]"` for the rightmost interval).
364
+ [`CategoricalArrays.default_formatter`](@ref), giving `"[from, to)"` (or `"[from, to]"`
365
+ for the rightmost interval if `extend == true`) if `allowempty=false`, otherwise to
366
+ [`CategoricalArrays.numbered_formatter`](@ref), which prefixes the label with the quantile
367
+ number to ensure uniqueness.
368
+ * `sigdigits::Integer=3`: the minimum number of significant digits to use when rounding
369
+ breaks for inclusion in generated labels. This value is increased automatically if necessary
370
+ so that rounded breaks are unique. Only used for floating point types and when `labels` is a
371
+ function, in which case it is passed to it as a keyword argument.
266
372
* `allowempty::Bool=false`: when `false`, an error is raised if some quantiles breakpoints
267
373
other than the last one are equal, generating empty intervals;
268
374
when `true`, duplicate breaks are allowed and the intervals they generate are kept as
269
375
unused levels (but duplicate labels are not allowed).
270
376
"""
271
377
function cut (x:: AbstractArray , ngroups:: Integer ;
272
- labels:: Union{AbstractVector{<:SupportedTypes},Function} = quantile_formatter,
378
+ labels:: Union{AbstractVector{<:SupportedTypes},Function,Nothing} = nothing ,
379
+ sigdigits:: Integer = 3 ,
273
380
allowempty:: Bool = false )
274
381
ngroups >= 1 || throw (ArgumentError (" ngroups must be strictly positive (got $ngroups )" ))
275
382
sorted_x = eltype (x) >: Missing ? sort! (collect (skipmissing (x))) : sort (x)
@@ -286,5 +393,8 @@ function cut(x::AbstractArray, ngroups::Integer;
286
393
" Pass `allowempty=true` to allow empty quantiles or " *
287
394
" choose a lower value for `ngroups`." ))
288
395
end
289
- cut (x, breaks; labels= labels, allowempty= allowempty)
396
+ if labels === nothing
397
+ labels = allowempty ? numbered_formatter : default_formatter
398
+ end
399
+ return cut (x, breaks; labels= labels, sigdigits= sigdigits, allowempty= allowempty)
290
400
end
0 commit comments