Skip to content

Commit 8e2da16

Browse files
bkaminsnalimilan
authored andcommitted
Correctly handle recoding pair with value range and source containing missings (JuliaData#106)
1 parent e5bc8f0 commit 8e2da16

File tree

2 files changed

+66
-12
lines changed

2 files changed

+66
-12
lines changed

src/recode.jl

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,21 @@ const ≅ = isequal
66
Fill `dest` with elements from `src`, replacing those matching a key of `pairs`
77
with the corresponding value.
88
9-
For each `Pair` in `pairs`, if the element is equal to (according to [`isequal`](@ref))
10-
or [`in`](@ref) the key (first item of the pair), then the corresponding value
11-
(second item) is copied to `dest`.
9+
For each `Pair` in `pairs`, if the element is equal to (according to [`isequal`](@ref)))
10+
the key (first item of the pair) or to one of its entries if it is a collection,
11+
then the corresponding value (second item) is copied to `dest`.
1212
If the element matches no key and `default` is not provided or `nothing`, it is copied as-is;
1313
if `default` is specified, it is used in place of the original element.
1414
`dest` and `src` must be of the same length, but not necessarily of the same type.
1515
Elements of `src` as well as values from `pairs` will be `convert`ed when possible
1616
on assignment.
1717
If an element matches more than one key, the first match is used.
1818
19+
recode!(dest::CategoricalArray, src::AbstractArray[, default::Any], pairs::Pair...)
20+
21+
If `dest` is a `CategoricalArray` then the ordering of resulting levels is determined
22+
by the order of passed `pairs` and `default` will be the last level if provided.
23+
1924
recode!(dest::AbstractArray, src::AbstractArray{>:Missing}[, default::Any], pairs::Pair...)
2025
2126
If `src` contains missing values, they are never replaced with `default`:
@@ -36,8 +41,8 @@ function recode!(dest::AbstractArray{T}, src::AbstractArray, default::Any, pairs
3641

3742
for j in 1:length(pairs)
3843
p = pairs[j]
39-
if (!isa(p.first, Union{AbstractArray, Tuple}) && x p.first) ||
40-
(isa(p.first, Union{AbstractArray, Tuple}) && x in p.first)
44+
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(x y for y in p.first)) ||
45+
x p.first)
4146
dest[i] = p.second
4247
@goto nextitem
4348
end
@@ -89,8 +94,8 @@ function recode!(dest::CategoricalArray{T}, src::AbstractArray, default::Any, pa
8994

9095
for j in 1:length(pairs)
9196
p = pairs[j]
92-
if (!isa(p.first, Union{AbstractArray, Tuple}) && x p.first) ||
93-
(isa(p.first, Union{AbstractArray, Tuple}) && x in p.first)
97+
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(x y for y in p.first)) ||
98+
x p.first)
9499
drefs[i] = dupvals ? pairmap[j] : j
95100
@goto nextitem
96101
end
@@ -146,7 +151,7 @@ function recode!(dest::CategoricalArray{T}, src::CategoricalArray, default::Any,
146151

147152
for l in srclevels
148153
if !(any(x -> x l, firsts) ||
149-
any(f -> isa(f, Union{AbstractArray, Tuple}) && l in f, firsts))
154+
any(f -> isa(f, Union{AbstractArray, Tuple}) && any(l y for y in f), firsts))
150155
try
151156
push!(keptlevels, l)
152157
catch err
@@ -176,7 +181,8 @@ function recode!(dest::CategoricalArray{T}, src::CategoricalArray, default::Any,
176181
# For missing values (0 if no missing in pairs' keys)
177182
indexmap[1] = 0
178183
for p in pairs
179-
if ismissing(p.first)
184+
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(ismissing, p.first)) ||
185+
ismissing(p.first))
180186
indexmap[1] = get(dest.pool, p.second)
181187
break
182188
end
@@ -189,8 +195,8 @@ function recode!(dest::CategoricalArray{T}, src::CategoricalArray, default::Any,
189195
@inbounds for (i, l) in enumerate(srcindex)
190196
for j in 1:length(pairs)
191197
p = pairs[j]
192-
if (!isa(p.first, Union{AbstractArray, Tuple}) && l p.first) ||
193-
(isa(p.first, Union{AbstractArray, Tuple}) && l in p.first)
198+
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(l y for y in p.first)) ||
199+
l p.first)
194200
indexmap[i+1] = pairmap[j]
195201
@goto nextitem
196202
end
@@ -268,6 +274,11 @@ If the element matches no key and `default` is not provided or `nothing`, it is
268274
if `default` is specified, it is used in place of the original element.
269275
If an element matches more than one key, the first match is used.
270276
277+
recode(a::CategoricalArray[, default::Any], pairs::Pair...)
278+
279+
If `a` is a `CategoricalArray` then the ordering of resulting levels is determined
280+
by the order of passed `pairs` and `default` will be the last level if provided.
281+
271282
# Examples
272283
```jldoctest
273284
julia> using CategoricalArrays

test/16_recode.jl

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ end
123123

124124
@testset "Recoding from $(typeof(x)) to categorical array with missing values" for
125125
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"]))
126+
126127
# check that error is thrown
127128
y = Vector{String}(4)
128129
@test_throws MissingException recode!(y, x, "a", "c"=>"b")
@@ -159,7 +160,21 @@ end
159160
end
160161
end
161162

162-
@testset "Recoding array with missings, no default and with missing as a key pair from $(typeof(x)) to $(typeof(y))" for
163+
@testset "Collection in LHS recoding array with missings and no default from $(typeof(x)) to $(typeof(y))" for
164+
x in (["1", missing, "3", "4", "5"], CategoricalArray(["1", missing, "3", "4", "5"])),
165+
y in (similar(x), Array{Union{String, Missing}}(size(x)),
166+
CategoricalArray{Union{String, Missing}}(size(x)), x)
167+
168+
z = @inferred recode!(y, x, ["3","4"]=>"2")
169+
@test y === z
170+
@test y ["1", missing, "2", "2", "5"]
171+
if isa(y, CategoricalArray)
172+
@test levels(y) == ["1", "5", "2"]
173+
@test !isordered(y)
174+
end
175+
end
176+
177+
@testset "Recoding array with missings, default and with missing as a key pair from $(typeof(x)) to $(typeof(y))" for
163178
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
164179
y in (similar(x), Array{Union{String, Missing}}(size(x)),
165180
CategoricalArray{Union{String, Missing}}(size(x)), x)
@@ -173,6 +188,20 @@ end
173188
end
174189
end
175190

191+
@testset "Collection with missing in LHS recoding array with missings, default from $(typeof(x)) to $(typeof(y))" for
192+
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
193+
y in (similar(x), Array{Union{String, Missing}}(size(x)),
194+
CategoricalArray{Union{String, Missing}}(size(x)), x)
195+
196+
z = @inferred recode!(y, x, "a", [missing, "c"]=>"b")
197+
@test y === z
198+
@test y == ["a", "b", "b", "a"]
199+
if isa(y, CategoricalArray)
200+
@test levels(y) == ["b", "a"]
201+
@test !isordered(y)
202+
end
203+
end
204+
176205
@testset "Recoding array with missings, no default and with missing as a key pair from $(typeof(x)) to $(typeof(y))" for
177206
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
178207
y in (similar(x), Array{Union{String, Missing}}(size(x)),
@@ -187,6 +216,20 @@ end
187216
end
188217
end
189218

219+
@testset "Collection with missing in LHS recoding array with missings, no default from $(typeof(x)) to $(typeof(y))" for
220+
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
221+
y in (similar(x), Array{Union{String, Missing}}(size(x)),
222+
CategoricalArray{Union{String, Missing}}(size(x)), x)
223+
224+
z = @inferred recode!(y, x, ["c", missing]=>"b")
225+
@test y === z
226+
@test y == ["a", "b", "b", "d"]
227+
if isa(y, CategoricalArray)
228+
@test levels(y) == ["a", "d", "b"]
229+
@test !isordered(y)
230+
end
231+
end
232+
190233
@testset "Recoding into an array of incompatible size from $(typeof(x)) to $(typeof(y))" for
191234
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
192235
y in (similar(x, 0), Array{Union{String, Missing}}(0),

0 commit comments

Comments
 (0)