Skip to content

Commit 59bf7f2

Browse files
gustafssonnalimilan
authored andcommitted
Explicit ordered levels (JuliaData#125)
Throw an error on assignment when level does not already exist and pool is ordered.
1 parent 65ee706 commit 59bf7f2

File tree

8 files changed

+129
-17
lines changed

8 files changed

+129
-17
lines changed

src/CategoricalArrays.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ module CategoricalArrays
66
export AbstractMissingCategoricalArray, AbstractMissingCategoricalVector,
77
AbstractMissingCategoricalMatrix,
88
MissingCategoricalArray, MissingCategoricalVector, MissingCategoricalMatrix
9-
export LevelsException
9+
export LevelsException, OrderedLevelsException
1010

1111
export categorical, compress, decompress, droplevels!, levels, levels!, isordered, ordered!
1212
export cut, recode, recode!

src/array.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -655,9 +655,9 @@ function Base.resize!(A::CategoricalVector, n::Integer)
655655
end
656656

657657
function Base.push!(A::CategoricalVector, item)
658-
resize!(A.refs, length(A.refs) + 1)
659-
A[end] = item
660-
return A
658+
r = get!(A.pool, item)
659+
push!(A.refs, r)
660+
A
661661
end
662662

663663
function Base.append!(A::CategoricalVector, B::CategoricalArray)

src/missingarray.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import Base: getindex, setindex!, similar, in, collect
1+
import Base: getindex, setindex!, push!, similar, in, collect
22

33
@inline function getindex(A::CategoricalArray{T}, I...) where {T>:Missing}
44
@boundscheck checkbounds(A, I...)
@@ -22,6 +22,11 @@ end
2222
@inbounds A.refs[I...] = 0
2323
end
2424

25+
@inline function push!(A::CategoricalVector{>:Missing}, v::Missing)
26+
push!(A.refs, 0)
27+
A
28+
end
29+
2530
Base.fill!(A::CategoricalArray{>:Missing}, ::Missing) = (fill!(A.refs, 0); A)
2631

2732
in(x::Missing, y::CategoricalArray) = false

src/pool.jl

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,24 +88,41 @@ Base.getindex(pool::CategoricalPool, i::Integer) = pool.valindex[i]
8888
Base.get(pool::CategoricalPool, level::Any) = pool.invindex[level]
8989
Base.get(pool::CategoricalPool, level::Any, default::Any) = get(pool.invindex, level, default)
9090

91-
@inline function Base.get!(pool::CategoricalPool{T, R}, level::Any) where {T, R}
91+
"""
92+
add the returned value to pool.invindex, this function doesn't do this itself to
93+
avoid doing a dict lookup twice
94+
"""
95+
@inline function push_level!(pool::CategoricalPool{T, R}, level) where {T, R}
96+
x = convert(T, level)
97+
n = length(pool)
98+
if n >= typemax(R)
99+
throw(LevelsException{T, R}([level]))
100+
end
101+
102+
i = R(n + 1)
103+
push!(pool.index, x)
104+
push!(pool.order, i)
105+
push!(pool.levels, x)
106+
push!(pool.valindex, catvalue(i, pool))
107+
i
108+
end
109+
110+
@inline function Base.get!(pool::CategoricalPool, level::Any)
92111
get!(pool.invindex, level) do
93-
x = convert(T, level)
94-
n = length(pool)
95-
if n >= typemax(R)
96-
throw(LevelsException{T, R}([level]))
112+
if isordered(pool)
113+
throw(OrderedLevelsException(level, pool.levels))
97114
end
98115

99-
i = R(n + 1)
100-
push!(pool.index, x)
101-
push!(pool.order, i)
102-
push!(pool.levels, x)
103-
push!(pool.valindex, catvalue(i, pool))
104-
i
116+
push_level!(pool, level)
105117
end
106118
end
107119

108-
Base.push!(pool::CategoricalPool, level) = (get!(pool, level); pool)
120+
@inline function Base.push!(pool::CategoricalPool, level)
121+
get!(pool.invindex, level) do
122+
push_level!(pool, level)
123+
end
124+
return pool
125+
end
109126

110127
# TODO: optimize for multiple additions
111128
function Base.append!(pool::CategoricalPool, levels)
@@ -188,3 +205,9 @@ function Base.showerror(io::IO, err::LevelsException{T, R}) where {T, R}
188205
levs = join(repr.(err.levels), ", ", " and ")
189206
print(io, "cannot store level(s) $levs since reference type $R can only hold $(typemax(R)) levels. Use the decompress function to make room for more levels.")
190207
end
208+
209+
210+
# OrderedLevelsException
211+
function Base.showerror(io::IO, err::OrderedLevelsException)
212+
print(io, "cannot add new level $(err.newlevel) since ordered pools cannot be extended implicitly. Use the levels! function to set new levels, or the ordered! function to mark the pool as unordered.")
213+
end

src/typedefs.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ struct LevelsException{T, R} <: Exception
4242
levels::Vector{T}
4343
end
4444

45+
struct OrderedLevelsException{T, S} <: Exception
46+
newlevel::S
47+
levels::Vector{T}
48+
end
49+
4550
## Values
4651

4752
"""

test/11_array.jl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,12 @@ using CategoricalArrays: DefaultRefType, catvaluetype, leveltype
200200
@test x[1] == x[end]
201201
@test levels(x) == ["e", "a", "b", "c", "zz"]
202202

203+
x2 = deepcopy(x)
204+
@test_throws MethodError push!(x, 1)
205+
@test x == x2
206+
@test x.pool.index == x2.pool.index
207+
@test x.pool.invindex == x2.pool.invindex
208+
203209
append!(x, x)
204210
@test length(x) == 12
205211
@test x == ["c", "b", "b", "a", "zz", "c", "c", "b", "b", "a", "zz", "c"]
@@ -351,13 +357,21 @@ using CategoricalArrays: DefaultRefType, catvaluetype, leveltype
351357
@test x[4] === x.pool.valindex[4]
352358
@test levels(x) == unique(a)
353359

360+
if ordered
361+
@test_throws OrderedLevelsException x[1:2] = -1
362+
levels!(x, [levels(x); -1])
363+
end
354364
x[1:2] = -1
355365
@test x[1] === x.pool.valindex[5]
356366
@test x[2] === x.pool.valindex[5]
357367
@test x[3] === x.pool.valindex[3]
358368
@test x[4] === x.pool.valindex[4]
359369
@test levels(x) == vcat(unique(a), -1)
360370

371+
if ordered
372+
@test_throws OrderedLevelsException push!(x, 2.0)
373+
levels!(x, [levels(x); 2.0])
374+
end
361375
push!(x, 2.0)
362376
@test length(x) == 5
363377
@test x[end] == 2.0
@@ -517,6 +531,10 @@ using CategoricalArrays: DefaultRefType, catvaluetype, leveltype
517531
@test_throws BoundsError x[1:1, -1:1]
518532
@test_throws BoundsError x[4, :]
519533

534+
if ordered
535+
@test_throws OrderedLevelsException x[1] = "z"
536+
levels!(x, [levels(x); "z"])
537+
end
520538
x[1] = "z"
521539
@test x[1] === x.pool.valindex[4]
522540
@test x[2] === x.pool.valindex[2]
@@ -586,12 +604,20 @@ using CategoricalArrays: DefaultRefType, catvaluetype, leveltype
586604
@test_throws UndefRefError x2[2]
587605
@test levels(x2) == []
588606

607+
if ordered
608+
@test_throws OrderedLevelsException x[1] = "c"
609+
levels!(x, [levels(x); "c"])
610+
end
589611
x[1] = "c"
590612
@test x[1] === x.pool.valindex[1]
591613
@test !isassigned(x, 2) && isdefined(x, 2)
592614
@test_throws UndefRefError x[2]
593615
@test levels(x) == ["c"]
594616

617+
if ordered
618+
@test_throws OrderedLevelsException x[1] = "a"
619+
levels!(x, [levels(x); "a"])
620+
end
595621
x[1] = "a"
596622
@test x[1] === x.pool.valindex[2]
597623
@test !isassigned(x, 2) && isdefined(x, 2)
@@ -604,6 +630,10 @@ using CategoricalArrays: DefaultRefType, catvaluetype, leveltype
604630
@test x[2] === x.pool.valindex[1]
605631
@test levels(x) == ["c", "a"]
606632

633+
if ordered
634+
@test_throws OrderedLevelsException x[1] = "b"
635+
levels!(x, [levels(x); "b"])
636+
end
607637
x[1] = "b"
608638
@test x[1] === x.pool.valindex[3]
609639
@test x[2] === x.pool.valindex[1]

test/12_missingarray.jl

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,10 @@ const ≅ = isequal
365365
@test x[2] === x.pool.valindex[2]
366366
@test x[3] === missing
367367

368+
if ordered
369+
@test_throws OrderedLevelsException x[3] = "c"
370+
levels!(x, [levels(x); "c"])
371+
end
368372
x[3] = "c"
369373
@test x[1] === x.pool.valindex[2]
370374
@test x[2] === x.pool.valindex[2]
@@ -533,13 +537,21 @@ const ≅ = isequal
533537
@test x[4] === x.pool.valindex[4]
534538
@test levels(x) == unique(a)
535539

540+
if ordered
541+
@test_throws OrderedLevelsException x[1:2] = -1
542+
levels!(x, [levels(x); -1])
543+
end
536544
x[1:2] = -1
537545
@test x[1] === x.pool.valindex[5]
538546
@test x[2] === x.pool.valindex[5]
539547
@test x[3] === x.pool.valindex[3]
540548
@test x[4] === x.pool.valindex[4]
541549
@test levels(x) == vcat(unique(a), -1)
542550

551+
if ordered
552+
@test_throws OrderedLevelsException push!(x, 2.0)
553+
levels!(x, [levels(x); 2.0])
554+
end
543555
push!(x, 2.0)
544556
@test length(x) == 5
545557
@test x == [-1.0, -1.0, 1.0, 1.5, 2.0]
@@ -682,6 +694,10 @@ const ≅ = isequal
682694
@test x[1:2,1] == ["a", "b"]
683695
@test isa(x[1:2,1], CategoricalVector{Union{String, Missing}, R})
684696

697+
if ordered
698+
@test_throws OrderedLevelsException x[1] = "z"
699+
levels!(x, [levels(x); "z"])
700+
end
685701
x[1] = "z"
686702
@test x[1] === x.pool.valindex[4]
687703
@test x[2] === x.pool.valindex[2]
@@ -837,6 +853,10 @@ const ≅ = isequal
837853
@test_throws BoundsError x[1:1, -1:1]
838854
@test_throws BoundsError x[4, :]
839855

856+
if ordered
857+
@test_throws OrderedLevelsException x[1] = "z"
858+
levels!(x, [levels(x); "z"])
859+
end
840860
x[1] = "z"
841861
@test x[1] === x.pool.valindex[4]
842862
@test x[2] === x.pool.valindex[2]
@@ -940,11 +960,19 @@ const ≅ = isequal
940960
@test isordered(x2) === isordered(x)
941961
@test levels(x2) == []
942962

963+
if ordered
964+
@test_throws OrderedLevelsException x[1] = "c"
965+
levels!(x, [levels(x); "c"])
966+
end
943967
x[1] = "c"
944968
@test x[1] === x.pool.valindex[1]
945969
@test ismissing(x[2])
946970
@test levels(x) == ["c"]
947971

972+
if ordered
973+
@test_throws OrderedLevelsException x[1] = "a"
974+
levels!(x, [levels(x); "a"])
975+
end
948976
x[1] = "a"
949977
@test x[1] === x.pool.valindex[2]
950978
@test ismissing(x[2])
@@ -955,6 +983,10 @@ const ≅ = isequal
955983
@test x[2] === missing
956984
@test levels(x) == ["c", "a"]
957985

986+
if ordered
987+
@test_throws OrderedLevelsException x[1] = "b"
988+
levels!(x, [levels(x); "b"])
989+
end
958990
x[1] = "b"
959991
@test x[1] === x.pool.valindex[3]
960992
@test x[2] === missing

test/13_arraycommon.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,4 +821,21 @@ end
821821
@test z x
822822
end
823823

824+
@testset "new levels can't be added through assignment when levels are ordered" begin
825+
x = categorical([1,2,3])
826+
ordered!(x, true)
827+
lev = copy(levels(x))
828+
res = @test_throws OrderedLevelsException{Int, Float64} x[1] = 4.0
829+
@test res.value.newlevel == 4
830+
@test sprint(showerror, res.value) ==
831+
"cannot add new level 4.0 since ordered pools cannot be extended implicitly. " *
832+
"Use the levels! function to set new levels, or the ordered! function to mark the pool as unordered."
833+
@test lev == levels(x)
834+
835+
# Assignment works after adding the level to the pool
836+
levels!(x, [3,4,1,2])
837+
x[1] = 4
838+
@test x == [4,2,3]
839+
end
840+
824841
end

0 commit comments

Comments
 (0)