From 4b12853fbdc4811c73d31727af418e0f79abc3af Mon Sep 17 00:00:00 2001
From: Milan Bouchet-Valat <nalimilan@club.fr>
Date: Mon, 26 May 2025 21:38:32 +0200
Subject: [PATCH 1/2] Make `levels` return a `CategoricalArray`

Having `levels` preserve the eltype of the input is sometimes useful
to write generic code. This is only slightly breaking as the result
still compares equal to the previous behvior returning unwrapped values.
---
 benchmark/benchmarks.jl                |  4 +-
 docs/src/using.md                      | 16 +++---
 ext/CategoricalArraysArrowExt.jl       |  2 +
 ext/CategoricalArraysRecipesBaseExt.jl |  2 +-
 src/array.jl                           | 56 +++++++++++----------
 src/pool.jl                            | 29 ++++++-----
 src/recode.jl                          | 10 ++--
 src/typedefs.jl                        |  5 +-
 src/value.jl                           |  8 +--
 test/01_value.jl                       |  7 ++-
 test/07_levels.jl                      | 68 ++++++++++++++++++--------
 test/11_array.jl                       |  2 +-
 test/12_missingarray.jl                |  2 +-
 test/13_arraycommon.jl                 |  8 +--
 test/14_view.jl                        |  3 +-
 15 files changed, 134 insertions(+), 88 deletions(-)

diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index 5c2ae42b..bf12f7c9 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -55,11 +55,11 @@ SUITE["many levels"]["CategoricalArray(::Vector{String})"] =
 a = rand([@sprintf("id%010d", k) for k in 1:1000], 10000)
 ca = CategoricalArray(a)
 
-levs = levels(ca)
+levs = unwrap.(levels(ca))
 SUITE["many levels"]["levels! with original levels"] =
     @benchmarkable levels!(ca, levs)
 
-levs = reverse(levels(ca))
+levs = reverse(unwrap.(levels(ca)))
 SUITE["many levels"]["levels! with resorted levels"] =
     @benchmarkable levels!(ca, levs)
 
diff --git a/docs/src/using.md b/docs/src/using.md
index 9790e8cf..aaa6e36c 100644
--- a/docs/src/using.md
+++ b/docs/src/using.md
@@ -20,7 +20,7 @@ By default, the levels are lexically sorted, which is clearly not correct in our
 
 ```jldoctest using
 julia> levels(x)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "Middle"
  "Old"
  "Young"
@@ -68,7 +68,7 @@ To get rid of the `"Old"` group, just call the [`droplevels!`](@ref) function:
 
 ```jldoctest using
 julia> levels(x)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "Young"
  "Middle"
  "Old"
@@ -81,7 +81,7 @@ julia> droplevels!(x)
  "Young"
 
 julia> levels(x)
-2-element Vector{String}:
+2-element CategoricalArray{String,1,UInt32}:
  "Young"
  "Middle"
 
@@ -139,7 +139,7 @@ Levels still need to be reordered manually:
 
 ```jldoctest using
 julia> levels(y)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "Middle"
  "Old"
  "Young"
@@ -263,7 +263,7 @@ true
 Likewise, assigning a `CategoricalValue` from `y` to an entry in `x` expands the levels of `x` with all levels from `y`, *respecting the ordering of levels of both vectors if possible*:
 ```jldoctest using
 julia> levels(x)
-2-element Vector{String}:
+2-element CategoricalArray{String,1,UInt32}:
  "Middle"
  "Old"
 
@@ -271,7 +271,7 @@ julia> x[1] = y[1]
 CategoricalValue{String, UInt32} "Young" (1/2)
 
 julia> levels(x)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "Young"
  "Middle"
  "Old"
@@ -296,7 +296,7 @@ julia> ab = vcat(a, b)
  "c"
 
 julia> levels(ab)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "a"
  "b"
  "c"
@@ -320,7 +320,7 @@ julia> ab2 = vcat(a, b)
  "c"
 
 julia> levels(ab2)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "a"
  "b"
  "c"
diff --git a/ext/CategoricalArraysArrowExt.jl b/ext/CategoricalArraysArrowExt.jl
index 3e764122..811870d2 100644
--- a/ext/CategoricalArraysArrowExt.jl
+++ b/ext/CategoricalArraysArrowExt.jl
@@ -7,6 +7,8 @@ import Arrow: ArrowTypes
 const CATARRAY_ARROWNAME = Symbol("JuliaLang.CategoricalArrays.CategoricalArray")
 ArrowTypes.arrowname(::Type{<:CategoricalValue}) = CATARRAY_ARROWNAME
 ArrowTypes.arrowmetadata(::Type{CategoricalValue{T, R}}) where {T, R} = string(R)
+ArrowTypes.ArrowType(::Type{<:CategoricalValue{T}}) where {T} = T
+ArrowTypes.toarrow(x::CategoricalValue) = unwrap(x)
 
 ArrowTypes.arrowname(::Type{Union{<:CategoricalValue, Missing}}) = CATARRAY_ARROWNAME
 ArrowTypes.arrowmetadata(::Type{Union{CategoricalValue{T, R}, Missing}}) where {T, R} =
diff --git a/ext/CategoricalArraysRecipesBaseExt.jl b/ext/CategoricalArraysRecipesBaseExt.jl
index 2642f838..656f3e3d 100644
--- a/ext/CategoricalArraysRecipesBaseExt.jl
+++ b/ext/CategoricalArraysRecipesBaseExt.jl
@@ -9,7 +9,7 @@ else
 end
 
 RecipesBase.@recipe function f(::Type{T}, v::T) where T <: CategoricalValue
-    level_strings = [map(string, levels(v)); missing]
+    level_strings = [map(string, CategoricalArrays._levels(v)); missing]
     ticks --> eachindex(level_strings)
     v -> ismissing(v) ? length(level_strings) : Int(CategoricalArrays.refcode(v)),
     i -> level_strings[Int(i)]
diff --git a/src/array.jl b/src/array.jl
index c462e7d4..a2103e2a 100644
--- a/src/array.jl
+++ b/src/array.jl
@@ -239,7 +239,7 @@ function CategoricalArray{T, N, R}(A::CategoricalArray{S, N, Q};
         catch err
             err isa LevelsException || rethrow(err)
             throw(ArgumentError("encountered value(s) not in specified `levels`: " *
-                                "$(setdiff(CategoricalArrays.levels(res), levels))"))
+                                "$(setdiff(_levels(res), levels))"))
         end
     end
     return res
@@ -358,18 +358,18 @@ function _convert(::Type{CategoricalArray{T, N, R}}, A::AbstractArray{S, N};
     copyto!(res, A)
 
     if levels !== nothing
-        CategoricalArrays.levels(res) == levels ||
+        _levels(res) == levels ||
             throw(ArgumentError("encountered value(s) not in specified `levels`: " *
-                                "$(setdiff(CategoricalArrays.levels(res), levels))"))
+                                "$(setdiff(_levels(res), levels))"))
     else
         # if order is defined for level type, automatically apply it
         L = leveltype(res)
         if Base.OrderStyle(L) isa Base.Ordered
-            levels!(res, sort(CategoricalArrays.levels(res)))
+            levels!(res, sort(_levels(res)))
         elseif hasmethod(isless, (L, L))
             # isless may throw an error, e.g. for AbstractArray{T} of unordered T
             try
-                levels!(res, sort(CategoricalArrays.levels(res)))
+                levels!(res, sort(_levels(res)))
             catch e
                  e isa MethodError || rethrow(e)
             end
@@ -382,7 +382,7 @@ end
 # From CategoricalArray (preserve levels, ordering and R)
 function convert(::Type{CategoricalArray{T, N, R}}, A::CategoricalArray{S, N}) where {S, T, N, R}
     if length(A.pool) > typemax(R)
-        throw(LevelsException{T, R}(levels(A)[typemax(R)+1:end]))
+        throw(LevelsException{T, R}(_levels(A)[typemax(R)+1:end]))
     end
 
     if !(T >: Missing) && S >: Missing && any(iszero, A.refs)
@@ -460,7 +460,7 @@ size(A::CategoricalArray) = size(A.refs)
 Base.IndexStyle(::Type{<:CategoricalArray}) = IndexLinear()
 
 function update_refs!(A::CategoricalArray, newlevels::AbstractVector)
-    oldlevels = levels(A)
+    oldlevels = _levels(A)
     levelsmap = similar(A.refs, length(oldlevels)+1)
     # 0 maps to a missing value
     levelsmap[1] = 0
@@ -478,7 +478,7 @@ function merge_pools!(A::CatArrOrSub,
                       updaterefs::Bool=true,
                       updatepool::Bool=true)
     newlevels, ordered = merge_pools(pool(A), pool(B))
-    oldlevels = levels(A)
+    oldlevels = _levels(A)
     pA = A isa SubArray ? parent(A) : A
     ordered!(pA, ordered)
     # If A's levels are an ordered superset of new (merged) pool, no need to recompute refs
@@ -537,8 +537,8 @@ function copyto!(dest::CatArrOrSub{T, N, R}, dstart::Integer,
 
     # try converting src to dest type to avoid partial copy corruption of dest
     # in the event that the src cannot be copied into dest
-    slevs = convert(Vector{T}, levels(src))
-    dlevs = levels(dest)
+    slevs = convert(Vector{T}, _levels(src))
+    dlevs = _levels(dest)
     if eltype(src) >: Missing && !(eltype(dest) >: Missing) && !all(x -> x > 0, srefs)
         throw(MissingException("cannot copy array with missing values to an array with element type $T"))
     end
@@ -591,7 +591,7 @@ function copyto!(dest::CatArrOrSub{T1, N, R}, dstart::Integer,
         return invoke(copyto!, Tuple{AbstractArray, Integer, AbstractArray, Integer, Integer},
                       dest, dstart, src, sstart, n)
     end
-    newdestlevs = destlevs = copy(levels(dest)) # copy since we need original levels below
+    newdestlevs = destlevs = copy(_levels(dest)) # copy since we need original levels below
     srclevsnm = T2 >: Missing ? setdiff(srclevs, [missing]) : srclevs
     if !(srclevsnm ⊆ destlevs)
         # if order is defined for level type, automatically apply it
@@ -701,7 +701,7 @@ While this will reduce memory use, this function is type-unstable, which can aff
 performance inside the function where the call is made. Therefore, use it with caution.
 """
 function compress(A::CategoricalArray{T, N}) where {T, N}
-    R = reftype(length(levels(A.pool)))
+    R = reftype(length(_levels(A.pool)))
     convert(CategoricalArray{T, N, R}, A)
 end
 
@@ -719,11 +719,11 @@ decompress(A::CategoricalArray{T, N}) where {T, N} =
     convert(CategoricalArray{T, N, DefaultRefType}, A)
 
 function vcat(A::CategoricalArray...)
-    ordered = any(isordered, A) && all(a->isordered(a) || isempty(levels(a)), A)
-    newlevels, ordered = mergelevels(ordered, map(levels, A)...)
+    ordered = any(isordered, A) && all(a->isordered(a) || isempty(_levels(a)), A)
+    newlevels, ordered = mergelevels(ordered, map(_levels, A)...)
 
     refsvec = map(A) do a
-        ii = convert(Vector{Int}, indexin(levels(a.pool), newlevels))
+        ii = convert(Vector{Int}, indexin(_levels(a.pool), newlevels))
         [x==0 ? 0 : ii[x] for x in a.refs]::Array{Int,ndims(a)}
     end
 
@@ -761,23 +761,25 @@ This may include levels which do not actually appear in the data
 `missing` will be included only if it appears in the data and
 `skipmissing=false` is passed.
 
-The returned vector is an internal field of `x` which must not be mutated
+The returned vector is owned by `x` and must not be mutated
 as doing so would corrupt it.
 """
-@inline function DataAPI.levels(A::CatArrOrSub{T}; skipmissing::Bool=true) where T
+@inline function DataAPI.levels(A::CatArrOrSub; skipmissing::Bool=true)
     if eltype(A) >: Missing && !skipmissing
         if any(==(0), refs(A))
-            T[levels(pool(A)); missing]
+            eltype(A)[levels(pool(A)); missing]
         else
-            convert(Vector{T}, levels(pool(A)))
+            levels_missing(pool(A))
         end
     else
         levels(pool(A))
     end
 end
 
+_levels(A::CatArrOrSub) = _levels(pool(A))
+
 """
-    levels!(A::CategoricalArray, newlevels::Vector; allowmissing::Bool=false)
+    levels!(A::CategoricalArray, newlevels::AbstractVector; allowmissing::Bool=false)
 
 Set the levels categorical array `A`. The order of appearance of levels will be respected
 by [`levels`](@ref DataAPI.levels), which may affect display of results in some operations; if `A` is
@@ -791,7 +793,7 @@ Else, `newlevels` must include all levels which appear in the data.
 """
 function levels!(A::CategoricalArray{T, N, R}, newlevels::AbstractVector;
                  allowmissing::Bool=false) where {T, N, R}
-    (levels(A) == newlevels) && return A # nothing to do
+    (_levels(A) == newlevels) && return A # nothing to do
 
     # map each new level to its ref code
     newlv2ref = Dict{eltype(newlevels), Int}()
@@ -806,7 +808,7 @@ function levels!(A::CategoricalArray{T, N, R}, newlevels::AbstractVector;
     end
 
     # map each old ref code to new ref code (or 0 if no such level)
-    oldlevels = levels(pool(A))
+    oldlevels = _levels(pool(A))
     oldref2newref = fill(0, length(oldlevels) + 1)
     for (i, lv) in enumerate(oldlevels)
         oldref2newref[i + 1] = get(newlv2ref, lv, 0)
@@ -867,7 +869,7 @@ end
 function _uniquerefs(A::CatArrOrSub{T}) where T
     arefs = refs(A)
     res = similar(arefs, 0)
-    nlevels = length(levels(A))
+    nlevels = length(_levels(A))
     maxunique = nlevels + (T >: Missing ? 1 : 0)
     seen = fill(false, nlevels + 1) # always +1 for 0 (missing ref)
     @inbounds for ref in arefs
@@ -900,7 +902,7 @@ returned by [`levels`](@ref DataAPI.levels)).
 """
 function droplevels!(A::CategoricalArray)
     arefs = refs(A)
-    nlevels = length(levels(A)) + 1 # +1 for missing
+    nlevels = length(_levels(A)) + 1 # +1 for missing
     seen = fill(false, nlevels)
     seen[1] = true # assume that missing is always observed to simplify checks
     nseen = 1
@@ -913,7 +915,7 @@ function droplevels!(A::CategoricalArray)
     end
 
     # replace the pool
-    A.pool = typeof(pool(A))(@inbounds(levels(A)[view(seen, 2:nlevels)]), isordered(A))
+    A.pool = typeof(pool(A))(@inbounds(_levels(A)[view(seen, 2:nlevels)]), isordered(A))
     # recode refs to keep only the seen ones (optimized version of update_refs!())
     seen[1] = false # to start levelsmap from 0
     levelsmap = cumsum(seen)
@@ -1030,7 +1032,7 @@ end
                              ordered=_isordered(A),
                              compress::Bool=false) where {T, N, R}
     # @inline is needed so that return type is inferred when compress is not provided
-    RefType = compress ? reftype(length(CategoricalArrays.levels(A))) : R
+    RefType = compress ? reftype(length(_levels(A))) : R
     CategoricalArray{T, N, RefType}(A, levels=levels, ordered=ordered)
 end
 
@@ -1043,7 +1045,7 @@ function in(x::CategoricalValue, y::CategoricalArray{T, N, R}) where {T, N, R}
     if x.pool === y.pool
         return refcode(x) in y.refs
     else
-        ref = get(y.pool, levels(x.pool)[refcode(x)], zero(R))
+        ref = get(y.pool, _levels(x.pool)[refcode(x)], zero(R))
         return ref != 0 ? ref in y.refs : false
     end
 end
diff --git a/src/pool.jl b/src/pool.jl
index 9753a76d..2df7e345 100644
--- a/src/pool.jl
+++ b/src/pool.jl
@@ -21,8 +21,8 @@ Base.convert(::Type{CategoricalPool{S}}, pool::CategoricalPool{T, R}) where {S,
     convert(CategoricalPool{S, R}, pool)
 
 function Base.convert(::Type{CategoricalPool{T, R}}, pool::CategoricalPool) where {T, R <: Integer}
-    if length(levels(pool)) > typemax(R)
-        throw(LevelsException{T, R}(levels(pool)[typemax(R)+1:end]))
+    if length(pool.levels) > typemax(R)
+        throw(LevelsException{T, R}(pool.levels[typemax(R)+1:end]))
     end
 
     levelsT = convert(Vector{T}, pool.levels)
@@ -37,10 +37,10 @@ Base.copy(pool::CategoricalPool{T, R}) where {T, R} =
 function Base.show(io::IO, pool::CategoricalPool{T, R}) where {T, R}
     @static if VERSION >= v"1.6.0"
         @printf(io, "%s{%s, %s}([%s])", CategoricalPool, T, R,
-                join(map(repr, levels(pool)), ", "))
+                join(map(repr, pool.levels), ", "))
     else
         @printf(io, "%s{%s,%s}([%s])", CategoricalPool, T, R,
-                join(map(repr, levels(pool)), ", "))
+                join(map(repr, pool.levels), ", "))
     end
 
     pool.ordered && print(io, " with ordered levels")
@@ -65,6 +65,7 @@ it doesn't do this itself to avoid doing a dict lookup twice
 
     i = R(n + 1)
     push!(pool.levels, x)
+    push!(pool.levelsinds, i)
     pool_hash = pool.hash
     if pool_hash !== nothing
         pool.hash = hash(x, pool_hash)
@@ -185,10 +186,10 @@ function merge_pools(a::CategoricalPool{T}, b::CategoricalPool) where {T}
         newlevs = T[]
         ordered = isordered(a)
     elseif length(a) == 0
-        newlevs = Vector{T}(levels(b))
+        newlevs = Vector{T}(b.levels)
         ordered = isordered(b)
     elseif length(b) == 0
-        newlevs = copy(levels(a))
+        newlevs = copy(a.levels)
         ordered = isordered(a)
     else
         ordered = isordered(a) && (isordered(b) || b ⊆ a)
@@ -200,7 +201,7 @@ end
 
 @inline function Base.hash(pool::CategoricalPool, h::UInt)
     if pool.hash === nothing
-        pool.hash = hashlevels(levels(pool))
+        pool.hash = hashlevels(pool.levels)
     end
     hash(pool.hash, h)
 end
@@ -246,9 +247,9 @@ end
 
 # Contrary to the CategoricalArray one, this method only allows adding new levels at the end
 # so that existing CategoricalValue objects still point to the same value
-function levels!(pool::CategoricalPool{S, R}, newlevels::Vector;
+function levels!(pool::CategoricalPool{S, R}, newlevels::AbstractVector;
                  checkunique::Bool=true) where {S, R}
-    levs = convert(Vector{S}, newlevels)
+    levs = newlevels isa CategoricalVector{S} ? newlevels : convert(Vector{S}, newlevels)
     if checkunique && !allunique(levs)
         throw(ArgumentError(string("duplicated levels found in levs: ",
                                    join(unique(filter(x->sum(levs.==x)>1, levs)), ", "))))
@@ -259,24 +260,30 @@ function levels!(pool::CategoricalPool{S, R}, newlevels::Vector;
     n = length(levs)
 
     if n > typemax(R)
-        throw(LevelsException{S, R}(setdiff(levs, levels(pool))[typemax(R)-length(levels(pool))+1:end]))
+        throw(LevelsException{S, R}(setdiff(levs, pool.levels)[typemax(R)-length(pool.levels)+1:end]))
     end
 
     empty!(pool.invindex)
     resize!(pool.levels, n)
+    resize!(pool.levelsinds, n)
     pool.hash = nothing
     pool.equalto = C_NULL
     pool.subsetof = C_NULL
     for i in 1:n
         v = levs[i]
         pool.levels[i] = v
+        pool.levelsinds[i] = i
         pool.invindex[v] = i
     end
 
     return pool
 end
 
-DataAPI.levels(pool::CategoricalPool) = pool.levels
+DataAPI.levels(pool::CategoricalPool{T}) where {T} =
+    CategoricalVector{T}(pool.levelsinds, pool)
+levels_missing(pool::CategoricalPool{T}) where {T} =
+    CategoricalVector{Union{T, Missing}}(pool.levelsinds, pool)
+_levels(pool::CategoricalPool) = pool.levels
 
 isordered(pool::CategoricalPool) = pool.ordered
 ordered!(pool::CategoricalPool, ordered) = (pool.ordered = ordered; pool)
diff --git a/src/recode.jl b/src/recode.jl
index 141f9967..ff258e60 100644
--- a/src/recode.jl
+++ b/src/recode.jl
@@ -111,7 +111,7 @@ function _recode!(dest::CategoricalArray{T, <:Any, R}, src::AbstractArray, defau
     levels!(dest.pool, filter!(!ismissing, unique(vals)))
     # In the absence of duplicated recoded values, we do not need to lookup the reference
     # for each pair in the loop, which is more efficient (with loop unswitching)
-    dupvals = length(vals) != length(levels(dest.pool))
+    dupvals = length(vals) != length(_levels(dest.pool))
 
     drefs = dest.refs
     pairmap = [ismissing(v) ? zero(R) : get(dest.pool, v) for v in vals]
@@ -150,7 +150,7 @@ function _recode!(dest::CategoricalArray{T, <:Any, R}, src::AbstractArray, defau
 
     # Put existing levels first, and sort them if possible
     # for consistency with CategoricalArray
-    oldlevels = setdiff(levels(dest), vals)
+    oldlevels = setdiff(_levels(dest), vals)
     filter!(!ismissing, oldlevels)
     L = eltype(oldlevels)
     if Base.OrderStyle(L) isa Base.Ordered
@@ -163,7 +163,7 @@ function _recode!(dest::CategoricalArray{T, <:Any, R}, src::AbstractArray, defau
             e isa MethodError || rethrow(e)
         end
     end
-    levels!(dest, union(oldlevels, levels(dest)))
+    levels!(dest, union(oldlevels, _levels(dest)))
 
     dest
 end
@@ -174,7 +174,7 @@ function _recode!(dest::CategoricalArray{T, N, R}, src::CategoricalArray,
     vals = T[p.second for p in pairs]
              
     if default === nothing
-        srclevels = levels(src)
+        srclevels = _levels(src)
 
         # Remove recoded levels as they won't appear in result
         keptlevels = Vector{T}(undef, 0)
@@ -201,7 +201,7 @@ function _recode!(dest::CategoricalArray{T, N, R}, src::CategoricalArray,
         ordered = false
     end
 
-    srclevels = src.pool === dest.pool ? copy(levels(src.pool)) : levels(src.pool)
+    srclevels = src.pool === dest.pool ? copy(_levels(src.pool)) : _levels(src.pool)
     if length(levs) > length(srclevels) && view(levs, 1:length(srclevels)) == srclevels
         levels!(dest.pool, levs)
     else
diff --git a/src/typedefs.jl b/src/typedefs.jl
index 0f9aa414..238bb995 100644
--- a/src/typedefs.jl
+++ b/src/typedefs.jl
@@ -8,6 +8,7 @@ const SupportedTypes = Union{AbstractString, AbstractChar, Number}
 # * `R` integer type for referencing category levels
 mutable struct CategoricalPool{T <: SupportedTypes, R <: Integer}
     levels::Vector{T}          # category levels ordered by their reference codes
+    levelsinds::Vector{R}      # set to 1:length(levels), used by `levels(p)`
     invindex::Dict{T, R}       # map from category levels to their reference codes
     ordered::Bool              # whether levels can be compared using <
     hash::Union{UInt, Nothing} # hash of levels
@@ -45,8 +46,8 @@ mutable struct CategoricalPool{T <: SupportedTypes, R <: Integer}
                                    invindex::Dict{T, R},
                                    ordered::Bool,
                                    hash::Union{UInt, Nothing}=nothing) where {T, R}
-        pool = new(levels, invindex, ordered, hash, C_NULL, C_NULL)
-        return pool
+        return new(levels, 1:length(levels), invindex,
+                   ordered, hash, C_NULL, C_NULL)
     end
 end
 
diff --git a/src/value.jl b/src/value.jl
index ae962adb..a1633204 100644
--- a/src/value.jl
+++ b/src/value.jl
@@ -27,6 +27,8 @@ reftype(x::Any) = reftype(typeof(x))
 pool(x::CategoricalValue) = x.pool
 refcode(x::CategoricalValue) = x.ref
 isordered(x::CategoricalValue) = isordered(x.pool)
+DataAPI.levels(x::CategoricalValue) = levels(pool(x))
+_levels(x::CategoricalValue) = _levels(pool(x))
 
 # extract the type of the original value from array eltype `T`
 unwrap_catvaluetype(::Type{T}) where {T} = T
@@ -42,7 +44,7 @@ unwrap_catvaluetype(::Type{T}) where {T <: CategoricalValue} = leveltype(T)
 
 Get the value wrapped by categorical value `x`. If `x` is `Missing` return `missing`.
 """
-DataAPI.unwrap(x::CategoricalValue) = levels(x)[refcode(x)]
+DataAPI.unwrap(x::CategoricalValue) = _levels(x)[refcode(x)]
 
 """
     levelcode(x::CategoricalValue)
@@ -59,10 +61,8 @@ Return `missing`.
 """
 levelcode(x::Missing) = missing
 
-DataAPI.levels(x::CategoricalValue) = levels(pool(x))
-
 function cat_promote_type(::Type{S}, ::Type{T}) where {S, T}
-    U = promote_type(S, T)
+    U = promote_type(unwrap_catvaluetype(S), unwrap_catvaluetype(T))
     U <: Union{SupportedTypes, Missing} ?
         U : typeintersect(Union{SupportedTypes, Missing}, Union{S, T})
 end
diff --git a/test/01_value.jl b/test/01_value.jl
index 39f58b67..8c60ae7f 100644
--- a/test/01_value.jl
+++ b/test/01_value.jl
@@ -22,6 +22,8 @@ end
     for i in 1:3
         x = CategoricalValue(pool, i)
 
+        @test levels(x) == levels(pool)
+        @test levels(x) isa CategoricalVector{String, UInt32}
         @test leveltype(x) === String
         @test leveltype(typeof(x)) === String
         @test reftype(x) === DefaultRefType
@@ -48,6 +50,8 @@ end
     for i in 1:3
         x = CategoricalValue(pool, i)
 
+        @test levels(x) == levels(pool)
+        @test levels(x) isa CategoricalVector{String, UInt8}
         @test leveltype(x) === String
         @test leveltype(typeof(x)) === String
         @test reftype(x) === UInt8
@@ -68,7 +72,8 @@ end
     for x in (CategoricalValue(pool, 1), arr, view(arr, 2:3))
         for (i, v) in enumerate(levels(pool))
             @test CategoricalValue(v, x) ===
-                CategoricalValue(float(v), x) ===
+                CategoricalValue(unwrap(v), x) ===
+                CategoricalValue(float(unwrap(v)), x) ===
                 CategoricalValue(CategoricalValue(pool, i), x) ===
                 CategoricalValue(pool, i)
         end
diff --git a/test/07_levels.jl b/test/07_levels.jl
index 25c54be0..b54e4d52 100644
--- a/test/07_levels.jl
+++ b/test/07_levels.jl
@@ -1,15 +1,16 @@
 module TestLevels
 using Test
 using CategoricalArrays
-using CategoricalArrays: DefaultRefType, levels!, hashlevels
+using CategoricalArrays: DefaultRefType, levels!, hashlevels, _levels
 
 @testset "CategoricalPool{Int} updates levels and order correctly" begin
     pool = CategoricalPool([2, 1, 3])
 
-    @test isa(levels(pool), Vector{Int})
+    @test isa(levels(pool), CategoricalVector{Int, DefaultRefType})
     @test length(pool) === 3
-    @test levels(pool) == [2, 1, 3]
-    @test all([levels(CategoricalValue(pool, i)) for i in 1:3] .=== Ref(levels(pool)))
+    @test levels(pool) == _levels(pool) == [2, 1, 3]
+    @test all([levels(CategoricalValue(pool, i)) for i in 1:3] .== Ref(levels(pool)))
+    @test pool.levelsinds == 1:3
     @test pool.invindex == Dict(2=>1, 1=>2, 3=>3)
     @test pool.hash === nothing
     @test pool.equalto == C_NULL
@@ -20,7 +21,8 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
         @test isa(pool.levels, Vector{Int})
         @test length(pool) === 4
-        @test levels(pool) == [2, 1, 3, 4]
+        @test levels(pool) == _levels(pool) == [2, 1, 3, 4]
+        @test pool.levelsinds == 1:4
         @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4)
         @test pool.hash === nothing
         @test pool.equalto == C_NULL
@@ -34,7 +36,8 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
         @test isa(pool.levels, Vector{Int})
         @test length(pool) === 5
-        @test levels(pool) == [2, 1, 3, 4, 0]
+        @test levels(pool) == _levels(pool) == [2, 1, 3, 4, 0]
+        @test pool.levelsinds == 1:5
         @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5)
         @test pool.hash === nothing
         @test pool.equalto == C_NULL
@@ -48,7 +51,8 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
         @test isa(pool.levels, Vector{Int})
         @test length(pool) === 7
-        @test levels(pool) == [2, 1, 3, 4, 0, 10, 11]
+        @test levels(pool) == _levels(pool) == [2, 1, 3, 4, 0, 10, 11]
+        @test pool.levelsinds == 1:7
         @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7)
         @test pool.hash === nothing
         @test pool.equalto == C_NULL
@@ -64,7 +68,8 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
         @test isa(pool.levels, Vector{Int})
         @test length(pool) === 9
-        @test levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13]
+        @test levels(pool) == _levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13]
+        @test pool.levelsinds == 1:9
         @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7, 12=>8, 13=>9)
         @test pool.hash === nothing
         @test pool.equalto == C_NULL
@@ -84,15 +89,10 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
     # Adding levels while preserving existing ones
     levs = [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14]
     @test levels!(pool, levs) === pool
-    @test levels(pool) == levs
-    @test levels(pool) !== levs
-    @test pool.hash === nothing
-    @test pool.equalto == C_NULL
-    @test pool.subsetof == C_NULL
-
+    @test levels(pool) == _levels(pool) == levs
+    @test pool.levels !== levs
     @test isa(pool.levels, Vector{Int})
-    @test length(pool) === 11
-    @test levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14]
+    @test pool.levelsinds == 1:11
     @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7, 12=>8, 13=>9,
                                 15=>10, 14=>11)
     @test pool.hash === nothing
@@ -109,7 +109,10 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
     @test isa(pool.levels, Vector{Int})
     @test length(pool) == 12
-    @test levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20]
+    @test levels(pool) ==
+        _levels(pool) ==
+        [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20]
+    @test pool.levelsinds == 1:12
     @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7, 12=>8, 13=>9,
                                 15=>10, 14=>11, 20=>12)
     @test pool.hash === nothing
@@ -128,7 +131,10 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
     @test isa(pool.levels, Vector{Int})
     @test length(pool) == 14
-    @test levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20, 100, 99]
+    @test levels(pool) ==
+        _levels(pool) ==
+        [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20, 100, 99]
+    @test pool.levelsinds == 1:14
     @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7, 12=>8, 13=>9,
                                 15=>10, 14=>11, 20=>12, 100=>13, 99=>14)
     @test pool.hash === nothing
@@ -143,7 +149,10 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
     @test isa(pool.levels, Vector{Int})
     @test length(pool) == 14
-    @test levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20, 100, 99]
+    @test levels(pool) ==
+        _levels(pool) ==
+        [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20, 100, 99]
+    @test pool.levelsinds == 1:14
     @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7, 12=>8, 13=>9,
                                 15=>10, 14=>11, 20=>12, 100=>13, 99=>14)
     @test pool.hash === CategoricalArrays.hashlevels(levels(pool))
@@ -155,7 +164,10 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
 
     @test isa(pool.levels, Vector{Int})
     @test length(pool) == 14
-    @test levels(pool) == [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20, 100, 99]
+    @test levels(pool) ==
+        _levels(pool) ==
+        [2, 1, 3, 4, 0, 10, 11, 12, 13, 15, 14, 20, 100, 99]
+    @test pool.levelsinds == 1:14
     @test pool.invindex == Dict(2=>1, 1=>2, 3=>3, 4=>4, 0=>5, 10=>6, 11=>7, 12=>8, 13=>9,
                                 15=>10, 14=>11, 20=>12, 100=>13, 99=>14)
     @test pool.hash === CategoricalArrays.hashlevels(levels(pool))
@@ -178,6 +190,22 @@ using CategoricalArrays: DefaultRefType, levels!, hashlevels
     @test !isordered(p2)
 end
 
+@testset "levels!(::CategoricalPool, ::CategoricalVector)" begin
+    pool = CategoricalPool([2, 1, 3])
+    levels!(pool, categorical([2, 1, 3, 4]))
+    @test levels(pool) == [2, 1, 3, 4]
+
+    pool = CategoricalPool([2, 1, 3])
+    levels!(pool, categorical([2.0, 1.0, 3.0, 4.0]))
+    @test levels(pool) == [2, 1, 3, 4]
+
+    pool = CategoricalPool([2, 1, 3])
+    @test_throws ArgumentError levels!(pool, categorical([2, 2, 1, 3, 4]))
+
+    pool = CategoricalPool([2, 1, 3])
+    @test_throws ArgumentError levels!(pool, categorical(1:3))
+end
+
 @testset "overflow of reftype is detected and doesn't corrupt levels" begin
     res = @test_throws LevelsException{Int, UInt8} CategoricalPool{Int, UInt8}(collect(256:-1:1))
     @test res.value.levels == [1]
diff --git a/test/11_array.jl b/test/11_array.jl
index b474cfe1..4f332640 100644
--- a/test/11_array.jl
+++ b/test/11_array.jl
@@ -746,7 +746,7 @@ end
     @test y == unique(x)
 
     x = CategoricalArray(String[])
-    @test isa(levels(x), Vector{String}) && isempty(levels(x))
+    @test isa(levels(x), CategoricalVector{String, DefaultRefType}) && isempty(levels(x))
     @test isa(unique(x), typeof(x)) && isempty(unique(x))
     @test levels!(x, ["Young", "Middle", "Old"]) === x
     @test levels(x) == ["Young", "Middle", "Old"]
diff --git a/test/12_missingarray.jl b/test/12_missingarray.jl
index a2204e40..5c2ed3a9 100644
--- a/test/12_missingarray.jl
+++ b/test/12_missingarray.jl
@@ -1160,7 +1160,7 @@ end
     @test unique(x) ≅ ["Old", "Young", "Middle", missing]
 
     x = CategoricalArray((Union{String, Missing})[missing])
-    @test isa(levels(x), Vector{String}) && isempty(levels(x))
+    @test isa(levels(x), CategoricalVector{String, DefaultRefType}) && isempty(levels(x))
     @test unique(x) ≅ [missing]
     @test levels!(x, ["Young", "Middle", "Old"]) === x
     @test levels(x) == ["Young", "Middle", "Old"]
diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl
index 02b51bd7..ac5fd424 100644
--- a/test/13_arraycommon.jl
+++ b/test/13_arraycommon.jl
@@ -2326,18 +2326,18 @@ end
               view(categorical(Union{String, Missing}[missing, "b", "a"], levels=["b", "c", "a"]), 2:3))
         @test @inferred(levels(x)) == ["b", "c", "a"]
         @test levels(x, skipmissing=true) == ["b", "c", "a"]
-        @test levels(x, skipmissing=true) isa Vector{String}
+        @test levels(x, skipmissing=true) isa CategoricalVector{String}
         @test levels(x, skipmissing=false) == ["b", "c", "a"]
-        @test levels(x, skipmissing=false) isa Vector{Union{String, Missing}}
+        @test levels(x, skipmissing=false) isa CategoricalVector{Union{String, Missing}}
     end
 
     for x in (categorical(Union{String, Missing}["a", "b", missing], levels=["b", "c", "a"]),
               view(categorical(Union{String, Missing}["c", "b", missing], levels=["b", "c", "a"]), 2:3))
         @test @inferred(levels(x)) == ["b", "c", "a"]
         @test levels(x, skipmissing=true) == ["b", "c", "a"]
-        @test levels(x, skipmissing=true) isa Vector{String}
+        @test levels(x, skipmissing=true) isa CategoricalVector{String}
         @test levels(x, skipmissing=false) ≅ ["b", "c", "a", missing]
-        @test levels(x, skipmissing=false) isa Vector{Union{String, Missing}}
+        @test levels(x, skipmissing=false) isa CategoricalVector{Union{String, Missing}}
     end
 end
 
diff --git a/test/14_view.jl b/test/14_view.jl
index 79b20812..11853853 100644
--- a/test/14_view.jl
+++ b/test/14_view.jl
@@ -11,7 +11,8 @@ const ≅ = isequal
 
     x = CategoricalArray{Union{T, eltype(a)}}(a, ordered=order)
     v = view(x, inds)
-    @test levels(v) === levels(x)
+    @test levels(x) isa CategoricalVector{nonmissingtype(eltype(a))}
+    @test levels(v) == levels(x)
     @test unique(v) == (ndims(v) > 0 ? unique(a[inds]) : [a[inds]])
     @test isordered(v) === isordered(x)
 end

From 87b50fc4fcade15c3c8aad1c92ac4c5f7cff9973 Mon Sep 17 00:00:00 2001
From: Milan Bouchet-Valat <nalimilan@club.fr>
Date: Tue, 27 May 2025 23:03:22 +0200
Subject: [PATCH 2/2] Fix doctests

---
 docs/src/using.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/using.md b/docs/src/using.md
index aaa6e36c..24c452b0 100644
--- a/docs/src/using.md
+++ b/docs/src/using.md
@@ -251,7 +251,7 @@ julia> xy = vcat(x, y)
  "Middle"
 
 julia> levels(xy)
-3-element Vector{String}:
+3-element CategoricalArray{String,1,UInt32}:
  "Young"
  "Middle"
  "Old"