Skip to content

Commit 901d270

Browse files
authored
Support missing values in fpsort! (#27817)
Use the fast algorithm for floating point even in the presence of missing values, adapting existing code to handle NaN. After sorting NaN and missing at the end, a second pass is made over these to put missing after NaN.
1 parent 93b89b9 commit 901d270

File tree

2 files changed

+79
-14
lines changed

2 files changed

+79
-14
lines changed

base/sort.jl

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,7 +1135,7 @@ end
11351135
module Float
11361136
using ..Sort
11371137
using ...Order
1138-
using ..Base: @inbounds, AbstractVector, Vector, last, axes
1138+
using ..Base: @inbounds, AbstractVector, Vector, last, axes, Missing
11391139

11401140
import Core.Intrinsics: slt_int
11411141
import ..Sort: sort!
@@ -1156,31 +1156,43 @@ lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x)
11561156
lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y)
11571157

11581158
isnan(o::DirectOrdering, x::Floats) = (x!=x)
1159+
isnan(o::DirectOrdering, x::Missing) = false
11591160
isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
11601161

1161-
function nans2left!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
1162+
ismissing(o::DirectOrdering, x::Floats) = false
1163+
ismissing(o::DirectOrdering, x::Missing) = true
1164+
ismissing(o::Perm, i::Int) = ismissing(o.order,o.data[i])
1165+
1166+
allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing
1167+
allowsmissing(::AbstractVector{Int},
1168+
::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} =
1169+
T >: Missing
1170+
1171+
function specials2left!(testf::Function, v::AbstractVector, o::Ordering,
1172+
lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
11621173
i = lo
1163-
@inbounds while i <= hi && isnan(o,v[i])
1174+
@inbounds while i <= hi && testf(o,v[i])
11641175
i += 1
11651176
end
11661177
j = i + 1
11671178
@inbounds while j <= hi
1168-
if isnan(o,v[j])
1179+
if testf(o,v[j])
11691180
v[i], v[j] = v[j], v[i]
11701181
i += 1
11711182
end
11721183
j += 1
11731184
end
11741185
return i, hi
11751186
end
1176-
function nans2right!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
1187+
function specials2right!(testf::Function, v::AbstractVector, o::Ordering,
1188+
lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
11771189
i = hi
1178-
@inbounds while lo <= i && isnan(o,v[i])
1190+
@inbounds while lo <= i && testf(o,v[i])
11791191
i -= 1
11801192
end
11811193
j = i - 1
11821194
@inbounds while lo <= j
1183-
if isnan(o,v[j])
1195+
if testf(o,v[j])
11841196
v[i], v[j] = v[j], v[i]
11851197
i -= 1
11861198
end
@@ -1189,17 +1201,42 @@ function nans2right!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1)
11891201
return lo, i
11901202
end
11911203

1192-
nans2end!(v::AbstractVector, o::ForwardOrdering) = nans2right!(v,o)
1193-
nans2end!(v::AbstractVector, o::ReverseOrdering) = nans2left!(v,o)
1194-
nans2end!(v::AbstractVector{<:Integer}, o::Perm{<:ForwardOrdering}) = nans2right!(v,o)
1195-
nans2end!(v::AbstractVector{<:Integer}, o::Perm{<:ReverseOrdering}) = nans2left!(v,o)
1204+
function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering)
1205+
lo, hi = first(axes(v,1)), last(axes(v,1))
1206+
if allowsmissing(v, o)
1207+
i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
1208+
sort!(v, lo, i-1, a, o)
1209+
return i, hi
1210+
else
1211+
return specials2left!(isnan, v, o, lo, hi)
1212+
end
1213+
end
1214+
function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering)
1215+
lo, hi = first(axes(v,1)), last(axes(v,1))
1216+
if allowsmissing(v, o)
1217+
_, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
1218+
sort!(v, i+1, hi, a, o)
1219+
return lo, i
1220+
else
1221+
return specials2right!(isnan, v, o, lo, hi)
1222+
end
1223+
end
1224+
1225+
specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) =
1226+
specials2right!(v, a, o)
1227+
specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) =
1228+
specials2left!(v, a, o)
1229+
specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) =
1230+
specials2right!(v, a, o)
1231+
specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
1232+
specials2left!(v, a, o)
11961233

11971234
issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x))
11981235
issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x))
11991236
issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
12001237

12011238
function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering)
1202-
i, j = lo, hi = nans2end!(v,o)
1239+
i, j = lo, hi = specials2end!(v,a,o)
12031240
@inbounds while true
12041241
while i <= j && issignleft(o,v[i]); i += 1; end
12051242
while i <= j && !issignleft(o,v[j]); j -= 1; end
@@ -1216,8 +1253,10 @@ end
12161253
fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
12171254
sort!(v, first(axes(v,1)), last(axes(v,1)), a, o)
12181255

1219-
sort!(v::AbstractVector{<:Floats}, a::Algorithm, o::DirectOrdering) = fpsort!(v,a,o)
1220-
sort!(v::Vector{Int}, a::Algorithm, o::Perm{<:DirectOrdering,<:Vector{<:Floats}}) = fpsort!(v,a,o)
1256+
sort!(v::AbstractVector{<:Union{Floats, Missing}}, a::Algorithm, o::DirectOrdering) =
1257+
fpsort!(v,a,o)
1258+
sort!(v::Vector{Int}, a::Algorithm, o::Perm{<:DirectOrdering,<:Vector{<:Union{Floats, Missing}}}) =
1259+
fpsort!(v,a,o)
12211260

12221261
end # module Sort.Float
12231262

test/missing.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,3 +552,29 @@ end
552552
me = try missing(1) catch e e end
553553
@test sprint(showerror, me) == "MethodError: objects of type Missing are not callable"
554554
end
555+
556+
@testset "sort and sortperm with $(eltype(X))" for (X, P, RP) in
557+
(([2, missing, -2, 5, missing], [3, 1, 4, 2, 5], [2, 5, 4, 1, 3]),
558+
([NaN, missing, 5, -0.0, NaN, missing, Inf, 0.0, -Inf],
559+
[9, 4, 8, 3, 7, 1, 5, 2, 6], [2, 6, 1, 5, 7, 3, 8, 4, 9]),
560+
([missing, "a", "c", missing, "b"], [2, 5, 3, 1, 4], [1, 4, 3, 5, 2]))
561+
@test sortperm(X) == P
562+
@test sortperm(X, alg=QuickSort) == P
563+
@test sortperm(X, alg=MergeSort) == P
564+
565+
XP = X[P]
566+
@test isequal(sort(X), XP)
567+
@test isequal(sort(X, alg=QuickSort), XP)
568+
@test isequal(sort(X, alg=MergeSort), XP)
569+
570+
@test sortperm(X, rev=true) == RP
571+
@test sortperm(X, alg=QuickSort, rev=true) == RP
572+
@test sortperm(X, alg=MergeSort, rev=true) == RP
573+
574+
XRP = X[RP]
575+
@test isequal(sort(X, rev=true), XRP)
576+
@test isequal(sort(X, alg=QuickSort, rev=true), XRP)
577+
@test isequal(sort(X, alg=MergeSort, rev=true), XRP)
578+
end
579+
580+
sortperm(reverse([NaN, missing, NaN, missing]))

0 commit comments

Comments
 (0)