Skip to content

Commit 581161e

Browse files
committed
Add skipmissing() to skip missing values
1 parent 1767963 commit 581161e

File tree

7 files changed

+167
-1
lines changed

7 files changed

+167
-1
lines changed

base/exports.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,7 @@ export
886886
# missing values
887887
ismissing,
888888
missing,
889+
skipmissing,
889890

890891
# time
891892
sleep,

base/missing.jl

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ end
1818
showerror(io::IO, ex::MissingException) =
1919
print(io, "MissingException: ", ex.msg)
2020

21+
nonmissingtype(::Type{Union{T, Missing}}) where {T} = T
22+
nonmissingtype(::Type{Missing}) = Union{}
23+
nonmissingtype(::Type{T}) where {T} = T
24+
nonmissingtype(::Type{Any}) = Any
25+
2126
promote_rule(::Type{Missing}, ::Type{T}) where {T} = Union{T, Missing}
2227
promote_rule(::Type{Union{S,Missing}}, ::Type{T}) where {T,S} = Union{promote_type(T, S), Missing}
2328
promote_rule(::Type{Any}, ::Type{T}) where {T} = Any
@@ -116,3 +121,81 @@ function float(A::AbstractArray{Union{T, Missing}}) where {T}
116121
convert(AbstractArray{Union{U, Missing}}, A)
117122
end
118123
float(A::AbstractArray{Missing}) = A
124+
125+
"""
126+
skipmissing(itr)
127+
128+
Return an iterator over the elements in `itr` skipping [`missing`](@ref) values.
129+
130+
Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values in
131+
`itr`. Note that even if `itr` is a multidimensional array, the result will always
132+
be a `Vector` since it is not possible to remove missings while preserving dimensions
133+
of the input.
134+
135+
# Examples
136+
```jldoctest
137+
julia> sum(skipmissing([1, missing, 2]))
138+
3
139+
140+
julia> collect(skipmissing([1, missing, 2]))
141+
2-element Array{Int64,1}:
142+
1
143+
2
144+
145+
julia> collect(skipmissing([1 missing; 2 missing]))
146+
2-element Array{Int64,1}:
147+
1
148+
2
149+
150+
```
151+
"""
152+
skipmissing(itr) = SkipMissing(itr)
153+
154+
struct SkipMissing{T}
155+
x::T
156+
end
157+
iteratorsize(::Type{<:SkipMissing}) = SizeUnknown()
158+
iteratoreltype(::Type{SkipMissing{T}}) where {T} = iteratoreltype(T)
159+
eltype(itr::SkipMissing) = nonmissingtype(eltype(itr.x))
160+
# Fallback implementation for general iterables: we cannot access a value twice,
161+
# so after finding the next non-missing element in start() or next(), we have to
162+
# pass it in the iterator state, which introduces a type instability since the value
163+
# is missing if the input does not contain any non-missing element.
164+
@inline function Base.start(itr::SkipMissing)
165+
s = start(itr.x)
166+
v = missing
167+
@inbounds while !done(itr.x, s) && v isa Missing
168+
v, s = next(itr.x, s)
169+
end
170+
(v, s)
171+
end
172+
@inline Base.done(itr::SkipMissing, state) = ismissing(state[1]) && done(itr.x, state[2])
173+
@inline function Base.next(itr::SkipMissing, state)
174+
v1, s = state
175+
v2 = missing
176+
@inbounds while !done(itr.x, s) && v2 isa Missing
177+
v2, s = next(itr.x, s)
178+
end
179+
(v1, (v2, s))
180+
end
181+
# Optimized implementation for AbstractArray, relying on the ability to access x[i] twice:
182+
# once in done() to find the next non-missing entry, and once in next() to return it.
183+
# This works around the type instability problem of the generic fallback.
184+
@inline function _next_nonmissing_ind(x::AbstractArray, s)
185+
idx = eachindex(x)
186+
@inbounds while !done(idx, s)
187+
i, new_s = next(idx, s)
188+
x[i] isa Missing || break
189+
s = new_s
190+
end
191+
s
192+
end
193+
@inline Base.start(itr::SkipMissing{<:AbstractArray}) =
194+
_next_nonmissing_ind(itr.x, start(eachindex(itr.x)))
195+
@inline Base.done(itr::SkipMissing{<:AbstractArray}, state) =
196+
done(eachindex(itr.x), state)
197+
@inline function Base.next(itr::SkipMissing{<:AbstractArray}, state)
198+
i, state = next(eachindex(itr.x), state)
199+
@inbounds v = itr.x[i]::eltype(itr)
200+
(v, _next_nonmissing_ind(itr.x, state))
201+
end

doc/src/manual/missing.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,46 @@ since type constructors fall back to convert methods.
296296
Stacktrace:
297297
[...]
298298
```
299+
## Skipping Missing Values
300+
301+
Since `missing` values propagate with standard mathematical operators, reduction
302+
functions return `missing` when called on arrays which contain missing values:
303+
```doctest
304+
julia> sum([1, missing])
305+
missing
306+
307+
```
308+
309+
In this situation, use the [`skipmissing`](@ref) function to skip missing values:
310+
```doctest
311+
julia> sum(skipmissing([1, missing]))
312+
1
313+
314+
```
315+
316+
This convenience function returns an iterator which filters out `missing` values
317+
efficiently. It can therefore be used with any function which supports iterators:
318+
```doctest
319+
julia> maximum(skipmissing([3, missing, 2, 1]))
320+
3
321+
322+
julia> mean(skipmissing([3, missing, 2, 1]))
323+
2.0
324+
325+
julia> mapreduce(sqrt, +, skipmissing([3, missing, 2, 1]))
326+
4.146264369941973
327+
328+
```
329+
330+
Use [`collect`](@ref) to extract non-`missing` values and store them in an array:
331+
```doctest
332+
julia> collect(skipmissing([3, missing, 2, 1]))
333+
3-element Array{Int64,1}:
334+
3
335+
2
336+
1
337+
338+
```
299339

300340
## Logical Operations on Arrays
301341

doc/src/manual/noteworthy-differences.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,9 @@ For users coming to Julia from R, these are some noteworthy differences:
183183
* Julia does not support the `NULL` type. The closest equivalent is [`nothing`](@ref), but it
184184
behaves like a scalar value rather than like a list. Use `x == nothing` instead of `is.null(x)`.
185185
* In Julia, missing values are represented by the [`missing`](@ref) object rather than by `NA`.
186-
Use [`ismissing(x)`](@ref) instead of `isna(x)`.
186+
Use [`ismissing(x)`](@ref) instead of `isna(x)`. The [`skipmissing`](@ref) function is generally
187+
used instead of `na.rm=TRUE` (though in some particular cases functions take a `skipmissing`
188+
argument).
187189
* Julia lacks the equivalent of R's `assign` or `get`.
188190
* In Julia, `return` does not require parentheses.
189191
* In R, an idiomatic way to remove unwanted values is to use logical indexing, like in the expression

doc/src/stdlib/base.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ Base.unsafe_get
226226
Base.Missing
227227
Base.missing
228228
Base.ismissing
229+
Base.skipmissing
229230
```
230231

231232
## System

test/ambiguous.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ end
287287
pop!(need_to_handle_undef_sparam, which(Base.zero, Tuple{Type{Union{Missing, T}} where T}))
288288
pop!(need_to_handle_undef_sparam, which(Base.one, Tuple{Type{Union{Missing, T}} where T}))
289289
pop!(need_to_handle_undef_sparam, which(Base.oneunit, Tuple{Type{Union{Missing, T}} where T}))
290+
pop!(need_to_handle_undef_sparam, which(Base.nonmissingtype, Tuple{Type{Union{Missing, T}} where T}))
290291
@test need_to_handle_undef_sparam == Set()
291292
end
292293
end

test/missing.jl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44
@test sprint(showerror, MissingException("test")) == "MissingException: test"
55
end
66

7+
@testset "nonmissingtype" begin
8+
@test Base.nonmissingtype(Union{Int, Missing}) == Int
9+
@test Base.nonmissingtype(Any) == Any
10+
@test Base.nonmissingtype(Missing) == Union{}
11+
end
12+
713
@testset "convert" begin
814
@test convert(Union{Int, Missing}, 1) === 1
915
@test convert(Union{Int, Missing}, 1.0) === 1
@@ -246,3 +252,35 @@ end
246252
@test isequal(float([missing]), [missing])
247253
@test float([missing]) isa Vector{Missing}
248254
end
255+
256+
@testset "skipmissing" begin
257+
x = skipmissing([1, 2, missing, 4])
258+
@test eltype(x) === Int
259+
@test collect(x) == [1, 2, 4]
260+
@test collect(x) isa Vector{Int}
261+
262+
x = skipmissing([1 2; missing 4])
263+
@test eltype(x) === Int
264+
@test collect(x) == [1, 2, 4]
265+
@test collect(x) isa Vector{Int}
266+
267+
x = collect(skipmissing([missing]))
268+
@test eltype(x) === Union{}
269+
@test isempty(collect(x))
270+
@test collect(x) isa Vector{Union{}}
271+
272+
x = collect(skipmissing(Union{Int, Missing}[]))
273+
@test eltype(x) === Int
274+
@test isempty(collect(x))
275+
@test collect(x) isa Vector{Int}
276+
277+
x = skipmissing([missing, missing, 1, 2, missing, 4, missing, missing])
278+
@test eltype(x) === Int
279+
@test collect(x) == [1, 2, 4]
280+
@test collect(x) isa Vector{Int}
281+
282+
x = skipmissing(v for v in [missing, 1, missing, 2, 4])
283+
@test eltype(x) === Any
284+
@test collect(x) == [1, 2, 4]
285+
@test collect(x) isa Vector{Int}
286+
end

0 commit comments

Comments
 (0)