Skip to content

Commit 170fff1

Browse files
committed
timetable wip
1 parent 15c3bd7 commit 170fff1

File tree

9 files changed

+330
-10
lines changed

9 files changed

+330
-10
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ authors = ["JuliaStats <https://github.com/JuliaStats>"]
44
version = "0.22.1"
55

66
[deps]
7+
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
78
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
89
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
910
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
1011
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
12+
PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"
1113
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
1214
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1315
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

src/TimeSeries.jl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ using Dates
55
using DelimitedFiles
66
using Statistics
77
# third-party
8+
using DataStructures
89
using DocStringExtensions: SIGNATURES
10+
using PaddedViews
911
using RecipesBase
1012
using Reexport
1113
using Tables
@@ -21,6 +23,9 @@ export TimeArray, AbstractTimeSeries,
2123
# modify.jl
2224
export rename, rename!
2325

26+
# timetable.jl
27+
export TimeTable
28+
2429
###############################################################################
2530
# Submodule
2631
###############################################################################
@@ -33,7 +38,9 @@ include("timeaxis/TimeAxis.jl")
3338
###############################################################################
3439

3540
include(".timeseriesrc.jl")
41+
include("ats.jl")
3642
include("timearray.jl")
43+
include("timetable.jl")
3744
include("utilities.jl")
3845
include("tables.jl")
3946
include("split.jl")

src/adt.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
"""
2+
Abstract data types
3+
"""

src/ats.jl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
###############################################################################
2+
# AbstractTimeSeries
3+
###############################################################################
4+
5+
"""
6+
AbstractTimeSeries{T}
7+
8+
An `AbstractTimeSeries{T}` is a table-like data structure with a time index and
9+
named columns.
10+
Where `T` denotes the type of time index.
11+
12+
In the case of multiple columns as compound index, `T <: Tuple`.
13+
For instance, let `T = Tuple{Date,Time}` implies there are two columns
14+
which forms the time index.
15+
16+
# Interfaces
17+
18+
19+
## Dimension and size
20+
21+
- `length`
22+
- `ndims`
23+
- `size`
24+
- `axes`
25+
26+
- `copy`
27+
- `deepcopy`
28+
- `similar`
29+
30+
- `names`
31+
- `rename`
32+
- `rename!`
33+
34+
- `hcat`
35+
- `vcat`
36+
37+
"""
38+
abstract type AbstractTimeSeries{T} end
39+
40+
Base.names(ats::AbstractTimeSeries) = getfield(ats, :names)
41+
42+
43+
Tables.istable(::Type{<:AbstractTimeSeries}) = true
44+
45+
Tables.columnaccess(::Type{<:AbstractTimeSeries}) = true
46+
Tables.columns(ats::AbstractTimeSeries) = ats
47+
48+
Tables.rowaccess(::Type{<:AbstractTimeSeries}) = true
49+
# TODO
50+
# Tables.rows(x::AbstractTimeSeries)
51+
52+
Tables.schema(ats::AbstractTimeSeries) = Tables.Schema(names(ats), #= TODO =#)

src/tables.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ function Base.iterate(x::TableIter, i::Integer = 1)
4747
x[i], i + 1
4848
end
4949

50-
Tables.istable(::Type{<:AbstractTimeSeries}) = true
5150
Tables.rowaccess(::Type{<:TimeArray}) = true
5251
Tables.rows(ta::TimeArray) = Tables.rows(Tables.columntable(ta))
5352
Tables.columnaccess(::Type{<:TimeArray}) = true
@@ -58,10 +57,10 @@ Tables.getcolumn(ta::TimeArray, i::Int) = Tables.getcolumn(TableIter(ta), i)
5857
Tables.getcolumn(ta::TimeArray, nm::Symbol) = Tables.getcolumn(TableIter(ta), nm)
5958
Tables.getcolumn(i::TableIter, n::Int) = i[n]
6059
Tables.getcolumn(i::TableIter, nm::Symbol) = getproperty(i, nm)
61-
Tables.schema(ta::AbstractTimeSeries{T,N,D}) where {T,N,D} = Tables.schema(TableIter(ta))
60+
Tables.schema(ta::TimeArray) = Tables.schema(TableIter(ta))
6261
Tables.schema(i::TableIter{T,S}) where {T,S} = Tables.Schema(S, coltypes(data(i)))
6362

64-
coltypes(x::AbstractTimeSeries{T,N,D}) where {T,N,D} = (D, (T for _ 1:size(x, 2))...)
63+
coltypes(x::TimeArray{T,N,D}) where {T,N,D} = (D, (T for _ 1:size(x, 2))...)
6564

6665

6766
###############################################################################

src/timearray.jl

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ import Base: convert, copy, length, show, getindex, iterate,
66
lastindex, size, eachindex, ==, isequal, hash, ndims,
77
getproperty, propertynames, values
88

9-
abstract type AbstractTimeSeries{T,N,D} end
10-
119
"""
1210
TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D}
1311
@@ -51,7 +49,7 @@ The third constructor builds a `TimeArray` from a `NamedTuple`.
5149
ta = TimeArray(data; timestamp = :datetime, meta = "Example")
5250
5351
"""
54-
struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,N,D}
52+
struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T}
5553

5654
timestamp::Vector{D}
5755
values::A
@@ -81,6 +79,8 @@ struct TimeArray{T,N,D<:TimeType,A<:AbstractArray{T,N}} <: AbstractTimeSeries{T,
8179
timestamp_r, reverse(values, dims = 1), replace_dupes!(colnames), meta)
8280

8381
throw(ArgumentError("timestamps must be monotonic"))
82+
83+
# TODO: padded array design?
8484
end
8585
end
8686

@@ -131,11 +131,13 @@ size(ta::TimeArray, dim) = size(values(ta), dim)
131131

132132
###### ndims #####################
133133

134-
ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N
134+
# ndims(ta::AbstractTimeSeries{T,N}) where {T,N} = N
135+
ndims(::AbstractTimeSeries) = 2
136+
ndims(::TimeArray{T,N}) where {T,N} = N
135137

136138
###### iteration protocol ########
137139

138-
@generated function iterate(ta::AbstractTimeSeries{T,N}, i = 1) where {T,N}
140+
@generated function iterate(ta::TimeArray{T,N}, i = 1) where {T,N}
139141
val = (N == 1) ? :(values(ta)[i]) : :(values(ta)[i, :])
140142

141143
quote
@@ -182,8 +184,8 @@ hash(x::TimeArray, h::UInt) =
182184

183185
###### eltype #####################
184186

185-
Base.eltype(::AbstractTimeSeries{T,1,D}) where {T,D} = Tuple{D,T}
186-
Base.eltype(::AbstractTimeSeries{T,2,D}) where {T,D} = Tuple{D,Vector{T}}
187+
Base.eltype(::TimeArray{T,1,D}) where {T,D} = Tuple{D,T}
188+
Base.eltype(::TimeArray{T,2,D}) where {T,D} = Tuple{D,Vector{T}}
187189

188190
###### show #####################
189191

src/timetable.jl

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
###############################################################################
2+
# Type
3+
###############################################################################
4+
5+
# TODO: consider constrain T<:AbstractTimeAxis
6+
mutable struct TimeTable{T} <: AbstractTimeSeries{T}
7+
ta::T
8+
vecs::OrderedDict{Symbol,AbstractVector}
9+
n::Int # length, in case of infinte time axis
10+
11+
function TimeTable{T}(ta::T, vecs) where {T}
12+
m = mapreduce(length, max, values(vecs))
13+
n = if Base.haslength(T)
14+
n′ = length(ta)
15+
(n′ m) || throw(DimensionMismatch(
16+
"The vector length should less or equal than the one of time axis"))
17+
n′
18+
else
19+
m
20+
end
21+
22+
# note that it will copy, if the length of a col is shorter than `m`
23+
for (k, v) in vecs
24+
(length(v) == n) && continue
25+
vecs[k] = collect(PaddedView(missing, v, (n,)))
26+
end
27+
28+
new(ta, vecs, n)
29+
end
30+
# other design style:
31+
# colnames::Vector{Symbol}
32+
# cols::Vector{AbstractVector}
33+
end
34+
35+
TimeTable(ta::T, vecs::OrderedDict{Symbol}) where T = TimeTable{T}(ta, vecs)
36+
function TimeTable(ta::T; kw...) where T
37+
vecs = OrderedDict{Symbol,AbstractVector}()
38+
for (k, v) kw
39+
vecs[k] = v
40+
end
41+
TimeTable(ta, vecs)
42+
end
43+
44+
const TimeTableTimeCol = :time
45+
46+
struct TimeTableRow{T,V}
47+
i::Int
48+
t::T
49+
v::V
50+
end
51+
52+
53+
###############################################################################
54+
# Iterator interfaces
55+
###############################################################################
56+
57+
Base.size(tt::TimeTable) = (length(tt), length(keys(_vecs(tt))))
58+
Base.size(tt::TimeTable, dim) =
59+
(dim == 1) ? length(tt) :
60+
(dim == 2) ? length(keys(_vecs(tt))) :
61+
1
62+
63+
@inline Base.length(tt::TimeTable) = getfield(tt, :n)
64+
65+
66+
###############################################################################
67+
# Indexing
68+
###############################################################################
69+
70+
Base.lastindex(tt::TimeTable) = getfield(tt, :n)
71+
72+
Base.checkindex(::Type{Bool}, tt::TimeTable, i::Int) = (1 i lastindex(tt))
73+
74+
Base.getindex(tt::TimeTable, s::Symbol) =
75+
(s TimeTableTimeCol) ? getfield(tt, :ta) : getvec(tt, s)
76+
77+
function Base.getindex(tt::TimeTable, i::Int)
78+
@boundscheck checkbounds(tt, i)
79+
TimeTableRow(i, _ta(tt)[i], map(x -> x[i], values(_vecs(tt))))
80+
end
81+
82+
Base.getindex(tt::TimeTable, t::TimeType) = tt[time2idx(tt, t)]
83+
Base.getindex(tt::TimeTable, i::Int, s::Symbol) =
84+
(@boundscheck checkbounds(tt, i); (s TimeTableTimeCol) ? _ta(tt)[i] : _vecs(tt)[s][i])
85+
Base.getindex(tt::TimeTable, t::TimeType, s::Symbol) = tt[time2idx(tt, t), s]
86+
87+
for func [:findfirst, :findlast]
88+
@eval function Base.$func(f::Function, tt::TimeTable)
89+
i = $func(f, _ta(tt))
90+
isnothing(i) && return nothing
91+
ifelse(i > getfield(tt, :n), nothing, i)
92+
end
93+
94+
# TODO: handle case of infinte timegrid for findlast
95+
end
96+
97+
for func [:findprev, :findnext]
98+
@eval function Base.$func(f::Function, tt::TimeTable, j::Int)
99+
i = $func(f, _ta(tt), j)
100+
isnothing(i) && return nothing
101+
ifelse(i > getfield(tt, :n), nothing, i)
102+
end
103+
end
104+
105+
function Base.getindex(r::TimeTableRow, i::Int)
106+
(i == 1) ? r.i :
107+
(i == 2) ? r.t :
108+
(i == 3) ? r.v :
109+
throw(BoundsError(r, i))
110+
end
111+
112+
###############################################################################
113+
# Value modification
114+
###############################################################################
115+
116+
function Base.setproperty!(tt::TimeTable, name::Symbol, x::AbstractVector)
117+
(length(tt) != length(x)) && throw(DimensionMismatch("length unmatched"))
118+
_vecs(tt)[name] = x
119+
end
120+
121+
# TODO: support time axis modification
122+
Base.setindex!(tt::TimeTable, v, i::Int, s::Symbol) =
123+
(@boundscheck checkbounds(tt, i); _vecs(tt)[s][i] = v)
124+
Base.setindex!(tt::TimeTable, v, t::TimeType, s::Symbol) = (tt[time2idx(tt, t), s] = v)
125+
126+
function Base.resize!(tt::TimeTable, n′::Int)
127+
n = length(tt)
128+
(n == n′) && return tt
129+
130+
for v values(_vecs(tt))
131+
resize!(v, n′)
132+
end
133+
setfield!(tt, :n, n′)
134+
tt
135+
end
136+
137+
function Base.push!(tt::TimeTable{<:TimeGrid}, x::NamedTuple)
138+
d = _vecs(tt)
139+
(size(tt, 2) == length(x)) || throw(DimensionMismatch("input length unmatched"))
140+
141+
ks = keys(d)
142+
for k keys(x)
143+
(k ks) || throw(ArgumentError("unknown column $k"))
144+
end
145+
146+
for (k, v) d
147+
push!(v, x[k])
148+
end
149+
150+
n = length(tt) + 1
151+
setfield!(tt, :n, n)
152+
resize!(_ta(tt), n)
153+
154+
tt
155+
end
156+
157+
158+
###############################################################################
159+
# Time axis modification
160+
###############################################################################
161+
162+
# TODO: add a `shrink` kwarg for shrinking length after lag/lead
163+
lag(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) + n, _vecs(tt))
164+
lead(tt::TimeTable{<:TimeGrid}, n::Int) = TimeTable(_ta(tt) - n, _vecs(tt))
165+
166+
# TODO: reindex ?
167+
168+
169+
###############################################################################
170+
# Join
171+
###############################################################################
172+
173+
# TODO: after DataAPI.jl v0.17 released, import method from it
174+
175+
# TODO: support `on` kwarg
176+
function innerjoin(x::TimeTable{<:TimeGrid}, y::TimeTable{<:TimeGrid})
177+
dx = _vecs(x)
178+
dy = _vecs(y)
179+
dz = OrderedDict{Symbol,AbstractVector}()
180+
181+
tax = _ta(x)
182+
tay = _ta(y)
183+
184+
idxx = Int[]
185+
idxy = Int[]
186+
sizehint!(idxy, length(x))
187+
sizehint!(idxy, length(x))
188+
for (i, j) enumerate(findall(tax, tay))
189+
ismissing(j) && continue
190+
push!(idxx, i)
191+
push!(idxy, j)
192+
end
193+
194+
for (k, v) dx
195+
dz[k] = v[idxx] # this will copy
196+
end
197+
198+
ks = keys(dx)
199+
for (k, v) dy
200+
k′ = ifelse(k ks, Symbol(k, :_), k)
201+
dz[k′] = v[idxy]
202+
end
203+
204+
ta′ = [tax[i] for i idxx]
205+
TimeTable(ta′, dz)
206+
end
207+
208+
209+
###############################################################################
210+
# Private utils
211+
###############################################################################
212+
213+
214+
checkbounds(tt::TimeTable, i::Int) =
215+
(checkindex(Bool, tt, i) || throw(BoundsError(tt, i)); nothing)
216+
217+
@inline getvec(tt::TimeTable, s::Symbol) = _vecs(tt)[s]
218+
@inline _vecs(tt::TimeTable) = getfield(tt, :vecs)
219+
@inline _ta(tt::TimeTable) = getfield(tt, :ta)
220+
221+
@inline time2idx(tt::TimeTable, t::TimeType) = _ta(tt)[t]

0 commit comments

Comments
 (0)