Skip to content

Commit 5bce041

Browse files
author
Jouni Helske
committed
improvements to predict for non-gaussian models with varying u
1 parent 49a12b3 commit 5bce041

File tree

3 files changed

+120
-86
lines changed

3 files changed

+120
-86
lines changed

ChangeLog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ Changes from version 1.5.1 to 1.6.0
33
users to abstract the functionality of `SSMcustom` via their own bespoke
44
function using, e.g., `SSMbespoke(myfun()) in the model formula. Thank you
55
Matteo Pelagatti for the suggestion.
6+
* For non-Gaussian models, `predict` no longer checks component `u` when
7+
using n.ahead with `type=`link`.
8+
* Added argument `u_new` to `predict`, which can be used together with
9+
`n.ahead` to define constant (series-specific) `u` for future predictions,
10+
thus avoiding the need for the `newdata` if model is otherwise
11+
time-invariant.
612

713
Changes from version 1.5.0 to 1.5.1
814
* Added explicit alias for KFAS-package due to changes in roxygen2.

R/predict.SSModel.R

Lines changed: 109 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
#' of observations in the equations, leading to results which match with \code{glm} (where applicable).
6464
#' The latter case was the default behaviour of KFAS before version 1.3.8.
6565
#' Essentially this is the difference between observed and expected information in GLM context.
66+
#' @param u_new For non-Gaussian models, optional vector of length matching the
67+
#' number of observation series. This defines the 'u' component to be used
68+
#' together with \code{n.ahead} argument.
6669
#' @param \dots Ignored.
6770
#' @return A matrix or list of matrices containing the predictions, and
6871
#' optionally standard errors.
@@ -82,10 +85,12 @@
8285
#' pred <- predict(model,n.ahead=10,interval="prediction",level=0.9)
8386
#' pred
8487
predict.SSModel <- function(object, newdata, n.ahead,
85-
interval = c("none", "confidence", "prediction"), level = 0.95,
86-
type = c("response", "link"), states = NULL, se.fit = FALSE, nsim = 0,
87-
prob = TRUE, maxiter = 50, filtered = FALSE, expected = FALSE, ...) {
88-
88+
interval = c("none", "confidence", "prediction"),
89+
level = 0.95, type = c("response", "link"),
90+
states = NULL, se.fit = FALSE, nsim = 0,
91+
prob = TRUE, maxiter = 50, filtered = FALSE,
92+
expected = FALSE, u_new, ...) {
93+
8994
interval <- match.arg(interval)
9095
type <- match.arg(type)
9196
# Check that the model object is of proper form
@@ -102,8 +107,8 @@ predict.SSModel <- function(object, newdata, n.ahead,
102107
stop("Vector states should contain the indices or types of the states which are combined.")
103108
} else {
104109
states <- match.arg(arg = states, choices = c("all", "arima", "custom", "level","slope",
105-
"cycle", "seasonal", "trend", "regression"),
106-
several.ok = TRUE)
110+
"cycle", "seasonal", "trend", "regression"),
111+
several.ok = TRUE)
107112
if ("all" %in% states) {
108113
states <- as.integer(1:attr(object, "m"))
109114
} else {
@@ -131,50 +136,68 @@ predict.SSModel <- function(object, newdata, n.ahead,
131136
n <- attr(object, "n") <- no + nn
132137
timespan <- (no + 1):n
133138
object$y <- ts(rbind(object$y, newdata$y),
134-
start = start(object$y), frequency = frequency(object$y))
139+
start = start(object$y), frequency = frequency(object$y))
135140
endtime <- end(object$y)
136141
tvo <- attr(object, "tv")
137142
tvn <- attr(newdata, "tv")
138143
same <- function(x, y) isTRUE(all.equal(x, y, tolerance = 0,
139-
check.attributes = FALSE))
144+
check.attributes = FALSE))
140145
if (tvo[1] || tvn[1] || !same(object$Z, newdata$Z)) {
141146
object$Z <- array(data = c(array(object$Z, dim = c(m, p, no)),
142-
array(newdata$Z, dim = c(m, p, nn))), dim = c(p, m, n))
147+
array(newdata$Z, dim = c(m, p, nn))), dim = c(p, m, n))
143148
attr(object, "tv")[1] <- 1L
144149
}
145150
if (gaussianmodel && (tvo[2] || tvn[2] || !same(object$H, newdata$H))) {
146151
object$H <- array(data = c(array(object$H, dim = c(p, p, no)),
147-
array(newdata$H, dim = c(p, p, nn))), dim = c(p, p, n))
152+
array(newdata$H, dim = c(p, p, nn))), dim = c(p, p, n))
148153
attr(object, "tv")[2] <- 1L
149154
} else if(!gaussianmodel) object$u <- rbind(object$u, matrix(newdata$u, nn, p))
150-
155+
151156
if (tvo[3] || tvn[3] || !same(object$T, newdata$T)) {
152157
object$T <- array(data = c(array(object$T, dim = c(m, m, no)),
153-
array(newdata$T, dim = c(m, m, nn))), dim = c(m, m, n))
158+
array(newdata$T, dim = c(m, m, nn))), dim = c(m, m, n))
154159
attr(object, "tv")[3] <- 1L
155160
}
156161
if (tvo[4] || tvn[4] || !same(object$R, newdata$R)) {
157162
object$R <- array(data = c(array(object$R, dim = c(m, k, no)),
158-
array(newdata$R, dim = c(m, k, nn))), dim = c(m, k, n))
163+
array(newdata$R, dim = c(m, k, nn))), dim = c(m, k, n))
159164
attr(object, "tv")[4] <- 1L
160165
}
161166
if (tvo[5] || tvn[5] || !same(object$Q, newdata$Q)) {
162167
object$Q <- array(data = c(array(data = object$Q, dim = c(k, k, no)),
163-
array(data = newdata$Q, dim = c(k, k, nn))), dim = c(k, k, n))
168+
array(data = newdata$Q, dim = c(k, k, nn))), dim = c(k, k, n))
164169
attr(object, "tv")[5] <- 1L
165170
}
166171
} else {
167172
if (!missing(n.ahead) && !is.null(n.ahead)) {
168173
tv <- attr(object, "tv")
169-
if(ifelse(gaussianmodel,any(tv), any(c(apply(object$u, 2, function(x) length(unique(x)) > 1))) || any(tv[-2])))
174+
ok <- TRUE
175+
if (gaussianmodel) {
176+
ok <- !any(tv)
177+
} else {
178+
ok <- !any(tv[-2])
179+
if (type == "response") {
180+
varying_u <- !any(c(apply(object$u, 2, function(x) length(unique(x)) > 1)))
181+
if (missing(u_new) && ok && !varying_u) {
182+
stop("Component 'u' is time-varying. Either use 'newdata' instead of 'n.ahead', or use 'u_new' together with 'n.ahead'.")
183+
}
184+
}
185+
if (!missing(u_new)) {
186+
u_new <- rep(u_new, ncol(object$u))
187+
} else {
188+
u_new <- object$u[1L, ]
189+
}
190+
}
191+
if (!ok) {
170192
stop("Model contains time varying system matrices, cannot use argument 'n.ahead'. Use 'newdata' instead.")
193+
}
171194
timespan <- attr(object, "n") + 1:n.ahead
172195
n <- attr(object, "n") <- attr(object, "n") + as.integer(n.ahead)
173196
endtime<-end(object$y) + c(0, n.ahead)
174197
object$y <- window(object$y, end = endtime, extend = TRUE)
175198
if (any(object$distribution != "gaussian"))
176-
object$u <- rbind(object$u, matrix(object$u[1, ], nrow = n.ahead,
177-
ncol = ncol(object$u), byrow = TRUE))
199+
object$u <- rbind(object$u, matrix(u_new, nrow = n.ahead,
200+
ncol = ncol(object$u), byrow = TRUE))
178201
} else {
179202
timespan <- 1:attr(object, "n")
180203
endtime <- end(object$y)
@@ -193,7 +216,7 @@ predict.SSModel <- function(object, newdata, n.ahead,
193216
out <- KFS(model = object, filtering = "mean", smoothing = "none")
194217
names(out)[match(c("m", "P_mu"), names(out))] <- c("muhat", "V_mu")
195218
if (out$d > 0) {
196-
out$V_mu[,,1:out$d] <- Inf #diffuse phase
219+
out$V_mu[,,1:out$d] <- Inf #diffuse phase
197220
}
198221
} else {
199222
out <- KFS(model = object, filtering = "none", smoothing = "mean")
@@ -205,25 +228,25 @@ predict.SSModel <- function(object, newdata, n.ahead,
205228
out <- signal(out, states = states, filtered = TRUE)
206229
names(out) <- c("muhat", "V_mu")
207230
if (d > 0) {
208-
out$V_mu[,,1:d] <- Inf #diffuse phase
231+
out$V_mu[,,1:d] <- Inf #diffuse phase
209232
}
210233
} else {
211234
out <- signal(KFS(model = object, filtering = "none", smoothing = "state"),
212-
states = states)
235+
states = states)
213236
names(out) <- c("muhat", "V_mu")
214237
}
215-
238+
216239
}
217240
for (i in 1:p) {
218241
pred[[i]] <- cbind(fit = out$muhat[timespan, i],
219-
switch(interval, none = NULL,
220-
confidence = out$muhat[timespan, i] +
221-
qnorm((1 - level)/2) * sqrt(out$V_mu[i, i, timespan]) %o% c(1, -1),
222-
prediction = out$muhat[timespan, i] + qnorm((1 - level)/2) *
223-
sqrt(out$V_mu[i, i, timespan] +
224-
object$H[i, i, if (dim(object$H)[3] > 1) timespan else 1]) %o% c(1, -1)),
225-
se.fit = if (se.fit)
226-
sqrt(out$V_mu[i, i, timespan]))
242+
switch(interval, none = NULL,
243+
confidence = out$muhat[timespan, i] +
244+
qnorm((1 - level)/2) * sqrt(out$V_mu[i, i, timespan]) %o% c(1, -1),
245+
prediction = out$muhat[timespan, i] + qnorm((1 - level)/2) *
246+
sqrt(out$V_mu[i, i, timespan] +
247+
object$H[i, i, if (dim(object$H)[3] > 1) timespan else 1]) %o% c(1, -1)),
248+
se.fit = if (se.fit)
249+
sqrt(out$V_mu[i, i, timespan]))
227250
if (interval != "none")
228251
colnames(pred[[i]])[2:3] <- c("lwr", "upr")
229252
}
@@ -232,41 +255,41 @@ predict.SSModel <- function(object, newdata, n.ahead,
232255
if (identical(states, as.integer(1:m))) {
233256
if (filtered) {
234257
out <- KFS(model = object, filtering = "signal", smoothing = "none",
235-
maxiter = maxiter, expected = expected)
258+
maxiter = maxiter, expected = expected)
236259
names(out)[match(c("t", "P_theta"), names(out))] <- c("thetahat", "V_theta")
237260
if (out$d > 0) {
238-
out$V_theta[,,1:out$d] <- Inf #diffuse phase
261+
out$V_theta[,,1:out$d] <- Inf #diffuse phase
239262
}
240263
} else {
241264
out <- KFS(model = object, smoothing = "signal", maxiter = maxiter, expected = expected)
242265
}
243266
} else {
244267
if (filtered) {
245268
out <- KFS(model = object, filtering = "state", smoothing = "none",
246-
maxiter = maxiter, expected = expected)
269+
maxiter = maxiter, expected = expected)
247270
d <- out$d
248271
out <- signal(out, states = states, filtered = TRUE)
249272
names(out) <- c("thetahat", "V_theta")
250273
if (d > 0) {
251-
out$V_theta[,,1:d] <- Inf #diffuse phase
274+
out$V_theta[,,1:d] <- Inf #diffuse phase
252275
}
253276
} else {
254277
out <- signal(KFS(model = object, smoothing = "state",
255-
maxiter = maxiter, expected = expected), states = states)
278+
maxiter = maxiter, expected = expected), states = states)
256279
names(out) <- c("thetahat", "V_theta")
257280
}
258-
281+
259282
}
260-
283+
261284
for (i in 1:p) {
262285
pred[[i]] <- cbind(fit = out$thetahat[timespan, i] +
263-
(if (object$distribution[i] == "poisson") log(object$u[timespan, i]) else 0),
264-
switch(interval, none = NULL,
265-
out$thetahat[timespan, i] +
266-
(if (object$distribution[i] == "poisson") log(object$u[timespan, i]) else 0) +
267-
qnorm((1 - level)/2) * sqrt(out$V_theta[i, i, timespan]) %o%
268-
c(1, -1)), se.fit = if (se.fit)
269-
sqrt(out$V_theta[i, i, timespan]))
286+
(if (object$distribution[i] == "poisson") log(object$u[timespan, i]) else 0),
287+
switch(interval, none = NULL,
288+
out$thetahat[timespan, i] +
289+
(if (object$distribution[i] == "poisson") log(object$u[timespan, i]) else 0) +
290+
qnorm((1 - level)/2) * sqrt(out$V_theta[i, i, timespan]) %o%
291+
c(1, -1)), se.fit = if (se.fit)
292+
sqrt(out$V_theta[i, i, timespan]))
270293
if (interval == "confidence")
271294
colnames(pred[[i]])[2:3] <- c("lwr", "upr")
272295
}
@@ -275,84 +298,84 @@ predict.SSModel <- function(object, newdata, n.ahead,
275298
tmp <- which(colnames(pred[[1]]) == "se.fit")
276299
for (i in 1:p) {
277300
pred[[i]][, "se.fit"] <- switch(object$distribution[i],
278-
gaussian = pred[[i]][,"se.fit"],
279-
poisson = pred[[i]][, "se.fit"] * exp(pred[[i]][, 1]),
280-
binomial = pred[[i]][, "se.fit"] *
281-
(if (!prob) object$u[timespan, i] else 1) *
282-
exp(pred[[i]][, 1])/(1 + exp(pred[[i]][, 1]))^2,
283-
gamma = pred[[i]][, "se.fit"] * exp(pred[[i]][, 1]),
284-
`negative binomial` = pred[[i]][, "se.fit"] * exp(pred[[i]][, 1]))
301+
gaussian = pred[[i]][,"se.fit"],
302+
poisson = pred[[i]][, "se.fit"] * exp(pred[[i]][, 1]),
303+
binomial = pred[[i]][, "se.fit"] *
304+
(if (!prob) object$u[timespan, i] else 1) *
305+
exp(pred[[i]][, 1])/(1 + exp(pred[[i]][, 1]))^2,
306+
gamma = pred[[i]][, "se.fit"] * exp(pred[[i]][, 1]),
307+
`negative binomial` = pred[[i]][, "se.fit"] * exp(pred[[i]][, 1]))
285308
pred[[i]][, -tmp] <- switch(object$distribution[i],
286-
gaussian = pred[[i]][, -tmp],
287-
poisson = exp(pred[[i]][, -tmp]),
288-
binomial = (if (!prob) object$u[timespan, i] else 1) *
289-
exp(pred[[i]][, -tmp])/(1 + exp(pred[[i]][, -tmp])),
290-
gamma = exp(pred[[i]][, -tmp]),
291-
`negative binomial` = exp(pred[[i]][, -tmp]))
309+
gaussian = pred[[i]][, -tmp],
310+
poisson = exp(pred[[i]][, -tmp]),
311+
binomial = (if (!prob) object$u[timespan, i] else 1) *
312+
exp(pred[[i]][, -tmp])/(1 + exp(pred[[i]][, -tmp])),
313+
gamma = exp(pred[[i]][, -tmp]),
314+
`negative binomial` = exp(pred[[i]][, -tmp]))
292315
}
293316
} else {
294317
for (i in 1:p) pred[[i]] <- switch(object$distribution[i],
295-
gaussian = pred[[i]],
296-
poisson = exp(pred[[i]]),
297-
binomial = (if (!prob) object$u[timespan, i] else 1) *
298-
exp(pred[[i]])/(1 + exp(pred[[i]])),
299-
gamma = exp(pred[[i]]),
300-
`negative binomial` = exp(pred[[i]]))
318+
gaussian = pred[[i]],
319+
poisson = exp(pred[[i]]),
320+
binomial = (if (!prob) object$u[timespan, i] else 1) *
321+
exp(pred[[i]])/(1 + exp(pred[[i]])),
322+
gamma = exp(pred[[i]]),
323+
`negative binomial` = exp(pred[[i]]))
301324
}
302325
}
303-
326+
304327
} else {# with importance sampling
305328
if (filtered) {
306329
d <- KFS(approxSSM(object, maxiter = maxiter, expected = expected), smoothing = "none")$d
307330
}
308-
331+
309332
if (interval == "none") {
310333
imp <- importanceSSM(object,
311-
ifelse(identical(states, as.integer(1:m)), "signal", "states"),
312-
nsim = nsim, antithetics = TRUE, maxiter = maxiter, filtered = filtered,
313-
expected = expected)
334+
ifelse(identical(states, as.integer(1:m)), "signal", "states"),
335+
nsim = nsim, antithetics = TRUE, maxiter = maxiter, filtered = filtered,
336+
expected = expected)
314337
nsim <- as.integer(4 * nsim)
315338
if (!identical(states, as.integer(1:m))) {
316339
imp$samples <- .Fortran(fzalpha, NAOK = TRUE, as.integer(dim(object$Z)[3] > 1),
317-
object$Z, imp$samples, signal = array(0, c(n, p, nsim)),
318-
p, m, n, nsim, length(states), states)$signal
340+
object$Z, imp$samples, signal = array(0, c(n, p, nsim)),
341+
p, m, n, nsim, length(states), states)$signal
319342
}
320-
343+
321344
w <- imp$weights/sum(imp$weights)
322345
if (type == "response") {
323346
for (i in 1:p) {
324347
imp$samples[timespan, i, ] <- switch(object$distribution[i],
325-
gaussian = imp$samples[timespan, i, ],
326-
poisson = object$u[timespan, i] * exp(imp$samples[timespan, i, ]),
327-
binomial = (if (!prob) object$u[timespan, i] else 1) *
328-
exp(imp$samples[timespan, i, ])/(1 + exp(imp$samples[timespan, i, ])),
329-
gamma = exp(imp$samples[timespan, i, ]),
330-
`negative binomial` = exp(imp$samples[timespan, i, ]))
348+
gaussian = imp$samples[timespan, i, ],
349+
poisson = object$u[timespan, i] * exp(imp$samples[timespan, i, ]),
350+
binomial = (if (!prob) object$u[timespan, i] else 1) *
351+
exp(imp$samples[timespan, i, ])/(1 + exp(imp$samples[timespan, i, ])),
352+
gamma = exp(imp$samples[timespan, i, ]),
353+
`negative binomial` = exp(imp$samples[timespan, i, ]))
331354
}
332355
} else {
333356
for (i in 1:p) if (object$distribution[i] == "poisson")
334357
imp$samples[timespan, i, ] <- imp$samples[timespan, i, ] + log(object$u[timespan,
335-
i])
358+
i])
336359
}
337360
varmean <- .Fortran(fvarmeanw, NAOK = TRUE, imp$samples[timespan, , , drop = FALSE], w,
338-
p, length(timespan),
339-
nsim, mean = array(0, c(length(timespan), p)),
340-
var = array(0, c(length(timespan), p)), as.integer(se.fit))
341-
361+
p, length(timespan),
362+
nsim, mean = array(0, c(length(timespan), p)),
363+
var = array(0, c(length(timespan), p)), as.integer(se.fit))
364+
342365
if (se.fit) {
343366
if (filtered && d > 0) {
344367
varmean$var[1:d, ] <- Inf #diffuse phase
345368
}
346369
pred <- lapply(1:p, function(j) cbind(fit = varmean$mean[, j],
347-
se.fit = sqrt(varmean$var[, j])))
348-
370+
se.fit = sqrt(varmean$var[, j])))
371+
349372
} else {
350373
pred <- lapply(1:p, function(j) varmean$mean[, j])
351374
}
352375
} else {
353376
pred <- interval(object, interval = interval, level = level, type = type,
354-
states = states, nsim = nsim, se.fit = se.fit, timespan = timespan,
355-
prob = prob, maxiter = maxiter, filtered = filtered, expected = expected)
377+
states = states, nsim = nsim, se.fit = se.fit, timespan = timespan,
378+
prob = prob, maxiter = maxiter, filtered = filtered, expected = expected)
356379
if (filtered && d > 0) {
357380
for (i in 1:p) {
358381
pred[[i]][1:d, "lwr"] <- -Inf
@@ -361,7 +384,7 @@ predict.SSModel <- function(object, newdata, n.ahead,
361384
pred[[i]][1:d, "se.fit"] <- Inf #diffuse phase
362385
}
363386
}
364-
387+
365388
}
366389
}
367390
}

man/predict.SSModel.Rd

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)