-
Notifications
You must be signed in to change notification settings - Fork 12
Description
For models with many votes (1000+), the summary of item-parameters is somewhat slow. To speed it up, the below code just replaces lapply with parallel::mclapply(). It also adds number of cores as the last argument, defaults to 1. I also changed data_frame to tibble at the end to avoid warning. This corresponds to lines 640 - 783 in Generics.R
#' Posterior Summaries for fitted \code{idealstan} object
#'
#' This function produces quantiles and standard deviations for the posterior samples of \code{idealstan} objects.
#'
#' @param object An \code{idealstan} object fitted by \code{\link{id_estimate}}
#' @param pars Either \code{'ideal_pts'} for person ideal points,
#' \code{'items'} for items/bills difficulty and discrimination parameters,
#' and \code{'all'} for all parameters in the model, including incidental parameters.
#' @param high_limit A number between 0 and 1 reflecting the upper limit of the
#' uncertainty interval (defaults to 0.95).
#' @param low_limit A number between 0 and 1 reflecting the lower limit of the
#' uncertainty interval (defaults to 0.05).
#' @param aggregated Whether to return summaries of the posterior values or the
#' full posterior samples. Defaults to \code{TRUE}.
#' @param use_chain ID of a specific MCMC chain to use. Default (NULL) is all the chains
#' and is recommended.
#' @param cores number of cores for parallel execution. Default is 1 (no parallelization).
#' @return A \code{\link[dplyr]{tibble}} data frame with parameters as rows and descriptive statistics as columns
#' @importFrom parallel mclapply
#' @export
setMethod('summary',signature(object='idealstan'),
function(object,pars='ideal_pts',
high_limit=0.95,
low_limit=0.05,
aggregated=TRUE,
use_chain=NULL,
cores = 1) {
options(tibble.print_max=1000,
tibble.print_min=100)
if(pars=='ideal_pts') {
ideal_pts <- .prepare_legis_data(object,
high_limit=high_limit,
low_limit=low_limit,
aggregated=aggregated,
use_chain=use_chain)
if(is.null(ideal_pts$time_id)) {
ideal_pts$time_id=1
}
if(aggregated) {
ideal_pts <- select(ideal_pts,
Person=person_id,
Group=group_id,
Time_Point=time_id,
`Low Posterior Interval`=low_pt,
`Posterior Median`=median_pt,
`High Posterior Interval`=high_pt,
`Parameter Name`=legis)
} else {
# add in iteration numbers
ideal_pts <- group_by(ideal_pts,person_id,time_id) %>%
mutate(Iteration=1:n())
ideal_pts <- select(ideal_pts,
Person=person_id,
Group=group_id,
Time_Point=time_id,
Ideal_Points=ideal_pts,
Iteration,
`Parameter Name`=legis)
}
return(ideal_pts)
}
if(pars=='items') {
# a bit trickier with item points
item_plot <- levels(object@score_data@score_matrix$item_id)
if(object@model_type %in% c(1,2) || (object@model_type>6 && object@model_type<13)) {
# binary models and continuous
item_points <- parallel::mclapply(item_plot,.item_plot_binary,object=object,
low_limit=low_limit,
high_limit=high_limit,
all=T,
aggregated=aggregated,
use_chain=use_chain,
mc.cores = cores) %>% bind_rows()
} else if(object@model_type %in% c(3,4)) {
# rating scale
item_points <- parallel::mclapply(item_plot,.item_plot_ord_rs,object=object,
low_limit=low_limit,
high_limit=high_limit,
all=T,
aggregated=aggregated,
use_chain=use_chain,
mc.cores = cores) %>% bind_rows()
} else if(object@model_type %in% c(5,6)) {
# grm
item_points <- parallel::mclapply(item_plot,.item_plot_ord_grm,object=object,
low_limit=low_limit,
high_limit=high_limit,
all=T,
aggregated=aggregated,
use_chain=use_chain,
mc.cores = cores) %>% bind_rows()
} else if(object@model_type %in% c(13,14)) {
# latent space
item_points <- parallel::mclappy(lyitem_plot,.item_plot_ls,object=object,
low_limit=low_limit,
high_limit=high_limit,
all=T,
aggregated=aggregated,
use_chain=use_chain,
mc.cores = cores) %>% bind_rows()
}
return(item_points)
}
if(pars=='all') {
return(object@summary)
}
if(pars %in% c('person_cov','discrim_reg_cov','discrim_infl_cov')) {
param_name <- switch(pars,person_cov='legis_x',
discrim_reg_cov='sigma_reg_x',
discrim_infl_cov='sigma_abs_x')
to_sum <- object@stan_samples$draws(param_name)
# reset names of parameters
new_names <- switch(pars,person_cov=object@score_data@person_cov,
discrim_reg=object@score_data@item_cov,
discrim_abs=object@score_data@item_cov_miss)
attributes(to_sum)$dimnames$variable <- new_names
if(!aggregated) {
return(to_sum)
} else {
out_d <- tibble(Covariate=new_names,
`Posterior Median`= apply(to_sum,3,median),
`Posterior High Interval`= apply(to_sum,3,quantile,high_limit),
`Posterior Low Interval`= apply(to_sum,3,quantile,low_limit),
Parameter=param_name)
return(out_d)
}
}
})