Skip to content

Speeding up summarizing of item-parameters #37

@bjornhoyland

Description

@bjornhoyland

For models with many votes (1000+), the summary of item-parameters is somewhat slow. To speed it up, the below code just replaces lapply with parallel::mclapply(). It also adds number of cores as the last argument, defaults to 1. I also changed data_frame to tibble at the end to avoid warning. This corresponds to lines 640 - 783 in Generics.R

#' Posterior Summaries for fitted \code{idealstan} object
#'
#' This function produces quantiles and standard deviations for the posterior samples of \code{idealstan} objects.
#'
#' @param object An \code{idealstan} object fitted by \code{\link{id_estimate}}
#' @param pars Either \code{'ideal_pts'} for person ideal points,
#' \code{'items'} for items/bills difficulty and discrimination parameters,
#' and \code{'all'} for all parameters in the model, including incidental parameters.
#' @param high_limit A number between 0 and 1 reflecting the upper limit of the
#' uncertainty interval (defaults to 0.95).
#' @param low_limit A number between 0 and 1 reflecting the lower limit of the
#' uncertainty interval (defaults to 0.05).
#' @param aggregated Whether to return summaries of the posterior values or the
#' full posterior samples. Defaults to \code{TRUE}.
#' @param use_chain ID of a specific MCMC chain to use. Default (NULL) is all the chains
#' and is recommended.
#' @param cores number of cores for parallel execution. Default is 1 (no parallelization).
#' @return A \code{\link[dplyr]{tibble}} data frame with parameters as rows and descriptive statistics as columns
#' @importFrom parallel mclapply
#' @export
setMethod('summary',signature(object='idealstan'),
function(object,pars='ideal_pts',
high_limit=0.95,
low_limit=0.05,
aggregated=TRUE,
use_chain=NULL,
cores = 1) {

        options(tibble.print_max=1000,
                tibble.print_min=100)
        
        
        if(pars=='ideal_pts') {
          ideal_pts <- .prepare_legis_data(object,
                                           high_limit=high_limit,
                                           low_limit=low_limit,
                                           aggregated=aggregated,
                                           use_chain=use_chain)
          if(is.null(ideal_pts$time_id)) {
            ideal_pts$time_id=1
          }
          if(aggregated) {
            ideal_pts <- select(ideal_pts,
                                Person=person_id,
                                Group=group_id,
                                Time_Point=time_id,
                                `Low Posterior Interval`=low_pt,
                                `Posterior Median`=median_pt,
                                `High Posterior Interval`=high_pt,
                                `Parameter Name`=legis)
          } else {
            # add in iteration numbers
            ideal_pts <- group_by(ideal_pts,person_id,time_id) %>% 
              mutate(Iteration=1:n())
            ideal_pts <- select(ideal_pts,
                                Person=person_id,
                                Group=group_id,
                                Time_Point=time_id,
                                Ideal_Points=ideal_pts,
                                Iteration,
                                `Parameter Name`=legis)
          }
          return(ideal_pts)
        }
        
        if(pars=='items') {
          
          # a bit trickier with item points
          item_plot <- levels(object@score_data@score_matrix$item_id)
          if(object@model_type %in% c(1,2) || (object@model_type>6 && object@model_type<13)) {
            # binary models and continuous
            item_points <- parallel::mclapply(item_plot,.item_plot_binary,object=object,
                                              low_limit=low_limit,
                                              high_limit=high_limit,
                                              all=T,
                                              aggregated=aggregated,
                                              use_chain=use_chain,
                                              mc.cores = cores) %>% bind_rows()
          } else if(object@model_type %in% c(3,4)) {
            # rating scale
            item_points <- parallel::mclapply(item_plot,.item_plot_ord_rs,object=object,
                                              low_limit=low_limit,
                                              high_limit=high_limit,
                                              all=T,
                                              aggregated=aggregated,
                                              use_chain=use_chain,
                                              mc.cores = cores) %>% bind_rows()
          } else if(object@model_type %in% c(5,6)) {
            # grm
            item_points <- parallel::mclapply(item_plot,.item_plot_ord_grm,object=object,
                                              low_limit=low_limit,
                                              high_limit=high_limit,
                                              all=T,
                                              aggregated=aggregated,
                                              use_chain=use_chain,
                                              mc.cores = cores) %>% bind_rows()
          } else if(object@model_type %in% c(13,14)) {
            # latent space
            item_points <- parallel::mclappy(lyitem_plot,.item_plot_ls,object=object,
                                            low_limit=low_limit,
                                            high_limit=high_limit,
                                            all=T,
                                            aggregated=aggregated,
                                            use_chain=use_chain, 
                                            mc.cores = cores) %>% bind_rows()
          }
          return(item_points)
        }
        
        
        if(pars=='all') {
          
          return(object@summary)
        }
        
        if(pars %in% c('person_cov','discrim_reg_cov','discrim_infl_cov')) {
          
          param_name <- switch(pars,person_cov='legis_x',
                               discrim_reg_cov='sigma_reg_x',
                               discrim_infl_cov='sigma_abs_x')
          
          to_sum <- object@stan_samples$draws(param_name)
          
          # reset names of parameters
          new_names <- switch(pars,person_cov=object@score_data@person_cov,
                              discrim_reg=object@score_data@item_cov,
                              discrim_abs=object@score_data@item_cov_miss)
          
          attributes(to_sum)$dimnames$variable <- new_names
          
          if(!aggregated) {
            return(to_sum)
          } else {
            out_d <- tibble(Covariate=new_names,
                                `Posterior Median`= apply(to_sum,3,median),
                                `Posterior High Interval`= apply(to_sum,3,quantile,high_limit),
                                `Posterior Low Interval`= apply(to_sum,3,quantile,low_limit),
                                Parameter=param_name)
            return(out_d)
          }
          
        }
        
      })

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions