Speeding up summarizing of item-parameters

For models with many votes (1000+), the summary of item-parameters is somewhat slow. To speed it up, the below code just replaces lapply with parallel::mclapply(). It also adds number of cores as the last argument, defaults to 1.  I also changed data_frame to tibble at the end to avoid warning. This corresponds to lines 640 - 783 in Generics.R

#' Posterior Summaries for fitted \code{idealstan} object
#' 
#' This function produces quantiles and standard deviations for the posterior samples of \code{idealstan} objects.
#' 
#' @param object An \code{idealstan} object fitted by \code{\link{id_estimate}}
#' @param pars Either \code{'ideal_pts'} for person ideal points, 
#' \code{'items'} for items/bills difficulty and discrimination parameters,
#' and \code{'all'} for all parameters in the model, including incidental parameters.
#' @param high_limit A number between 0 and 1 reflecting the upper limit of the 
#' uncertainty interval (defaults to 0.95).
#' @param low_limit A number between 0 and 1 reflecting the lower limit of the 
#' uncertainty interval (defaults to 0.05).
#' @param aggregated Whether to return summaries of the posterior values or the 
#' full posterior samples. Defaults to \code{TRUE}.
#' @param use_chain ID of a specific MCMC chain to use. Default (NULL) is all the chains
#' and is recommended.
#' @param cores number of cores for parallel execution. Default is 1 (no parallelization). 
#' @return A \code{\link[dplyr]{tibble}} data frame with parameters as rows and descriptive statistics as columns
#' @importFrom parallel mclapply
#' @export
setMethod('summary',signature(object='idealstan'),
          function(object,pars='ideal_pts',
                   high_limit=0.95,
                   low_limit=0.05,
                   aggregated=TRUE,
                   use_chain=NULL,
                   cores = 1) {
            
            options(tibble.print_max=1000,
                    tibble.print_min=100)
            
            
            if(pars=='ideal_pts') {
              ideal_pts <- .prepare_legis_data(object,
                                               high_limit=high_limit,
                                               low_limit=low_limit,
                                               aggregated=aggregated,
                                               use_chain=use_chain)
              if(is.null(ideal_pts$time_id)) {
                ideal_pts$time_id=1
              }
              if(aggregated) {
                ideal_pts <- select(ideal_pts,
                                    Person=person_id,
                                    Group=group_id,
                                    Time_Point=time_id,
                                    `Low Posterior Interval`=low_pt,
                                    `Posterior Median`=median_pt,
                                    `High Posterior Interval`=high_pt,
                                    `Parameter Name`=legis)
              } else {
                # add in iteration numbers
                ideal_pts <- group_by(ideal_pts,person_id,time_id) %>% 
                  mutate(Iteration=1:n())
                ideal_pts <- select(ideal_pts,
                                    Person=person_id,
                                    Group=group_id,
                                    Time_Point=time_id,
                                    Ideal_Points=ideal_pts,
                                    Iteration,
                                    `Parameter Name`=legis)
              }
              return(ideal_pts)
            }
            
            if(pars=='items') {
              
              # a bit trickier with item points
              item_plot <- levels(object@score_data@score_matrix$item_id)
              if(object@model_type %in% c(1,2) || (object@model_type>6 && object@model_type<13)) {
                # binary models and continuous
                item_points <- parallel::mclapply(item_plot,.item_plot_binary,object=object,
                                                  low_limit=low_limit,
                                                  high_limit=high_limit,
                                                  all=T,
                                                  aggregated=aggregated,
                                                  use_chain=use_chain,
                                                  mc.cores = cores) %>% bind_rows()
              } else if(object@model_type %in% c(3,4)) {
                # rating scale
                item_points <- parallel::mclapply(item_plot,.item_plot_ord_rs,object=object,
                                                  low_limit=low_limit,
                                                  high_limit=high_limit,
                                                  all=T,
                                                  aggregated=aggregated,
                                                  use_chain=use_chain,
                                                  mc.cores = cores) %>% bind_rows()
              } else if(object@model_type %in% c(5,6)) {
                # grm
                item_points <- parallel::mclapply(item_plot,.item_plot_ord_grm,object=object,
                                                  low_limit=low_limit,
                                                  high_limit=high_limit,
                                                  all=T,
                                                  aggregated=aggregated,
                                                  use_chain=use_chain,
                                                  mc.cores = cores) %>% bind_rows()
              } else if(object@model_type %in% c(13,14)) {
                # latent space
                item_points <- parallel::mclappy(lyitem_plot,.item_plot_ls,object=object,
                                                low_limit=low_limit,
                                                high_limit=high_limit,
                                                all=T,
                                                aggregated=aggregated,
                                                use_chain=use_chain, 
                                                mc.cores = cores) %>% bind_rows()
              }
              return(item_points)
            }
            
            
            if(pars=='all') {
              
              return(object@summary)
            }
            
            if(pars %in% c('person_cov','discrim_reg_cov','discrim_infl_cov')) {
              
              param_name <- switch(pars,person_cov='legis_x',
                                   discrim_reg_cov='sigma_reg_x',
                                   discrim_infl_cov='sigma_abs_x')
              
              to_sum <- object@stan_samples$draws(param_name)
              
              # reset names of parameters
              new_names <- switch(pars,person_cov=object@score_data@person_cov,
                                  discrim_reg=object@score_data@item_cov,
                                  discrim_abs=object@score_data@item_cov_miss)
              
              attributes(to_sum)$dimnames$variable <- new_names
              
              if(!aggregated) {
                return(to_sum)
              } else {
                out_d <- tibble(Covariate=new_names,
                                    `Posterior Median`= apply(to_sum,3,median),
                                    `Posterior High Interval`= apply(to_sum,3,quantile,high_limit),
                                    `Posterior Low Interval`= apply(to_sum,3,quantile,low_limit),
                                    Parameter=param_name)
                return(out_d)
              }
              
            }
            
          })


 

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Speeding up summarizing of item-parameters #37

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Speeding up summarizing of item-parameters #37

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions