Merge pull request #239 from Crunch-io/develop

1beb · web-flow · commit 41e978bc861f · 2021-01-11T11:24:36.000-05:00
WIP: Finalizing tracking reports
diff --git a/R/asToplineCategoricalArray.R b/R/asToplineCategoricalArray.R
@@ -12,14 +12,12 @@
 #' @param labels A character vector of labels that will be displayed in the
 #' resulting crunchtabs output. Should match the number of results objects
 as.ToplineCategoricalArray <- function(questions, question_alias = NULL, labels) {
-
   if (length(questions) != length(labels))
     stop("Number of labels provided does not match number of result sets")
   
   # Use the first result item as a skeleton
   obj <- questions[[1]]
   
-  is_mr <- questions[[1]]$type == "multiple_response"
   is_catarray <- questions[[1]]$type == "categorical_array"
   
   if(is_catarray) {
diff --git a/R/trackingReports.R b/R/trackingReports.R
@@ -5,43 +5,105 @@
 #' @param dataset_list A list of two or more crunch datasets. Datasets should be 
 #' provided in time order. From oldest to youngest. (i.e, wave 1, wave 2, 
 #' ..., wave n)
-#' @param vars A character vector of question aliases to be included in the report
+#' @param vars A character vector of question aliases to be included in the 
+#' report this may include aliases that are available in at least one of the 
+#' datasets specified in dataset_list
 #' @param weight NULL to accept each dataset's current weight or a single alias
-#' that is available in all datasets as a string.
+#' that is available in all datasets as a string. Multiple weights is not 
+#' recommended in a tracking report. 
 #' @param labels The labels for each wave. Should be of a length that
 #' matches the number of datasets.
 tracking_report <- function(dataset_list, vars, labels = NULL, weight = NULL) {
-  # topline tabbooks
   tabs <- tracking_report_tabs(dataset_list, vars, weight)
   
   if (is.null(labels))
     labels <- paste0("Wave ", seq_len(length(dataset_list)))
   
-  # Use the first result item as a skeleton
-  rebuilt_results <- tabs[[1]] 
+  # In previous iterations we used the first item of tabs as a skeleton
+  # However, what if there is an alias that is not included in the first 
+  # variable? Instead we build the rebuilt_results object piece by piece 
+  # using the first available result for each alias to create a skeleton
+  
+  rebuilt_results <- list()
+  class(rebuilt_results) <- c("Toplines", "CrunchTabs")
+  rebuilt_results$results <- lapply(vars, function(x) NULL)
+  rebuilt_results$metadata <- tabs[[1]]$metadata
+  names(rebuilt_results$results) <- vars
+  rebuilt_results$banner <- NULL
+  
+  # Loop through each element of tabs, suck out the first result available
+  # per alias and use that result as part of the skeleton. If there is more 
+  # than one result but less than n results, we need to denote that for future 
+  # use.
+  # 
+  # For example, if someone has a survey where "q1" was asked in waves 1 and 3
+  # but not 2 - we need a good way to identify this. 
+  
+  for (v in vars) {
+    var_results <- lapply(tabs, function(x) return(x$results[[v]]))
+    results_available <-which(!unlist(lapply(var_results, is.null))) 
+    first_var_result <- which(!unlist(lapply(var_results, is.null)))[1]
+    rebuilt_results$results[[v]] <- var_results[[first_var_result]]
+    rebuilt_results$results[[v]]$available_at <- results_available
+
+    
+    # For each alias, we set an attribute that identifies it's availability 
+    # across all the datasets: "all", "partial", and "single"
+    # - "all" means it is available in every dataset
+    # - "partial" means it is available in only some datasets
+    # - "single" means it is available in exactly one dataset
+    
+    # Because we use subsetting at the list level, "all" and "partial" 
+    # would follow a typical path that labeling was adjusted appropriately 
+    # for presentation in the resulting pdf "single" should act as a simple 
+    # passthrough where no additional formatting or manipulation takes place 
+    # on the result.
+    
+    # The single case
+    if(length(results_available) == 1) {
+      rebuilt_results$results[[v]]$availability <- "single"
+    } else {
+      rebuilt_results$results[[v]]$availability <- "general"
+    }
+
+  }
+
+  # Now that we have an attribute that identifies availability we can use it as
+  # a trigger for logic that allows us to customize the result of each 
+  # condition.
+  # 
+  # We wil loop over each variable and either combine those elements that are 
+  # setup for tracking, or passthrough those that are singles. As singles 
+  # represent the simplest case, we will deal with them first. 
   
   for (v in vars) {
-    message("Preparing: ",v)
+    if (rebuilt_results$results[[v]]$availability == "single") {
+      next
+    }
+    
+    available_at <- rebuilt_results$results[[v]]$available_at
+    
+    message("Preparing: ",v) # TODO: Delete me after feature dev
     result_list <- lapply(tabs, function(x) x$results[[v]])
     if(rebuilt_results$results[[v]]$type == "categorical_array") {
       rebuilt_results$results <- c( 
         catArrayToCategoricals(
-          result_list, 
+          result_list[available_at], 
           question_alias=v, 
-          labels=labels
+          labels=labels[available_at]
           ),
         rebuilt_results$results
       )
       rebuilt_results$results[[v]] <- NULL
             
-      # Fix the class!
+      # We must fake the class of the object
       class(rebuilt_results$results) <- c("ToplineResults", "CrosstabsResults")
     } else {
       rebuilt_results$results[[v]] <- as.ToplineCategoricalArray(
-        result_list, 
+        result_list[available_at], 
         question_alias = v, 
-        labels = labels
-      )
+        labels = labels[available_at]
+       )
     }
 
   }
@@ -59,7 +121,8 @@ tracking_report_tabs <- function(datasets, vars, weight = NULL) {
       if(is.null(weight)) {
         weight = weight(x)
       }
-      crosstabs(x, vars, weight, include_numeric = TRUE)
+      adj_vars = vars[vars %in% names(x)]
+      crosstabs(x, adj_vars, weight, include_numeric = TRUE)
     }
   )
 }
diff --git a/R/writeCodeBookLatex.R b/R/writeCodeBookLatex.R
@@ -28,6 +28,13 @@ writeCodeBookLatex <- function(
   preamble = NULL, suppress_zero_counts = FALSE, appendix = TRUE, logo = NULL,
   position = NULL, path = NULL, filename = NULL, logging = FALSE,
   ...) {
+  
+  if(!is.null(crunch::weight(ds))) 
+    stop(paste(
+      "Codebooks are designed to work with whole numbers. Your dataset is",
+      "weighted and the resulting codebook will either not run or have",
+      "breaking display issues."
+    ))
 
   options("crunchtabs.codebook.suppress.zeros" = suppress_zero_counts)
 
diff --git a/README.md b/README.md
@@ -98,8 +98,9 @@ While recontact reports are designed for questions asked in the same dataset, we
     )
     
     writeLatex(ct, pdf = TRUE, theme = theme)
-    
-![Tracking Report Example - Flipped grids](example-016-tracking-report.png "Tracking reports")  
+
+
+![Tracking Report Example - Flipped grids](vignettes/example-016-tracking-report.png)  
 
 ### Create a Cross Tabulation
 
diff --git a/dev-misc/tracking_reports_setup.R b/dev-misc/tracking_reports_setup.R
@@ -32,8 +32,25 @@ weight(ds3) <- ds3$weight1
 ds1 <- loadDataset("Example dataset W1")
 ds2 <- loadDataset("Example dataset W2")
 ds3 <- loadDataset("Example dataset W3")
-
-
-tema <- themeNew(default_theme = themeDefaultLatex(), latex_flip_grids = TRUE, one_per_sheet = FALSE)
-ct <- tracking_report(list(ds1, ds2, ds3), vars = c("allpets", "q1", "petloc"))
-writeLatex(ct, pdf = TRUE,theme = tema)
+ds1$only_wave1 <- factor(sample(letters[1:5], 20, replace = T))
+ds2$only_wave2 <- factor(sample(letters[1:5], 20, replace = T))
+ds3$only_wave3 <- factor(sample(letters[1:5], 20, replace = T))
+ds1$avail_wave13 <- factor(sample(letters[1:5], 20, replace = T))
+ds3$avail_wave13 <- factor(sample(letters[1:5], 20, replace = T))
+description(ds2$only_wave2) <- "This question is only available in wave 2"
+description(ds1$avail_wave13) <- "This question is only available in waves 1 and 3"
+description(ds1$avail_wave13) <- "This question is only available in waves 1 and 3"
+
+thema <- themeNew(default_theme = themeDefaultLatex(), latex_flip_grids = FALSE, one_per_sheet = FALSE)
+ct <- tracking_report(list(ds1, ds2, ds3), vars = c("allpets", "q1", "only_wave2", "avail_wave13"))
+writeLatex(ct, pdf = TRUE,theme = thema, title = "Data from 3 Example Datasets")
+
+
+# Clean up
+
+datasets() %>% 
+  as.data.frame() %>% 
+  filter(grepl("Example", name)) %>% 
+  pull(id) %>% 
+  lapply(function(x) 
+    with_consent(deleteDataset(sprintf("https://app.crunch.io/datasets/%s",x))))
diff --git a/tests/testthat/test-banner.R b/tests/testthat/test-banner.R
@@ -151,16 +151,28 @@ test_that("Single banner with one variable, recodes - categories rename, else",
   expect_identical(banner_data[["Results"]][["age5"]][["categories"]], c("Under 25", "Over 54"))
 })
 
-# TODO: Figure these out, maybe never.
-# with_test_authentication({
-#   ds <- loadDataset("https://app.crunch.io/api/datasets/868e8b3e01834c45b73e56e80160d3c3/")
-#   test_that("Error handling - banner", {
-#     expect_warning(banner(ds, list(c(), "A"="art3")),
-#                    "No variables found in 'Banner1' in `vars`. 'Banner1' will be ignored.")
-#
-#     expect_error(banner(ds, list(Results = c("profile_gender")), recodes = list(profile_gender = list("Male2"="Man"))),
-#                  "Responses in `recodes` must be included in variable responses. This is not true for 'Male2' in 'profile_gender'.")
-#     expect_error(banner(ds, list(Results = c("profile_gender")), recodes = list(profile_gender = list("Male"="Man", "Female"="Man"))),
-#                  "Combining categories is not currently supported. Please check 'profile_gender' recodes.")
-#   })
-# })
+context("getBannerInfo")
+
+test_that("Returns default banner", {
+  expect_equal(getBannerInfo(NULL), default_banner)
+})
+
+context("removeInserts")
+
+test_that("Adjustments for subtotals", {
+  var <- list()
+  theme <- list()
+  theme$format_subtotals <- NULL
+  theme$format_headers <- NULL
+  var$inserts_obj <- list()
+  var$inserts_obj$test <- "Fake Object of class Subtotal"
+  class(var$inserts_obj$test) <- "Subtotal"
+  var$inserts_obj$other <- "Fake Object of class Headers"
+  class(var$inserts_obj$other) <- "Headers"
+  
+  expect_equal(
+    removeInserts(var, theme),
+    list(inserts_obj = structure(list(), .Names = character(0)), 
+       inserts = structure(list(), .Names = character(0)))
+  )
+})
diff --git a/tests/testthat/test-write-latex.R b/tests/testthat/test-write-latex.R
@@ -85,6 +85,8 @@ with_temp_dir({
         writeLatex(cs, theme = theme, pdf = TRUE)
         theme <- themeNew(default_theme = theme, format_weighted_n=list(latex_add_parenthesis = TRUE))
         writeLatex(cs, theme = theme, pdf = TRUE)
+        expect_true(file.remove("Example Dataset with Nets.pdf"))
+        expect_true(file.remove("Example Dataset with Nets.tex"))
     })
 
     test_that("Write Latex toplines", {
@@ -109,6 +111,8 @@ with_temp_dir({
         ts$results[[1]]$description <- bad_description
         writeLatex(ts, pdf = TRUE, file = "topline2")
         expect_true(file.exists("topline2.pdf"))
+        expect_true(file.remove("Example Dataset with Nets.pdf"))
+        expect_true(file.remove("Example Dataset with Nets.tex"))
     })
 })
 
@@ -179,4 +183,9 @@ test_that("Adds nonTabBookSummary as expected", {
     any(grepl("clearpage$", res))
   )
   
+})
+
+test_that("Clean up", {
+  expect_true(file.remove("Example Dataset with Nets.pdf"))
+  expect_true(file.remove("Example Dataset with Nets.tex"))
 })
diff --git a/tests/testthat/test-writeCodeBookLatex.R b/tests/testthat/test-writeCodeBookLatex.R
@@ -3,6 +3,11 @@ context("writeCodeBookLatex")
 test_that("End to end writeCodeBookLatex", {
   ds <- readRDS(test_path("fixtures/example_dataset.rds"))
   
+  mockery::stub(
+    writeCodeBookLatex,
+    "crunch::weight", NULL
+  )
+  
   mockery::stub(
     writeCodeBookLatex, 
     "codeBookItemBody", 
@@ -19,7 +24,9 @@ test_that("End to end writeCodeBookLatex", {
       sample_desc = "US Voting Adults",
       logo = "yougov",
       pdf = TRUE)
+    
   )
+  
   tex <- readLines("Example-dataset.tex")
   expect_equal(res, NULL)
   expect_equal(length(tex), 149)
@@ -54,12 +61,19 @@ test_that("End to end writeCodeBookLatex", {
 test_that("Dataset name as title if title not specified", {
   ds <- readRDS(test_path("fixtures/example_dataset.rds"))
   
+  mockery::stub(
+    writeCodeBookLatex,
+    "crunch::weight", NULL
+  )
+  
   mockery::stub(
     writeCodeBookLatex, 
     "codeBookItemBody", 
     readRDS(test_path("fixtures/codeBookItem_allpets.rds"))
   )
   
+  
+  
   mockery::stub(
     writeCodeBookLatex,
     "crunch::name",
@@ -87,6 +101,11 @@ test_that("Dataset name as title if title not specified", {
 test_that("Dataset name as title if title not specified", {
   ds <- readRDS(test_path("fixtures/example_dataset.rds"))
   
+  mockery::stub(
+    writeCodeBookLatex,
+    "crunch::weight", NULL
+  )
+  
   mockery::stub(
     writeCodeBookLatex, 
     "codeBookItemBody", 
@@ -131,6 +150,11 @@ test_that("Appendices are positioned as expected", {
   #   dput %>% 
   #   saveRDS("tests/testthat/fixtures/codeBookItem_inputregstate")
   
+  mockery::stub(
+    writeCodeBookLatex,
+    "crunch::weight", NULL
+  )
+  
   mockery::stub(
     writeCodeBookLatex, 
     "codeBookItemBody", 
@@ -174,6 +198,10 @@ test_that("Position functions as expected", {
   # ds <- loadDataset("Example dataset")
   # codeBookItemBody(ds$allpets) %>% dput() %>% 
   #   saveRDS("tests/testthat/fixtures/codeBookItem_allpets.rds")
+  mockery::stub(
+    writeCodeBookLatex,
+    "crunch::weight", NULL
+  )
   
   mockery::stub(
     writeCodeBookLatex, 
@@ -242,3 +270,30 @@ test_that("default yg logo returns normal path", {
   p <- default_yg_logo()
   expect_equal(p, system.file("YouGov.png", package = "crunchtabs"))
 })
+
+test_that("Expect a stop if dataset is weighted", {
+  ds <- readRDS(test_path("fixtures/example_dataset.rds"))
+  
+  mockery::stub(
+    writeCodeBookLatex,
+    "crunch::weight", "weight_alias"
+  )
+  
+  mockery::stub(
+    writeCodeBookLatex, 
+    "codeBookItemBody", 
+    readRDS(test_path("fixtures/codeBookItem_allpets.rds"))
+  )
+  
+  mockery::stub(writeCodeBookLatex, "file.open", NULL)
+  
+  expect_error(suppressWarnings(
+    writeCodeBookLatex(
+      ds[c("allpets")],
+      title = "Hello",
+      subtitle = "Goodbye",
+      sample_desc = "US Voting Adults",
+      logo = "yougov",
+      pdf = TRUE)
+  ), "Codebooks are designed to work with whole numbers")
+})
diff --git a/vignettes/Tracking-Recontact-Reports.Rmd b/vignettes/Tracking-Recontact-Reports.Rmd
@@ -133,7 +133,7 @@ There are a number of important elements here:
 
 4.  Many defaults are extracted from the first dataset in the `dataset_list`.
 
-    ![](example-016-tracking-report.png "Tracking reports")
+![Example tracking report](example-016-tracking-report.png "Tracking reports")
 
 ### Tracking Reports for Other Stacking Data
 

Original file line number	Diff line number	Diff line change
`@@ -98,8 +98,9 @@ While recontact reports are designed for questions asked in the same dataset, we`
`98`	`98`	`)`
`99`	`99`
`100`	`100`	`writeLatex(ct, pdf = TRUE, theme = theme)`
`101`		`-`
`102`		`-![Tracking Report Example - Flipped grids](example-016-tracking-report.png "Tracking reports")`
	`101`	`+`
	`102`	`+`
	`103`	`+![Tracking Report Example - Flipped grids](vignettes/example-016-tracking-report.png)`
`103`	`104`
`104`	`105`	`### Create a Cross Tabulation`
`105`	`106`