Testing comparison

etpeterson · etpeterson · commit a5c5f9b18e20 · 2023-12-21T22:41:04.000-08:00
diff --git a/.github/workflows/analysis.yml b/.github/workflows/analysis.yml
@@ -145,3 +145,26 @@ jobs:
               durations.pdf
               curve_plot.pdf
               fitted_curves.pdf
+
+  compare:
+    runs-on: ubuntu-latest
+    needs: merge
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up R
+        uses: r-lib/actions/setup-r@v2
+        with: 
+          use-public-rspm: true
+      - name: Install R dependencies
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          packages: |
+            any::tidyverse
+            any::stats
+            any::assertr
+      - name: Download artifacts
+        uses: actions/download-artifact@v3
+        with:
+          name: Data
+      - name: Test against previous results
+        run: Rscript --vanilla tests/IVIMmodels/unit_tests/compare.r test_output.csv test_reference.csv reference_output.csv test_results.csv
diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
@@ -23,6 +23,7 @@ jobs:
         uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
+          cache: 'pip'
       # You can test your matrix by printing the current Python version
       - name: Display Python version
         run: python -c "import sys; print(sys.version)"
diff --git a/tests/IVIMmodels/unit_tests/compare.r b/tests/IVIMmodels/unit_tests/compare.r
@@ -0,0 +1,210 @@
+#!/usr/bin/env Rscript
+
+#Run like this:
+#Rscript --vanilla tests/IVIMmodels/unit_tests/compare.r test_output.csv test_reference.csv reference_output.csv test_results.csv
+
+args = commandArgs(trailingOnly=TRUE)
+# Define file paths
+test_file <- "test_output.csv"
+test_reference_file <- "test_reference.csv"
+reference_file <- "" #"reference_output.csv"
+test_result_file <- "test_results.csv"
+
+
+if (length(args)>=1) {
+   test_file = args[1]
+}
+if (length(args)>=2) {
+    test_reference_file = args[2]
+}
+if (length(args)>=3) {
+    reference_file = args[3]
+}
+if (length(args)>=4) {
+    test_result_file = args[4]
+}
+
+
+# Load required libraries
+library(tidyverse)
+library(stats)
+# library(testit)
+library(assertr)
+
+alpha <- 0.45  # be sensitive to changes
+
+# Define desired columns to keep
+keep_columns_reference <- c("Algorithm", "Region", "SNR", "f", "Dp", "D", "f_mu", "Dp_mu", "D_mu", "f_alpha", "Dp_alpha", "D_alpha", "f_std", "Dp_std", "D_std", "f_df", "Dp_df", "D_df")
+keep_columns_test <- c("Algorithm", "Region", "SNR", "index", "f", "Dp", "D", "f_fitted", "Dp_fitted", "D_fitted")
+
+test <- read_csv(test_file) %>%
+  select(keep_columns_test) %>%
+  # Convert Algorithm and Region to factors
+  mutate(Algorithm = as.factor(Algorithm), Region = as.factor(Region))
+
+# Group data by relevant factors
+grouped_data <- test %>%
+  group_by(Algorithm, Region, SNR, f, Dp, D)
+
+# Combine data for easier comparison
+# combined_data <- inner_join(reference, test, join_by(Algorithm, Region, SNR, f, Dp, D, index))
+
+# Perform t-test for each value
+summary_data <- grouped_data %>%
+  summarize(
+    # Calculate group means
+    f_mu = mean(f_fitted),
+    Dp_mu = mean(Dp_fitted),
+    D_mu = mean(D_fitted),
+
+    # Also insert alpha values here
+    f_alpha = alpha,
+    Dp_alpha = alpha,
+    D_alpha = alpha,
+
+    # Calculate group standard deviations
+    f_std = sd(f_fitted),
+    Dp_std = sd(Dp_fitted),
+    D_std = sd(D_fitted),
+
+    # Degrees of freedom
+    f_df = length(f_fitted) - 1,
+    Dp_df = length(Dp_fitted) - 1,
+    D_df = length(D_fitted) - 1,
+
+    # Calculate group equivalence
+    # f_fitted_equal = all(all.equal(f_fitted.x, f_fitted.y)),
+    # Dp_fitted_equal = all(all.equal(Dp_fitted.x, Dp_fitted.y)),
+    # D_fitted_equal = all(all.equal(D_fitted.x, D_fitted.y)),
+    
+    # Perform paired t-test for each value
+    # f_fitted_p = t.test(f_fitted.x, f_fitted.y, paired = TRUE)$p.value,
+    # Dp_fitted_p = t.test(Dp_fitted.x, Dp_fitted.y, paired = TRUE)$p.value,
+    # D_fitted_p = t.test(D_fitted.x, D_fitted.y, paired = TRUE)$p.value
+  )
+
+# If no reference file, just report the test results and fail
+write.csv(summary_data, test_reference_file, row.names=TRUE)
+
+# Exit at this point if we don't have a reference file
+if (nchar(reference_file) == 0) {
+    stop("No reference file defined, stopping without testing.")
+}
+
+
+# Read data from CSV files and select only relevant columns
+reference <- read_csv(reference_file) %>%
+  select(keep_columns_reference) %>%
+  # Convert Algorithm and Region to factors
+  mutate(Algorithm = as.factor(Algorithm), Region = as.factor(Region)) 
+
+reference_combined <- inner_join(summary_data, reference, join_by(Algorithm, Region, SNR)) %>%
+  group_by(Algorithm, Region, SNR)
+
+# Run tests
+test_results <- reference_combined %>%
+  summarize(
+    # f-tests
+    f_ftest_lower = pf(f_std.x^2 / f_std.y^2, f_df.x, f_df.y, lower.tail=TRUE),
+    f_ftest_upper = pf(f_std.x^2 / f_std.y^2, f_df.x, f_df.y, lower.tail=FALSE),
+    Dp_ftest_lower = pf(Dp_std.x^2 / Dp_std.y^2, Dp_df.x, Dp_df.y, lower.tail=TRUE),
+    Dp_ftest_upper = pf(Dp_std.x^2 / Dp_std.y^2, Dp_df.x, Dp_df.y, lower.tail=FALSE),
+    D_ftest_lower = pf(D_std.x^2 / D_std.y^2, D_df.x, D_df.y, lower.tail=TRUE),
+    D_ftest_upper = pf(D_std.x^2 / D_std.y^2, D_df.x, D_df.y, lower.tail=FALSE),
+
+    # t-tests
+    f_ttest_lower = pt((f_mu.x - f_mu.y) / (f_std.x / sqrt(f_df.x + 1)), df=f_df.y, lower.tail=TRUE),
+    f_ttest_upper = pt((f_mu.x - f_mu.y) / (f_std.x / sqrt(f_df.x + 1)), df=f_df.y, lower.tail=FALSE),
+    Dp_ttest_lower = pt((Dp_mu.x - Dp_mu.y) / (Dp_std.x / sqrt(Dp_df.x + 1)), df=Dp_df.y, lower.tail=TRUE),
+    Dp_ttest_upper = pt((Dp_mu.x - Dp_mu.y) / (Dp_std.x / sqrt(Dp_df.x + 1)), df=Dp_df.y, lower.tail=FALSE),
+    D_ttest_lower = pt((D_mu.x - D_mu.y) / (D_std.x / sqrt(D_df.x + 1)), df=D_df.y, lower.tail=TRUE),
+    D_ttest_upper = pt((D_mu.x - D_mu.y) / (D_std.x / sqrt(D_df.x + 1)), df=D_df.y, lower.tail=FALSE),
+  )
+
+
+test_results <- test_results %>%
+  mutate(
+    f_ftest_lower_null = f_ftest_lower >= alpha,
+    f_ftest_upper_null = f_ftest_upper >= alpha,
+    Dp_ftest_lower_null = Dp_ftest_lower >= alpha,
+    Dp_ftest_upper_null = Dp_ftest_upper >= alpha,
+    D_ftest_lower_null = D_ftest_lower >= alpha,
+    D_ftest_upper_null = D_ftest_upper >= alpha,
+
+    f_ttest_lower_null = f_ttest_lower >= alpha,
+    f_ttest_upper_null = f_ttest_upper >= alpha,
+    Dp_ttest_lower_null = Dp_ttest_lower >= alpha,
+    Dp_ttest_upper_null = Dp_ttest_upper >= alpha,
+    D_ttest_lower_null = D_ttest_lower >= alpha,
+    D_ttest_upper_null = D_ttest_upper >= alpha,
+  )
+
+
+  # Write the t-test file
+write.csv(test_results, test_result_file, row.names=TRUE)
+
+# Fail if we had failures
+test_results %>% verify(f_ftest_lower_null)
+test_results %>% verify(f_ftest_upper_null)
+test_results %>% verify(Dp_ftest_lower_null)
+test_results %>% verify(Dp_ftest_upper_null)
+test_results %>% verify(D_ftest_lower_null)
+test_results %>% verify(D_ftest_upper_null)
+test_results %>% verify(f_ttest_lower_null)
+test_results %>% verify(f_ttest_upper_null)
+test_results %>% verify(Dp_ttest_lower_null)
+test_results %>% verify(Dp_ttest_upper_null)
+test_results %>% verify(D_ttest_lower_null)
+test_results %>% verify(D_ttest_upper_null)
+
+
+
+
+
+
+# # Combine data for easier comparison
+# reference_combined <- inner_join(grouped_data, reference, join_by(Algorithm, Region, SNR)) %>%
+#   group_by(Algorithm, Region, SNR)
+
+# # Run t-tests
+# t_tests <- reference_combined %>%
+#   summarize(
+#     # Perform paired t-test for each value
+#     f_fitted_p = t.test(f_fitted, mu = f_mu[1])$p.value,
+#     Dp_fitted_p = t.test(Dp_fitted, mu = Dp_mu[1])$p.value,
+#     D_fitted_p = t.test(D_fitted, mu = D_mu[1])$p.value
+#   )
+
+# # Extract p-values and assess significance, true is accept the null, false is reject
+# t_tests <- t_tests %>%
+#   mutate(
+#     f_fitted_null = f_fitted_p >= alpha,
+#     Dp_fitted_null = Dp_fitted_p >= alpha,
+#     D_fitted_null = D_fitted_p >= alpha
+#   )
+
+# # Write the t-test file
+# write.csv(t_tests, test_result_file, row.names=TRUE)
+
+# # Fail if we had failures
+# t_tests %>% verify(f_fitted_null)
+# t_tests %>% verify(Dp_fitted_null)
+# t_tests %>% verify(D_fitted_null)
+
+
+# # Fail if we had failures (fallback)
+# # failed_tests <- t_tests[!t_tests$f_fitted_null,]
+# # print(failed_tests)
+# # testit::assert(nrow(failed_tests) == 0)
+# # failed_tests <- t_tests[!t_tests$Dp_fitted_null,]
+# # print(failed_tests)
+# # testit::assert(nrow(failed_tests) == 0)
+# # failed_tests <- t_tests[!t_tests$D_fitted_null,]
+# # print(failed_tests)
+# # testit::assert(nrow(failed_tests) == 0)
+
+# # TODO:
+# # Could 
+# # Could plot somehow?
+# # Need to melt this data somehow to plot
+# # grouped_plots <- grouped_data %>% do(plots=ggplot(data=.) + geom_boxplot(aes(f_fitted.x, f_fitted.y)))
diff --git a/tests/IVIMmodels/unit_tests/reference_output.csv b/tests/IVIMmodels/unit_tests/reference_output.csv