|
| 1 | +#!/usr/bin/env Rscript |
| 2 | + |
| 3 | +#Run like this: |
| 4 | +#Rscript --vanilla tests/IVIMmodels/unit_tests/compare.r test_output.csv test_reference.csv reference_output.csv test_results.csv |
| 5 | + |
| 6 | +args = commandArgs(trailingOnly=TRUE) |
| 7 | +# Define file paths |
| 8 | +test_file <- "test_output.csv" |
| 9 | +test_reference_file <- "test_reference.csv" |
| 10 | +reference_file <- "" #"reference_output.csv" |
| 11 | +test_result_file <- "test_results.csv" |
| 12 | + |
| 13 | + |
| 14 | +if (length(args)>=1) { |
| 15 | + test_file = args[1] |
| 16 | +} |
| 17 | +if (length(args)>=2) { |
| 18 | + test_reference_file = args[2] |
| 19 | +} |
| 20 | +if (length(args)>=3) { |
| 21 | + reference_file = args[3] |
| 22 | +} |
| 23 | +if (length(args)>=4) { |
| 24 | + test_result_file = args[4] |
| 25 | +} |
| 26 | + |
| 27 | + |
| 28 | +# Load required libraries |
| 29 | +library(tidyverse) |
| 30 | +library(stats) |
| 31 | +# library(testit) |
| 32 | +library(assertr) |
| 33 | + |
| 34 | +alpha <- 0.45 # be sensitive to changes |
| 35 | + |
| 36 | +# Define desired columns to keep |
| 37 | +keep_columns_reference <- c("Algorithm", "Region", "SNR", "f", "Dp", "D", "f_mu", "Dp_mu", "D_mu", "f_alpha", "Dp_alpha", "D_alpha", "f_std", "Dp_std", "D_std", "f_df", "Dp_df", "D_df") |
| 38 | +keep_columns_test <- c("Algorithm", "Region", "SNR", "index", "f", "Dp", "D", "f_fitted", "Dp_fitted", "D_fitted") |
| 39 | + |
| 40 | +test <- read_csv(test_file) %>% |
| 41 | + select(keep_columns_test) %>% |
| 42 | + # Convert Algorithm and Region to factors |
| 43 | + mutate(Algorithm = as.factor(Algorithm), Region = as.factor(Region)) |
| 44 | + |
| 45 | +# Group data by relevant factors |
| 46 | +grouped_data <- test %>% |
| 47 | + group_by(Algorithm, Region, SNR, f, Dp, D) |
| 48 | + |
| 49 | +# Combine data for easier comparison |
| 50 | +# combined_data <- inner_join(reference, test, join_by(Algorithm, Region, SNR, f, Dp, D, index)) |
| 51 | + |
| 52 | +# Perform t-test for each value |
| 53 | +summary_data <- grouped_data %>% |
| 54 | + summarize( |
| 55 | + # Calculate group means |
| 56 | + f_mu = mean(f_fitted), |
| 57 | + Dp_mu = mean(Dp_fitted), |
| 58 | + D_mu = mean(D_fitted), |
| 59 | + |
| 60 | + # Also insert alpha values here |
| 61 | + f_alpha = alpha, |
| 62 | + Dp_alpha = alpha, |
| 63 | + D_alpha = alpha, |
| 64 | + |
| 65 | + # Calculate group standard deviations |
| 66 | + f_std = sd(f_fitted), |
| 67 | + Dp_std = sd(Dp_fitted), |
| 68 | + D_std = sd(D_fitted), |
| 69 | + |
| 70 | + # Degrees of freedom |
| 71 | + f_df = length(f_fitted) - 1, |
| 72 | + Dp_df = length(Dp_fitted) - 1, |
| 73 | + D_df = length(D_fitted) - 1, |
| 74 | + |
| 75 | + # Calculate group equivalence |
| 76 | + # f_fitted_equal = all(all.equal(f_fitted.x, f_fitted.y)), |
| 77 | + # Dp_fitted_equal = all(all.equal(Dp_fitted.x, Dp_fitted.y)), |
| 78 | + # D_fitted_equal = all(all.equal(D_fitted.x, D_fitted.y)), |
| 79 | + |
| 80 | + # Perform paired t-test for each value |
| 81 | + # f_fitted_p = t.test(f_fitted.x, f_fitted.y, paired = TRUE)$p.value, |
| 82 | + # Dp_fitted_p = t.test(Dp_fitted.x, Dp_fitted.y, paired = TRUE)$p.value, |
| 83 | + # D_fitted_p = t.test(D_fitted.x, D_fitted.y, paired = TRUE)$p.value |
| 84 | + ) |
| 85 | + |
| 86 | +# If no reference file, just report the test results and fail |
| 87 | +write.csv(summary_data, test_reference_file, row.names=TRUE) |
| 88 | + |
| 89 | +# Exit at this point if we don't have a reference file |
| 90 | +if (nchar(reference_file) == 0) { |
| 91 | + stop("No reference file defined, stopping without testing.") |
| 92 | +} |
| 93 | + |
| 94 | + |
| 95 | +# Read data from CSV files and select only relevant columns |
| 96 | +reference <- read_csv(reference_file) %>% |
| 97 | + select(keep_columns_reference) %>% |
| 98 | + # Convert Algorithm and Region to factors |
| 99 | + mutate(Algorithm = as.factor(Algorithm), Region = as.factor(Region)) |
| 100 | + |
| 101 | +reference_combined <- inner_join(summary_data, reference, join_by(Algorithm, Region, SNR)) %>% |
| 102 | + group_by(Algorithm, Region, SNR) |
| 103 | + |
| 104 | +# Run tests |
| 105 | +test_results <- reference_combined %>% |
| 106 | + summarize( |
| 107 | + # f-tests |
| 108 | + f_ftest_lower = pf(f_std.x^2 / f_std.y^2, f_df.x, f_df.y, lower.tail=TRUE), |
| 109 | + f_ftest_upper = pf(f_std.x^2 / f_std.y^2, f_df.x, f_df.y, lower.tail=FALSE), |
| 110 | + Dp_ftest_lower = pf(Dp_std.x^2 / Dp_std.y^2, Dp_df.x, Dp_df.y, lower.tail=TRUE), |
| 111 | + Dp_ftest_upper = pf(Dp_std.x^2 / Dp_std.y^2, Dp_df.x, Dp_df.y, lower.tail=FALSE), |
| 112 | + D_ftest_lower = pf(D_std.x^2 / D_std.y^2, D_df.x, D_df.y, lower.tail=TRUE), |
| 113 | + D_ftest_upper = pf(D_std.x^2 / D_std.y^2, D_df.x, D_df.y, lower.tail=FALSE), |
| 114 | + |
| 115 | + # t-tests |
| 116 | + f_ttest_lower = pt((f_mu.x - f_mu.y) / (f_std.x / sqrt(f_df.x + 1)), df=f_df.y, lower.tail=TRUE), |
| 117 | + f_ttest_upper = pt((f_mu.x - f_mu.y) / (f_std.x / sqrt(f_df.x + 1)), df=f_df.y, lower.tail=FALSE), |
| 118 | + Dp_ttest_lower = pt((Dp_mu.x - Dp_mu.y) / (Dp_std.x / sqrt(Dp_df.x + 1)), df=Dp_df.y, lower.tail=TRUE), |
| 119 | + Dp_ttest_upper = pt((Dp_mu.x - Dp_mu.y) / (Dp_std.x / sqrt(Dp_df.x + 1)), df=Dp_df.y, lower.tail=FALSE), |
| 120 | + D_ttest_lower = pt((D_mu.x - D_mu.y) / (D_std.x / sqrt(D_df.x + 1)), df=D_df.y, lower.tail=TRUE), |
| 121 | + D_ttest_upper = pt((D_mu.x - D_mu.y) / (D_std.x / sqrt(D_df.x + 1)), df=D_df.y, lower.tail=FALSE), |
| 122 | + ) |
| 123 | + |
| 124 | + |
| 125 | +test_results <- test_results %>% |
| 126 | + mutate( |
| 127 | + f_ftest_lower_null = f_ftest_lower >= alpha, |
| 128 | + f_ftest_upper_null = f_ftest_upper >= alpha, |
| 129 | + Dp_ftest_lower_null = Dp_ftest_lower >= alpha, |
| 130 | + Dp_ftest_upper_null = Dp_ftest_upper >= alpha, |
| 131 | + D_ftest_lower_null = D_ftest_lower >= alpha, |
| 132 | + D_ftest_upper_null = D_ftest_upper >= alpha, |
| 133 | + |
| 134 | + f_ttest_lower_null = f_ttest_lower >= alpha, |
| 135 | + f_ttest_upper_null = f_ttest_upper >= alpha, |
| 136 | + Dp_ttest_lower_null = Dp_ttest_lower >= alpha, |
| 137 | + Dp_ttest_upper_null = Dp_ttest_upper >= alpha, |
| 138 | + D_ttest_lower_null = D_ttest_lower >= alpha, |
| 139 | + D_ttest_upper_null = D_ttest_upper >= alpha, |
| 140 | + ) |
| 141 | + |
| 142 | + |
| 143 | + # Write the t-test file |
| 144 | +write.csv(test_results, test_result_file, row.names=TRUE) |
| 145 | + |
| 146 | +# Fail if we had failures |
| 147 | +test_results %>% verify(f_ftest_lower_null) |
| 148 | +test_results %>% verify(f_ftest_upper_null) |
| 149 | +test_results %>% verify(Dp_ftest_lower_null) |
| 150 | +test_results %>% verify(Dp_ftest_upper_null) |
| 151 | +test_results %>% verify(D_ftest_lower_null) |
| 152 | +test_results %>% verify(D_ftest_upper_null) |
| 153 | +test_results %>% verify(f_ttest_lower_null) |
| 154 | +test_results %>% verify(f_ttest_upper_null) |
| 155 | +test_results %>% verify(Dp_ttest_lower_null) |
| 156 | +test_results %>% verify(Dp_ttest_upper_null) |
| 157 | +test_results %>% verify(D_ttest_lower_null) |
| 158 | +test_results %>% verify(D_ttest_upper_null) |
| 159 | + |
| 160 | + |
| 161 | + |
| 162 | + |
| 163 | + |
| 164 | + |
| 165 | +# # Combine data for easier comparison |
| 166 | +# reference_combined <- inner_join(grouped_data, reference, join_by(Algorithm, Region, SNR)) %>% |
| 167 | +# group_by(Algorithm, Region, SNR) |
| 168 | + |
| 169 | +# # Run t-tests |
| 170 | +# t_tests <- reference_combined %>% |
| 171 | +# summarize( |
| 172 | +# # Perform paired t-test for each value |
| 173 | +# f_fitted_p = t.test(f_fitted, mu = f_mu[1])$p.value, |
| 174 | +# Dp_fitted_p = t.test(Dp_fitted, mu = Dp_mu[1])$p.value, |
| 175 | +# D_fitted_p = t.test(D_fitted, mu = D_mu[1])$p.value |
| 176 | +# ) |
| 177 | + |
| 178 | +# # Extract p-values and assess significance, true is accept the null, false is reject |
| 179 | +# t_tests <- t_tests %>% |
| 180 | +# mutate( |
| 181 | +# f_fitted_null = f_fitted_p >= alpha, |
| 182 | +# Dp_fitted_null = Dp_fitted_p >= alpha, |
| 183 | +# D_fitted_null = D_fitted_p >= alpha |
| 184 | +# ) |
| 185 | + |
| 186 | +# # Write the t-test file |
| 187 | +# write.csv(t_tests, test_result_file, row.names=TRUE) |
| 188 | + |
| 189 | +# # Fail if we had failures |
| 190 | +# t_tests %>% verify(f_fitted_null) |
| 191 | +# t_tests %>% verify(Dp_fitted_null) |
| 192 | +# t_tests %>% verify(D_fitted_null) |
| 193 | + |
| 194 | + |
| 195 | +# # Fail if we had failures (fallback) |
| 196 | +# # failed_tests <- t_tests[!t_tests$f_fitted_null,] |
| 197 | +# # print(failed_tests) |
| 198 | +# # testit::assert(nrow(failed_tests) == 0) |
| 199 | +# # failed_tests <- t_tests[!t_tests$Dp_fitted_null,] |
| 200 | +# # print(failed_tests) |
| 201 | +# # testit::assert(nrow(failed_tests) == 0) |
| 202 | +# # failed_tests <- t_tests[!t_tests$D_fitted_null,] |
| 203 | +# # print(failed_tests) |
| 204 | +# # testit::assert(nrow(failed_tests) == 0) |
| 205 | + |
| 206 | +# # TODO: |
| 207 | +# # Could |
| 208 | +# # Could plot somehow? |
| 209 | +# # Need to melt this data somehow to plot |
| 210 | +# # grouped_plots <- grouped_data %>% do(plots=ggplot(data=.) + geom_boxplot(aes(f_fitted.x, f_fitted.y))) |
0 commit comments