Skip to content

Commit 1e76e70

Browse files
Fixes to handle 2-group datasets and rarefaction read depth
[NF-AmpIllumina] Update pipeline doc code
2 parents 28baa1b + ef00761 commit 1e76e70

File tree

1 file changed

+131
-34
lines changed

1 file changed

+131
-34
lines changed

Amplicon/Illumina/Pipeline_GL-DPPD-7104_Versions/GL-DPPD-7104-C.md

Lines changed: 131 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2012,6 +2012,63 @@ output_prefix <- ""
20122012
distance_methods <- c("euclidean", "bray")
20132013
normalization_methods <- c("vst", "rarefy")
20142014

2015+
# Check and adjust rarefaction depth to preserve at least 2 groups
2016+
library_sizes <- colSums(feature_table)
2017+
min_lib_size <- min(library_sizes)
2018+
max_lib_size <- max(library_sizes)
2019+
2020+
# Check group-wise library sizes
2021+
metadata_with_libsizes <- metadata
2022+
metadata_with_libsizes$library_size <- library_sizes[rownames(metadata)]
2023+
2024+
group_lib_stats <- metadata_with_libsizes %>%
2025+
group_by(!!sym(groups_colname)) %>%
2026+
summarise(
2027+
n_samples = n(),
2028+
min_lib = min(library_size),
2029+
max_lib = max(library_size),
2030+
median_lib = median(library_size),
2031+
.groups = 'drop'
2032+
)
2033+
2034+
# Find max depth that preserves at least 2 groups
2035+
groups_surviving_at_depth <- function(depth) {
2036+
sum(group_lib_stats$min_lib >= depth)
2037+
}
2038+
2039+
if(groups_surviving_at_depth(rarefaction_depth) < 2) {
2040+
2041+
# Find the depth that preserves exactly 2 groups (use the 2nd highest group minimum)
2042+
group_mins <- sort(group_lib_stats$min_lib, decreasing = TRUE)
2043+
if(length(group_mins) >= 2) {
2044+
adjusted_depth <- group_mins[2] # Use 2nd highest group minimum directly
2045+
} else {
2046+
adjusted_depth <- max(10, floor(min_lib_size * 0.8))
2047+
}
2048+
2049+
warning_msg <- c(
2050+
paste("Original rarefaction depth:", rarefaction_depth),
2051+
paste("Total groups in data:", nrow(group_lib_stats)),
2052+
"",
2053+
"Group-wise library size stats:",
2054+
paste(capture.output(print(group_lib_stats, row.names = FALSE)), collapse = "\n"),
2055+
"",
2056+
paste("WARNING: Rarefaction depth", rarefaction_depth, "would preserve only",
2057+
groups_surviving_at_depth(rarefaction_depth), "group(s)"),
2058+
paste("Beta diversity analysis requires at least 2 groups for statistical tests."),
2059+
"",
2060+
paste("Automatically adjusted rarefaction depth to:", adjusted_depth),
2061+
paste("This should preserve", groups_surviving_at_depth(adjusted_depth), "groups for analysis.")
2062+
)
2063+
2064+
writeLines(warning_msg, glue("{beta_diversity_out_dir}/{output_prefix}rarefaction_depth_warning.txt"))
2065+
message("WARNING: Rarefaction depth adjusted from ", rarefaction_depth, " to ", adjusted_depth,
2066+
" to preserve at least 2 groups - see ", output_prefix, "rarefaction_depth_warning.txt")
2067+
2068+
# Update the rarefaction depth
2069+
rarefaction_depth <- adjusted_depth
2070+
}
2071+
20152072
options(warn=-1) # ignore warnings
20162073
# Run the analysis
20172074
walk2(.x = normalization_methods, .y = distance_methods,
@@ -2835,44 +2892,71 @@ output <- ancombc2(data = tse,
28352892
iter_control = list(tol = 1e-5, max_iter = 20,
28362893
verbose = FALSE),
28372894
mdfdr_control = list(fwer_ctrl_method = "fdr", B = 100),
2838-
lme_control = NULL)
2839-
2895+
lme_control = NULL, trend_control = NULL)
28402896

2897+
# For 2-group comparisons, use res instead of mapping across pairwise results in res_pair
2898+
is_two_group <- length(unique(tse[[group]])) == 2
28412899

28422900
# Create new column names - the original column names given by ANCOMBC are
28432901
# difficult to understand
28442902
tryCatch({
2845-
new_colnames <- map_chr(output$res_pair %>% colnames,
2846-
function(colname) {
2847-
# Columns comparing a group to the reference group
2848-
if(str_count(colname,groups_colname) == 1){
2849-
str_replace_all(string=colname,
2850-
pattern=glue("(.+)_{groups_colname}(.+)"),
2851-
replacement=glue("\\1_(\\2)v({ref_group})")) %>%
2852-
str_replace(pattern = "^lfc_", replacement = "lnFC_") %>%
2853-
str_replace(pattern = "^se_", replacement = "lnfcSE_") %>%
2854-
str_replace(pattern = "^W_", replacement = "Wstat_") %>%
2855-
str_replace(pattern = "^p_", replacement = "pvalue_") %>%
2856-
str_replace(pattern = "^q_", replacement = "qvalue_")
2857-
2858-
# Columns with normal two groups comparison
2859-
} else if(str_count(colname,groups_colname) == 2){
2860-
2861-
str_replace_all(string=colname,
2862-
pattern=glue("(.+)_{groups_colname}(.+)_{groups_colname}(.+)"),
2863-
replacement=glue("\\1_(\\2)v(\\3)")) %>%
2864-
str_replace(pattern = "^lfc_", replacement = "lnFC_") %>%
2865-
str_replace(pattern = "^se_", replacement = "lnfcSE_") %>%
2866-
str_replace(pattern = "^W_", replacement = "Wstat_") %>%
2867-
str_replace(pattern = "^p_", replacement = "pvalue_") %>%
2868-
str_replace(pattern = "^q_", replacement = "qvalue_")
2869-
2870-
# Feature/ ASV column
2871-
} else{
2872-
2873-
return(colname)
2874-
}
2875-
} )
2903+
# Check if this is a 2-group comparison (using res instead of res_pair)
2904+
if(is_two_group) {
2905+
# For 2-group comparisons, use the group-specific columns
2906+
group_cols <- colnames(output$res)[grepl(paste0("^[a-zA-Z_]+_", group), colnames(output$res))]
2907+
if(length(group_cols) > 0) {
2908+
# Extract group name from the first group-specific column
2909+
group_name <- str_replace(group_cols[1], paste0("^[a-zA-Z_]+_", group), "")
2910+
# Create comparison name
2911+
comparison_name <- glue("({ref_group})v({group_name})")
2912+
2913+
new_colnames <- c(
2914+
feature, # Keep the feature column name
2915+
glue("lnFC_{comparison_name}"),
2916+
glue("lnfcSE_{comparison_name}"),
2917+
glue("Wstat_{comparison_name}"),
2918+
glue("pvalue_{comparison_name}"),
2919+
glue("qvalue_{comparison_name}"),
2920+
glue("diff_{comparison_name}"),
2921+
glue("passed_ss_{comparison_name}")
2922+
)
2923+
} else {
2924+
stop("Could not identify group-specific column for 2-group comparison")
2925+
}
2926+
} else {
2927+
# Multi-group comparisons
2928+
new_colnames <- map_chr(output$res_pair %>% colnames,
2929+
function(colname) {
2930+
# Columns comparing a group to the reference group
2931+
if(str_count(colname,group) == 1){
2932+
str_replace_all(string=colname,
2933+
pattern=glue("(.+)_{group}(.+)"),
2934+
replacement=glue("\\1_(\\2)v({ref_group})")) %>%
2935+
str_replace(pattern = "^lfc_", replacement = "lnFC_") %>%
2936+
str_replace(pattern = "^se_", replacement = "lnfcSE_") %>%
2937+
str_replace(pattern = "^W_", replacement = "Wstat_") %>%
2938+
str_replace(pattern = "^p_", replacement = "pvalue_") %>%
2939+
str_replace(pattern = "^q_", replacement = "qvalue_")
2940+
2941+
# Columns with normal two groups comparison
2942+
} else if(str_count(colname,group) == 2){
2943+
2944+
str_replace_all(string=colname,
2945+
pattern=glue("(.+)_{group}(.+)_{group}(.+)"),
2946+
replacement=glue("\\1_(\\2)v(\\3)")) %>%
2947+
str_replace(pattern = "^lfc_", replacement = "lnFC_") %>%
2948+
str_replace(pattern = "^se_", replacement = "lnfcSE_") %>%
2949+
str_replace(pattern = "^W_", replacement = "Wstat_") %>%
2950+
str_replace(pattern = "^p_", replacement = "pvalue_") %>%
2951+
str_replace(pattern = "^q_", replacement = "qvalue_")
2952+
2953+
# Feature/ ASV column
2954+
} else{
2955+
2956+
return(colname)
2957+
}
2958+
} )
2959+
}
28762960
}, error = function(e) {
28772961
writeLines(c("ANCOMBC2 script failed at res_pair processing:", e$message,
28782962
"\n\nDiagnostics:",
@@ -2890,7 +2974,20 @@ new_colnames[match("taxon", new_colnames)] <- feature
28902974

28912975

28922976
# Rename columns
2893-
paired_stats_df <- output$res_pair %>% set_names(new_colnames)
2977+
if(is_two_group) {
2978+
# For 2-group comparisons, we need to select the group-specific columns and rename them
2979+
# The columns are named like "lfc_groupsGround Control", "se_groupsGround Control", etc.
2980+
2981+
group_specific_cols <- colnames(output$res)[grepl(paste0("^[a-zA-Z_]+_", group), colnames(output$res))]
2982+
2983+
# Create a new data frame with the selected columns
2984+
paired_stats_df <- output$res %>%
2985+
select(taxon, all_of(group_specific_cols)) %>%
2986+
set_names(new_colnames)
2987+
} else {
2988+
# Multi-group comparisons
2989+
paired_stats_df <- output$res_pair %>% set_names(new_colnames)
2990+
}
28942991

28952992
# Get the unique comparison names
28962993
uniq_comps <- str_replace_all(new_colnames, ".+_(\\(.+\\))", "\\1") %>% unique()

0 commit comments

Comments
 (0)