@@ -2012,6 +2012,63 @@ output_prefix <- ""
2012
2012
distance_methods <- c(" euclidean" , " bray" )
2013
2013
normalization_methods <- c(" vst" , " rarefy" )
2014
2014
2015
+ # Check and adjust rarefaction depth to preserve at least 2 groups
2016
+ library_sizes <- colSums(feature_table )
2017
+ min_lib_size <- min(library_sizes )
2018
+ max_lib_size <- max(library_sizes )
2019
+
2020
+ # Check group-wise library sizes
2021
+ metadata_with_libsizes <- metadata
2022
+ metadata_with_libsizes $ library_size <- library_sizes [rownames(metadata )]
2023
+
2024
+ group_lib_stats <- metadata_with_libsizes %> %
2025
+ group_by(!! sym(groups_colname )) %> %
2026
+ summarise(
2027
+ n_samples = n(),
2028
+ min_lib = min(library_size ),
2029
+ max_lib = max(library_size ),
2030
+ median_lib = median(library_size ),
2031
+ .groups = ' drop'
2032
+ )
2033
+
2034
+ # Find max depth that preserves at least 2 groups
2035
+ groups_surviving_at_depth <- function (depth ) {
2036
+ sum(group_lib_stats $ min_lib > = depth )
2037
+ }
2038
+
2039
+ if (groups_surviving_at_depth(rarefaction_depth ) < 2 ) {
2040
+
2041
+ # Find the depth that preserves exactly 2 groups (use the 2nd highest group minimum)
2042
+ group_mins <- sort(group_lib_stats $ min_lib , decreasing = TRUE )
2043
+ if (length(group_mins ) > = 2 ) {
2044
+ adjusted_depth <- group_mins [2 ] # Use 2nd highest group minimum directly
2045
+ } else {
2046
+ adjusted_depth <- max(10 , floor(min_lib_size * 0.8 ))
2047
+ }
2048
+
2049
+ warning_msg <- c(
2050
+ paste(" Original rarefaction depth:" , rarefaction_depth ),
2051
+ paste(" Total groups in data:" , nrow(group_lib_stats )),
2052
+ " " ,
2053
+ " Group-wise library size stats:" ,
2054
+ paste(capture.output(print(group_lib_stats , row.names = FALSE )), collapse = " \n " ),
2055
+ " " ,
2056
+ paste(" WARNING: Rarefaction depth" , rarefaction_depth , " would preserve only" ,
2057
+ groups_surviving_at_depth(rarefaction_depth ), " group(s)" ),
2058
+ paste(" Beta diversity analysis requires at least 2 groups for statistical tests." ),
2059
+ " " ,
2060
+ paste(" Automatically adjusted rarefaction depth to:" , adjusted_depth ),
2061
+ paste(" This should preserve" , groups_surviving_at_depth(adjusted_depth ), " groups for analysis." )
2062
+ )
2063
+
2064
+ writeLines(warning_msg , glue(" {beta_diversity_out_dir}/{output_prefix}rarefaction_depth_warning.txt" ))
2065
+ message(" WARNING: Rarefaction depth adjusted from " , rarefaction_depth , " to " , adjusted_depth ,
2066
+ " to preserve at least 2 groups - see " , output_prefix , " rarefaction_depth_warning.txt" )
2067
+
2068
+ # Update the rarefaction depth
2069
+ rarefaction_depth <- adjusted_depth
2070
+ }
2071
+
2015
2072
options(warn = - 1 ) # ignore warnings
2016
2073
# Run the analysis
2017
2074
walk2(.x = normalization_methods , .y = distance_methods ,
@@ -2835,44 +2892,71 @@ output <- ancombc2(data = tse,
2835
2892
iter_control = list (tol = 1e-5 , max_iter = 20 ,
2836
2893
verbose = FALSE ),
2837
2894
mdfdr_control = list (fwer_ctrl_method = " fdr" , B = 100 ),
2838
- lme_control = NULL )
2839
-
2895
+ lme_control = NULL , trend_control = NULL )
2840
2896
2897
+ # For 2-group comparisons, use res instead of mapping across pairwise results in res_pair
2898
+ is_two_group <- length(unique(tse [[group ]])) == 2
2841
2899
2842
2900
# Create new column names - the original column names given by ANCOMBC are
2843
2901
# difficult to understand
2844
2902
tryCatch({
2845
- new_colnames <- map_chr(output $ res_pair %> % colnames ,
2846
- function (colname ) {
2847
- # Columns comparing a group to the reference group
2848
- if (str_count(colname ,groups_colname ) == 1 ){
2849
- str_replace_all(string = colname ,
2850
- pattern = glue(" (.+)_{groups_colname}(.+)" ),
2851
- replacement = glue(" \\ 1_(\\ 2)v({ref_group})" )) %> %
2852
- str_replace(pattern = " ^lfc_" , replacement = " lnFC_" ) %> %
2853
- str_replace(pattern = " ^se_" , replacement = " lnfcSE_" ) %> %
2854
- str_replace(pattern = " ^W_" , replacement = " Wstat_" ) %> %
2855
- str_replace(pattern = " ^p_" , replacement = " pvalue_" ) %> %
2856
- str_replace(pattern = " ^q_" , replacement = " qvalue_" )
2857
-
2858
- # Columns with normal two groups comparison
2859
- } else if (str_count(colname ,groups_colname ) == 2 ){
2860
-
2861
- str_replace_all(string = colname ,
2862
- pattern = glue(" (.+)_{groups_colname}(.+)_{groups_colname}(.+)" ),
2863
- replacement = glue(" \\ 1_(\\ 2)v(\\ 3)" )) %> %
2864
- str_replace(pattern = " ^lfc_" , replacement = " lnFC_" ) %> %
2865
- str_replace(pattern = " ^se_" , replacement = " lnfcSE_" ) %> %
2866
- str_replace(pattern = " ^W_" , replacement = " Wstat_" ) %> %
2867
- str_replace(pattern = " ^p_" , replacement = " pvalue_" ) %> %
2868
- str_replace(pattern = " ^q_" , replacement = " qvalue_" )
2869
-
2870
- # Feature/ ASV column
2871
- } else {
2872
-
2873
- return (colname )
2874
- }
2875
- } )
2903
+ # Check if this is a 2-group comparison (using res instead of res_pair)
2904
+ if (is_two_group ) {
2905
+ # For 2-group comparisons, use the group-specific columns
2906
+ group_cols <- colnames(output $ res )[grepl(paste0(" ^[a-zA-Z_]+_" , group ), colnames(output $ res ))]
2907
+ if (length(group_cols ) > 0 ) {
2908
+ # Extract group name from the first group-specific column
2909
+ group_name <- str_replace(group_cols [1 ], paste0(" ^[a-zA-Z_]+_" , group ), " " )
2910
+ # Create comparison name
2911
+ comparison_name <- glue(" ({ref_group})v({group_name})" )
2912
+
2913
+ new_colnames <- c(
2914
+ feature , # Keep the feature column name
2915
+ glue(" lnFC_{comparison_name}" ),
2916
+ glue(" lnfcSE_{comparison_name}" ),
2917
+ glue(" Wstat_{comparison_name}" ),
2918
+ glue(" pvalue_{comparison_name}" ),
2919
+ glue(" qvalue_{comparison_name}" ),
2920
+ glue(" diff_{comparison_name}" ),
2921
+ glue(" passed_ss_{comparison_name}" )
2922
+ )
2923
+ } else {
2924
+ stop(" Could not identify group-specific column for 2-group comparison" )
2925
+ }
2926
+ } else {
2927
+ # Multi-group comparisons
2928
+ new_colnames <- map_chr(output $ res_pair %> % colnames ,
2929
+ function (colname ) {
2930
+ # Columns comparing a group to the reference group
2931
+ if (str_count(colname ,group ) == 1 ){
2932
+ str_replace_all(string = colname ,
2933
+ pattern = glue(" (.+)_{group}(.+)" ),
2934
+ replacement = glue(" \\ 1_(\\ 2)v({ref_group})" )) %> %
2935
+ str_replace(pattern = " ^lfc_" , replacement = " lnFC_" ) %> %
2936
+ str_replace(pattern = " ^se_" , replacement = " lnfcSE_" ) %> %
2937
+ str_replace(pattern = " ^W_" , replacement = " Wstat_" ) %> %
2938
+ str_replace(pattern = " ^p_" , replacement = " pvalue_" ) %> %
2939
+ str_replace(pattern = " ^q_" , replacement = " qvalue_" )
2940
+
2941
+ # Columns with normal two groups comparison
2942
+ } else if (str_count(colname ,group ) == 2 ){
2943
+
2944
+ str_replace_all(string = colname ,
2945
+ pattern = glue(" (.+)_{group}(.+)_{group}(.+)" ),
2946
+ replacement = glue(" \\ 1_(\\ 2)v(\\ 3)" )) %> %
2947
+ str_replace(pattern = " ^lfc_" , replacement = " lnFC_" ) %> %
2948
+ str_replace(pattern = " ^se_" , replacement = " lnfcSE_" ) %> %
2949
+ str_replace(pattern = " ^W_" , replacement = " Wstat_" ) %> %
2950
+ str_replace(pattern = " ^p_" , replacement = " pvalue_" ) %> %
2951
+ str_replace(pattern = " ^q_" , replacement = " qvalue_" )
2952
+
2953
+ # Feature/ ASV column
2954
+ } else {
2955
+
2956
+ return (colname )
2957
+ }
2958
+ } )
2959
+ }
2876
2960
}, error = function (e ) {
2877
2961
writeLines(c(" ANCOMBC2 script failed at res_pair processing:" , e $ message ,
2878
2962
" \n\n Diagnostics:" ,
@@ -2890,7 +2974,20 @@ new_colnames[match("taxon", new_colnames)] <- feature
2890
2974
2891
2975
2892
2976
# Rename columns
2893
- paired_stats_df <- output $ res_pair %> % set_names(new_colnames )
2977
+ if (is_two_group ) {
2978
+ # For 2-group comparisons, we need to select the group-specific columns and rename them
2979
+ # The columns are named like "lfc_groupsGround Control", "se_groupsGround Control", etc.
2980
+
2981
+ group_specific_cols <- colnames(output $ res )[grepl(paste0(" ^[a-zA-Z_]+_" , group ), colnames(output $ res ))]
2982
+
2983
+ # Create a new data frame with the selected columns
2984
+ paired_stats_df <- output $ res %> %
2985
+ select(taxon , all_of(group_specific_cols )) %> %
2986
+ set_names(new_colnames )
2987
+ } else {
2988
+ # Multi-group comparisons
2989
+ paired_stats_df <- output $ res_pair %> % set_names(new_colnames )
2990
+ }
2894
2991
2895
2992
# Get the unique comparison names
2896
2993
uniq_comps <- str_replace_all(new_colnames , " .+_(\\ (.+\\ ))" , " \\ 1" ) %> % unique()
0 commit comments