@@ -149,14 +149,15 @@ pan_mat_to_gene_vec_tibble <- function(pan_mat){
149149# ' @param pan_mat a presence absence matrix of 1/0, rows are genomes, columns are genes
150150# ' @param desired_coverage proportion of the pangenome's gene content you want the reduced set to contain (.95)
151151# ' @param SEED random seed to use when selecting the first genome of the collection.
152+ # ' @param verbose T/F provides updates via print statements
152153# '
153154# ' @return returns a list of length 3. 1:names of the genomes, 2:scores for each iteration , 3:proportion coverage for each iteration
154155# ' @export
155156# '
156157# ' @examples #gen_pangenome_representatives(pan_mat)
157158# ' @importFrom rlang .data
158159get_pangenome_representatives  <- 
159-   function (pan_mat , desired_coverage = .95 , SEED = 3 ){
160+   function (pan_mat , desired_coverage = .95 , SEED = 3 ,  verbose = FALSE ){
160161    #  hopefully get smallest set of genomes that gives desired coverage of pangenome
161162    #  browser()
162163    genomes  <-  pan_mat_to_gene_vec_tibble(pan_mat )
@@ -173,35 +174,47 @@ get_pangenome_representatives <-
173174
174175    #  best score = total number of genes in pangenome
175176    best_score  <-  base :: nrow(pan_mat )
176-     tot_genomes  <-  base :: col (pan_mat )
177+     tot_genomes  <-  base :: ncol (pan_mat )
177178    desired_score  <-  best_score  *  desired_coverage 
178179
179-     print(base :: paste(tot_genomes , ' total genomes' 
180-     print(base :: paste(best_score , ' = best possible score' 
181-     print(base :: paste(desired_score , '  = desired score' 
182- 
183180    score  <-  base :: length(cumulative_pan )
184181    scores  <-  base :: c(score )
185-     print(base :: paste0(' starting score = ' score ))
182+ 
183+     if  (verbose ) {
184+       print(base :: paste(tot_genomes , ' total genomes' 
185+       print(base :: paste(best_score , ' = best possible score' 
186+       print(base :: paste(desired_score , '  = desired score' 
187+       print(base :: paste0(' starting score = ' score ))
188+     }
189+ 
186190    while  (score  <  desired_score ){
187191
188192      #  calculates the number of new genes each genome would contribute to the cumulative pangenome
193+ 
194+       genomes  <- 
195+         genomes  | > 
196+         dplyr :: mutate(num_new = purrr :: map_int(.x  =  .data $ gene_vec , .f =  ~ (base :: sum(! (base :: is.element(.x , cumulative_pan )))))) | > 
197+         dplyr :: filter(.data $ num_new  >  0 ) #  removes genomes that do not contribute new information
198+ 
189199      #  filters the genomes to only those that contain the max number of new genes for that iteration
190-       #  selects the first one and adds it to  the cumulative pangenome. 
200+       #  selects a random genome from those that contribute  the max number of new genes 
191201      best_addition_genome  <- 
192202        genomes  | > 
193-         dplyr :: mutate(num_new = purrr :: map_int(.x  =  .data $ gene_vec , .f =  ~ (base :: sum(! (base :: is.element(.x , cumulative_pan )))))) | > 
194203        dplyr :: filter(.data $ num_new  ==  max(.data $ num_new )) | > 
195-         #  dplyr::arrange(dplyr::desc(.data$num_new)) |>
196204        dplyr :: slice_sample(n  =  1 )
197205
198206      cumulative_pan  <-  base :: c(cumulative_pan , best_addition_genome $ gene_vec [[1 ]]) | >  base :: unique()
199207      cumulative_genomes  <-  base :: c(cumulative_genomes , best_addition_genome $ genome_name [[1 ]])
200208      score  <-  base :: length(cumulative_pan )
201209      scores  <-  base :: c(scores , score )
202-       base :: print(base :: paste0(' new score = ' score ))
203210      proportion_coverages  <-  scores / best_score 
204-       print(base :: paste0(' proportion covered = ' score / best_score ))
211+ 
212+       if  (verbose ){
213+ 
214+         base :: print(base :: paste0(' new score = ' score ))
215+         base :: print(base :: paste0(' proportion covered = ' score / best_score ))
216+ 
217+       }
205218    }
206219    return (base :: list (cumulative_genomes , scores , proportion_coverages ))
207220  }
0 commit comments