Skip to content

Commit 8a1cac7

Browse files
committed
add R/r_repos_data_cast.R
1 parent 42aa8a2 commit 8a1cac7

19 files changed

+245
-56
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ Imports:
3535
stringr,
3636
testthat (>= 3.0.0),
3737
RCurl,
38-
rvest
38+
rvest,
39+
parallel
3940
Suggests:
4041
markdown,
4142
rmarkdown,

NAMESPACE

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ importFrom(data.table,setnames)
3333
importFrom(gh,gh)
3434
importFrom(gh,gh_token)
3535
importFrom(methods,is)
36-
importFrom(parallel,detectCores)
3736
importFrom(parallel,mclapply)
3837
importFrom(stats,setNames)
3938
importFrom(stringr,str_split)

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
## Bug fixes
1919

2020
* Elevate `rvest` to *Imports*.
21+
* Add `parallel` to *Imports*.
2122

2223
# echogithub 0.99.0
2324

R/check_pkgs.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
check_pkgs <- function(pkgs){
2+
if(is.character(pkgs)) pkgs <- data.table::data.table(package=pkgs)
3+
return(pkgs)
4+
}

R/description_extract.R

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,58 @@
33
#' Extract information from an R package's DESCRIPTION file.
44
#' @param fields Fields to extract.
55
#' @param add_html Add HTML styling to certain fields (e.g "authors").
6+
#' @param as_datatable Convert the results into a \link[data.table]{data.table}.
67
#' @inheritParams description_find
78
#' @inheritParams github_files
9+
#' @returns A named list or \link[data.table]{data.table}.
10+
#'
811
#' @export
12+
#' @importFrom data.table as.data.table
913
#' @examples
1014
#'\dontrun{
1115
#' res <- description_extract(repo="echolocatoR")
1216
#'}
13-
description_extract <- function(desc_file,
17+
description_extract <- function(desc_file = NULL,
1418
repo = NULL,
1519
fields = c("owner",
1620
"repo",
1721
"authors"),
1822
add_html = FALSE,
23+
as_datatable = FALSE,
1924
verbose = TRUE){
2025
#### Find or read DESCRIPTION file ####
21-
# desc_file <- description_find(repo = repo,
22-
# desc_file = desc_file,
23-
# verbose = verbose)
26+
if(is.null(desc_file)){
27+
desc_file <- description_find(repo = repo,
28+
desc_file = desc_file,
29+
verbose = verbose)
30+
}
2431
force(desc_file)
2532
if(is.null(desc_file)) {
2633
stopper("desc_file is required for description_extract")
2734
}
35+
if(is.null(fields)) {
36+
fields <- c("owner","repo",names(desc_file))
37+
}
38+
fields <- unique(fields)
39+
fields <- fields[fields %in% c("owner","repo",names(desc_file))]
2840
#### Extract info ####
41+
messager("Extracting",length(fields),"field(s).",v=verbose)
2942
res <- lapply(stats::setNames(fields,
3043
fields),
3144
function(f){
32-
messager("Inferring",f,"from DESCRIPTION file.",v=verbose)
45+
# messager("Inferring",f,"from DESCRIPTION file.",v=verbose)
3346
#### Check fields ####
3447
if(f=="owner") {
3548
i <- 2
3649
} else if(f=="repo") {
3750
i <- 1
38-
} else if(f=="authors") {
51+
} else if(f %in% c("authors","Authors@R")) {
3952
authors <- description_authors(desc_file = desc_file,
40-
add_html = add_html)
41-
messager("+ Inferred authors:",authors)
53+
add_html = add_html)
4254
return(authors)
4355
} else if(f %in% names(desc_file)){
4456
return(desc_file[[f]])
45-
}else {
57+
} else {
4658
stp <- paste("fields must be one of:",
4759
paste("\n -",c(
4860
eval(formals(description_extract)$fields),
@@ -61,9 +73,13 @@ description_extract <- function(desc_file,
6173
stop(stp)
6274
}
6375
info <- rev(strsplit(URL,"/")[[1]])[i]
64-
messager(paste0("+ Inferred ",f,":"),info,v=verbose)
76+
# messager(paste0("+ ",f,":"),info,v=verbose)
6577
return(info)
6678
})
6779
#### Return ####
68-
return(res)
80+
if(isTRUE(as_datatable)){
81+
return(data.table::as.data.table(res))
82+
} else {
83+
return(res)
84+
}
6985
}

R/description_extract_multi.R

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
description_extract_multi <- function(pkgs,
2+
nThread=1,
3+
verbose=TRUE){
4+
requireNamespace("parallel")
5+
6+
pkgs <- check_pkgs(pkgs = pkgs)
7+
meta_desc <- parallel::mclapply(pkgs$package,
8+
function(p){
9+
tryCatch({
10+
description_extract(repo = p,
11+
fields = NULL,
12+
as_datatable = TRUE,
13+
verbose = FALSE)
14+
}, error=function(e){messager(e,v=verbose);NULL})
15+
}, mc.cores = nThread) |>
16+
data.table::rbindlist(fill = TRUE) |>
17+
data.table::setnames("Package","package",
18+
skip_absent = TRUE)
19+
if(nrow(meta_desc)==0){
20+
messager("WARNING: No metadata retrieved from any DESCRIPTION files.",
21+
v=verbose)
22+
} else {
23+
data.table::setkeyv(meta_desc,"package")
24+
}
25+
return(meta_desc)
26+
}

R/description_find.R

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#' desc_file <- description_find(repo="data.table")
1818
description_find <- function(desc_file = NULL,
1919
owner = NULL,
20-
repo = NULL,
20+
repo = NULL,
2121
use_github = FALSE,
2222
verbose = TRUE){
2323

@@ -56,12 +56,14 @@ description_find <- function(desc_file = NULL,
5656
return(dfile)
5757
#### From local file ####
5858
} else if(!is.null(desc_file) &&
59-
file.exists(desc_file)) {
59+
file.exists(desc_file) &&
60+
any(description_read(dcf = desc_file)$Package==repo)) {
6061
messager("Getting DESCRIPTION file from a local file.",v=verbose)
6162
dfile <- description_read(dcf = desc_file)
6263
return(dfile)
6364
#### From remote file ####
64-
} else if(file.exists("DESCRIPTION")) {
65+
} else if(file.exists("DESCRIPTION") &&
66+
any(description_read(dcf = desc_file)$Package==repo)) {
6567
messager("Getting DESCRIPTION file from a local file in the",
6668
"current working directory.",v=verbose)
6769
dfile <- description_read(dcf = "DESCRIPTION")
@@ -73,7 +75,8 @@ description_find <- function(desc_file = NULL,
7375
dfile <- description_read(dcf = file)
7476
return(dfile)
7577
#### From GitHub Repo ####
76-
} else if (!is.null(owner) && !is.null(repo)){
78+
} else if (!is.null(owner) &&
79+
!is.null(repo)){
7780
messager("Getting DESCRIPTION file from GitHub repository.",v=verbose)
7881
dt <- github_files(owner = owner,
7982
repo = repo,

R/github_dependencies.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,17 @@ github_dependencies <- function(owner,
6969
workflow_url=workflow_url,
7070
d)
7171
}) |> data.table::rbindlist(fill = TRUE)
72+
#### Add owner/repo for each action ####
73+
dt <- cbind(dt,
74+
data.table::data.table(
75+
stringr::str_split(dt$action,"/", simplify = TRUE)
76+
)|> `colnames<-`(c("owner","repo")))
7277
## Unsure why some rows have the branch name instead of a number.
7378
#### Report ####
7479
messager("Found",formatC(nrow(dt),big.mark = ","),
7580
"dependencies across",
76-
formatC(dt$workflow,big.mark = ","),"workflows.",v=verbose)
81+
formatC(length(unique(dt$workflow)),big.mark = ","),
82+
"workflows.",v=verbose)
7783
#### Return ####
7884
return(dt)
7985
}

R/github_files_download.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#'
88
#' @export
99
#' @importFrom stringr str_split
10-
#' @importFrom parallel detectCores mclapply
10+
#' @importFrom parallel mclapply
1111
#' @importFrom utils download.file
1212
#' @examples
1313
#' dt <- github_files(owner = "RajLabMSSM",

R/r_repos_data.R

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,35 @@
22
#'
33
#' Gather data on which repositories R packages are distributed through
44
#' (e.g. CRAN, Bioc, rOpenSci, and/or GitHub).
5+
#' @param include A subset of packages to return data for.
56
#' @param add_downloads Add the number of downloads from each repository.
7+
#' @param add_descriptions Add metadata extracted from \emph{DESCRIPTION} files.
8+
#' @param cast Cast the results to wide format
9+
#' so that each package only appears in one row.
10+
#' @param nThread Number of threads to parallelise \code{add_descriptions}
11+
#' step across.
612
#' @param verbose Print messages.
713
#' @inheritParams r_repos
814
#' @inheritParams BiocManager::repositories
915
#' @returns data.table
1016
#'
1117
#' @export
1218
#' @importFrom utils installed.packages available.packages
19+
#' @importFrom data.table merge.data.table
1320
#' @examples
1421
#' pkgs <- r_repos_data()
15-
r_repos_data <- function(add_downloads=FALSE,
22+
r_repos_data <- function(include=NULL,
23+
add_downloads=FALSE,
24+
add_descriptions=FALSE,
1625
which=r_repos_opts(),
26+
cast=FALSE,
1727
version=NULL,
28+
nThread=1,
1829
verbose=TRUE){
1930
requireNamespace("rvest")
2031
requireNamespace("BiocManager")
2132
requireNamespace("githubinstall")
33+
installed <- package <- NULL;
2234

2335
which <- tolower(which)
2436
res <- list()
@@ -79,16 +91,42 @@ r_repos_data <- function(add_downloads=FALSE,
7991
github <- githubinstall::gh_list_packages()
8092
res[["GitHub"]] <- data.table::data.table(package=github$package_name)
8193
}
94+
#### GitHub ####
95+
if("local" %in% which){
96+
messager("Gathering R packages: local",v=verbose)
97+
# githubinstall::gh_update_package_list()
98+
local <- utils::installed.packages()
99+
res[["local"]] <- data.table::data.table(package=rownames(local))
100+
}
82101
#### Merge all repos ####
83102
pkgs <- data.table::rbindlist(res,
84103
fill = TRUE,
85104
use.names = TRUE,
86105
idcol = "r_repo")
106+
if(!is.null(include)) pkgs <- pkgs[package %in% include]
107+
#### Add installed info ####
108+
pkgs[,installed:=package %in% rownames(utils::installed.packages())]
87109
#### Add downloads ####
88110
if(isTRUE(add_downloads)){
89111
pkgs <- r_repos_downloads(pkgs = pkgs,
90112
which = which,
91113
verbose = verbose)
92114
}
115+
#### Cast wider ####
116+
if(isTRUE(cast)){
117+
pkgs <- r_repos_data_cast(pkgs = pkgs,
118+
verbose = verbose)
119+
120+
}
121+
#### Add DESRIPTION metadata ####
122+
if(isTRUE(add_descriptions)){
123+
meta_desc <- description_extract_multi(pkgs = unique(pkgs$package),
124+
nThread = nThread,
125+
verbose = verbose)
126+
pkgs <- data.table::merge.data.table(meta_desc,
127+
pkgs,
128+
all = TRUE,
129+
by="package")
130+
}
93131
return(pkgs)
94132
}

0 commit comments

Comments
 (0)