Skip to content

Commit 87df6da

Browse files
committed
add row_min, row_max, minor cleanup elsewhere
1 parent 4ad02fe commit 87df6da

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1617
-1577
lines changed

DESCRIPTION

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: tidyext
22
Type: Package
33
Title: Tidy Extensions for Data Processing
4-
Version: 0.3.5
4+
Version: 0.3.6
55
Authors@R: person("Michael", "Clark", role = c("aut", "cre"), email = "micl@umich.edu")
66
Maintainer: Michael Clark <micl@umich.edu>
77
Description: Common data processing and summary functions to extend your tidy ways.
@@ -16,10 +16,8 @@ Depends:
1616
Imports:
1717
dplyr (>= 1.0.0),
1818
purrr,
19-
magrittr,
2019
rlang,
2120
scales,
22-
tibble,
2321
tidyr (>= 1.0.0)
2422
Suggests:
2523
ggplot2,
@@ -28,5 +26,6 @@ Suggests:
2826
stringi,
2927
stringr,
3028
testthat,
29+
tibble,
3130
covr
3231
RoxygenNote: 7.1.1

NAMESPACE

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export("%>%")
43
export(cat_by)
54
export(combn_2_col)
65
export(create_prediction_data)
@@ -15,17 +14,16 @@ export(onehot)
1514
export(pre_process)
1615
export(rnd)
1716
export(row_apply)
17+
export(row_max)
1818
export(row_means)
19+
export(row_min)
1920
export(row_sums)
2021
export(spread2)
2122
export(sum_NA)
2223
export(sum_NaN)
2324
export(sum_blank)
24-
export(vars)
2525
import(dplyr)
2626
importFrom(dplyr,quo_name)
27-
importFrom(dplyr,vars)
28-
importFrom(magrittr,"%>%")
2927
importFrom(purrr,map)
3028
importFrom(purrr,map_df)
3129
importFrom(purrr,map_dfr)
@@ -39,7 +37,6 @@ importFrom(stats,model.matrix)
3937
importFrom(stats,na.omit)
4038
importFrom(stats,quantile)
4139
importFrom(stats,sd)
42-
importFrom(tibble,rowid_to_column)
4340
importFrom(tidyr,gather)
4441
importFrom(tidyr,nest)
4542
importFrom(tidyr,unnest)

NEWS.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
# tidyext 0.3.6
2+
3+
Added `row_min` and `row_max`. Removed some dependencies.
4+
15
# tidyext 0.3.5
26

3-
Cleanup and update for R 4.0 and dplyr 1.0. Deprecate gather_multi and spread2.
7+
Cleanup and update for R 4.0 and dplyr 1.0. Deprecate `gather_multi` and `spread2`.
48

59
# tidyext 0.3.1
610

R/combn_2_col.R

Lines changed: 37 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -47,92 +47,57 @@
4747
#' only the indicator columns.
4848
#' @examples
4949
#' library(tidyext)
50-
#' d = data.frame(id = 1:4,
51-
#' labs = c('A/B', 'B/C/D/E', 'A/E', 'D/E'))
52-
#' test = combn_2_col(data=d, var='labs', max_m=3)
50+
#'
51+
#' d = data.frame(id = 1:4, labs = c('A/B', 'B/C/D/E', 'A/E', 'D/E'))
52+
#' test = combn_2_col(data = d, var = 'labs', max_m = 3)
5353
#' test
5454
#' str(test)
55+
#'
5556
#' d$labs = c('A B', 'B C D E', 'A E', 'D E')
56-
#' combn_2_col(data=d, var='labs', max_m=1)
57+
#' combn_2_col(data = d, var = 'labs', max_m = 1)
58+
#'
5759
#' d$labs = c('Tom, Dick & Harriet', "J'Sean", "OBG, Andreas", NA)
58-
#' combn_2_col(data=d, var='labs', sep=',', max_m=2, collapse='-')
59-
#'
60-
#' \dontrun{
61-
#' # requires at least tidytext
62-
#' tidy_dtm <- function(data, var, sep='-', max_m=3) {
63-
#' init = stringr::str_split(data[[var]], pattern = sep) # creates a list of separated letters
64-
#'
65-
#' # the following gets the combos with a dot separating drugs in a given combo
66-
#' # this first lapply could be parallelized if need be and is probably slowest
67-
#' # probably want to change to m = min(c(4, m)) so as to only limit to 4
68-
#' # see also, combinat::combn which is slightly faster than base R below
69-
#' observation_combos = init %>%
70-
#' lapply(function(x)
71-
#' sapply(seq_along(x), function(m)
72-
#' utils::combn(x, min(max_m, m), FUN=paste, collapse = '_')))
73-
#'
74-
#' # now we have a standard text analysis problem in need of a document term
75-
#' matrix
76-
#' documents = observation_combos %>% lapply(unlist)
77-
#'
78-
#' # create a 'tidy' form of documents and terms; each term (i.e. combo) only
79-
#' occurs once in a document
80-
#' doc_df = data.frame(id=rep(data$id, sapply(documents, length)),
81-
#' combos=unlist(documents),
82-
#' count=1) # each term only occurs once in the document
83-
#' doc_df %>%
84-
#' tidytext::cast_dfm(document=id, term=combos, value=count)
85-
#' }
86-
#'
87-
#' # requires at least text2vec
88-
#' ttv <- function(data, var, sep='-', max_m=3) {
89-
#' docs = sapply(stringr::str_split(data[[var]], pattern=sep),
90-
#' function(str_vec)
91-
#' sapply(seq_along(str_vec),
92-
#' function(m)
93-
#' combn(str_vec,
94-
#' m = min(max_m, m),
95-
#' FUN = paste,
96-
#' collapse = '_')
97-
#' ) %>% unlist()
98-
#' )
99-
#'
100-
#' toks = itoken(docs, progressbar = FALSE)
101-
#' vocab = create_vocabulary(toks)
102-
#' create_dtm(toks, vectorizer = vocab_vectorizer(vocab), progressbar = FALSE) %>%
103-
#' as.matrix() %>%
104-
#' cbind(data,.)
105-
#' }
106-
#'
107-
#' }
60+
#'
61+
#' combn_2_col(
62+
#' data = d,
63+
#' var = 'labs',
64+
#' sep = ',',
65+
#' max_m = 2,
66+
#' collapse = '-'
67+
#' )
68+
#'
10869
#'
10970
#'
11071
#'
11172
#' @export
112-
combn_2_col <- function(data,
113-
var,
114-
sep='[^[:alnum:]]+',
115-
max_m=1,
116-
collapse = '_',
117-
toInteger=FALSE,
118-
sparse=FALSE) {
73+
combn_2_col <- function(
74+
data,
75+
var,
76+
sep = '[^[:alnum:]]+',
77+
max_m = 1,
78+
collapse = '_',
79+
toInteger = FALSE,
80+
sparse = FALSE
81+
) {
11982

12083
if (is.null(data) | is.null(var))
12184
stop('Need data and variable name to continue.')
12285

12386
if (max_m < 1) stop('Need positive value for max_m.')
12487

12588
data$combo <-
126-
map(stringr::str_split(data[[var]], pattern=sep),
127-
function(str_vec)
128-
map(seq_along(str_vec),
129-
function(m)
130-
combn(str_vec,
131-
m = min(max_m, m),
132-
FUN = paste,
133-
collapse = collapse)
134-
) %>% unlist()
89+
map(stringr::str_split(data[[var]], pattern = sep),
90+
function(str_vec)
91+
map(seq_along(str_vec),
92+
function(m)
93+
combn(str_vec,
94+
m = min(max_m, m),
95+
FUN = paste,
96+
collapse = collapse)
97+
) %>%
98+
unlist()
13599
)
100+
136101
combo_cols <- unique(unlist(data$combo))
137102

138103
if (sparse) {
@@ -142,7 +107,7 @@ combn_2_col <- function(data,
142107
do.call(rbind,.) %>%
143108
Matrix::Matrix(sparse = TRUE,
144109
dimnames = list(rownames(data), combo_cols))
145-
)
110+
)
146111
}
147112

148113
if (toInteger) {
@@ -156,6 +121,7 @@ combn_2_col <- function(data,
156121
map(function(x) combo_cols %in% x) %>%
157122
do.call(rbind,.)
158123
}
124+
159125
data
160126
}
161127

R/gather_multi.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ gather_multi <- function(data,
132132
na.rm = na.rm,
133133
convert = convert,
134134
factor_key = factor_key) %>%
135-
rowid_to_column()
135+
mutate(rowid = 1:nrow(.)) %>% # changed to get rid of tibble requirement while deprecated
136136

137137
for (i in 2:length(varlist)) {
138138
data_long <- data %>%
@@ -144,7 +144,7 @@ gather_multi <- function(data,
144144
na.rm = na.rm,
145145
convert = convert,
146146
factor_key = factor_key) %>%
147-
rowid_to_column()%>%
147+
mutate(rowid = 1:nrow(.)) %>% # changed to get rid of tibble requirement while deprecated
148148
select(rowid, !!values[[i]]) %>%
149149
left_join(data_long, ., by='rowid')
150150
}

R/globals.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ utils::globalVariables(c(".", 'Max', 'Mean', 'Median', 'Min', 'Missing', 'N',
33
'Q1', 'Q3', 'SD', 'Variable', 'X1st.Qu.', 'X3rd.Qu.',
44
'perc', 'result', 'results', 'target', 'value',
55
'x.Freq', 'x.x', 'y.Freq', 'y.x', '%', 'rowid',
6-
'Group', 'Frequency'))
6+
'Group', 'Frequency', 'rn'))

R/head_tail.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
#' @examples
1212
#'
1313
#' library(tidyext)
14-
#' as.matrix(mtcars) %>%
15-
#' head_tail(6)
14+
#'
15+
#' head_tail(mtcars)
1616
#'
1717
head_tail = function(data, n_slice = 6) {
1818
# initial checks

R/num_summary.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ num_summary <- function(x, digits = 1, extra = FALSE) {
3535

3636
x <- as.numeric(x)
3737

38-
d <- tibble(
38+
d <- dplyr::tibble(
3939
N = length(na.omit(x)),
4040
data.frame(t(c(summary(x)))),
4141
SD = sd(x, na.rm = TRUE),

R/pre_process.R

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,17 @@
3333
#' them.
3434
#' @importFrom scales rescale
3535
#' @return A data frame that has been processed
36-
#' @export
3736
#'
3837
#' @examples
3938
#' library(tidyext)
39+
#' library(dplyr)
40+
#'
4041
#' pre_process(mtcars)
41-
#' pre_process(mtcars, log_vars=vars(mpg, wt))
42-
#' pre_process(mtcars, zero_start=vars(cyl, gear))
43-
#' pre_process(mtcars, zero_one=vars(mpg))
42+
#' pre_process(mtcars, log_vars = vars(mpg, wt))
43+
#' pre_process(mtcars, zero_start = vars(cyl, gear))
44+
#' pre_process(mtcars, zero_one = vars(mpg))
45+
#'
46+
#' @export
4447
pre_process <- function(
4548
data,
4649
std = TRUE,

R/row_sums.R

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#' @param ... The columns to sum, take the mean of, etc. \emph{Required}.
88
#' @param .fun The function to apply.
99
#' @param na_rm Whether to remove \code{NA} values or not. Default is \code{FALSE}.
10-
#' @param varname The column name of the sums means etc.
10+
#' @param varname The column name of the sums means etc. as a character string.
1111
#'
1212
#' @details Simple wrappers for applying rowwise operations only for selected
1313
#' columns within the tidyverse approach to data processing. The
@@ -28,6 +28,9 @@
2828
#' d %>%
2929
#' row_means(matches('x|z'))
3030
#'
31+
#' d %>%
32+
#' row_max(matches('x|y'))
33+
#'
3134
#' row_apply(
3235
#' d ,
3336
#' everything(),
@@ -37,6 +40,8 @@
3740
#'
3841
#' @export
3942
row_sums <- function(data, ..., na_rm = FALSE, varname = 'sum') {
43+
# note: dplyr 1.0 included rowwise operations, but it wasn't obvious what
44+
# advantage there would be for these functions except for min and max
4045
dplyr::mutate(data, !!varname := rowSums(select(data, ...), na.rm = na_rm))
4146
}
4247

@@ -46,6 +51,28 @@ row_means <- function(data, ..., na_rm = FALSE, varname = 'mean') {
4651
dplyr::mutate(data, !!varname := rowMeans(select(data, ...), na.rm = na_rm))
4752
}
4853

54+
#' @export
55+
#' @rdname row_sums
56+
row_min <- function(data, ..., na_rm = FALSE, varname = 'min') {
57+
dplyr::select(data, ...) %>%
58+
dplyr::mutate(rn = 1:nrow(.)) %>%
59+
dplyr::rowwise(rn) %>%
60+
dplyr::mutate(!!varname := min(dplyr::c_across(...), na.rm = na_rm)) %>%
61+
dplyr::ungroup() %>%
62+
dplyr::select(-rn)
63+
}
64+
65+
#' @export
66+
#' @rdname row_sums
67+
row_max <- function(data, ..., na_rm = FALSE, varname = 'max') {
68+
dplyr::select(data, ...) %>%
69+
dplyr::mutate(rn = 1:nrow(.)) %>%
70+
dplyr::rowwise(rn) %>%
71+
dplyr::mutate(!!varname := max(dplyr::c_across(...), na.rm = na_rm)) %>%
72+
dplyr::ungroup() %>%
73+
dplyr::select(-rn)
74+
}
75+
4976
#' @export
5077
#' @rdname row_sums
5178
row_apply <- function(data, ..., .fun, varname = 'var') {

R/spread2.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
#'
3030
#' @return A data frame with 'wide' format.
3131
#' @seealso \code{\link[tidyr]{spread}}
32-
#' @importFrom tibble rowid_to_column
3332
#' @examples
3433
#' \dontrun{
3534
#' library(tidyext); library(tidyr)
@@ -90,7 +89,7 @@ spread2 <- function(data,
9089
} else {
9190
data <- data %>%
9291
bind_cols(data %>%
93-
tibble::rowid_to_column() %>%
92+
mutate(rowid = 1:nrow(.)) %>% # changed to get rid of tibble requirement while deprecated
9493
select(rowid)
9594
)
9695
}

R/utils.R

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1 @@
1-
#' @importFrom magrittr %>%
2-
#' @export
3-
magrittr::`%>%`
4-
5-
6-
7-
#' @importFrom dplyr vars
8-
#' @export
9-
dplyr::vars
1+
#' @importFrom dplyr vars `%>%` tibble

_pkgdown.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,16 @@ reference:
4040
- create_prediction_data
4141
- gather_multi
4242
- onehot
43-
# - pre_process
43+
- pre_process
4444
# - spread2
4545
- row_sums
4646
- title: "Miscellaneous"
4747
desc: >
4848
Miscellaneous functions.
4949
contents:
5050
- select_not
51+
- head_tail
52+
- rnd
5153
- tidyext
5254
figures:
5355
dev: svglite::svglite

0 commit comments

Comments
 (0)