Skip to content

Commit 4247db6

Browse files
Add files via upload
1 parent 09f9141 commit 4247db6

29 files changed

+365
-230
lines changed

CRAN-RELEASE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
This package was submitted to CRAN on 2020-03-24.
2-
Once it is accepted, delete this file and tag the release (commit 8797722950).
1+
This package was submitted to CRAN on 2020-04-02.
2+
Once it is accepted, delete this file and tag the release (commit fbccc9581e).

DESCRIPTION

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: tidyfst
22
Title: Tidy Verbs for Fast Data Manipulation
3-
Version: 0.9.3
3+
Version: 0.9.4
44
Depends: R (>= 3.3.0)
55
Authors@R:
66
person(given = "Tian-Yuan",
@@ -28,5 +28,6 @@ Suggests:
2828
rmarkdown,
2929
nycflights13,
3030
pryr,
31-
tidyr
31+
tidyr,
32+
ggplot2
3233
VignetteBuilder: knitr

NAMESPACE

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,22 @@
22

33
export("%>%")
44
export(add_count_dt)
5+
export(ajoin)
56
export(anti_join_dt)
67
export(arrange_dt)
78
export(as.data.table)
89
export(as_dt)
910
export(as_fst)
1011
export(chop_dt)
12+
export(col_rn)
1113
export(complete_dt)
1214
export(copy)
1315
export(count_dt)
1416
export(cummean)
1517
export(data.table)
1618
export(delete_na_cols)
1719
export(delete_na_rows)
20+
export(df_mat)
1821
export(distinct_dt)
1922
export(drop_na_dt)
2023
export(dummy_dt)
@@ -24,6 +27,7 @@ export(fill_na_dt)
2427
export(filter_dt)
2528
export(filter_fst)
2629
export(fintersect)
30+
export(fjoin)
2731
export(fread)
2832
export(frollapply)
2933
export(fsetdiff)
@@ -34,6 +38,7 @@ export(fwrite)
3438
export(group_by_dt)
3539
export(group_dt)
3640
export(group_exe_dt)
41+
export(ijoin)
3742
export(import_fst)
3843
export(in_dt)
3944
export(inner_join_dt)
@@ -42,7 +47,9 @@ export(lag_dt)
4247
export(lead_dt)
4348
export(left_join_dt)
4449
export(like)
50+
export(ljoin)
4551
export(longer_dt)
52+
export(mat_df)
4653
export(mutate_dt)
4754
export(mutate_vars)
4855
export(mutate_when)
@@ -56,6 +63,8 @@ export(rename_dt)
5663
export(replace_dt)
5764
export(replace_na_dt)
5865
export(right_join_dt)
66+
export(rjoin)
67+
export(rn_col)
5968
export(rowwise_dt)
6069
export(sample_dt)
6170
export(sample_frac_dt)
@@ -65,16 +74,8 @@ export(select_fst)
6574
export(select_mix)
6675
export(semi_join_dt)
6776
export(separate_dt)
68-
export(set_add_count)
69-
export(set_arrange)
70-
export(set_dt)
71-
export(set_fill_na)
72-
export(set_in_dt)
73-
export(set_mutate)
74-
export(set_relocate)
75-
export(set_rename)
76-
export(set_replace_na)
7777
export(shift_fill)
78+
export(sjoin)
7879
export(slice_dt)
7980
export(slice_fst)
8081
export(squeeze_dt)
@@ -94,6 +95,7 @@ export(uncount_dt)
9495
export(uniqueN)
9596
export(unite_dt)
9697
export(unnest_dt)
98+
export(utf8_encoding)
9799
export(wider_dt)
98100
import(data.table)
99101
import(fst)

R/arrange_dt.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#' @param ... Arrange by what group? Minus symbol means arrange by
66
#' descending order.
77
#' @return data.table
8-
#' @seealso \code{\link[dplyr]{arrange}},\code{\link[maditr]{dt_arrange}}
8+
#' @seealso \code{\link[dplyr]{arrange}}
99
#' @examples
1010
#'
1111
#' iris %>% arrange_dt(Sepal.Length)

R/complete.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,17 @@
3131
#' df %>% complete_dt(item_id=1:3)
3232
#' df %>% complete_dt(item_id=1:3,group=1:2)
3333
#' df %>% complete_dt(item_id=1:3,group=1:3,item_name=c("a","b","c"))
34+
#'
3435

3536
#' @export
37+
3638
complete_dt = function(.data,...,fill = NA){
3739
dt = as_dt(.data)
3840

3941
if(
4042
substitute(list(...)) %>%
41-
deparse() %>%
42-
str_detect("=")
43+
deparse() %>%
44+
str_detect("=")
4345
) {
4446
list(...) %>%
4547
lapply(unique) %>%

R/count_dt.R

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#' iris %>% add_count_dt(Species,.name = "N")
1919
#'
2020
#' mtcars %>% count_dt(cyl,vs)
21+
#' mtcars %>% count_dt(cyl,vs,.name = "N",sort = FALSE)
2122
#' mtcars %>% add_count_dt(cyl,vs)
2223
#'
2324
#' @rdname count
@@ -26,21 +27,21 @@
2627
count_dt = function(.data,...,sort = TRUE,.name = "n"){
2728
dt = as_dt(.data)
2829
dot_string = substitute(list(...))
29-
if(sort == TRUE) dt[,.(n = .N),by = dot_string][order(-n)] -> dt
30-
else dt[,.(n = .N),by = dot_string] -> dt
31-
if(.name != "n") setnames(dt,old = "n",new = .name)
32-
as.data.table(dt)
30+
if(sort)
31+
eval(parse(text =
32+
str_glue("dt[,.({.name} = .N),by = dot_string][order(-{.name})]")))
33+
else
34+
eval(parse(text = str_glue("dt[,.({.name} = .N),by = dot_string]")))
3335
}
3436

3537
#' @rdname count
3638
#' @export
3739
add_count_dt = function(.data,...,.name = "n"){
3840
dt = as_dt(.data)
3941
dot_string = substitute(list(...))
40-
dt[,mutate_dt(.SD,n = .N),by = dot_string] -> dt
41-
if(.name != "n") setnames(dt,old = "n",new = .name)
42-
as.data.table(dt)
42+
dt[,(.name):=.N,by = dot_string][]
4343
}
4444

4545

46-
globalVariables("n")
46+
47+

R/dt.R

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,8 @@ in_dt = function(.data,...){
2525

2626
#' @rdname dt
2727
#' @export
28-
2928
as_dt = function (.data) {
3029
if (is.data.frame(.data) || ("fst_table" %chin% class(.data)))
3130
as.data.table(.data)
32-
else
33-
stop("Only a data.frame or fst_table could be received.")
31+
else stop("Only a data.frame or fst_table could be received.")
3432
}
35-
36-
# optional as_dt for future
37-
# as_dt = function(.data){
38-
# fifelse(
39-
# is.data.table(.data),
40-
# data,
41-
# fifelse(is.data.frame(.data),
42-
# as.data.table(.data),
43-
# stop("Only a data.frame could be received."))
44-
# )
45-
# }
46-
47-
48-
49-

R/dummy_dt.R

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@ dummy_dt = function(.data,...,longname = TRUE){
3838

3939
globalVariables("id_")
4040

41-
dummy_col = function(.data,col_name,longname){
42-
dt = as_dt(.data)
43-
#substitute(col) %>% deparse() -> col_name
41+
dummy_col = function(dt,col_name,longname){
4442
dt[, `:=`(one_=1,id_=1:.N) ]
4543

4644
if(longname){
@@ -60,7 +58,3 @@ dummy_col = function(.data,col_name,longname){
6058

6159

6260

63-
64-
65-
66-

R/fst.R

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
#' ft %>% select_fst("Se")
3535
#' ft %>% select_fst("nothing")
3636
#' ft %>% select_fst("Se|Sp")
37-
#' ft %>% select_fst(names(iris)[2:3])
37+
#' ft %>% select_fst(cols = names(iris)[2:3])
3838
#'
3939
#' ft %>% filter_fst(Sepal.Width > 3)
4040
#' ft %>% filter_fst(Sepal.Length > 6 , Species == "virginica")
@@ -44,10 +44,8 @@
4444
#' }
4545

4646

47-
4847
globalVariables(c("."))
4948

50-
5149
#' @rdname fst
5250
#' @export
5351
parse_fst = function(path){
@@ -59,53 +57,37 @@ parse_fst = function(path){
5957
#' @export
6058

6159
slice_fst = function(ft,row_no){
62-
ft[row_no,] %>% as.data.table()
60+
setDT(ft[row_no,])[]
6361
}
6462

6563
#' @rdname fst
6664
#' @export
6765
select_fst = function(ft,...){
68-
substitute(list(...)) %>%
69-
deparse() %>%
70-
str_extract("\\(.+\\)") %>%
71-
str_sub(2,-2)-> dot_string
72-
if(dot_string %like% "^[0-9]+$")
73-
eval(parse(text = str_glue("ft[{dot_string}] %>% as.data.table()")))
74-
else if(str_detect(dot_string,"^\"") | str_detect(dot_string,"^[a-zA-Z0-9_.]+$")){
75-
dot_string = str_remove_all(dot_string,"\"")
76-
str_detect(names(ft),dot_string) -> logical_vec
77-
if(all(logical_vec == FALSE)) {
78-
warning("No matched columns,try other patterns. Names of the `fst_table` are listed.")
79-
names(ft)
80-
} else
81-
ft[,logical_vec] %>% as.data.table()
82-
}
83-
else if(str_detect(dot_string,"^[0-9]") &
84-
str_detect(dot_string,"[0-9]$"))
85-
eval(parse(text = str_glue("ft[,c({dot_string})] %>% as.data.table()")))
86-
else if(str_detect(dot_string,",")){
87-
dot_string %>%
88-
str_split(",",simplify = TRUE) %>%
89-
str_trim() %>%
90-
str_c("'",.,"'") %>%
91-
str_c(collapse = ",") %>%
92-
str_c("c(",.,")") -> dot_string
93-
eval(parse(text = str_glue("ft[,{dot_string}] %>% as.data.table()")))
94-
} else
95-
eval(parse(text = str_glue("ft[,{dot_string}] %>% as.data.table()")))
66+
67+
setDT(ft[1,])[0] %>% select_dt(...) %>% names() -> sel_names
68+
setDT(ft[names(ft) %chin% sel_names])[]
69+
9670
}
9771

72+
select_fst = function(ft,...){
73+
74+
setDT(ft[1,])[0] %>% select_dt(...) %>% names() -> sel_names
75+
names(ft) %chin% sel_names -> logical_vec
76+
if(all(logical_vec == FALSE)) {
77+
warning("No matched columns,try other patterns. Names of the `fst_table` are listed.")
78+
names(ft)
79+
} else setDT(ft[logical_vec])[]
80+
81+
}
9882

9983
#' @rdname fst
10084
#' @export
10185

10286
filter_fst = function(ft,...){
10387
substitute(list(...)) %>%
104-
deparse() %>%
105-
paste0(collapse = "") %>%
106-
trimws() %>%
107-
str_extract("(?<=\\().+?(?=\\))") %>%
108-
gsub(",","&",.)-> dot_string
88+
lapply(deparse) %>%
89+
.[-1] %>%
90+
str_c(collapse = " & ")-> dot_string
10991
names(ft) -> ft_names
11092
ft_names[str_detect(dot_string,ft_names)] -> old
11193
paste0("ft$",old) -> new

R/group_by.R

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,12 @@
1010
#' implemented on a data.frame for \code{group_exe_dt}.
1111
#' It can receive what \code{select_dt} receives.
1212
#' @param cols A character vector of column names to group by.
13-
#' @param inplace Should the grouping implemented by reference?
14-
#' (Modify the original data.frame) Default uses \code{FALSE}.
15-
#' @return A data.table
13+
#' @return A data.table with keys
1614
#' @details \code{group_by_dt} and \code{group_exe_dt} are a pair of functions
1715
#' to be used in combination. It utilizes the feature of key setting in data.table,
1816
#' which provides high performance for group operations, especially when you have
1917
#' to operate by specific groups frequently.
2018
#' @examples
21-
#' # group by Species in iris data set
22-
#' as.data.table(iris) -> a
23-
#' key(a)
24-
#' group_by_dt(a,Species,inplace = FALSE)
25-
#' key(a)
26-
#'
27-
#' # use inplace operation to group by reference
28-
#' as.data.table(iris) -> a
29-
#' key(a)
30-
#' group_by_dt(a,Species,inplace = TRUE)
31-
#' key(a)
3219
#'
3320
#' # aggregation after grouping using group_exe_dt
3421
#' as.data.table(iris) -> a
@@ -58,10 +45,8 @@
5845
#' @rdname group_by
5946
#' @export
6047

61-
group_by_dt = function(.data,...,cols = NULL,inplace = FALSE){
62-
63-
if(inplace) setDT(.data)
64-
else .data = as_dt(.data)
48+
group_by_dt = function(.data,...,cols = NULL){
49+
.data = as_dt(.data)
6550

6651
if(!is.null(cols)) setkeyv(.data,cols)
6752
else {
@@ -72,16 +57,6 @@ group_by_dt = function(.data,...,cols = NULL,inplace = FALSE){
7257
.data
7358
}
7459

75-
# group_by_dt = function(data,...,cols = NULL,inplace = FALSE){
76-
#
77-
# if(inplace) setDT(data)
78-
# else data = as_dt(data)
79-
#
80-
# if(!is.null(cols)) setkeyv(data,cols)
81-
# else eval(substitute(setkey(data,...)))
82-
#
83-
# }
84-
8560
#' @rdname group_by
8661
#' @export
8762
group_exe_dt = function(.data,...){
@@ -97,9 +72,3 @@ group_exe_dt = function(.data,...){
9772
}
9873

9974

100-
101-
102-
103-
104-
105-

0 commit comments

Comments
 (0)