Skip to content

Commit c864c38

Browse files
Add files via upload
1 parent bfdff2d commit c864c38

File tree

11 files changed

+362
-29
lines changed

11 files changed

+362
-29
lines changed

CRAN-RELEASE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
This package was submitted to CRAN on 2020-05-02.
1+
This package was submitted to CRAN on 2020-05-28.
22
Once it is accepted, delete this file and tag the release (commit fbccc9581e).

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: tidyfst
22
Title: Tidy Verbs for Fast Data Manipulation
3-
Version: 0.9.6
3+
Version: 0.9.7
44
Depends: R (>= 3.3.0)
55
Authors@R:
66
person(given = "Tian-Yuan",

NAMESPACE

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
export("%>%")
44
export(CJ)
55
export(add_count_dt)
6-
export(ajoin)
76
export(anti_join_dt)
87
export(arrange_dt)
98
export(as.data.table)
@@ -28,7 +27,6 @@ export(fill_na_dt)
2827
export(filter_dt)
2928
export(filter_fst)
3029
export(fintersect)
31-
export(fjoin)
3230
export(fread)
3331
export(frollapply)
3432
export(fsetdiff)
@@ -39,7 +37,6 @@ export(fwrite)
3937
export(group_by_dt)
4038
export(group_dt)
4139
export(group_exe_dt)
42-
export(ijoin)
4340
export(import_fst)
4441
export(impute_dt)
4542
export(in_dt)
@@ -50,7 +47,6 @@ export(lag_dt)
5047
export(lead_dt)
5148
export(left_join_dt)
5249
export(like)
53-
export(ljoin)
5450
export(longer_dt)
5551
export(mat_df)
5652
export(mutate_dt)
@@ -67,7 +63,6 @@ export(rename_dt)
6763
export(replace_dt)
6864
export(replace_na_dt)
6965
export(right_join_dt)
70-
export(rjoin)
7166
export(rn_col)
7267
export(rowwise_dt)
7368
export(sample_dt)
@@ -81,9 +76,13 @@ export(separate_dt)
8176
export(setdiff_dt)
8277
export(setequal_dt)
8378
export(shift_fill)
84-
export(sjoin)
8579
export(slice_dt)
8680
export(slice_fst)
81+
export(slice_head_dt)
82+
export(slice_max_dt)
83+
export(slice_min_dt)
84+
export(slice_sample_dt)
85+
export(slice_tail_dt)
8786
export(squeeze_dt)
8887
export(summarise_dt)
8988
export(summarise_vars)
@@ -113,4 +112,6 @@ import(stringr)
113112
importFrom(stats,median)
114113
importFrom(stats,na.omit)
115114
importFrom(stats,setNames)
115+
importFrom(utils,head)
116116
importFrom(utils,object.size)
117+
importFrom(utils,tail)

R/_global_setting.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#' @import fst
44
#' @import stringr
55
#' @importFrom stats na.omit setNames median
6-
#' @importFrom utils object.size
6+
#' @importFrom utils object.size head tail
77

88
#' @export
99
stringr::`%>%`

R/complete.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ complete_dt = function(.data,...,fill = NA){
4141
if(
4242
substitute(list(...)) %>%
4343
deparse() %>%
44+
.[1] %>%
4445
str_detect("=")
4546
) {
4647
list(...) %>%

R/join.R

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
2+
#' @title Join tables
3+
#' @name join
4+
#' @description The mutating joins add columns from `y` to `x`,
5+
#' matching rows based on the keys:
6+
#'
7+
#' * `inner_join_dt()`: includes all rows in `x` and `y`.
8+
#' * `left_join_dt()`: includes all rows in `x`.
9+
#' * `right_join_dt()`: includes all rows in `y`.
10+
#' * `full_join_dt()`: includes all rows in `x` or `y`.
11+
#' @description
12+
#' Filtering joins filter rows from `x` based on the presence or absence
13+
#' of matches in `y`:
14+
#'
15+
#' * `semi_join_dt()` return all rows from `x` with a match in `y`.
16+
#' * `anti_join_dt()` return all rows from `x` without a match in `y`.
17+
#' @param x A data.table
18+
#' @param y A data.table
19+
#' @param by (Optional) A character vector of variables to join by.
20+
#'
21+
#' If `NULL`, the default, `*_join_dt()` will perform a natural join, using all
22+
#' variables in common across `x` and `y`. A message lists the variables so that you
23+
#' can check they're correct; suppress the message by supplying `by` explicitly.
24+
#'
25+
#' To join by different variables on `x` and `y`, use a named vector.
26+
#' For example, `by = c("a" = "b")` will match `x$a` to `y$b`.
27+
#'
28+
#' To join by multiple variables, use a vector with length > 1.
29+
#' For example, `by = c("a", "b")` will match `x$a` to `y$a` and `x$b` to
30+
#' `y$b`. Use a named vector to match different variables in `x` and `y`.
31+
#' For example, `by = c("a" = "b", "c" = "d")` will match `x$a` to `y$b` and
32+
#' `x$c` to `y$d`.
33+
#' @param on (Optional)
34+
#' Indicate which columns in x should be joined with which columns in y.
35+
#' Examples included:
36+
#' 1.\code{.by = c("a","b")} (this is a must for \code{set_full_join_dt});
37+
#' 2.\code{.by = c(x1="y1", x2="y2")};
38+
#' 3.\code{.by = c("x1==y1", "x2==y2")};
39+
#' 4.\code{.by = c("a", V2="b")};
40+
#' 5.\code{.by = .(a, b)};
41+
#' 6.\code{.by = c("x>=a", "y<=b")} or \code{.by = .(x>=a, y<=b)}.
42+
#' @param suffix If there are non-joined duplicate variables in x and y, these
43+
#' suffixes will be added to the output to disambiguate them. Should be a
44+
#' character vector of length 2.
45+
#' @return A data.table
46+
#' @examples
47+
#'
48+
#' workers = fread("
49+
#' name company
50+
#' Nick Acme
51+
#' John Ajax
52+
#' Daniela Ajax
53+
#' ")
54+
#'
55+
#' positions = fread("
56+
#' name position
57+
#' John designer
58+
#' Daniela engineer
59+
#' Cathie manager
60+
#' ")
61+
#'
62+
#' workers %>% inner_join_dt(positions)
63+
#' workers %>% left_join_dt(positions)
64+
#' workers %>% right_join_dt(positions)
65+
#' workers %>% full_join_dt(positions)
66+
#'
67+
#' # filtering joins
68+
#' workers %>% anti_join_dt(positions)
69+
#' workers %>% semi_join_dt(positions)
70+
#'
71+
#' # To suppress the message, supply 'by' argument
72+
#' workers %>% left_join_dt(positions, by = "name")
73+
#'
74+
#' # Use a named 'by' if the join variables have different names
75+
#' positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
76+
#' workers %>% inner_join_dt(positions2, by = c("name" = "worker"))
77+
#'
78+
#' # the syntax of 'on' could be a bit different
79+
#' workers %>% inner_join_dt(positions2,on = "name==worker")
80+
#'
81+
#'
82+
83+
#' @rdname join
84+
#' @export
85+
inner_join_dt = function(x,y,by = NULL, on = NULL,suffix = c(".x",".y")){
86+
x = as_dt(x)
87+
y = as_dt(y)
88+
on_ = substitute(on) %>% deparse()
89+
by_ = substitute(by) %>% deparse()
90+
if(on_ != "NULL") x[y, nomatch = 0L, on = on]
91+
else if(by_ == "NULL"){
92+
by = intersect(names(x), names(y))
93+
by_name = str_c(by, collapse = ",")
94+
message(str_glue("Joining by: {by_name}\n\n"))
95+
merge.data.table(x,y,by = by,suffixes = suffix)
96+
}else if(is.null(names(by))) merge.data.table(x,y,by = by,suffixes = suffix)
97+
else merge.data.table(x,y,by.x = names(by),by.y = by,suffixes = suffix)
98+
}
99+
100+
#' @rdname join
101+
#' @export
102+
left_join_dt = function(x,y,by = NULL, on = NULL,suffix = c(".x",".y")){
103+
x = as_dt(x)
104+
y = as_dt(y)
105+
on_ = substitute(on) %>% deparse()
106+
by_ = substitute(by) %>% deparse()
107+
if(on_ != "NULL") y[x, on = on]
108+
else if(by_ == "NULL"){
109+
by = intersect(names(x), names(y))
110+
by_name = str_c(by, collapse = ",")
111+
message(str_glue("Joining by: {by_name}\n\n"))
112+
merge.data.table(x,y,by = by,all.x = TRUE,suffixes = suffix)
113+
}else if(is.null(names(by))) merge.data.table(x,y,by = by,all.x = TRUE,suffixes = suffix)
114+
else merge.data.table(x,y,by.x = names(by),by.y = by,all.x = TRUE,suffixes = suffix)
115+
}
116+
117+
#' @rdname join
118+
#' @export
119+
right_join_dt = function(x,y,by = NULL, on = NULL,suffix = c(".x",".y")){
120+
x = as_dt(x)
121+
y = as_dt(y)
122+
on_ = substitute(on) %>% deparse()
123+
by_ = substitute(by) %>% deparse()
124+
if(on_ != "NULL") x[y, on = on]
125+
else if(by_ == "NULL"){
126+
by = intersect(names(x), names(y))
127+
by_name = str_c(by, collapse = ",")
128+
message(str_glue("Joining by: {by_name}\n\n"))
129+
merge.data.table(x,y,by = by,all.y = TRUE,suffixes = suffix)
130+
}else if(is.null(names(by))) merge.data.table(x,y,by = by,all.y = TRUE,suffixes = suffix)
131+
else merge.data.table(x,y,by.x = names(by),by.y = by,all.y = TRUE,suffixes = suffix)
132+
}
133+
134+
#' @rdname join
135+
#' @export
136+
full_join_dt = function(x,y,by = NULL, on = NULL,suffix = c(".x",".y")){
137+
x = as_dt(x)
138+
y = as_dt(y)
139+
on_ = substitute(on) %>% deparse()
140+
by_ = substitute(by) %>% deparse()
141+
if(on_ != "NULL") {
142+
if(by_!="null"){
143+
rbind(x[, .SD, .SDcols = by],
144+
y[, .SD, .SDcols = by]) %>%
145+
unique()-> unique_keys
146+
y[x[.(unique_keys), on = on], on = on]
147+
}else{
148+
rbind(x[, .SD, .SDcols = on],
149+
y[, .SD, .SDcols = on]) %>%
150+
unique()-> unique_keys
151+
y[x[.(unique_keys), on = on], on = on]
152+
}
153+
} else if(by_ == "NULL"){
154+
by = intersect(names(x), names(y))
155+
by_name = str_c(by, collapse = ",")
156+
message(str_glue("Joining by: {by_name}\n\n"))
157+
merge.data.table(x,y,by = by,all = TRUE,suffixes = suffix)
158+
}else if(is.null(names(by))) merge.data.table(x,y,by = by,all = TRUE,suffixes = suffix)
159+
else merge.data.table(x,y,by.x = names(by),by.y = by,all = TRUE,suffixes = suffix)
160+
}
161+
162+
#' @rdname join
163+
#' @export
164+
anti_join_dt = function(x,y,by = NULL, on = NULL){
165+
x = as_dt(x)
166+
y = as_dt(y)
167+
on_ = substitute(on) %>% deparse()
168+
by_ = substitute(by) %>% deparse()
169+
if(on_ != "NULL") x[!y, on = on]
170+
else if(by_ == "NULL"){
171+
by = intersect(names(x), names(y))
172+
by_name = str_c(by, collapse = ",")
173+
message(str_glue("Joining by: {by_name}\n\n"))
174+
x[!y, on = by]
175+
}else x[!y, on = by]
176+
}
177+
178+
#' @rdname join
179+
#' @export
180+
semi_join_dt = function(x,y,by = NULL, on = NULL){
181+
x = as_dt(x)
182+
y = as_dt(y)
183+
on_ = substitute(on) %>% deparse()
184+
by_ = substitute(by) %>% deparse()
185+
if(on_ != "NULL") {
186+
w = unique(x[y, on = on, nomatch = 0L, which = TRUE, allow.cartesian = TRUE])
187+
x[w]
188+
}
189+
else{
190+
if(by_ == "NULL"){
191+
by = intersect(names(x), names(y))
192+
by_name = str_c(by, collapse = ",")
193+
message(str_glue("Joining by: {by_name}\n\n"))
194+
}
195+
w = unique(x[y, on = by, nomatch = 0L, which = TRUE, allow.cartesian = TRUE])
196+
x[w]
197+
}
198+
}

R/nest_dt.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ unnest_dt = function(.data,...){
145145
if(length(col_names) == 1) unnest_col(dt,...)
146146
else
147147
lapply(col_names,function(x) unnest_col(dt,cols = x)) %>%
148-
Reduce(x = ., f = function(x,y) merge(x,y))
148+
Reduce(x = ., f = function(x,y) merge(x,y,all = TRUE))
149149
}
150150

151151
unnest_col = function(.data,...){

R/separate.R

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,40 +15,57 @@
1515
#' df %>% separate_dt(x, c("A", "B"))
1616
#' # equals to
1717
#' df %>% separate_dt("x", c("A", "B"))
18+
#'
19+
#' # If you just want the second variable:
20+
#' df %>% separate_dt(x,into = c(NA,"B"))
1821

1922
#' @export
2023
separate_dt = function(.data,separated_colname,into,
21-
sep = "[^[:alnum:]]+",
22-
remove = TRUE){
24+
sep = "[^[:alnum:]]+",
25+
remove = TRUE){
2326
dt = as.data.table(.data)
2427
substitute(separated_colname) %>% deparse() -> parse_name
2528
if(!str_detect(parse_name,"^\"")) separated_colname = parse_name
2629

30+
if(anyNA(into)) into[is.na(into)] = "NA_COL_"
31+
2732
dt[[separated_colname]] %>%
2833
tstrsplit(split = sep) %>%
2934
setDT() %>%
30-
setnames(names(.),into) -> split_columns
35+
setnames(names(.),into) %>%
36+
select_dt(-"NA_COL_")-> split_columns
3137
if(remove)
3238
dt[,(separated_colname):=NULL][,names(split_columns):=split_columns][]
3339
else dt[,names(split_columns):=split_columns][]
3440

3541
}
3642

37-
#' separate_dt = function(.data,separated_colname,into,
38-
#' sep = "[^[:alnum:]]+",
39-
#' remove = TRUE){
40-
#' dt = as_dt(.data)
41-
#' substitute(separated_colname) %>% deparse() -> parse_name
42-
#' if(!str_detect(parse_name,"^\"")) separated_colname = parse_name
43-
#'
44-
#' dt[[separated_colname]] %>%
45-
#' tstrsplit(split = sep) %>%
46-
#' setNames(into) %>%
47-
#' as.data.table() -> split_columns
48-
#' if(remove) cbind(dt[,.SD,.SDcols = -separated_colname],split_columns)
49-
#' else cbind(dt,split_columns)
50-
#'
51-
#' }
43+
44+
# separate_dt = function(.data,separated_colname,into,
45+
# sep = "[^[:alnum:]]+",
46+
# remove = TRUE){
47+
# dt = as.data.table(.data)
48+
# substitute(separated_colname) %>% deparse() -> parse_name
49+
# if(!str_detect(parse_name,"^\"")) separated_colname = parse_name
50+
#
51+
# dt[[separated_colname]] %>%
52+
# tstrsplit(split = sep) %>%
53+
# setDT() %>%
54+
# setnames(names(.),into) -> split_columns
55+
# if(remove)
56+
# dt[,(separated_colname):=NULL][,names(split_columns):=split_columns][]
57+
# else dt[,names(split_columns):=split_columns][]
58+
#
59+
# }
60+
61+
62+
63+
64+
65+
66+
67+
68+
5269

5370

5471

0 commit comments

Comments
 (0)