Skip to content

Commit 65293d2

Browse files
Add files via upload
1 parent c34f068 commit 65293d2

13 files changed

+239
-42
lines changed

CRAN-RELEASE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
This package was submitted to CRAN on 2020-04-10.
1+
This package was submitted to CRAN on 2020-05-02.
22
Once it is accepted, delete this file and tag the release (commit fbccc9581e).

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: tidyfst
22
Title: Tidy Verbs for Fast Data Manipulation
3-
Version: 0.9.5
3+
Version: 0.9.6
44
Depends: R (>= 3.3.0)
55
Authors@R:
66
person(given = "Tian-Yuan",

NAMESPACE

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export("%>%")
4+
export(CJ)
45
export(add_count_dt)
56
export(ajoin)
67
export(anti_join_dt)
@@ -40,8 +41,10 @@ export(group_dt)
4041
export(group_exe_dt)
4142
export(ijoin)
4243
export(import_fst)
44+
export(impute_dt)
4345
export(in_dt)
4446
export(inner_join_dt)
47+
export(intersect_dt)
4548
export(key)
4649
export(lag_dt)
4750
export(lead_dt)
@@ -75,6 +78,8 @@ export(select_fst)
7578
export(select_mix)
7679
export(semi_join_dt)
7780
export(separate_dt)
81+
export(setdiff_dt)
82+
export(setequal_dt)
7883
export(shift_fill)
7984
export(sjoin)
8085
export(slice_dt)
@@ -96,6 +101,7 @@ export(top_n_dt)
96101
export(transmute_dt)
97102
export(unchop_dt)
98103
export(uncount_dt)
104+
export(union_dt)
99105
export(uniqueN)
100106
export(unite_dt)
101107
export(unnest_dt)
@@ -104,6 +110,7 @@ export(wider_dt)
104110
import(data.table)
105111
import(fst)
106112
import(stringr)
113+
importFrom(stats,median)
107114
importFrom(stats,na.omit)
108115
importFrom(stats,setNames)
109116
importFrom(utils,object.size)

R/_global_setting.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#' @import data.table
33
#' @import fst
44
#' @import stringr
5-
#' @importFrom stats na.omit setNames
5+
#' @importFrom stats na.omit setNames median
66
#' @importFrom utils object.size
77

88
#' @export
@@ -56,4 +56,5 @@ data.table::copy
5656
#' @export
5757
data.table::key
5858

59-
59+
#' @export
60+
data.table::CJ

R/impute_dt.R

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
2+
#' @title Impute missing values with mean, median or mode
3+
#' @description Impute the columns of data.frame with its mean, median or mode.
4+
#' @param .data A data.frame
5+
#' @param ... Columns to select
6+
#' @param .func Character, "mode" (default), "mean" or "median".
7+
#' Could also define it by oneself.
8+
#' @return A data.table
9+
#' @examples
10+
#'
11+
#' Pclass <- c(3, 1, 3, 1, 3, 2, 2, 3, NA, NA)
12+
#' Sex <- c('male', 'male', 'female', 'female', 'female',
13+
#' 'female', NA, 'male', 'female', NA)
14+
#' Age <- c(22, 38, 26, 35, NA,
15+
#' 45, 25, 39, 28, 40)
16+
#' SibSp <- c(0, 1, 3, 1, 2, 3, 2, 2, NA, 0)
17+
#' Fare <- c(7.25, 71.3, 7.92, NA, 8.05, 8.46, 51.9, 60, 32, 15)
18+
#' Embarked <- c('S', NA, 'S', 'Q', 'Q', 'S', 'C', 'S', 'C', 'S')
19+
#' data <- data.frame('Pclass' = Pclass,
20+
#' 'Sex' = Sex, 'Age' = Age, 'SibSp' = SibSp,
21+
#' 'Fare' = Fare, 'Embarked' = Embarked)
22+
#'
23+
#' data
24+
#' data %>% impute_dt() # defalut uses "mode" as `.func`
25+
#' data %>% impute_dt(is.numeric,.func = "mean")
26+
#' data %>% impute_dt(is.numeric,.func = "median")
27+
#'
28+
#' # use user defined function
29+
#' my_fun = function(x){
30+
#' (max(x,na.rm = TRUE) - min(x,na.rm = TRUE))/2
31+
#' }
32+
#' data %>% impute_dt(is.numeric,.func = my_fun)
33+
#'
34+
35+
#' @export
36+
impute_dt = function(.data,...,.func = "mode"){
37+
38+
dt = as.data.table(.data)
39+
if (substitute(list(...)) %>% deparse() == "list()")
40+
sel_cols = names(dt)
41+
else
42+
dt[0] %>% select_dt(...) %>% names() -> sel_cols
43+
44+
if(!is.function(.func)){
45+
if(.func == "mode")
46+
dt[,(sel_cols):=lapply(.SD,
47+
function(x){
48+
uniqv = unique(x)
49+
x_i = uniqv[which.max(tabulate(match(x, uniqv)))]
50+
x[which(is.na(x))] = x_i
51+
x
52+
}),
53+
.SDcols = sel_cols][]
54+
else if(.func == "mean"){
55+
dt[,(sel_cols):=lapply(.SD,
56+
function(x){
57+
x_i = mean(x,na.rm = TRUE)
58+
x[which(is.na(x))] = x_i
59+
x
60+
}),
61+
.SDcols = sel_cols][]
62+
}else if(.func == "median"){
63+
dt[,(sel_cols):=lapply(.SD,
64+
function(x){
65+
x_i = median(x,na.rm = TRUE)
66+
x[which(is.na(x))] = x_i
67+
x
68+
}),
69+
.SDcols = sel_cols][]
70+
}
71+
}else dt[,(sel_cols):=lapply(.SD,.func),.SDcols = sel_cols][]
72+
}
73+
74+
75+
76+
# impute_dt = function(.data,...,.func = "mode"){
77+
#
78+
# dt = as.data.table(.data)
79+
# if (substitute(list(...)) %>% deparse() == "list()")
80+
# sel_cols = names(dt)
81+
# else
82+
# dt[0] %>% select_dt(...) %>% names() -> sel_cols
83+
#
84+
# if(!is.function(.func)){
85+
# if(.func == "mode") .FUN = function(v) {
86+
# uniqv <- unique(v)
87+
# uniqv[which.max(tabulate(match(v, uniqv)))]
88+
# }else if(.func == "mean"){
89+
# .FUN = function(x) mean(x,na.rm = TRUE)
90+
# }else if(.func == "median"){
91+
# .FUN = function(x) median(x,na.rm = TRUE)
92+
# }
93+
# }else .FUN = .func
94+
#
95+
# .func = function(x){
96+
# x[which(is.na(x))] = .FUN(x)
97+
# x
98+
# }
99+
#
100+
# dt[,(sel_cols):=lapply(.SD,.func),.SDcols = sel_cols][]
101+
#
102+
# }
103+
104+
105+
106+

R/join_dt.R

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
21
#' @title Join table by common keys
32
#' @description Join operations.
4-
#'
3+
#' @name join
54
#' @param x data.frame
65
#' @param y data.frame
76
#' @param by
@@ -270,8 +269,3 @@ sjoin = function(x,y,by = NULL){
270269
x[w]
271270
}
272271

273-
274-
275-
276-
277-

R/on_attach.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
.onAttach = function(...) {
2-
options("datatable.print.class" = TRUE, # print class in data.table
2+
options(
3+
"datatable.print.class" = TRUE, # print class in data.table
34
# "datatable.print.trunc.cols" = TRUE,
4-
"datatable.print.keys" = TRUE)
5+
"datatable.print.keys" = TRUE
6+
)
57
hints = c(
68
"Life's short, use R."
79
)

R/setops.R

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
2+
#' @title Set operations for data frames
3+
#' @description Wrappers of set operations in \pkg{data.table}.
4+
#' Only difference is it could be applied to non-data.table data frames by
5+
#' recognizing and coercing them to data.table automatically.
6+
#' @param x A data.frame
7+
#' @param y A data.frame
8+
#' @param all Logical. When \code{FALSE} (default),
9+
#' removes duplicate rows on the result.
10+
#' @return A data.table
11+
#' @seealso \code{\link[data.table]{setops}}
12+
#' @examples
13+
#'
14+
#' x = iris[c(2,3,3,4),]
15+
#' x2 = iris[2:4,]
16+
#' y = iris[c(3:5),]
17+
#'
18+
#' intersect_dt(x, y) # intersect
19+
#' intersect_dt(x, y, all=TRUE) # intersect all
20+
#' setdiff_dt(x, y) # except
21+
#' setdiff_dt(x, y, all=TRUE) # except all
22+
#' union_dt(x, y) # union
23+
#' union_dt(x, y, all=TRUE) # union all
24+
#' setequal_dt(x, x2, all=FALSE) # setequal
25+
#' setequal_dt(x, x2) # setequal all
26+
#'
27+
28+
#' @rdname setops
29+
#' @export
30+
31+
intersect_dt = function(x,y,all = FALSE){
32+
x = as_dt(x)
33+
y = as_dt(y)
34+
fintersect(x,y,all)
35+
}
36+
37+
#' @rdname setops
38+
#' @export
39+
40+
union_dt = function(x,y,all = FALSE){
41+
x = as_dt(x)
42+
y = as_dt(y)
43+
funion(x,y,all)
44+
}
45+
46+
#' @rdname setops
47+
#' @export
48+
49+
setdiff_dt = function(x,y,all = FALSE){
50+
x = as_dt(x)
51+
y = as_dt(y)
52+
fsetdiff(x,y,all)
53+
}
54+
55+
#' @rdname setops
56+
#' @export
57+
58+
setequal_dt = function(x,y,all = TRUE){
59+
x = as_dt(x)
60+
y = as_dt(y)
61+
fsetequal(x,y,all)
62+
}

R/transpose.R

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,23 @@
1313

1414
#' @export
1515
t_dt = function(.data){
16-
dt = as_dt(.data)
17-
dt = transpose(dt) %>% as.data.frame()
16+
dt = transpose(.data) %>% setDF()
1817
rownames(dt) = colnames(.data)
1918
if(setequal(rownames(.data),as.character(1:nrow(.data))))
2019
colnames(dt) = paste0("V",rownames(.data))
2120
else
2221
colnames(dt) = rownames(.data)
2322
dt
2423
}
24+
25+
# t_dt = function(.data){
26+
# dt = as_dt(.data)
27+
# dt = transpose(dt) %>% as.data.frame()
28+
# rownames(dt) = colnames(.data)
29+
# if(setequal(rownames(.data),as.character(1:nrow(.data))))
30+
# colnames(dt) = paste0("V",rownames(.data))
31+
# else
32+
# colnames(dt) = rownames(.data)
33+
# dt
34+
# }
35+

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# tidyfst: Tidy Verbs for Fast Data Manipulation<img src="man/figures/logo.png" align="right" alt="" width="120" />
22

3-
[![](https://www.r-pkg.org/badges/version/tidyfst?color=orange)](https://cran.r-project.org/package=tidyfst) [![](https://img.shields.io/badge/devel%20version-0.9.5-green.svg)](https://github.com/hope-data-science/tidyfst) ![](https://img.shields.io/badge/lifecycle-maturing-purple.svg) [![](https://img.shields.io/github/last-commit/hope-data-science/tidyfst.svg)](https://github.com/hope-data-science/tidyfst/commits/master) [![downloads](http://cranlogs.r-pkg.org/badges/grand-total/tidyfst?color=yellow)](https://r-pkg.org/pkg/tidyfst)
3+
[![](https://www.r-pkg.org/badges/version/tidyfst?color=orange)](https://cran.r-project.org/package=tidyfst) [![](https://img.shields.io/badge/devel%20version-0.9.6-purple.svg)](https://github.com/hope-data-science/tidyfst) ![](https://img.shields.io/badge/lifecycle-stable-deepgreen.svg) [![downloads](http://cranlogs.r-pkg.org/badges/grand-total/tidyfst?color=yellow)](https://r-pkg.org/pkg/tidyfst)
44

55
[![download](https://cranlogs.r-pkg.org/badges/tidyfst?color=red)](https://rdrr.io/cran/tidyfst/) [![downloads](https://cranlogs.r-pkg.org/badges/last-day/tidyfst?color=9cf)](https://crantastic.org/packages/tidyfst) [![DOI](https://zenodo.org/badge/240626994.svg)](https://zenodo.org/badge/latestdoi/240626994)
66

@@ -53,8 +53,10 @@ iris %>%
5353
#> 3: virginica 3.8
5454

5555
mtcars %>%
56-
group_dt(by =.(vs,am),
57-
summarise_dt(avg = mean(mpg)))
56+
group_dt(
57+
by =.(vs,am),
58+
summarise_dt(avg = mean(mpg))
59+
)
5860
#> vs am avg
5961
#> <num> <num> <num>
6062
#> 1: 0 1 19.75000

0 commit comments

Comments
 (0)