Skip to content

Commit ff584f4

Browse files
authored
[read] Document changes to reading date/datetime (#1335)
* [write] allow writing date vectors horizontally * [read] convert dates in column names and row names * [doc] document various types of date conversion * [misc] cleanup
1 parent 74192d0 commit ff584f4

File tree

5 files changed

+129
-47
lines changed

5 files changed

+129
-47
lines changed

R/helper-functions.R

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,10 +1315,15 @@ fits_in_dims <- function(x, dims, startCol, startRow) {
13151315
dims
13161316
}
13171317

1318-
# transpose single column or row data frames to wide/long. keeps attributes and class
1318+
# transpose single column or row data frames to wide/long. keeps attributes and
1319+
# class.
1320+
# The magic of t(). A Date can be something like a numeric with a
1321+
# format attached. After t(x) it will be a string "yyyy-mm-dd".
1322+
# Therefore unclass first and apply the class afterwards.
13191323
transpose_df <- function(x) {
13201324
attribs <- attr(x, "c_cm")
13211325
classes <- class(x[[1]])
1326+
x[] <- lapply(x[], unclass)
13221327
x <- as.data.frame(t(x), stringsAsFactors = FALSE)
13231328
for (i in seq_along(x)) {
13241329
class(x[[i]]) <- classes

R/read.R

Lines changed: 76 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,46 @@
1+
# Internal function to convert data frame from character to whatever is required
2+
convert_df <- function(z, types, date_conv, datetime_conv, hms_conv, as_character = FALSE) {
3+
sel <- !is.na(names(types))
4+
5+
if (any(sel)) {
6+
nums <- names(which(types[sel] == 1))
7+
dtes <- names(which(types[sel] == 2))
8+
poxs <- names(which(types[sel] == 3))
9+
logs <- names(which(types[sel] == 4))
10+
difs <- names(which(types[sel] == 5))
11+
fmls <- names(which(types[sel] == 6))
12+
# convert "#NUM!" to "NaN" -- then converts to NaN
13+
# maybe consider this an option to instead return NA?
14+
15+
if (as_character) {
16+
date_conv_c <- function(...) as.character(date_conv(...))
17+
datetime_conv_c <- function(...) as.character(datetime_conv(...))
18+
hms_conv_c <- function(...) as.character(hms_conv(...))
19+
20+
if (length(nums)) z[nums] <- lapply(z[nums], function(i) as.character(as.numeric(replace(i, i == "#NUM!", "NaN"))))
21+
if (length(dtes)) z[dtes] <- lapply(z[dtes], date_conv_c)
22+
if (length(poxs)) z[poxs] <- lapply(z[poxs], datetime_conv_c)
23+
if (length(logs)) z[logs] <- lapply(z[logs], function(i) as.character(as.logical(i)))
24+
if (isNamespaceLoaded("hms")) z[difs] <- lapply(z[difs], hms_conv_c)
25+
} else {
26+
if (length(nums)) z[nums] <- lapply(z[nums], function(i) as.numeric(replace(i, i == "#NUM!", "NaN")))
27+
if (length(dtes)) z[dtes] <- lapply(z[dtes], date_conv)
28+
if (length(poxs)) z[poxs] <- lapply(z[poxs], datetime_conv)
29+
if (length(logs)) z[logs] <- lapply(z[logs], as.logical)
30+
if (isNamespaceLoaded("hms")) z[difs] <- lapply(z[difs], hms_conv)
31+
}
32+
33+
for (i in seq_along(z)) { # convert df to class formula
34+
if (names(z)[i] %in% fmls) class(z[[i]]) <- c(class(z[[i]]), "formula")
35+
}
36+
37+
} else {
38+
warning("could not convert. All missing in row used for variable names")
39+
}
40+
41+
z
42+
}
43+
144
# `wb_to_df()` ----------------------------------------
245
#' Create a data frame from a Workbook
346
#'
@@ -34,9 +77,17 @@
3477
#' Opening, saving and closing the file in a spreadsheet software will resolve
3578
#' this.
3679
#'
37-
#' Prior to release `1.15`, datetime variables (yyyy-mm-dd hh:mm:ss) were
38-
#' imported in the users current timezone (`Sys.timezone()`). This was
39-
#' changed. All datetime variables are now imported with timezone `"UTC"`.
80+
#' Before release 1.15, datetime variables (in 'yyyy-mm-dd hh:mm:ss' format)
81+
#' were imported using the user's local system timezone (`Sys.timezone()`).
82+
#' This behavior has been updated. Now, all datetime variables are imported
83+
#' with the timezone set to "UTC".
84+
#' If automatic date detection and conversion are enabled but the conversion
85+
#' is unsuccessful (for instance, in a column containing a mix of data types
86+
#' like strings, numbers, and dates), the dates will be displayed as a Unix
87+
#' timestamp. These can be transformed using the [as.POSIXct()] function.
88+
#' If date detection is disabled, dates will show up as a spreadsheet date
89+
#' format. To convert these, you can use the functions [convert_date()],
90+
#' [convert_datetime()], or [convert_hms()].
4091
#'
4192
#' @seealso [wb_get_named_regions()]
4293
#'
@@ -596,39 +647,34 @@ wb_to_df <- function(
596647
xlsx_cols_names <- colnames(z)
597648
names(xlsx_cols_names) <- xlsx_cols_names
598649

599-
# if colNames, then change tt too
650+
651+
date_conv <- function(x) as.Date(.POSIXct(as.double(x), "UTC"), tz = "UTC", origin = "1970-01-01")
652+
datetime_conv <- function(x) .POSIXct(as.double(x), "UTC")
653+
hms_conv <- convert_hms
654+
655+
# if colNames, then change tt too. rownames will be converted later. If column name row
656+
# is in z/tt, the column name guessing will fail below
600657
if (col_names) {
601658
# select first row as colnames, but do not yet assign. it might contain
602659
# missing values and if assigned, convert below might break with unambiguous
603660
# names.
661+
604662
nams <- names(xlsx_cols_names)
605-
xlsx_cols_names <- z[1, ]
663+
if (convert)
664+
xlsx_cols_names <- convert_df(z[1, , drop = FALSE], guess_col_type(tt[1, , drop = FALSE]), date_conv, datetime_conv, hms_conv, as_character = TRUE)
665+
else
666+
xlsx_cols_names <- z[1, , drop = FALSE]
606667
names(xlsx_cols_names) <- nams
607668

608669
z <- z[-1, , drop = FALSE]
609670
tt <- tt[-1, , drop = FALSE]
610671
}
611672

612-
if (row_names) {
613-
rownames(z) <- z[, 1]
614-
rownames(tt) <- z[, 1]
615-
xlsx_cols_names <- xlsx_cols_names[-1]
616-
617-
z <- z[, -1, drop = FALSE]
618-
tt <- tt[, -1, drop = FALSE]
619-
}
620-
621673
# # faster guess_col_type alternative? to avoid tt
622674
# types <- ftable(cc$row_r ~ cc$c_r ~ cc$typ)
623675

624-
date_conv <- NULL
625-
datetime_conv <- NULL
626-
hms_conv <- convert_hms
627-
628676
if (missing(types)) {
629677
types <- guess_col_type(tt)
630-
date_conv <- function(x) as.Date(.POSIXct(as.double(x), "UTC"), tz = "UTC", origin = "1970-01-01")
631-
datetime_conv <- function(x) .POSIXct(as.double(x), "UTC")
632678
} else {
633679
# TODO check if guessing only if !all() is possible
634680
if (any(xlsx_cols_names %in% names(types))) {
@@ -659,36 +705,24 @@ wb_to_df <- function(
659705
stop("no variable from `types` found in data")
660706
}
661707

708+
# avoid multiple conversion
662709
date_conv <- function(x) convert_date(x, origin = origin)
663710
datetime_conv <- function(x) convert_datetime(x, origin = origin)
664711
}
665712

666713
# could make it optional or explicit
667714
if (convert) {
668-
sel <- !is.na(names(types))
669-
670-
if (any(sel)) {
671-
nums <- names(which(types[sel] == 1))
672-
dtes <- names(which(types[sel] == 2))
673-
poxs <- names(which(types[sel] == 3))
674-
logs <- names(which(types[sel] == 4))
675-
difs <- names(which(types[sel] == 5))
676-
fmls <- names(which(types[sel] == 6))
677-
# convert "#NUM!" to "NaN" -- then converts to NaN
678-
# maybe consider this an option to instead return NA?
679-
if (length(nums)) z[nums] <- lapply(z[nums], function(i) as.numeric(replace(i, i == "#NUM!", "NaN")))
680-
if (length(dtes)) z[dtes] <- lapply(z[dtes], date_conv)
681-
if (length(poxs)) z[poxs] <- lapply(z[poxs], datetime_conv)
682-
if (length(logs)) z[logs] <- lapply(z[logs], as.logical)
683-
if (isNamespaceLoaded("hms")) z[difs] <- lapply(z[difs], hms_conv)
715+
z <- convert_df(z, types, date_conv, datetime_conv, hms_conv)
716+
}
684717

685-
for (i in seq_along(z)) { # convert df to class formula
686-
if (names(z)[i] %in% fmls) class(z[[i]]) <- c(class(z[[i]]), "formula")
687-
}
718+
# column names were picked earlier
719+
if (row_names) {
720+
rownames(z) <- z[, 1]
721+
rownames(tt) <- z[, 1]
722+
xlsx_cols_names <- xlsx_cols_names[-1]
688723

689-
} else {
690-
warning("could not convert. All missing in row used for variable names")
691-
}
724+
z <- z[, -1, drop = FALSE]
725+
tt <- tt[, -1, drop = FALSE]
692726
}
693727

694728
if (col_names) {

man/wb_to_df.Rd

Lines changed: 11 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-date_time_conversion.R

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,38 @@ test_that("date 1904 works as expected", {
159159
# ignore rounding differences
160160
expect_equal(as.Date(df$pos), as.Date(got[["pos"]], tz = "UTC"))
161161
})
162+
163+
test_that("date conversion works", {
164+
165+
wb <- wb_workbook()$add_worksheet()
166+
wb$add_data(dims = "A1:D1", x = as.Date(paste0("2025-0", 1:4, "-01")), col_names = FALSE)
167+
wb$add_data(dims = "A2:D4", x = matrix(1:12, 3, 4), col_names = FALSE)
168+
169+
# column name is converted date, column is numeric
170+
df <- wb$to_df()
171+
expect_true(is.numeric(df$`2025-01-01`))
172+
173+
# column name is converted date, column is character
174+
df <- wb$to_df(convert = FALSE)
175+
expect_true(is.character(df$`2025-01-01`))
176+
177+
# column name is spreadsheet date, column is numeric
178+
df <- wb$to_df(convert = TRUE, detect_dates = FALSE)
179+
expect_true(is.numeric(df$`45658`))
180+
181+
# column name is spreadsheet date, column is character
182+
df <- wb$to_df(convert = FALSE, detect_dates = FALSE)
183+
expect_true(is.character(df$`45658`))
184+
185+
# conversion works for rownames
186+
df <- data.frame(
187+
x = as.Date(paste0("2025-0", 1:4, "-01")),
188+
y = 1:4
189+
)
190+
wb <- wb_workbook()$add_worksheet()
191+
wb$add_data(x = df)
192+
df <- wb$to_df(row_names = TRUE)
193+
exp <- c("2025-01-01", "2025-02-01", "2025-03-01", "2025-04-01")
194+
got <- rownames(df)
195+
expect_equal(exp, got)
196+
})

tests/testthat/test-named_regions.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ test_that("Maintaining Named Regions on Load", {
4848

4949
# nonsense
5050
# df1 is a single value and this single value is now used as rowName
51-
expect_warning(df1 <- read_xlsx(wb, named_region = "region1", row_names = TRUE))
51+
df1 <- read_xlsx(wb, named_region = "region1", row_names = TRUE)
5252
expect_s3_class(df1, "data.frame")
5353
expect_equal(nrow(df1), 0)
5454
expect_equal(ncol(df1), 0)

0 commit comments

Comments
 (0)