|
| 1 | +# Internal function to convert data frame from character to whatever is required |
| 2 | +convert_df <- function(z, types, date_conv, datetime_conv, hms_conv, as_character = FALSE) { |
| 3 | + sel <- !is.na(names(types)) |
| 4 | + |
| 5 | + if (any(sel)) { |
| 6 | + nums <- names(which(types[sel] == 1)) |
| 7 | + dtes <- names(which(types[sel] == 2)) |
| 8 | + poxs <- names(which(types[sel] == 3)) |
| 9 | + logs <- names(which(types[sel] == 4)) |
| 10 | + difs <- names(which(types[sel] == 5)) |
| 11 | + fmls <- names(which(types[sel] == 6)) |
| 12 | + # convert "#NUM!" to "NaN" -- then converts to NaN |
| 13 | + # maybe consider this an option to instead return NA? |
| 14 | + |
| 15 | + if (as_character) { |
| 16 | + date_conv_c <- function(...) as.character(date_conv(...)) |
| 17 | + datetime_conv_c <- function(...) as.character(datetime_conv(...)) |
| 18 | + hms_conv_c <- function(...) as.character(hms_conv(...)) |
| 19 | + |
| 20 | + if (length(nums)) z[nums] <- lapply(z[nums], function(i) as.character(as.numeric(replace(i, i == "#NUM!", "NaN")))) |
| 21 | + if (length(dtes)) z[dtes] <- lapply(z[dtes], date_conv_c) |
| 22 | + if (length(poxs)) z[poxs] <- lapply(z[poxs], datetime_conv_c) |
| 23 | + if (length(logs)) z[logs] <- lapply(z[logs], function(i) as.character(as.logical(i))) |
| 24 | + if (isNamespaceLoaded("hms")) z[difs] <- lapply(z[difs], hms_conv_c) |
| 25 | + } else { |
| 26 | + if (length(nums)) z[nums] <- lapply(z[nums], function(i) as.numeric(replace(i, i == "#NUM!", "NaN"))) |
| 27 | + if (length(dtes)) z[dtes] <- lapply(z[dtes], date_conv) |
| 28 | + if (length(poxs)) z[poxs] <- lapply(z[poxs], datetime_conv) |
| 29 | + if (length(logs)) z[logs] <- lapply(z[logs], as.logical) |
| 30 | + if (isNamespaceLoaded("hms")) z[difs] <- lapply(z[difs], hms_conv) |
| 31 | + } |
| 32 | + |
| 33 | + for (i in seq_along(z)) { # convert df to class formula |
| 34 | + if (names(z)[i] %in% fmls) class(z[[i]]) <- c(class(z[[i]]), "formula") |
| 35 | + } |
| 36 | + |
| 37 | + } else { |
| 38 | + warning("could not convert. All missing in row used for variable names") |
| 39 | + } |
| 40 | + |
| 41 | + z |
| 42 | +} |
| 43 | + |
1 | 44 | # `wb_to_df()` ----------------------------------------
|
2 | 45 | #' Create a data frame from a Workbook
|
3 | 46 | #'
|
|
34 | 77 | #' Opening, saving and closing the file in a spreadsheet software will resolve
|
35 | 78 | #' this.
|
36 | 79 | #'
|
37 |
| -#' Prior to release `1.15`, datetime variables (yyyy-mm-dd hh:mm:ss) were |
38 |
| -#' imported in the users current timezone (`Sys.timezone()`). This was |
39 |
| -#' changed. All datetime variables are now imported with timezone `"UTC"`. |
| 80 | +#' Before release 1.15, datetime variables (in 'yyyy-mm-dd hh:mm:ss' format) |
| 81 | +#' were imported using the user's local system timezone (`Sys.timezone()`). |
| 82 | +#' This behavior has been updated. Now, all datetime variables are imported |
| 83 | +#' with the timezone set to "UTC". |
| 84 | +#' If automatic date detection and conversion are enabled but the conversion |
| 85 | +#' is unsuccessful (for instance, in a column containing a mix of data types |
| 86 | +#' like strings, numbers, and dates), the dates will be displayed as a Unix |
| 87 | +#' timestamp. These can be transformed using the [as.POSIXct()] function. |
| 88 | +#' If date detection is disabled, dates will show up as a spreadsheet date |
| 89 | +#' format. To convert these, you can use the functions [convert_date()], |
| 90 | +#' [convert_datetime()], or [convert_hms()]. |
40 | 91 | #'
|
41 | 92 | #' @seealso [wb_get_named_regions()]
|
42 | 93 | #'
|
@@ -596,39 +647,34 @@ wb_to_df <- function(
|
596 | 647 | xlsx_cols_names <- colnames(z)
|
597 | 648 | names(xlsx_cols_names) <- xlsx_cols_names
|
598 | 649 |
|
599 |
| - # if colNames, then change tt too |
| 650 | + |
| 651 | + date_conv <- function(x) as.Date(.POSIXct(as.double(x), "UTC"), tz = "UTC", origin = "1970-01-01") |
| 652 | + datetime_conv <- function(x) .POSIXct(as.double(x), "UTC") |
| 653 | + hms_conv <- convert_hms |
| 654 | + |
| 655 | + # if colNames, then change tt too. rownames will be converted later. If column name row |
| 656 | + # is in z/tt, the column name guessing will fail below |
600 | 657 | if (col_names) {
|
601 | 658 | # select first row as colnames, but do not yet assign. it might contain
|
602 | 659 | # missing values and if assigned, convert below might break with unambiguous
|
603 | 660 | # names.
|
| 661 | + |
604 | 662 | nams <- names(xlsx_cols_names)
|
605 |
| - xlsx_cols_names <- z[1, ] |
| 663 | + if (convert) |
| 664 | + xlsx_cols_names <- convert_df(z[1, , drop = FALSE], guess_col_type(tt[1, , drop = FALSE]), date_conv, datetime_conv, hms_conv, as_character = TRUE) |
| 665 | + else |
| 666 | + xlsx_cols_names <- z[1, , drop = FALSE] |
606 | 667 | names(xlsx_cols_names) <- nams
|
607 | 668 |
|
608 | 669 | z <- z[-1, , drop = FALSE]
|
609 | 670 | tt <- tt[-1, , drop = FALSE]
|
610 | 671 | }
|
611 | 672 |
|
612 |
| - if (row_names) { |
613 |
| - rownames(z) <- z[, 1] |
614 |
| - rownames(tt) <- z[, 1] |
615 |
| - xlsx_cols_names <- xlsx_cols_names[-1] |
616 |
| - |
617 |
| - z <- z[, -1, drop = FALSE] |
618 |
| - tt <- tt[, -1, drop = FALSE] |
619 |
| - } |
620 |
| - |
621 | 673 | # # faster guess_col_type alternative? to avoid tt
|
622 | 674 | # types <- ftable(cc$row_r ~ cc$c_r ~ cc$typ)
|
623 | 675 |
|
624 |
| - date_conv <- NULL |
625 |
| - datetime_conv <- NULL |
626 |
| - hms_conv <- convert_hms |
627 |
| - |
628 | 676 | if (missing(types)) {
|
629 | 677 | types <- guess_col_type(tt)
|
630 |
| - date_conv <- function(x) as.Date(.POSIXct(as.double(x), "UTC"), tz = "UTC", origin = "1970-01-01") |
631 |
| - datetime_conv <- function(x) .POSIXct(as.double(x), "UTC") |
632 | 678 | } else {
|
633 | 679 | # TODO check if guessing only if !all() is possible
|
634 | 680 | if (any(xlsx_cols_names %in% names(types))) {
|
@@ -659,36 +705,24 @@ wb_to_df <- function(
|
659 | 705 | stop("no variable from `types` found in data")
|
660 | 706 | }
|
661 | 707 |
|
| 708 | + # avoid multiple conversion |
662 | 709 | date_conv <- function(x) convert_date(x, origin = origin)
|
663 | 710 | datetime_conv <- function(x) convert_datetime(x, origin = origin)
|
664 | 711 | }
|
665 | 712 |
|
666 | 713 | # could make it optional or explicit
|
667 | 714 | if (convert) {
|
668 |
| - sel <- !is.na(names(types)) |
669 |
| - |
670 |
| - if (any(sel)) { |
671 |
| - nums <- names(which(types[sel] == 1)) |
672 |
| - dtes <- names(which(types[sel] == 2)) |
673 |
| - poxs <- names(which(types[sel] == 3)) |
674 |
| - logs <- names(which(types[sel] == 4)) |
675 |
| - difs <- names(which(types[sel] == 5)) |
676 |
| - fmls <- names(which(types[sel] == 6)) |
677 |
| - # convert "#NUM!" to "NaN" -- then converts to NaN |
678 |
| - # maybe consider this an option to instead return NA? |
679 |
| - if (length(nums)) z[nums] <- lapply(z[nums], function(i) as.numeric(replace(i, i == "#NUM!", "NaN"))) |
680 |
| - if (length(dtes)) z[dtes] <- lapply(z[dtes], date_conv) |
681 |
| - if (length(poxs)) z[poxs] <- lapply(z[poxs], datetime_conv) |
682 |
| - if (length(logs)) z[logs] <- lapply(z[logs], as.logical) |
683 |
| - if (isNamespaceLoaded("hms")) z[difs] <- lapply(z[difs], hms_conv) |
| 715 | + z <- convert_df(z, types, date_conv, datetime_conv, hms_conv) |
| 716 | + } |
684 | 717 |
|
685 |
| - for (i in seq_along(z)) { # convert df to class formula |
686 |
| - if (names(z)[i] %in% fmls) class(z[[i]]) <- c(class(z[[i]]), "formula") |
687 |
| - } |
| 718 | + # column names were picked earlier |
| 719 | + if (row_names) { |
| 720 | + rownames(z) <- z[, 1] |
| 721 | + rownames(tt) <- z[, 1] |
| 722 | + xlsx_cols_names <- xlsx_cols_names[-1] |
688 | 723 |
|
689 |
| - } else { |
690 |
| - warning("could not convert. All missing in row used for variable names") |
691 |
| - } |
| 724 | + z <- z[, -1, drop = FALSE] |
| 725 | + tt <- tt[, -1, drop = FALSE] |
692 | 726 | }
|
693 | 727 |
|
694 | 728 | if (col_names) {
|
|
0 commit comments