v1.2.0-RC

rmendels · rmendels · commit a8019bf40cfa · 2024-12-10T12:24:06.000-08:00
diff --git a/.Rapp.history b/.Rapp.history
@@ -1,4 +1,3 @@
-str(test)
 units(test)
 60*60*10
 library(ncdf4)
@@ -419,3 +418,4 @@ rerddap::global_search('jplOscar')
 library(sos)
 getwd()
 devtools::check()
+devtools::build()
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,12 @@
 Package: rerddap
-Title: General Purpose Client for 'ERDDAP' Servers
-Description: General purpose R client for 'ERDDAP' servers. Includes
+Title: General Purpose Client for 'ERDDAP™' Servers
+Description: General purpose R client for 'ERDDAP™' servers. Includes
     functions to search for 'datasets', get summary information on
     'datasets', and fetch 'datasets', in either 'csv' or 'netCDF' format.
-    'ERDDAP' information: 
+    'ERDDAP™' information: 
     <https://upwell.pfeg.noaa.gov/erddap/information.html>.
-Version: 1.1.0
-Date: 2024-1-12
+Version: 1.2.0
+Date: 2024-12-13
 License: MIT + file LICENSE
 Authors@R: c(
     person("Scott", "Chamberlain", role = "aut"),
@@ -31,6 +31,7 @@ Imports:
     jsonlite (>= 1.6),
     lubridate,
     methods,
+    nanoparquet,
     ncdf4 (>= 1.16),
     tibble,
     utils,
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+rerddap 1.2.0
+=============
+*  tabledap() requests can now be downloaded as a parquet file, making for a much smaller download
+*  units have been added to tabledap() output
+*  griddap() bug fixed when a coordinate has a very large value,  such as for some projected data.
+*  browse() now returns the URL if base::interactive is FALSE,  as the documentation states
+
 rerddap 1.1.0
 =============
 * 'tabledap()' responses now have the datatype given in the file .dds
diff --git a/R/browse.R b/R/browse.R
@@ -1,13 +1,10 @@
 #' Browse a dataset webpage.
 #'
-#' Note that it is an error to call this when `base::interactive()`
-#' returns `FALSE`
-#'
 #' @export
 #'
 #' @param x  datasetid or an object associated with a datasetid such
 #' [info()], [griddap()] or [tabledap()]
-#' @param url A URL for an ERDDAP server. Default:
+#' @param url A URL for an ERDDAP™ server. Default:
 #' https://upwell.pfeg.noaa.gov/erddap/ - See [eurl()] for 
 #' more information
 #' @param ... Further args passed on to `utils::browseURL`
@@ -33,39 +30,40 @@ browse <- function(x, url = eurl(), ...){
   UseMethod("browse", x)
 }
 
+
 #' @export
 browse.character <- function(x, url = eurl(), ...){
-  stopifnot(interactive())
   if (missing(x)) stop("datasetid is required")
   uri <- sprintf(paste0(url, 'info/%s/index.html'), x)
-  utils::browseURL(uri)
+  
+  if (interactive()) {
+    utils::browseURL(uri)
+  } else {
+    message("URL: ", uri)
+    return(uri)
+  }
 }
 
 #' @export
 browse.info <- function(x, url = eurl(), ...){
-  stopifnot(interactive())
   datasetid <- attr(x, "datasetid")
   browse(datasetid, ...)
 }
 
 #' @export
 browse.tabledap <- function(x, url = eurl(), ...){
-  stopifnot(interactive())
   datasetid <- attr(x, "datasetid")
   browse(datasetid, ...)
 }
 
 #' @export
 browse.griddap_nc <- function(x, url = eurl(), ...){
-  stopifnot(interactive())
   datasetid <- attr(x, "datasetid")
   browse(datasetid, ...)
 }
 
-
 #' @export
 browse.griddap_csv <- function(x, url = eurl(), ...){
-  stopifnot(interactive())
   datasetid <- attr(x, "datasetid")
   browse(datasetid, ...)
 }
diff --git a/R/grid.R b/R/grid.R
@@ -1,4 +1,4 @@
-#' Get ERDDAP gridded data
+#' Get ERDDAP™ gridded data
 #'
 #' @export
 #' @template griddap_params
@@ -275,7 +275,7 @@ parse_args <- function(.info, dim, s, dimargs, wname = FALSE){
   }
   tmp <- format(tmp, scientific = FALSE)
   if (length(s) > 1) {
-    if (!length(s) == length(dimvars(.info))) stop("Your stride vector must equal length of dimension variables", call. = FALSE)
+     if (!length(s) == length(dimvars(.info))) stop("Your stride vector must equal length of dimension variables", call. = FALSE)
     names(s) <- dimvars(.info)
     if (!wname) {
       sprintf('[(%s):%s:(%s)]', tmp[1], s[[dim]], tmp[2])
@@ -285,16 +285,16 @@ parse_args <- function(.info, dim, s, dimargs, wname = FALSE){
   } else {
     if (!wname) {
       if (length(tmp) == 1) {
-        tmp
+          tmp
       } else {
         sprintf('[(%s):%s:(%s)]', tmp[1], s, tmp[2])
       }
     } else {
       if (length(tmp) == 1) {
-        tmp
+           tmp
       } else {
         sprintf('%s[(%s):%s:(%s)]', dim, tmp[1], s, tmp[2])
-      }
+       }
     }
   }
 }
diff --git a/R/table.R b/R/table.R
@@ -1,4 +1,4 @@
-#' Get ERDDAP tabledap data.
+#' Get ERDDAP™ tabledap data.
 #'
 #' @export
 #'
@@ -8,13 +8,13 @@
 #' @param ... Any number of key-value pairs in quotes as query constraints.
 #' See Details & examples
 #' @param fields Columns to return, as a character vector
-#' @param distinct If `TRUE` ERDDAP will sort all of the rows in the results
+#' @param distinct If `TRUE` ERDDAP™ will sort all of the rows in the results
 #' table (starting with the first requested variable, then using the second
 #' requested variable if the first variable has a tie, ...), then remove all
-#' non-unique rows of data. In many situations, ERDDAP can return distinct
-#' values quickly and efficiently. But in some cases, ERDDAP must look through
+#' non-unique rows of data. In many situations, ERDDAP™ can return distinct
+#' values quickly and efficiently. But in some cases, ERDDAP™ must look through
 #' all rows of the source dataset.
-#' @param orderby If used, ERDDAP will sort all of the rows in the results
+#' @param orderby If used, ERDDAP™ will sort all of the rows in the results
 #' table (starting with the first variable, then using the second variable
 #' if the first variable has a tie, ...). Normally, the rows of data in the
 #' response table are in the order they arrived from the data source. orderBy
@@ -23,7 +23,7 @@
 #' sorted by stationID, then time. The orderby variables MUST be included in
 #' the list of requested variables in the fields parameter.
 #' @param orderbymax Give a vector of one or more fields, that must be included
-#' in the fields parameter as well. Gives back data given constraints. ERDDAP
+#' in the fields parameter as well. Gives back data given constraints. ERDDAP™
 #' will sort all of the rows in the results table (starting with the first
 #' variable, then using the second variable if the first variable has a
 #' tie, ...) and then just keeps the rows where the value of the last sort
@@ -33,10 +33,11 @@
 #' @param orderbyminmax Same as `orderbymax` parameter, except returns
 #' two rows for every combination of the n-1 variables: one row with the
 #' minimum value, and one row with the maximum value.
+#' @param fmt whether download should be as '.csv' (default) or '.parquet'
 #' @param units One of 'udunits' (units will be described via the UDUNITS
 #' standard (e.g.,degrees_C)) or 'ucum' (units will be described via the
 #' UCUM standard (e.g., Cel)).
-#' @param url A URL for an ERDDAP server.
+#' @param url A URL for an ERDDAP™ server.
 #' Default: https://upwell.pfeg.noaa.gov/erddap/ - See [eurl()] for 
 #' more information
 #' @param store One of `disk` (default) or `memory`. You can pass
@@ -165,28 +166,49 @@
 #' ## memory
 #' tabledap('erdCinpKfmBT', store = memory())
 #'
-#' # use a different ERDDAP server
+#' # use a different ERDDAP™ server
 #' ## NOAA IOOS NERACOOS
 #' url <- "http://www.neracoos.org/erddap/"
 #' tabledap("E01_optics_hist", url = url)
 #' }
 
 tabledap <- function(x, ..., fields=NULL, distinct=FALSE, orderby=NULL,
-  orderbymax=NULL, orderbymin=NULL, orderbyminmax=NULL, units=NULL,
+  orderbymax=NULL, orderbymin=NULL, orderbyminmax=NULL, units=NULL, fmt = 'csv',
   url = eurl(), store = disk(), callopts=list()) {
+  
 
   if (inherits(x, "info")) {
     url <- x$base_url
     message("info() output passed to x; setting base url to: ", url)
   }
   x <- as.info(x, url)
+  
+  # if fmt is parquet,  check the ERDDAP version
+  
+  if (fmt == 'parquet') {
+    url_version <- version(url)
+    url_version <- as.numeric(sub(".*=", "", url_version))
+    if (url_version < 2.25)  {
+      print(paste0('Selected ERDDAP is version ', url_version))
+      stop('ERDDAP version greater than 2.25 is required for parquet - program stops')
+    }
+  }
+  
   fields <- paste(fields, collapse = ",")
   lenURL <- nchar(url)
   if (substr(url, lenURL, lenURL) != '/') {
     url <- paste0(url, '/')
   }
-  url <- sprintf(paste0(url, "tabledap/%s.csv?%s"), attr(x, "datasetid"),
-                 fields)
+  if (fmt == 'csv') {
+    url <- sprintf(paste0(url, "tabledap/%s.csv?%s"), attr(x, "datasetid"),
+                   fields)
+  } else if (fmt == 'parquet') {
+    url <- sprintf(paste0(url, "tabledap/%s.parquetWMeta?%s"), attr(x, "datasetid"),
+                   fields)
+  } else {
+     print(paste0('format given is ', fmt))
+     stop('fmt must be either csv or parquet')
+  }
   args <- list(...)
   distinct <- if (distinct) 'distinct()' else NULL
   units <- if (!is.null(units)) {
@@ -206,33 +228,60 @@ tabledap <- function(x, ..., fields=NULL, distinct=FALSE, orderby=NULL,
   if (!nchar(args[[1]]) == 0) {
     url <- paste0(url, '&', args)
   }
-  resp <- erd_tab_GET(url, dset = attr(x, "datasetid"), store, callopts)
+  resp <- erd_tab_GET(url, dset = attr(x, "datasetid"), store, fmt, callopts)
   loc <- if (store$store == "disk") resp else "memory"
-  temp_table <- read_table(resp)
-  # change response type
-  dds_url <- sub('csv', 'dds', url)
-  # strip off constraints
-  amp_location <- regexpr("&", dds_url)
-  if (amp_location[1] > 0) {
-    dds_url <- substr(dds_url, 1, amp_location[1] - 1)
+  temp_table <- read_table(resp, fmt)
+  # change response type if csv
+  if (fmt == 'csv'){
+    dds_url <- sub('csv', 'dds', url)
+    # strip off constraints
+    amp_location <- regexpr("&", dds_url)
+    if (amp_location[1] > 0) {
+      dds_url <- substr(dds_url, 1, amp_location[1] - 1)
+    }
+    dds <- try(suppressWarnings(utils::read.table(dds_url)), silent = TRUE)
+    # if (class(dds) == 'try-error') {
+    if (methods::is(dds, 'try-error')) {
+      print('failed to get variable datatype information')
+      print('will leave units unchanged')
+    } else{
+      temp_table <- set_units(temp_table, dds)
+    }
   }
-  dds <- try(suppressWarnings(utils::read.table(dds_url)), silent = TRUE)
-  # if (class(dds) == 'try-error') {
-  if (methods::is(dds, 'try-error')) {
-    print('failed to get variable datatype information')
-    print('will leave units unchanged')
-  } else{
-    temp_table <- set_units(temp_table, dds)
+  
+  # go through columns get units to add as an attribute
+  # if parquet file also set missing value to NA
+  temp_table_names <- colnames(temp_table)
+  icount = 0
+  for (myName in temp_table_names){
+    icount <- icount + 1
+    units_loc <- which(x$alldata[[myName]]$attribute_name == 'units')
+    if (length(units_loc) > 0) {
+      temp_units <- x$alldata[[myName]]$value[[units_loc]]
+      if (icount == 1){
+        temp_table_units <- temp_units
+      } else {
+        temp_table_units <- c(temp_table_units, temp_units)
+      }
+    } else {
+      if (icount == 1){
+        temp_table_units <- NA
+      } else {
+        temp_table_units <- c(temp_table_units, NA)
+      }
+    }
+    fillLoc <- which(x$alldata[[myName]]$attribute_name == '_FillValue')
   }
- 
+   
     
   structure(
     #read_table(resp),
     temp_table,
     class = c("tabledap", "data.frame"),
     datasetid = attr(x, "datasetid"),
     path = loc,
-    url = url
+    url = url,
+    units = temp_table_units
   )
 }
 
@@ -250,11 +299,15 @@ print.tabledap <- function(x, ...) {
   print(tibble::as_tibble(x))
 }
 
-erd_tab_GET <- function(url, dset, store, callopts) {
+erd_tab_GET <- function(url, dset, store, fmt, callopts) {
   cli <- crul::HttpClient$new(url = url, opts = callopts)
   if (store$store == "disk") {
     # store on disk
-    key <- gen_key(url, NULL, "csv")
+    if (fmt == 'csv') {
+      key <- gen_key(url, NULL, "csv")
+    } else {
+      key <- gen_key(url, NULL, "parquet")
+    }
     if ( file.exists(file.path(store$path, key)) ) {
       file.path(store$path, key)
     } else {
diff --git a/R/zzz.r b/R/zzz.r
@@ -63,17 +63,30 @@ read_all <- function(x, fmt, read) {
   )
 }
 
-read_table <- function(x){
+read_table <- function(x, fmt){
   if (inherits(x, "HttpResponse")) {
     txt <- gsub('\n$', '', x$parse("UTF-8"))
     read.csv(text = txt, sep = ",", stringsAsFactors = FALSE,
              blank.lines.skip = FALSE)[-1, , drop = FALSE]
   } else {
-    read.delim(x, sep = ",", stringsAsFactors = FALSE,
-               blank.lines.skip = FALSE)[-1, , drop = FALSE]
+    if (fmt =='csv') {
+      read.delim(x, sep = ",", stringsAsFactors = FALSE,
+                 blank.lines.skip = FALSE)[-1, , drop = FALSE]
+    }  else {
+      temp_data <- nanoparquet::read_parquet(x)[-1, , drop = FALSE]
+    }
   }
 }
 
+replace_value_with_na <- function(x, fillValue) {
+  if (is.numeric(x)) {
+    test_value <- as.numeric(fillValue)
+    x[x == test_value] <- NA
+  }
+  return(x)
+}
+
+
 pu <- function(x) sub("/$|//$", "", x)
 
 strect <- function (str, pattern) regmatches(str, regexpr(pattern, str))
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 rerddap
 =====
 
+
 <!-- badges: start -->
 [![cran checks](https://cranchecks.info/badges/worst/rerddap)](https://cranchecks.info/pkgs/rerddap)
   [![R-CMD-check](https://github.com/ropensci/rerddap/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ropensci/rerddap/actions/workflows/R-CMD-check.yaml)
diff --git a/man/browse.Rd b/man/browse.Rd
diff --git a/man/griddap.Rd b/man/griddap.Rd
diff --git a/man/tabledap.Rd b/man/tabledap.Rd
diff --git a/vignettes/Using_rerddap.Rmd b/vignettes/Using_rerddap.Rmd

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-str(test)`
`2`	`1`	`units(test)`
`3`	`2`	`606010`
`4`	`3`	`library(ncdf4)`
`@@ -419,3 +418,4 @@ rerddap::global_search('jplOscar')`
`419`	`418`	`library(sos)`
`420`	`419`	`getwd()`
`421`	`420`	`devtools::check()`
	`421`	`+devtools::build()`