1
- # ' Get ERDDAP tabledap data.
1
+ # ' Get ERDDAP™ tabledap data.
2
2
# '
3
3
# ' @export
4
4
# '
8
8
# ' @param ... Any number of key-value pairs in quotes as query constraints.
9
9
# ' See Details & examples
10
10
# ' @param fields Columns to return, as a character vector
11
- # ' @param distinct If `TRUE` ERDDAP will sort all of the rows in the results
11
+ # ' @param distinct If `TRUE` ERDDAP™ will sort all of the rows in the results
12
12
# ' table (starting with the first requested variable, then using the second
13
13
# ' requested variable if the first variable has a tie, ...), then remove all
14
- # ' non-unique rows of data. In many situations, ERDDAP can return distinct
15
- # ' values quickly and efficiently. But in some cases, ERDDAP must look through
14
+ # ' non-unique rows of data. In many situations, ERDDAP™ can return distinct
15
+ # ' values quickly and efficiently. But in some cases, ERDDAP™ must look through
16
16
# ' all rows of the source dataset.
17
- # ' @param orderby If used, ERDDAP will sort all of the rows in the results
17
+ # ' @param orderby If used, ERDDAP™ will sort all of the rows in the results
18
18
# ' table (starting with the first variable, then using the second variable
19
19
# ' if the first variable has a tie, ...). Normally, the rows of data in the
20
20
# ' response table are in the order they arrived from the data source. orderBy
23
23
# ' sorted by stationID, then time. The orderby variables MUST be included in
24
24
# ' the list of requested variables in the fields parameter.
25
25
# ' @param orderbymax Give a vector of one or more fields, that must be included
26
- # ' in the fields parameter as well. Gives back data given constraints. ERDDAP
26
+ # ' in the fields parameter as well. Gives back data given constraints. ERDDAP™
27
27
# ' will sort all of the rows in the results table (starting with the first
28
28
# ' variable, then using the second variable if the first variable has a
29
29
# ' tie, ...) and then just keeps the rows where the value of the last sort
33
33
# ' @param orderbyminmax Same as `orderbymax` parameter, except returns
34
34
# ' two rows for every combination of the n-1 variables: one row with the
35
35
# ' minimum value, and one row with the maximum value.
36
+ # ' @param fmt whether download should be as '.csv' (default) or '.parquet'
36
37
# ' @param units One of 'udunits' (units will be described via the UDUNITS
37
38
# ' standard (e.g.,degrees_C)) or 'ucum' (units will be described via the
38
39
# ' UCUM standard (e.g., Cel)).
39
- # ' @param url A URL for an ERDDAP server.
40
+ # ' @param url A URL for an ERDDAP™ server.
40
41
# ' Default: https://upwell.pfeg.noaa.gov/erddap/ - See [eurl()] for
41
42
# ' more information
42
43
# ' @param store One of `disk` (default) or `memory`. You can pass
165
166
# ' ## memory
166
167
# ' tabledap('erdCinpKfmBT', store = memory())
167
168
# '
168
- # ' # use a different ERDDAP server
169
+ # ' # use a different ERDDAP™ server
169
170
# ' ## NOAA IOOS NERACOOS
170
171
# ' url <- "http://www.neracoos.org/erddap/"
171
172
# ' tabledap("E01_optics_hist", url = url)
172
173
# ' }
173
174
174
175
tabledap <- function (x , ... , fields = NULL , distinct = FALSE , orderby = NULL ,
175
- orderbymax = NULL , orderbymin = NULL , orderbyminmax = NULL , units = NULL ,
176
+ orderbymax = NULL , orderbymin = NULL , orderbyminmax = NULL , units = NULL , fmt = ' csv ' ,
176
177
url = eurl(), store = disk(), callopts = list ()) {
178
+
177
179
178
180
if (inherits(x , " info" )) {
179
181
url <- x $ base_url
180
182
message(" info() output passed to x; setting base url to: " , url )
181
183
}
182
184
x <- as.info(x , url )
185
+
186
+ # if fmt is parquet, check the ERDDAP version
187
+
188
+ if (fmt == ' parquet' ) {
189
+ url_version <- version(url )
190
+ url_version <- as.numeric(sub(" .*=" , " " , url_version ))
191
+ if (url_version < 2.25 ) {
192
+ print(paste0(' Selected ERDDAP is version ' , url_version ))
193
+ stop(' ERDDAP version greater than 2.25 is required for parquet - program stops' )
194
+ }
195
+ }
196
+
183
197
fields <- paste(fields , collapse = " ," )
184
198
lenURL <- nchar(url )
185
199
if (substr(url , lenURL , lenURL ) != ' /' ) {
186
200
url <- paste0(url , ' /' )
187
201
}
188
- url <- sprintf(paste0(url , " tabledap/%s.csv?%s" ), attr(x , " datasetid" ),
189
- fields )
202
+ if (fmt == ' csv' ) {
203
+ url <- sprintf(paste0(url , " tabledap/%s.csv?%s" ), attr(x , " datasetid" ),
204
+ fields )
205
+ } else if (fmt == ' parquet' ) {
206
+ url <- sprintf(paste0(url , " tabledap/%s.parquetWMeta?%s" ), attr(x , " datasetid" ),
207
+ fields )
208
+ } else {
209
+ print(paste0(' format given is ' , fmt ))
210
+ stop(' fmt must be either csv or parquet' )
211
+ }
190
212
args <- list (... )
191
213
distinct <- if (distinct ) ' distinct()' else NULL
192
214
units <- if (! is.null(units )) {
@@ -206,33 +228,60 @@ tabledap <- function(x, ..., fields=NULL, distinct=FALSE, orderby=NULL,
206
228
if (! nchar(args [[1 ]]) == 0 ) {
207
229
url <- paste0(url , ' &' , args )
208
230
}
209
- resp <- erd_tab_GET(url , dset = attr(x , " datasetid" ), store , callopts )
231
+ resp <- erd_tab_GET(url , dset = attr(x , " datasetid" ), store , fmt , callopts )
210
232
loc <- if (store $ store == " disk" ) resp else " memory"
211
- temp_table <- read_table(resp )
212
- # change response type
213
- dds_url <- sub(' csv' , ' dds' , url )
214
- # strip off constraints
215
- amp_location <- regexpr(" &" , dds_url )
216
- if (amp_location [1 ] > 0 ) {
217
- dds_url <- substr(dds_url , 1 , amp_location [1 ] - 1 )
233
+ temp_table <- read_table(resp , fmt )
234
+ # change response type if csv
235
+ if (fmt == ' csv' ){
236
+ dds_url <- sub(' csv' , ' dds' , url )
237
+ # strip off constraints
238
+ amp_location <- regexpr(" &" , dds_url )
239
+ if (amp_location [1 ] > 0 ) {
240
+ dds_url <- substr(dds_url , 1 , amp_location [1 ] - 1 )
241
+ }
242
+ dds <- try(suppressWarnings(utils :: read.table(dds_url )), silent = TRUE )
243
+ # if (class(dds) == 'try-error') {
244
+ if (methods :: is(dds , ' try-error' )) {
245
+ print(' failed to get variable datatype information' )
246
+ print(' will leave units unchanged' )
247
+ } else {
248
+ temp_table <- set_units(temp_table , dds )
249
+ }
218
250
}
219
- dds <- try(suppressWarnings(utils :: read.table(dds_url )), silent = TRUE )
220
- # if (class(dds) == 'try-error') {
221
- if (methods :: is(dds , ' try-error' )) {
222
- print(' failed to get variable datatype information' )
223
- print(' will leave units unchanged' )
224
- } else {
225
- temp_table <- set_units(temp_table , dds )
251
+
252
+ # go through columns get units to add as an attribute
253
+ # if parquet file also set missing value to NA
254
+ temp_table_names <- colnames(temp_table )
255
+ icount = 0
256
+ for (myName in temp_table_names ){
257
+ icount <- icount + 1
258
+ units_loc <- which(x $ alldata [[myName ]]$ attribute_name == ' units' )
259
+ if (length(units_loc ) > 0 ) {
260
+ temp_units <- x $ alldata [[myName ]]$ value [[units_loc ]]
261
+ if (icount == 1 ){
262
+ temp_table_units <- temp_units
263
+ } else {
264
+ temp_table_units <- c(temp_table_units , temp_units )
265
+ }
266
+ } else {
267
+ if (icount == 1 ){
268
+ temp_table_units <- NA
269
+ } else {
270
+ temp_table_units <- c(temp_table_units , NA )
271
+ }
272
+ }
273
+ fillLoc <- which(x $ alldata [[myName ]]$ attribute_name == ' _FillValue' )
226
274
}
227
-
275
+
228
276
229
277
structure(
230
278
# read_table(resp),
231
279
temp_table ,
232
280
class = c(" tabledap" , " data.frame" ),
233
281
datasetid = attr(x , " datasetid" ),
234
282
path = loc ,
235
- url = url
283
+ url = url ,
284
+ units = temp_table_units
236
285
)
237
286
}
238
287
@@ -250,11 +299,15 @@ print.tabledap <- function(x, ...) {
250
299
print(tibble :: as_tibble(x ))
251
300
}
252
301
253
- erd_tab_GET <- function (url , dset , store , callopts ) {
302
+ erd_tab_GET <- function (url , dset , store , fmt , callopts ) {
254
303
cli <- crul :: HttpClient $ new(url = url , opts = callopts )
255
304
if (store $ store == " disk" ) {
256
305
# store on disk
257
- key <- gen_key(url , NULL , " csv" )
306
+ if (fmt == ' csv' ) {
307
+ key <- gen_key(url , NULL , " csv" )
308
+ } else {
309
+ key <- gen_key(url , NULL , " parquet" )
310
+ }
258
311
if ( file.exists(file.path(store $ path , key )) ) {
259
312
file.path(store $ path , key )
260
313
} else {
0 commit comments