diff --git a/.Rbuildignore b/.Rbuildignore index 91be94e2..a4686bfb 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -90,6 +90,7 @@ vignettes/Status.Rmd vignettes/long_to_wide.Rmd vignettes/join_by_closest.Rmd vignettes/wqx3_development_plan.Rmd +vignettes/read_waterdata_functions.Rmd vignettes/dataretrieval_discrete_changes_images/* ^ci$ ^public$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index b6aa1e55..15254f9b 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -24,6 +24,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + API_USGS_PAT: ${{ secrets.API_USGS_PAT }} R_KEEP_PKG_SOURCE: yes CUSTOM_DR_UA: 'GitHub_CI' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 7c20a4cb..ac3360ad 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -28,6 +28,7 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + API_USGS_PAT: ${{ secrets.API_USGS_PAT }} CUSTOM_DR_UA: 'GitHub_CI' steps: - uses: actions/checkout@581d62f320f2a4043a1ea6ac77290c60d27485cc diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index ce05609b..98e4b9e6 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -14,6 +14,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + API_USGS_PAT: ${{ secrets.API_USGS_PAT }} R_KEEP_PKG_SOURCE: yes CUSTOM_DR_UA: 'GitHub_CI' diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9112a7f2..fc5dec06 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -31,6 +31,7 @@ variables: NOT_CRAN: "true" PAGES_OUTDIR: "$CI_PROJECT_DIR/public" CUSTOM_DR_UA: "GitLab_CI" + API_USGS_PAT: "${API_USGS_PAT}" build-image: stage: build @@ -98,6 +99,7 @@ pages: cache: [] script: - Rscript -e 'pkgdown::build_site(override = list(destination = "public"))' + - Rscript -e 'file.copy(from = "./public/articles/logo.png", to = "./public/reference/logo.png")' artifacts: paths: - $PAGES_OUTDIR diff --git a/DESCRIPTION b/DESCRIPTION index 52258002..1bdf2b4a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data -Version: 2.7.18.9002 +Version: 2.7.19 Authors@R: c( person("Laura", "DeCicco", role = c("aut","cre"), email = "ldecicco@usgs.gov", @@ -26,12 +26,20 @@ Authors@R: c( comment=c(ORCID = "0000-0003-2521-5043")), person("Lee", "Stanish", role="ctb", email = "lstanish@usgs.gov", - comment=c(ORCID = "0000-0002-9775-6861"))) + comment=c(ORCID = "0000-0002-9775-6861")), + person("Joeseph", "Zemmels", role="ctb", + email = "jzemmels@usgs.gov", + comment=c(ORCID = "0009-0008-1463-6313")), + person("Elise", "Hinman", role="ctb", + email = "ehinman@usgs.gov", + comment=c(ORCID = "0000-0001-5396-1583")), + person("Michael", "Mahoney", role="ctb", + email = "mjmahoney@usgs.gov", + comment=c(ORCID = "0000-0003-2402-304X")) + ) Description: Collection of functions to help retrieve U.S. Geological Survey and U.S. Environmental Protection Agency water quality and - hydrology data from web services. Data are discovered from - National Water Information System and . - Water quality data are obtained from the Water Quality Portal . + hydrology data from web services. License: CC0 Copyright: This software is in the public domain because it contains materials that originally came from the United States Geological Survey, an agency of @@ -39,23 +47,25 @@ Copyright: This software is in the public domain because it contains materials Depends: R (>= 4.1.0) Imports: - curl, + curl (>= 6.0.0), lubridate (>= 1.5.0), stats, utils, xml2, readr (>= 1.4.0), jsonlite, - httr2 + httr2, + whisker, + sf Suggests: covr, dplyr, knitr, rmarkdown, - sf, testthat Encoding: UTF-8 BuildVignettes: true VignetteBuilder: knitr BugReports: https://github.com/DOI-USGS/dataRetrieval/issues RoxygenNote: 7.3.2 +Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index d3130088..c396b414 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,11 +3,13 @@ export(addWaterYear) export(calcWaterYear) export(checkWQPdates) -export(check_param) +export(check_OGC_requests) +export(check_waterdata_sample_params) export(constructNWISURL) export(constructUseURL) export(constructWQPURL) -export(construct_USGS_sample_request) +export(construct_api_requests) +export(construct_waterdata_sample_request) export(countyCd) export(countyCdLookup) export(create_NWIS_bib) @@ -43,11 +45,17 @@ export(readWQPdata) export(readWQPqw) export(readWQPsummary) export(read_USGS_samples) +export(read_waterdata) +export(read_waterdata_daily) +export(read_waterdata_monitoring_location) +export(read_waterdata_samples) +export(read_waterdata_ts_meta) export(renameNWISColumns) export(setAccess) export(stateCd) export(stateCdLookup) export(summarize_USGS_samples) +export(summarize_waterdata_samples) export(whatNWISdata) export(whatNWISsites) export(whatWQPdata) diff --git a/NEWS b/NEWS index 624a63fd..2e656a3a 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,12 @@ +dataRetrieval 2.7.19 +=================== +* Added read_waterdata_daily, read_waterdata_monitoring_location, read_waterdata_ts_meta to access +new USGS web services. +* Added whisker and sf as dependencies. +* Renamed read_USGS_samples to read_waterdata_samples. +* Renamed summarize_USGS_samples to summarize_waterdata_samples. +* Added warning to setAccess for non-public endpoints. + dataRetrieval 2.7.18 =================== * Switched from httr to httr2 diff --git a/R/AAA.R b/R/AAA.R index 762bff68..32340d30 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -4,6 +4,7 @@ pkg.env <- new.env() suppressMessages(setAccess("public")) pkg.env$nldi_base <- "https://api.water.usgs.gov/nldi/linked-data/" pkg.env$local_sf <- requireNamespace("sf", quietly = TRUE) + options("dataRetrieval" = list("api_version" = "v0")) } @@ -51,3 +52,11 @@ discrete water quality data newer than March 11, 2024. For additional details, see: https://doi-usgs.github.io/dataRetrieval/articles/Status.html") } + +new_nwis_message <- function(){ + return("ALERT: All NWIS services are slated for decommission +and new dataRetrieval functions will be added. +For up-to-date information, see: +https://doi-usgs.github.io/dataRetrieval/articles/Status.html") +} + diff --git a/R/checkWQPdates.R b/R/checkWQPdates.R index 99abf198..3033cb2e 100644 --- a/R/checkWQPdates.R +++ b/R/checkWQPdates.R @@ -1,7 +1,7 @@ #' Date Check for Water Quality Portal #' -#' Checks date format for inputs to the Water Quality Portal. Used in \code{readWQPqw} -#' and \code{readWQPdata}. +#' Checks date format for inputs to the Water Quality Portal. Used in `readWQPqw` +#' and `readWQPdata`. #' #' @param values named list with arguments to send to the Water Quality Portal #' @return values named list with corrected arguments to send to the Water Quality Portal diff --git a/R/citations.R b/R/citations.R index aedbd5ba..b6d5b21d 100644 --- a/R/citations.R +++ b/R/citations.R @@ -2,7 +2,7 @@ #' #' Uses attributes from the NWIS functions to create data citations. #' -#' See \code{?bibentry} for more information. +#' See `?bibentry` for more information. #' #' @param x Any data returned from an NWIS function, must #' include "queryTime" and "url" attributes, which should @@ -48,7 +48,7 @@ create_NWIS_bib <- function(x){ #' #' Uses attributes from the WQP functions to create data citations. #' -#' See \code{?bibentry} for more information. +#' See `?bibentry` for more information. #' #' @param x Any data returned from an NWIS function, must #' include "queryTime" and "url" attributes, which should diff --git a/R/constructNWISURL.R b/R/constructNWISURL.R index 98b5e094..fedb8edb 100644 --- a/R/constructNWISURL.R +++ b/R/constructNWISURL.R @@ -22,21 +22,21 @@ #' momentary problem with the internet connection). It is possible to safely use the "tsv" option, #' but the user must carefully check the results to see if the data returns matches #' what is expected. The default is therefore "xml". -#' @param expanded logical defaults to \code{TRUE}. If \code{TRUE}, retrieves additional +#' @param expanded logical defaults to `TRUE`. If `TRUE`, retrieves additional #' information, only applicable for qw data. #' @param ratingType can be "base", "corr", or "exsa". Only applies to rating curve data. #' @param statReportType character Only used for statistics service requests. Time #' division for statistics: daily, monthly, or annual. Default is daily. #' Note that daily provides statistics for each calendar day over the specified #' range of water years, i.e. no more than 366 data points will be returned for -#' each site/parameter. Use \code{readNWISdata} or \code{readNWISdv} for daily averages. +#' each site/parameter. Use `readNWISdata` or `readNWISdv` for daily averages. #' Also note that "annual" returns statistics for the calendar year. Use -#' \code{readNWISdata} for water years. Monthly and yearly +#' `readNWISdata` for water years. Monthly and yearly #' provide statistics for each month and year within the range individually. #' @param statType character Only used for statistics service requests. Type(s) #' of statistics to output for daily values. Default is mean, which is the only #' option for monthly and yearly report types. See the statistics service documentation -#' at \url{https://waterservices.usgs.gov/docs/statistics/} for a +#' at for a #' full list of codes. #' @keywords data import USGS web service #' @return url string @@ -304,7 +304,7 @@ constructNWISURL <- function(siteNumbers, #' Construct WQP url for data retrieval #' -#' Construct WQP url for data retrieval. This function gets the data from here: \url{https://www.waterqualitydata.us} +#' Construct WQP url for data retrieval. This function gets the data from here: #' #' @param siteNumbers string or vector of strings USGS site number. #' @param parameterCd string or vector of USGS parameter code. This is usually an 5 digit number. @@ -433,7 +433,7 @@ constructWQPURL <- function(siteNumbers, #' Construct URL for NWIS water use data service #' -#' Reconstructs URLs to retrieve data from here: \url{https://waterdata.usgs.gov/nwis/wu} +#' Reconstructs URLs to retrieve data from here: #' #' @param years integer Years for data retrieval. Must be years ending in 0 or 5, #' or "ALL", which retrieves all available years. @@ -490,4 +490,4 @@ constructUseURL <- function(years, stateCd, countyCd, categories) { .multi = "comma") return(baseURL) -} \ No newline at end of file +} diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R new file mode 100644 index 00000000..3d2279f2 --- /dev/null +++ b/R/construct_api_requests.R @@ -0,0 +1,578 @@ +#' Create API url +#' +#' Main documentation: , +#' Swagger docs: . +#' +#' @export +#' @param service Which service available on . +#' @param ... Extra parameters from the specific services. +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). +#' @param properties The properties that should be included for each feature. The +#' parameter value is a comma-separated list of property names which depend on the +#' service being called. +#' @param skipGeometry This option can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. +#' @param limit The optional limit parameter limits the number of items that are +#' presented in the response document. Only items are counted that are on the +#' first level of the collection in the response document. Nested objects +#' contained within the explicitly requested items shall not be counted. +#' @keywords internal +#' @examples +#' site <- "USGS-02238500" +#' pcode <- "00060" +#' req_dv <- construct_api_requests("daily", +#' monitoring_location_id = site, +#' parameter_code = "00060") +#' +#' req_dv <- construct_api_requests("daily", +#' monitoring_location_id = site, +#' parameter_code = c("00060", "00065")) +#' +#' sites <- c("USGS-01491000", "USGS-01645000") +#' start_date <- "2018-01-01" +#' end_date <- "2022-01-01" +#' req_dv <- construct_api_requests("daily", +#' monitoring_location_id = sites, +#' parameter_code = c("00060", "00065"), +#' datetime = c(start_date, end_date)) +#' +construct_api_requests <- function(service, + properties = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + skipGeometry = FALSE, + ...){ + + baseURL <- setup_api(service) + + POST <- FALSE + + single_params <- c("datetime", "last_modified", "begin", "end", "time") + + full_list <- list(...) + + if(all(is.na(full_list)) & all(is.na(bbox))){ + warning("No filtering arguments specified.") + } + + get_list <- full_list[names(full_list) %in% single_params] + + get_list[["skipGeometry"]] <- skipGeometry + + if(is.na(limit)){ + if(!is.na(max_results)){ + get_list[["limit"]] <- max_results + } else { + get_list[["limit"]] <- 10000 + } + } else { + if(!is.na(max_results)){ + if(limit > max_results) stop("limit cannot be greater than max_result") + } + get_list[["limit"]] <- limit + } + + post_list <- full_list[!names(full_list) %in% single_params] + + post_params <- explode_post(post_list) + + if(length(post_params) > 0){ + POST = TRUE + } + + time_periods <- c("last_modified", "datetime", "time", "begin", "end") + if(any(time_periods %in% names(get_list))){ + for(i in time_periods){ + get_list[[i]] <- format_api_dates(get_list[[i]]) + full_list[[i]] <- format_api_dates(full_list[[i]]) + } + } + + baseURL <- explode_query(baseURL, POST = FALSE, get_list) + + if(all(!is.na(bbox))){ + baseURL <- httr2::req_url_query(baseURL, + bbox = as.numeric(bbox), + .multi = "comma") + } + + if(!all(is.na(properties))){ + baseURL <- httr2::req_url_query(baseURL, + properties = properties, + .multi = "comma") + } + + if(POST){ + baseURL <- baseURL |> + httr2::req_headers(`Content-Type` = "application/query-cql-json") + + post_params <- list( + "params" = unname(post_params) + ) + + template_path_post <- system.file("templates/post.CQL2", package = "dataRetrieval") + template_post <- readChar(template_path_post, file.info(template_path_post)$size) + + x <- whisker::whisker.render(template_post, post_params) + baseURL <- httr2::req_body_raw(baseURL, x) + + } else { + baseURL <- explode_query(baseURL, POST = FALSE, full_list) + } + + return(baseURL) +} + +#' Setup the request for the OGC API requests +#' +#' @noRd +#' @return httr2 request +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' request <- dataRetrieval:::base_url() +#' request +#' } +base_url <- function(){ + + httr2::request("https://api.waterdata.usgs.gov/ogcapi/") |> + httr2::req_url_path_append(getOption("dataRetrieval")$api_version) +} + +#' Setup the request for a particular endpoint collection +#' +#' @noRd +#' @return httr2 request +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' request <- dataRetrieval:::setup_api("daily") +#' request +#' } +setup_api <- function(service){ + + baseURL <- base_url() |> + httr2::req_url_path_append("collections") |> + httr2::req_url_path_append(service, "items") |> + basic_request() + +} + +#' Switch endpoint id arg +#' +#' @noRd +#' @return list +#' @examples +#' +#' l1 <- list("id" = "1234") +#' dataRetrieval:::switch_arg_id(l1, +#' id_name = "monitoring_location_id", +#' service = "monitoring-locations") +#' +#' l2 <- list("monitoring_location_id" = "1234") +#' dataRetrieval:::switch_arg_id(l2, +#' id_name = "monitoring_location_id", +#' service = "monitoring-locations") +#' +#' l3 <- list("monitoring_locations_id" = "1234") +#' dataRetrieval:::switch_arg_id(l3, +#' id_name = "monitoring_location_id", +#' service = "monitoring-locations") +#' +switch_arg_id <- function(ls, id_name, service){ + + service_id <- paste0(gsub("-", "_", service), "_id") + if(!"id" %in% names(ls)){ + if(service_id %in% names(ls)){ + ls[["id"]] <- ls[[service_id]] + } else { + ls[["id"]] <- ls[[id_name]] + } + } + + ls[[service_id]] <- NULL + ls[[id_name]] <- NULL + return(ls) +} + +#' Switch properties id +#' +#' @noRd +#' @return list +#' @examples +#' +#' properties <- c("id", "state_name", "country_name") +#' dataRetrieval:::switch_properties_id(properties, +#' id_name = "monitoring_location_id", +#' service = "monitoring-locations") +#' +#' properties2 <- c("monitoring_location_id", "state_name", "country_name") +#' dataRetrieval:::switch_properties_id(properties2, +#' id_name = "monitoring_location_id", +#' service = "monitoring-locations") +#' +#' properties3 <- c("monitoring_locations_id", "state_name", "country_name") +#' dataRetrieval:::switch_properties_id(properties3, +#' id_name = "monitoring_location_id", +#' service = "monitoring-locations") +switch_properties_id <- function(properties, id_name, service){ + + service_id <- paste0(gsub("-", "_", service), "_id") + + last_letter <- substr(service, nchar(service), nchar(service)) + if(last_letter == "s"){ + service_singluar <- substr(service,1, nchar(service)-1) + service_id_singular <- paste0(gsub("-", "_", service_singluar), "_id") + } else { + service_id_singular <- "" + } + + if(!"id" %in% properties){ + if(service_id %in% properties){ + properties[properties == service_id] <- "id" + + } else if(service_id_singular %in% properties) { + properties[properties == service_id_singular] <- "id" + } else { + properties[properties == id_name] <- "id" + } + } + + schema <- check_OGC_requests(endpoint = service, + type = "schema") + all_properties <- names(schema$properties) + + if(all(all_properties[!all_properties %in% c("id", "geometry")] %in% properties)) { + # Cleans up URL if we're asking for everything + properties <- NA_character_ + } else { + if(all(!is.na(properties))){ + properties <- gsub("-", "_", properties) + properties <- properties[!properties %in% c("id", + "geometry", + paste0(gsub("-", "_", service), "_id"))] + } + } + + if(!all(is.na(properties))){ + match.arg(properties, choices = all_properties, + several.ok = TRUE) + } + + return(properties) +} + + +#' Format the date request +#' +#' Users will want to give either start/end dates or +#' period requests. +#' +#' +#' @noRd +#' @return character vector with a length of either 1 or 2. +#' @examples +#' +#' start_end <- c("2021-01-01", "2022-01-01") +#' dataRetrieval:::format_api_dates(start_end) +#' +#' period <- "P7D" +#' dataRetrieval:::format_api_dates(period) +#' +#' start <- c("2021-01-01", NA) +#' dataRetrieval:::format_api_dates(start) +#' +#' end <- c(NA, "2021-01-01") +#' dataRetrieval:::format_api_dates(end) +#' +#' start_end <- as.POSIXct(c("2021-01-01 12:15:00", "2022-01-01 16:45")) +#' dataRetrieval:::format_api_dates(start_end) +#' +#' start_end2 <- c("2021-01-01 12:15:00", "") +#' dataRetrieval:::format_api_dates(start_end2) +#' +format_api_dates <- function(datetime){ + + if(is.character(datetime)){ + datetime[datetime == ""] <- NA + } + + if(!any(isTRUE(is.na(datetime)) | isTRUE(is.null(datetime)))){ + if(length(datetime) == 1){ + if(grepl("P", datetime, ignore.case = TRUE) | + grepl("/", datetime)){ + return(datetime) + } else { + datetime <- format(datetime, format = "%Y-%m-%dT%H:%M:%SZ") + } + } else if (length(datetime) == 2) { + datetime <- as.POSIXct(datetime) + datetime <- paste0(vapply(datetime, FUN = function(x) { + format(x, format = "%Y-%m-%dT%H:%M:%SZ")}, + FUN.VALUE = c(NA_character_) + ), collapse = "/") + datetime <- gsub("NA", "..", datetime) + } else { + stop("datetime should only include 1-2 values") + } + } + return(datetime) +} + +#' Turn request list into POST body cql +#' +#' @noRd +#' @return character vector of CQL filters +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' +#' query_list <- list(monitoring_location_id = c("USGS-01491000", +#' "USGS-01645000"), +#' parameter_code = c("00060", "00010")) +#' +#' dataRetrieval:::explode_post(query_list) +#' +#' } +explode_post <- function(ls){ + + ls <- Filter(Negate(anyNA), ls) + params <- NULL + + if(length(ls) > 0){ + if(max(lengths(ls)) > 1) { + + for(i in seq_along(ls)){ + params[names(ls[i])] <- cql2_param(ls[i]) + } + + if(length(params) > 1){ + params[seq_along(1:(length(params)-1))] <- paste0(params[seq_along(1:(length(params)-1))], ",") + } + } + } + return(params) +} + +#' Create CQL parameters +#' +#' Helps to give more informative messages on some errors. +#' +#' @param parameter named vector +#' @noRd +#' @return list +#' @examples +#' +#' parameter <- list("monitoring_location_id" = c("USGS-02238500", +#' "USGS-01491000")) +#' dataRetrieval:::cql2_param(parameter) +#' +cql2_param <- function(parameter){ + template_path <- system.file("templates/param.CQL2", package = "dataRetrieval") + template <- readChar(template_path, file.info(template_path)$size) + + parameters <- paste0(unlist(parameter), collapse = '", "') + parameters <- paste0('"', parameters, '"') + parameter_list <- list("property" = names(parameter), + "parameter" = parameters) + return(whisker::whisker.render(template, parameter_list)) +} + +#' Check OGC requests +#' +#' @param endpoint Character, can be any existing collection +#' @param type Character, can be "queryables", "schema" +#' @export +#' @keywords internal +#' @return list +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' +#' dv_queryables <- check_OGC_requests(endpoint = "daily", +#' type = "queryables") +#' dv_schema <- check_OGC_requests(endpoint = "daily", +#' type = "schema") +#' ts_meta_queryables <- check_OGC_requests(endpoint = "time-series-metadata", +#' type = "queryables") +#' ts_meta_schema <- check_OGC_requests(endpoint = "time-series-metadata", +#' type = "schema") +#' } +check_OGC_requests <- function(endpoint = "daily", + type = "queryables"){ + + match.arg(type, c("queryables", "schema")) + + query_ret <- get_collection() + + services <- sapply(query_ret$tags, function(x) x[["name"]]) + + match.arg(endpoint, services) + + req <- base_url() |> + httr2::req_url_path_append("collections") |> + httr2::req_url_path_append(endpoint) |> + httr2::req_url_path_append(type) |> + basic_request() + + query_ret <- req |> + httr2::req_perform() |> + httr2::resp_body_json() + + return(query_ret) + +} + +#' Custom Error Messages +#' +#' Helps to give more informative messages on some errors. +#' +#' @param resp httr2 response +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' check_collections <- dataRetrieval:::base_url() |> +#' httr2::req_url_path_append("openapi?f=html#/server/getCollections") +#' +#' collect_request <- dataRetrieval:::basic_request(check_collections) +#' query_ret <- httr2::req_perform(collect_request) +#' dataRetrieval:::error_body(query_ret) +#' } +#' +error_body <- function(resp) { + status <- httr2::resp_status(resp) + if(status == 429){ + x <- httr2::resp_body_json(resp)$error + return(x[["message"]]) + } else if (status == 403){ + return("Query request denied. Possible reasons include query exceeding server limits.") + } +} + + +#' Basic request to API services +#' +#' Automatically includes json format, gzip encoding, dataRetrieval +#' user agents, and the X-Api-Key token if available. +#' +#' @param url_base httr2 request +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' check_collections <- dataRetrieval:::base_url() |> +#' httr2::req_url_path_append("openapi?f=html#/server/getCollections") +#' collect_request <- dataRetrieval:::basic_request(check_collections) +#' collect_request +#' } +#' +basic_request <- function(url_base){ + + req <- url_base |> + httr2::req_user_agent(default_ua()) |> + httr2::req_headers(`Accept-Encoding` = c("compress", "gzip")) |> + httr2::req_url_query(f = "json", + lang = "en-US") |> + httr2::req_error(body = error_body) + + token <- Sys.getenv("API_USGS_PAT") + + if(token != ""){ + req <- req |> + httr2::req_headers_redacted(`X-Api-Key` = token) + } + + return(req) + +} + +#' Create service descriptions dynamically +#' +#' This function populates the parameter descriptions. +#' +#' @param service Character, can be any of the endpoints +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' ml_desc <- dataRetrieval:::get_description("monitoring-locations") +#' ml_desc +#' } +#' +get_description <- function(service){ + + query_ret <- get_collection() + + tags <- query_ret[["tags"]] + + service_index <- which(sapply(tags, function(x){ + x$name == service + })) + + tags[[service_index]][["description"]] + +} + +#' Get collection response +#' +#' +#' @return httr2 response +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' collection <- dataRetrieval:::get_collection() +#' collection +#' } +#' +get_collection <- function(){ + + check_collections <- base_url() |> + httr2::req_url_path_append("openapi?f=html#/server/getCollections") + + check_endpoints_req <- basic_request(check_collections) + + query_ret <- httr2::req_perform(check_endpoints_req) |> + httr2::resp_body_json() + + return(query_ret) +} + +#' Create parameter descriptions dynamically +#' +#' This function populates the parameter descriptions. +#' +#' @param service Character, can be any of the endpoints +#' @return list +#' @noRd +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' ml <- dataRetrieval:::get_params("monitoring-locations") +#' ml$national_aquifer_code +#' } +#' +get_params <- function(service){ + + check_queryables_req <- base_url() |> + httr2::req_url_path_append("collections") |> + httr2::req_url_path_append(service) |> + httr2::req_url_path_append("schema") |> + basic_request() + + query_ret <- httr2::req_perform(check_queryables_req) |> + httr2::resp_body_json() + + params <- sapply(query_ret$properties, function(x) x[["description"]]) + +} + diff --git a/R/dataRetrievals-package.R b/R/dataRetrievals-package.R index 3afb244a..cd205de7 100644 --- a/R/dataRetrievals-package.R +++ b/R/dataRetrievals-package.R @@ -1,8 +1,14 @@ .onAttach <- function(libname, pkgname) { if (!interactive()) return() - dataRetrieval_version = utils::packageVersion("dataRetrieval") + dataRetrieval_version <- utils::packageVersion("dataRetrieval") + token_message <- "" + if(Sys.getenv("API_USGS_PAT") == ""){ + token_message <- "\nConsider adding an API_USGS_PAT for new USGS functions. +See: https://api.waterdata.usgs.gov/signup" + } packageStartupMessage("dataRetrieval ", dataRetrieval_version," -Extended Documentation: https://doi-usgs.github.io/dataRetrieval") +Extended Documentation: https://doi-usgs.github.io/dataRetrieval", + token_message) } #' Retrieval functions for USGS and EPA data @@ -15,13 +21,13 @@ Extended Documentation: https://doi-usgs.github.io/dataRetrieval") #' that originally came from the United States Geological Survey, an agency of #' the United States Department of Interior. For more information, see the #' official USGS copyright policy at -#' \url{https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits}\cr +#' \cr #' LazyLoad: \tab yes\cr #' } #' #' Retrieval functions for USGS and EPA hydrologic and water quality data. #' -#' Please see \url{https://doi-usgs.github.io/dataRetrieval/} for more information. +#' Please see for more information. #' #' @name dataRetrieval #' @aliases dataRetrieval-package @@ -60,7 +66,7 @@ NULL #' Data to convert USGS parameter code to characteristic names #' #' Data pulled from Water Quality Portal on December 20, 2021. The data was pulled from -#' \url{https://www.waterqualitydata.us/Codes/public_srsnames/?mimeType=csv}. +#' . #' #' @name pCodeToName #' @return pCodeToName data frame with information about USGS parameters and how they @@ -90,7 +96,7 @@ NULL #' US State Code Lookup Table #' #' Classic lookup table for states. Has been replaced in functions with -#' \code{check_param("states")}. +#' `check_waterdata_sample_params("states")`. #' #' @name stateCd #' @return stateCd data frame. @@ -112,7 +118,7 @@ NULL #' US County Code Lookup Table #' #' Classic lookup table for counties. Has been replaced in functions with -#' \code{check_param("counties")}. +#' `check_USGS_waterdata_params("counties")`. #' #' @name countyCd #' @return countyCd data frame. diff --git a/R/findNLDI.R b/R/findNLDI.R index fb109aef..c32e0d1b 100644 --- a/R/findNLDI.R +++ b/R/findNLDI.R @@ -91,15 +91,14 @@ get_nldi <- function(url, type = "", use_sf = FALSE, warn = TRUE) { # If successful ... if (res$status_code == 200) { # Interpret as text - d <- httr2::resp_body_string(res) - - if (d == "") { - + if(length(res$body) > 0){ + d <- httr2::resp_body_string(res) + } else { if(warn){ warning("No data returned for: ", url, call. = FALSE) } - return(NULL) + return(NULL) } if (use_sf) { diff --git a/R/importNGWMN_wml2.R b/R/importNGWMN_wml2.R index 2f0541f1..a9782fa9 100644 --- a/R/importNGWMN_wml2.R +++ b/R/importNGWMN_wml2.R @@ -4,12 +4,12 @@ #' but the general functionality is correct. #' #' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML. -#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character +#' @param asDateTime logical, if `TRUE` returns date and time as POSIXct, if `FALSE`, character #' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the #' date times to UTC, properly accounting for daylight savings times based on the data's provided time zone offset. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. #' @return mergedDF a data frame source, time, value, uom, uomTitle, comment, gmlID #' @export @@ -138,7 +138,7 @@ importNGWMN <- function(input, asDateTime = FALSE, tz = "UTC") { #' Anything defined as a default, is returned as an attribute of that data frame. #' #' @param input XML with only the wml2:MeasurementTimeseries node and children -#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character +#' @param asDateTime logical, if `TRUE` returns date and time as POSIXct, if `FALSE`, character #' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the #' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset). #' Possible values are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", diff --git a/R/importRDB1.R b/R/importRDB1.R index 36419be4..0fcdfe4f 100644 --- a/R/importRDB1.R +++ b/R/importRDB1.R @@ -6,14 +6,14 @@ #' recommended to use the RDB format for importing multi-site data. #' #' @param obs_url character containing the url for the retrieval or a file path to the data file. -#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date +#' @param asDateTime logical, if `TRUE` returns date and time as POSIXct, if `FALSE`, Date #' @param tz character to set timezone attribute of datetime. Default converts the datetimes to UTC #' (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Recommended US values include "UTC", "America/New_York", "America/Chicago", "America/Denver", #' "America/Los_Angeles", "America/Anchorage", "America/Honolulu", "America/Jamaica", "America/Managua", #' "America/Phoenix", and "America/Metlakatla". -#' For a complete list, see \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones} -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the +#' For a complete list, see +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the #' function will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @return A data frame with the following columns: @@ -22,7 +22,7 @@ #' agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr #' site_no \tab character \tab The USGS site number \cr #' datetime \tab POSIXct \tab The date and time of the value converted to -#' UTC (if asDateTime = \code{TRUE}), \cr +#' UTC (if asDateTime = `TRUE`), \cr #' \tab character \tab or raw character string (if asDateTime = FALSE) \cr #' tz_cd \tab character \tab The time zone code for datetime \cr #' code \tab character \tab Any codes that qualify the corresponding value\cr @@ -99,6 +99,11 @@ importRDB1 <- function(obs_url, tz <- match.arg(tz, OlsonNames()) + if (is.character(obs_url) && + grepl("(https)://[^ /$.?#].[^\\s]*", obs_url)){ + obs_url <- httr2::request(obs_url) + } + if(inherits(obs_url, "httr2_request")){ doc <- getWebServiceData(obs_url) diff --git a/R/importWQP.R b/R/importWQP.R index c245afb0..570f5131 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -8,17 +8,17 @@ #' Possible values include "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", #' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua", #' "America/Phoenix", and "America/Metlakatla" -#' @param csv logical. Is the data coming back with a csv or tsv format. Default is \code{FALSE}. +#' @param csv logical. Is the data coming back with a csv or tsv format. Default is `FALSE`. #' Currently, the summary service does not support tsv, for other services tsv is the safer choice. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character. #' @return retval dataframe raw data returned from the Water Quality Portal. Additionally, #' a POSIXct dateTime column is supplied for #' start and end times, and converted to UTC. See -#' \url{https://www.waterqualitydata.us/portal_userguide/} for more information. +#' for more information. #' @export -#' @seealso \code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}} +#' @seealso [readWQPdata()], [readWQPqw()], [whatWQPsites()] #' @examplesIf is_dataRetrieval_user() #' # These examples require an internet connection to run #' @@ -45,18 +45,24 @@ importWQP <- function(obs_url, tz = "UTC", tz <- "UTC" } + if (is.character(obs_url) && + grepl("(https)://[^ /$.?#].[^\\s]*", obs_url)){ + obs_url <- httr2::request(obs_url) + } + if (inherits(obs_url, "httr2_request")) { doc <- getWebServiceData(obs_url) if (is.null(doc)) { return(invisible(NULL)) } headerInfo <- attr(doc, "headerInfo") - + } else { doc <- obs_url } last_chars <- as.character(substr(doc, nchar(doc)-1, nchar(doc))) + if(last_chars != c("\n")){ doc <- paste0(doc, "\n") } diff --git a/R/importWaterML1.R b/R/importWaterML1.R index da9ba20b..2ad98d46 100644 --- a/R/importWaterML1.R +++ b/R/importWaterML1.R @@ -5,13 +5,13 @@ #' #' @param obs_url character or raw, containing the url for the retrieval or a #' file path to the data file, or raw XML. -#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date +#' @param asDateTime logical, if `TRUE` returns date and time as POSIXct, if `FALSE`, Date #' @param tz character to set timezone attribute of datetime. Default converts the datetimes to UTC #' (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Recommended US values include "UTC", "America/New_York", "America/Chicago", "America/Denver", #' "America/Los_Angeles", "America/Anchorage", "America/Honolulu", "America/Jamaica", "America/Managua", #' "America/Phoenix", and "America/Metlakatla". -#' For a complete list, see \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones} +#' For a complete list, see #' @return A data frame with the following columns: #' \tabular{lll}{ #' Name \tab Type \tab Description \cr @@ -39,7 +39,7 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' -#' @seealso \code{\link{renameNWISColumns}} +#' @seealso [renameNWISColumns()] #' @export #' @examplesIf is_dataRetrieval_user() #' site_id <- "02177000" diff --git a/R/readNGWMNdata.R b/R/readNGWMNdata.R index 5c283e81..bc8704bf 100644 --- a/R/readNGWMNdata.R +++ b/R/readNGWMNdata.R @@ -2,14 +2,14 @@ #' #' Only water level data and site locations and names are currently available through the web service. #' @param service char Service for the request - "observation" and "featureOfInterest" are implemented. -#' @param \dots Other parameters to supply, namely \code{siteNumbers} or \code{bbox} -#' @param asDateTime logical if \code{TRUE}, will convert times to POSIXct format. Currently defaults to -#' \code{FALSE} since time zone information is not included. +#' @param \dots Other parameters to supply, namely `siteNumbers` or `bbox` +#' @param asDateTime logical if `TRUE`, will convert times to POSIXct format. Currently defaults to +#' `FALSE` since time zone information is not included. #' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the #' date times to UTC, properly accounting for daylight savings times based on the data's provided time zone offset. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. #' @export #' @examplesIf is_dataRetrieval_user() @@ -98,15 +98,15 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = "UTC") { #' Retrieve groundwater levels from the National Ground Water Monitoring Network. #' #' @param siteNumbers character Vector of feature IDs formatted with agency code and site number -#' separated by a period or semicolon, e.g. \code{USGS.404159100494601}. +#' separated by a period or semicolon, e.g. `USGS.404159100494601`. #' @param asDateTime logical Should dates and times be converted to date/time objects, -#' or returned as character? Defaults to \code{TRUE}. Must be set to \code{FALSE} if a site +#' or returned as character? Defaults to `TRUE`. Must be set to `FALSE` if a site #' contains non-standard dates. #' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the #' date times to UTC, properly accounting for daylight savings times based on the data's provided time zone offset. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. #' @export #' @@ -139,7 +139,7 @@ readNGWMNlevels <- function(siteNumbers, asDateTime = TRUE, tz = "UTC") { #' Retrieve site data from the National Ground Water Monitoring Network. #' #' @param siteNumbers character Vector of feature IDs formatted with agency code and site number -#' separated by a period or semicolon, e.g. \code{USGS.404159100494601}. +#' separated by a period or semicolon, e.g. `USGS.404159100494601`. #' #' @export #' @return A data frame the following columns: diff --git a/R/readNWISdata.R b/R/readNWISdata.R index 182ae674..d0c47fc4 100644 --- a/R/readNWISdata.R +++ b/R/readNWISdata.R @@ -1,20 +1,20 @@ #' General Data Import from NWIS #' #' Returns data from the NWIS web service. -#' Arguments to the function should be based on \url{https://waterservices.usgs.gov} service calls. +#' Arguments to the function should be based on service calls. #' See examples below for ideas of constructing queries. #' -#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the +#' @param asDateTime logical, if `TRUE` returns date and time as POSIXct, if `FALSE`, Date +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the #' function will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the #' date times to UTC, properly accounting for daylight savings times based on the data's provided tz_cd column. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. -#' @param \dots see \url{https://waterservices.usgs.gov/docs/site-service/} for +#' @param \dots see for #' a complete list of options. A #' list of arguments can also be supplied. One important argument to include is #' "service". Possible values are "iv" @@ -22,14 +22,14 @@ #' "dv" (for daily values), "gwlevels" (for groundwater levels), #' "site" (for site service), "measurement", and "stat" (for #' statistics service). Note: "measurement" calls go to: -#' \url{https://nwis.waterdata.usgs.gov/usa/nwis} for data requests, and use different call requests schemes. +#' for data requests, and use different call requests schemes. #' The statistics service has a limited selection of arguments -#' (see \url{https://waterservices.usgs.gov/docs/site-service/}). +#' (see ). #' #' @details This function requires users to create their own arguments #' based on the NWIS web services. It is a more complicated function to use -#' compared to other NWIS functions such as \code{\link{readNWISdv}}, \code{\link{readNWISuv}}, -#' \code{\link{readNWISgwl}}, etc. However, this function adds a lot of +#' compared to other NWIS functions such as [readNWISdv()], [readNWISuv()], +#' [readNWISgwl()], etc. However, this function adds a lot of #' flexibility to the possible queries. This function will also behave exactly #' as NWIS when it comes to date queries. NWIS by default will only return the latest #' value for the daily and instantaneous services. So if you do not provide @@ -71,13 +71,12 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' -#' @seealso \code{\link{renameNWISColumns}}, \code{\link{importWaterML1}}, \code{\link{importRDB1}} +#' @seealso [read_waterdata()] #' @export #' @examplesIf is_dataRetrieval_user() #' \donttest{ #' # Examples not run for time considerations #' -#' dataTemp <- readNWISdata(stateCd = "OH", parameterCd = "00010", service = "dv") #' instFlow <- readNWISdata( #' sites = "05114000", service = "iv", #' parameterCd = "00060", @@ -96,26 +95,7 @@ #' service = "iv", parameterCd = "00060" #' ) #' -#' bBoxEx <- readNWISdata(bBox = c(-83, 36.5, -81, 38.5), parameterCd = "00010") #' -#' startDate <- as.Date("2013-10-01") -#' endDate <- as.Date("2014-09-30") -#' waterYear <- readNWISdata( -#' bBox = c(-83, 36.5, -82.5, 36.75), -#' parameterCd = "00010", -#' service = "dv", -#' startDate = startDate, -#' endDate = endDate -#' ) -#' -#' siteInfo <- readNWISdata( -#' stateCd = "WI", parameterCd = "00010", -#' hasDataTypeCd = "iv", service = "site" -#' ) -#' temp <- readNWISdata( -#' bBox = c(-83, 36.5, -82.5, 36.75), parameterCd = "00010", service = "site", -#' seriesCatalogOutput = TRUE -#' ) #' GWL <- readNWISdata(site_no = c("392725077582401", #' "375907091432201"), #' parameterCd = "62610", @@ -160,19 +140,6 @@ #' ) #' allDailyStats_2 <- readNWISdata(arg.list, service = "stat") #' -#' # use county names to get data -#' dailyStaffordVA <- readNWISdata( -#' stateCd = "Virginia", -#' countyCd = "Stafford", -#' parameterCd = "00060", -#' startDate = "2015-01-01", -#' endDate = "2015-01-30" -#' ) -#' va_counties <- c("51001", "51003", "51005", "51007", "51009", "51011", "51013", "51015") -#' va_counties_data <- readNWISdata( -#' startDate = "2015-01-01", endDate = "2015-12-31", -#' parameterCd = "00060", countycode = va_counties -#' ) #' #' site_id <- "01594440" #' rating_curve <- readNWISdata(service = "rating", site_no = site_id, file_type = "base") @@ -221,6 +188,17 @@ Please see vignette('qwdata_changes', package = 'dataRetrieval') for more information. https://cran.r-project.org/web/packages/dataRetrieval/vignettes/qwdata_changes.html" ) + } else if (service == "dv"){ + .Deprecated(new = "read_waterdata_daily", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_daily.") + + } else if (service == "site"){ + .Deprecated(new = "read_waterdata_monitoring_location", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_monitoring_location") + } else { + message(new_nwis_message()) } baseURL <- httr2::request(pkg.env[[service]]) @@ -284,8 +262,8 @@ https://cran.r-project.org/web/packages/dataRetrieval/vignettes/qwdata_changes.h #' State code look up #' -#' Function to simplify finding state and state code definitions. Used in \code{readNWISdata} -#' and \code{readWQPdata}. +#' Function to simplify finding state and state code definitions. Used in `readNWISdata` +#' and `readWQPdata`. #' #' @param input could be character (full name, abbreviation, id), or numeric (id) #' @param country description @@ -310,7 +288,7 @@ stateCdLookup <- function(input, outputType <- match.arg(outputType, c("postal", "fullName", "id", "fips")) - states <- check_param("states") + states <- check_waterdata_sample_params("states") country <- match.arg(country, choices = unique(states$countryCode), several.ok = FALSE) states <- states[states$countryCode == country,] @@ -348,8 +326,8 @@ stateCdLookup <- function(input, #' US county code look up #' -#' Function to simplify finding county and county code definitions. Used in \code{readNWISdata} -#' and \code{readNWISuse}. Currently only has US counties. +#' Function to simplify finding county and county code definitions. Used in `readNWISdata` +#' and `readNWISuse`. Currently only has US counties. #' #' @param state could be character (full name, abbreviation, id), or numeric (id) #' @param county could be character (name, with or without "County") or numeric (id) @@ -382,7 +360,7 @@ countyCdLookup <- function(state, county, outputType = "fips") { stop("Only one state allowed in countyCdLookup.") } - counties <- check_param("counties") + counties <- check_waterdata_sample_params("counties") # first turn state into stateCd postal name state_postal <- stateCdLookup(state, @@ -428,7 +406,7 @@ countyCdLookup <- function(state, county, outputType = "fips") { } #' -#' Format and organize NWIS arguments that are passed in as \code{...}. +#' Format and organize NWIS arguments that are passed in as `...`. #' #' @keywords internal readNWISdots <- function(...) { @@ -587,4 +565,4 @@ convertLists <- function(...) { list(...)[sapply(list(...), class) != "list"] ) # get the non-list parts return(matchReturn) -} \ No newline at end of file +} diff --git a/R/readNWISdv.R b/R/readNWISdv.R index 95fb5e76..c67a773c 100644 --- a/R/readNWISdv.R +++ b/R/readNWISdv.R @@ -1,18 +1,18 @@ #' Daily Value USGS NWIS Data Retrieval #' #' Imports data from NWIS daily web service. This function gets the data from here: -#' \url{https://waterservices.usgs.gov/docs/dv-service/daily-values-service-details/} +#' #' Inputs to this function are just USGS site ids, USGS parameter codes, -#' USGS statistic codes, and start and end date. For a more complex query, use \code{\link{readNWISdata}}, +#' USGS statistic codes, and start and end date. For a more complex query, use [readNWISdata()], #' with an argument service = "dv". #' Data coming the daily web services are aggregates of the instantaneous #' (sensor) web services. Not all statistical codes are available for all data. -#' Use the function \code{\link{whatNWISdata}} to discover what data +#' Use the function [whatNWISdata()] to discover what data #' is available for a USGS site. The column data_type_cd with the values "dv" -#' returned from \code{\link{whatNWISdata}}) are available from this service. +#' returned from [whatNWISdata()]) are available from this service. #' #' More information on the web service can be found here: -#' \url{https://waterservices.usgs.gov/test-tools}, choosing the +#' , choosing the #' "Daily Value Service". #' #' @param siteNumbers character USGS site number. This is usually an 8 digit number. @@ -49,40 +49,30 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' -#' @seealso \code{\link{renameNWISColumns}}, \code{\link{importWaterML1}} +#' @seealso [read_waterdata_daily()] #' @export #' @keywords data import USGS web service -#' @examplesIf is_dataRetrieval_user() -#' site_id <- "04085427" -#' startDate <- "2012-01-01" -#' endDate <- "2012-06-30" -#' pCode <- "00060" -#' \donttest{ -#' rawDailyQ <- readNWISdv(site_id, pCode, startDate, endDate) -#' rawDailyQAndTempMeanMax <- readNWISdv(site_id, c("00010", "00060"), -#' startDate, endDate, -#' statCd = c("00001", "00003") -#' ) -#' rawDailyQAndTempMeanMax <- renameNWISColumns(rawDailyQAndTempMeanMax) -#' rawDailyMultiSites <- readNWISdv(c("01491000", "01645000"), c("00010", "00060"), -#' startDate, endDate, -#' statCd = c("00001", "00003") -#' ) -#' # Site with no data: -#' x <- readNWISdv("10258500", "00060", "2014-09-08", "2014-09-14") -#' names(attributes(x)) -#' attr(x, "siteInfo") -#' attr(x, "variableInfo") -#' -#' site <- "05212700" -#' notActive <- readNWISdv(site, "00060", "2014-01-01", "2014-01-07") -#' } +#' @examples +#' +#' # see ?read_waterdata_daily +#' +#' #site_id <- "04085427" +#' #startDate <- "2012-01-01" +#' #endDate <- "2012-06-30" +#' #pCode <- "00060" +#' # +#' #rawDailyQ <- readNWISdv(site_id, pCode, startDate, endDate) +#' readNWISdv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", statCd = "00003") { + .Deprecated(new = "read_waterdata_daily", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_daily.") + url <- constructNWISURL( siteNumbers = siteNumbers, parameterCd = parameterCd, diff --git a/R/readNWISpCode.R b/R/readNWISpCode.R index 1bb77bd7..6eb2f80c 100644 --- a/R/readNWISpCode.R +++ b/R/readNWISpCode.R @@ -1,11 +1,11 @@ #' USGS Parameter Data Retrieval #' #' Imports data from NWIS about measured parameter based on user-supplied parameter code or codes. -#' This function gets the data from here: \url{https://nwis.waterdata.usgs.gov/nwis/pmcodes} +#' This function gets the data from here: #' -#' @param parameterCd character of USGS parameter codes (or multiple parameter codes). These are 5 digit number codes, -#' more information can be found here: \url{https://help.waterdata.usgs.gov/}. To get a -#' complete list of all current parameter codes in the USGS, use "all" as the input. +#' @param parameterCd character of USGS parameter codes (or multiple parameter codes). +#' These are 5 digit number codes. To get a complete list of all current parameter +#' codes in the USGS, use "all" as the input. #' @keywords data import USGS web service #' @return parameterData data frame with the following information: #' \tabular{lll}{ @@ -19,7 +19,7 @@ #' } #' #' @export -#' @seealso \code{\link{importRDB1}} +#' @seealso [importRDB1()] #' @examples #' #' paramINFO <- readNWISpCode(c("01075", "00060", "00931")) @@ -31,6 +31,9 @@ #' #' } readNWISpCode <- function(parameterCd) { + + message(new_nwis_message()) + parameterCd.orig <- parameterCd parameterCd <- parameterCd[!is.na(parameterCd)] baseURL <- httr2::request(pkg.env[["pCode"]]) diff --git a/R/readNWISsite.R b/R/readNWISsite.R index 5898ff32..4ab14e63 100644 --- a/R/readNWISsite.R +++ b/R/readNWISsite.R @@ -1,6 +1,6 @@ #' USGS Site File Data Retrieval #' -#' Imports data from USGS site file site. This function gets data from here: \url{https://waterservices.usgs.gov/} +#' Imports data from USGS site file site. This function gets data from here: #' #' @param siteNumbers character USGS site number (or multiple sites). This is usually an 8 digit number #' @keywords data import USGS web service @@ -59,14 +59,19 @@ #' comment \tab character \tab Header comments from the RDB file \cr #' } #' @export -#' @examplesIf is_dataRetrieval_user() -#' \donttest{ -#' -#' siteINFO <- readNWISsite("05114000") -#' siteINFOMulti <- readNWISsite(c("05114000", "09423350")) -#' } +#' @seealso [read_waterdata_monitoring_location()] +#' @examples +#' +#' # see ?read_waterdata_monitoring_location +#' # siteINFOMulti <- readNWISsite(c("05114000", "09423350")) +#' readNWISsite <- function(siteNumbers) { + .Deprecated(new = "read_waterdata_monitoring_location", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_monitoring_location") + + baseURL <- httr2::request(pkg.env[["site"]]) urlSitefile <- httr2::req_url_query(baseURL, siteOutput = "Expanded", diff --git a/R/readNWISunit.R b/R/readNWISunit.R index b3fea426..13728aa3 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -1,32 +1,32 @@ #' Instantaneous value data retrieval from USGS (NWIS) #' #' Imports data from NWIS web service. This function gets the data from here: -#' \url{https://waterservices.usgs.gov/docs/instantaneous-values/instantaneous-values-details/} +#' #' Inputs to this function are just USGS site ids, USGS parameter codes, -#' and start and end date. For a more complex query, use \code{\link{readNWISdata}}, +#' and start and end date. For a more complex query, use [readNWISdata()], #' including an arguement service="uv". #' Not all parameter codes are available for all data. -#' Use the function \code{\link{whatNWISdata}} to discover what data +#' Use the function [whatNWISdata()] to discover what data #' is available for a USGS site. The column data_type_cd with the values "uv" -#' returned from \code{\link{whatNWISdata}}) are available from this service. +#' returned from [whatNWISdata()]) are available from this service. #' #' More information on the web service can be found here: -#' \url{https://waterservices.usgs.gov/test-tools}, choosing the +#' , choosing the #' "Instantaneous Value Service". #' #' @param siteNumbers character USGS site number (or multiple sites). This is usually an 8 digit number #' @param parameterCd character USGS parameter code. This is usually an 5 digit number. #' @param startDate character starting date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates #' retrieval for the earliest possible record. Simple date arguments are specified in local time. -#' See more information here: \url{https://waterservices.usgs.gov/docs/instantaneous-values/}. +#' See more information here: . #' @param endDate character ending date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates #' retrieval for the latest possible record. Simple date arguments are specified in local time. -#' See more information here: \url{https://waterservices.usgs.gov/docs/instantaneous-values/}. +#' See more information here: . #' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the #' date times to UTC, properly accounting for daylight savings times based on the data's provided tz_cd column. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. #' @keywords data import USGS web service #' @return A data frame with the following columns: @@ -55,7 +55,7 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' -#' @seealso \code{\link{renameNWISColumns}}, \code{\link{importWaterML1}} +#' @seealso [renameNWISColumns()], [importWaterML1()] #' @export #' @examplesIf is_dataRetrieval_user() #' site_id <- "05114000" @@ -107,7 +107,7 @@ readNWISuv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", t #' Peak flow data from USGS (NWIS) #' #' Reads peak flow from NWISweb. Data is retrieved from -#' \url{https://waterdata.usgs.gov/nwis}. +#' . #' In some cases, the specific date of the peak data is not know. This function #' will default to #' converting complete dates to a "Date" object, and converting incomplete dates to @@ -123,10 +123,10 @@ readNWISuv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", t #' @param endDate character ending date for data retrieval in the form YYYY-MM-DD. #' Default is "" which indicates #' retrieval for the latest possible record. -#' @param asDateTime logical default to \code{TRUE}. When \code{TRUE}, the peak_dt column is converted -#' to a Date object, and incomplete dates are removed. When \code{FALSE}, no +#' @param asDateTime logical default to `TRUE`. When `TRUE`, the peak_dt column is converted +#' to a Date object, and incomplete dates are removed. When `FALSE`, no #' columns are removed, but no dates are converted. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @return A data frame with the following columns: @@ -137,7 +137,7 @@ readNWISuv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", t #' peak_dt \tab Date \tab Date of peak streamflow \cr #' peak_tm \tab character \tab Time of peak streamflow as character \cr #' peak_va \tab numeric \tab Annual peak streamflow value in cfs \cr -#' peak_cd \tab character \tab Peak Discharge-Qualification codes (see \code{comment} +#' peak_cd \tab character \tab Peak Discharge-Qualification codes (see `comment` #' for more information) \cr #' gage_ht \tab numeric \tab Gage height for the associated peak streamflow in feet \cr #' gage_ht_cd \tab character \tab Gage height qualification codes \cr @@ -159,7 +159,7 @@ readNWISuv <- function(siteNumbers, parameterCd, startDate = "", endDate = "", t #' comment \tab character \tab Header comments from the RDB file \cr #' siteInfo \tab data.frame \tab A data frame containing information on the requested sites \cr #' } -#' @seealso \code{\link{constructNWISURL}}, \code{\link{importRDB1}} +#' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() #' site_ids <- c("01594440", "040851325") @@ -175,6 +175,8 @@ readNWISpeak <- function(siteNumbers, asDateTime = TRUE, convertType = TRUE) { + message(new_nwis_message()) + # Doesn't seem to be a peak xml service url <- constructNWISURL( siteNumbers = siteNumbers, @@ -207,7 +209,7 @@ readNWISpeak <- function(siteNumbers, } - siteInfo <- suppressMessages(readNWISsite(siteNumbers)) + siteInfo <- suppressWarnings(readNWISsite(siteNumbers)) siteInfo <- merge( x = unique(data[, c("agency_cd", "site_no")]), y = siteInfo, @@ -226,22 +228,22 @@ readNWISpeak <- function(siteNumbers, #' Rating table for an active USGS streamgage retrieval #' #' Reads current rating table for an active USGS streamgage from NWISweb. -#' Data is retrieved from \url{https://waterdata.usgs.gov/nwis}. +#' Data is retrieved from . #' #' @param siteNumber character USGS site number. This is usually an 8 digit number #' @param type character can be "base", "corr", or "exsa" -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character -#' @return A data frame. If \code{type} is "base, " then the columns are +#' @return A data frame. If `type` is "base, " then the columns are #' INDEP, typically the gage height, in feet; DEP, typically the streamflow, #' in cubic feet per second; and STOR, where "*" indicates that the pair are -#' a fixed point of the rating curve. If \code{type} is "exsa, " then an +#' a fixed point of the rating curve. If `type` is "exsa, " then an #' additional column, SHIFT, is included that indicates the current shift in -#' the rating for that value of INDEP. If \code{type} is "corr, " then the +#' the rating for that value of INDEP. If `type` is "corr, " then the #' columns are INDEP, typically the gage height, in feet; CORR, the correction #' for that value; and CORRINDEP, the corrected value for CORR.\cr -#' If \code{type} is "base, " then the data frame has an attribute called "RATING" +#' If `type` is "base, " then the data frame has an attribute called "RATING" #' that describes the rating curve is included. #' #' There are also several useful attributes attached to the data frame: @@ -256,7 +258,7 @@ readNWISpeak <- function(siteNumbers, #' #' @note Not all active USGS streamgages have traditional rating curves that #' relate flow to stage. -#' @seealso \code{\link{constructNWISURL}}, \code{\link{importRDB1}} +#' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() #' site_id <- "01594440" @@ -266,6 +268,7 @@ readNWISpeak <- function(siteNumbers, #' } readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { + message(new_nwis_message()) # No rating xml service url <- constructNWISURL(siteNumber, service = "rating", ratingType = type) @@ -284,7 +287,7 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { attr(data, "RATING") <- Rat } - siteInfo <- suppressMessages(readNWISsite(siteNumbers = siteNumber)) + siteInfo <- suppressWarnings(readNWISsite(siteNumbers = siteNumber)) attr(data, "siteInfo") <- siteInfo attr(data, "variableInfo") <- NULL @@ -296,8 +299,8 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { #' Surface-water measurement data retrieval from USGS (NWIS) #' -#' Reads surface-water measurement data from NWISweb. Data is retrieved from \url{https://waterdata.usgs.gov/nwis}. -#' See \url{https://waterdata.usgs.gov/usa/nwis/sw} for details about surface water. +#' Reads surface-water measurement data from NWISweb. Data is retrieved from . +#' See for details about surface water. #' #' @param siteNumbers character USGS site number (or multiple sites). This is usually an 8 digit number #' @param startDate character starting date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates @@ -308,10 +311,10 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { #' date times to UTC, properly accounting for daylight savings times based on the data's provided tz_cd column. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. #' @param expanded logical. Whether or not (TRUE or FALSE) to call the expanded data. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function will #' convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @return A data frame with at least the following columns: @@ -326,8 +329,8 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { #' tz_cd \tab character \tab The time zone code for the measurement_dt column \cr #' } #' -#' See \url{https://waterdata.usgs.gov/usa/nwis/sw} for details about surface water, and -#' \url{https://waterdata.usgs.gov/nwis/help?output_formats_help} +#' See for details about surface water, and +#' #' for help on the columns and codes. #' #' There are also several useful attributes attached to the data frame: @@ -339,7 +342,7 @@ readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { #' siteInfo \tab data.frame \tab A data frame containing information on the requested sites \cr #' tz_cd_reported \tab The originally reported time zone \cr #' } -#' @seealso \code{\link{constructNWISURL}}, \code{\link{importRDB1}} +#' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() #' site_ids <- c("01594440", "040851325") @@ -357,6 +360,7 @@ readNWISmeas <- function(siteNumbers, expanded = FALSE, convertType = TRUE) { + message(new_nwis_message()) # Doesn't seem to be a WaterML1 format option url <- constructNWISURL( siteNumbers = siteNumbers, @@ -404,7 +408,7 @@ readNWISmeas <- function(siteNumbers, } - siteInfo <- suppressMessages(readNWISsite(siteNumbers)) + siteInfo <- suppressWarnings(readNWISsite(siteNumbers)) siteInfo <- merge( x = unique(data[, c("agency_cd", "site_no")]), y = siteInfo, @@ -427,25 +431,25 @@ readNWISmeas <- function(siteNumbers, #' Groundwater level measurements retrieval from USGS (NWIS) #' #' Imports groundwater level data from NWIS web service. This function gets the data from here: -#' \url{https://waterservices.usgs.gov/docs/groundwater-levels/groundwater-levels-details/} +#' #' Inputs to this function are just USGS site ids, USGS parameter codes, -#' and start and end date. For a more complex query, use \code{\link{readNWISdata}}, +#' and start and end date. For a more complex query, use [readNWISdata()], #' including an argument service="gwlevels". #' Not all parameter codes are available for all data. -#' Use the function \code{\link{whatNWISdata}} to discover what data +#' Use the function [whatNWISdata()] to discover what data #' is available for a USGS site. The column data_type_cd with the values "gw" -#' returned from \code{\link{whatNWISdata}}) are available from this service. +#' returned from [whatNWISdata()]) are available from this service. #' #' More information on the web service can be found here: -#' \url{https://waterservices.usgs.gov/test-tools}, choosing the +#' , choosing the #' "Groundwater Levels Value Service". #' #' #' Mixed date/times come back from the service -#' depending on the year that the data was collected. See \url{https://waterdata.usgs.gov/usa/nwis/gw} +#' depending on the year that the data was collected. See #' for details about groundwater. By default the returned dates are converted to date objects, unless convertType #' is specified as FALSE. Sites with non-standard date formats (i.e. lacking a day) can be affected (see examples). -#' See \url{https://waterservices.usgs.gov/docs/groundwater-levels/} for more information. +#' See for more information. #' #' @param siteNumbers character USGS site number (or multiple sites). This is usually an 8 digit number #' @param startDate character starting date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates @@ -453,14 +457,14 @@ readNWISmeas <- function(siteNumbers, #' @param endDate character ending date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates #' retrieval for the latest possible record. #' @param parameterCd character USGS parameter code. This is usually an 5 digit number. Default is "". -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the #' function will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the #' date times to UTC, properly accounting for daylight savings times based on the data's provided tz_cd column. #' Possible values to provide are "America/New_York", "America/Chicago", "America/Denver", "America/Los_Angeles", #' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu", -#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also \code{OlsonNames()} +#' "America/Jamaica", "America/Managua", "America/Phoenix", and "America/Metlakatla". See also `OlsonNames()` #' for more information on time zones. #' @return A data frame with the following columns: #' \tabular{lll}{ @@ -486,7 +490,7 @@ readNWISmeas <- function(siteNumbers, #' siteInfo \tab data.frame \tab A data frame containing information on the requested sites \cr #' } #' -#' @seealso \code{\link{constructNWISURL}}, \code{\link{importRDB1}} +#' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() #' site_id <- "434400121275801" @@ -505,6 +509,9 @@ readNWISgwl <- function(siteNumbers, endDate = "", parameterCd = NA, convertType = TRUE, tz = "UTC") { + + message(new_nwis_message()) + url <- constructNWISURL( siteNumbers = siteNumbers, parameterCd = parameterCd, @@ -534,7 +541,7 @@ readNWISgwl <- function(siteNumbers, data$lev_dt <- as.Date(data$lev_dt) } } - siteInfo <- suppressMessages(readNWISsite(siteNumbers)) + siteInfo <- suppressWarnings(readNWISsite(siteNumbers)) siteInfo <- merge( x = unique(data[, c("agency_cd", "site_no")]), y = siteInfo, @@ -550,7 +557,7 @@ readNWISgwl <- function(siteNumbers, #' Site statistics retrieval from USGS (NWIS) #' #' Retrieves site statistics from the USGS Statistics Web Service beta. -#' See \url{https://waterservices.usgs.gov/docs/statistics/} for more information. +#' See for more information. #' #' @param siteNumbers character USGS site number (or multiple sites). This is usually an 8 digit number. #' @param parameterCd character USGS parameter code. This is usually a 5 digit number. @@ -563,7 +570,7 @@ readNWISgwl <- function(siteNumbers, #' which indicates retrieval for the latest possible record. For daily data, this #' indicates the end of the period #' the statistics will be computed over. The same restrictions as startDate apply. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function will convert the data to #' numerics based on a standard algorithm. Years, months, and days (if appliccable) are also returned as numerics #' in separate columns. If convertType is false, everything is returned as a character. #' @param statReportType character time division for statistics: daily, monthly, or annual. Default is daily. @@ -576,7 +583,7 @@ readNWISgwl <- function(siteNumbers, #' @param statType character type(s) of statistics to output for daily values. #' Default is mean, which is the only #' option for monthly and yearly report types. See the statistics service documentation -#' at \url{https://waterservices.usgs.gov/docs/statistics/} for a full list of codes. +#' at for a full list of codes. #' @return A data frame with the following columns: #' \tabular{lll}{ #' Name \tab Type \tab Description \cr @@ -586,7 +593,7 @@ readNWISgwl <- function(siteNumbers, #' #' Other columns will be present depending on statReportType and statType #' } -#' @seealso \code{\link{constructNWISURL}}, \code{\link{importRDB1}} +#' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() #' \donttest{ @@ -616,6 +623,7 @@ readNWISgwl <- function(siteNumbers, readNWISstat <- function(siteNumbers, parameterCd, startDate = "", endDate = "", convertType = TRUE, statReportType = "daily", statType = "mean") { + message(new_nwis_message()) # check for NAs in site numbers if (any(is.na(siteNumbers))) { siteNumbers <- siteNumbers[!is.na(siteNumbers)] @@ -641,7 +649,7 @@ readNWISstat <- function(siteNumbers, parameterCd, startDate = "", endDate = "", convertType = convertType ) - siteInfo <- suppressMessages(readNWISsite(siteNumbers)) + siteInfo <- suppressWarnings(readNWISsite(siteNumbers)) if (nrow(data) > 0) { siteInfo <- merge( @@ -660,13 +668,13 @@ readNWISstat <- function(siteNumbers, parameterCd, startDate = "", endDate = "", #' Water use data retrieval from USGS (NWIS) #' #' Retrieves water use data from USGS Water Use Data for the Nation. See -#' \url{https://waterdata.usgs.gov/nwis/wu} for +#' for #' more information. All available use categories for the supplied arguments are retrieved. #' #' @param stateCd could be character (full name, abbreviation, id), or numeric (id). #' Only one is accepted per query. #' @param countyCd could be character (name, with or without "County", or "ALL"), -#' numeric (id), or \code{NULL}, which will +#' numeric (id), or `NULL`, which will #' return state or national data depending on the stateCd argument. "ALL" may #' also be supplied, which will return data #' for every county in a state. Can be a vector of counties in the same state. @@ -676,14 +684,14 @@ readNWISstat <- function(siteNumbers, parameterCd, startDate = "", endDate = "", #' Specific categories must be supplied as two- #' letter abbreviations as seen in the URL when using the NWIS water use web interface. Note that #' there are different codes for national and state level data. -#' @param convertType logical defaults to \code{TRUE}. If \code{TRUE}, the function +#' @param convertType logical defaults to `TRUE`. If `TRUE`, the function #' will convert the data to #' numerics based on a standard algorithm. Years, months, and days (if appliccable) are #' also returned as numerics #' in separate columns. If convertType is false, everything is returned as a character. #' @param transform logical only intended for use with national data. Defaults to -#' \code{FALSE}, with data being returned as -#' presented by the web service. If \code{TRUE}, data will be transformed and +#' `FALSE`, with data being returned as +#' presented by the web service. If `TRUE`, data will be transformed and #' returned with column names, which will reformat #' national data to be similar to state data. #' @return A data frame with at least the year of record, and all available @@ -720,6 +728,7 @@ readNWISuse <- function(stateCd, categories = "ALL", convertType = TRUE, transform = FALSE) { + message(new_nwis_message()) countyID <- NULL countyCd <- countyCd[countyCd != ""] diff --git a/R/readWQPdata.R b/R/readWQPdata.R index 841c1010..29b3a9c7 100644 --- a/R/readWQPdata.R +++ b/R/readWQPdata.R @@ -1,7 +1,7 @@ #' General Data Import from Water Quality Portal #' #' Imports data from Water Quality Portal web service. This function gets the data from here: -#' \url{https://www.waterqualitydata.us}. +#' . #' #' This function uses \dots as a query input, which can be very flexible, but also #' has a steeper learning curve. For a quick overview, scroll down to the Examples @@ -41,7 +41,7 @@ #' } #' #' -#' @param \dots see \url{https://www.waterqualitydata.us/webservices_documentation} for a complete list of options. +#' @param \dots see for a complete list of options. #' A list of arguments can also be supplied. For more information see the above #' description for this help file. One way to figure out how to construct a WQP query is to go to the "Advanced" #' form in the Water Quality Portal. Use the form to discover what parameters are available. Once the query is @@ -63,11 +63,11 @@ #' "America/Anchorage", as well as the following which do not use daylight savings #' time: "America/Honolulu", #' "America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla". -#' See also \code{OlsonNames()} +#' See also `OlsonNames()` #' for more information on time zones. #' @param ignore_attributes logical to choose to ignore fetching site and status -#' attributes. Default is \code{FALSE}. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function +#' attributes. Default is `FALSE`. +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character. #' @keywords data import WQP web service @@ -96,9 +96,9 @@ #' #' # WQX3: #' pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", -#' characteristicName = nameToUse, -#' service = "ResultWQX3", -#' dataProfile = "basicPhysChem") +#' characteristicName = nameToUse, +#' service = "ResultWQX3", +#' dataProfile = "basicPhysChem") #' attr(pHData_wqx3, "url") #' #' # More examples: @@ -112,12 +112,12 @@ #' attr(DeWitt, "url") #' #' DeWitt_wqx3 <- readWQPdata( -#' statecode = "Illinois", -#' countycode = "DeWitt", -#' characteristicName = "Nitrogen", -#' service = "ResultWQX3", -#' dataProfile = "basicPhysChem", -#' ignore_attributes = TRUE) +#' statecode = "Illinois", +#' countycode = "DeWitt", +#' characteristicName = "Nitrogen", +#' service = "ResultWQX3", +#' dataProfile = "basicPhysChem", +#' ignore_attributes = TRUE) #' #' attr(DeWitt_wqx3, "url") #' @@ -128,11 +128,11 @@ #' ) #' attr(activity, "url") #' -#' activity_wqx3 <- readWQPdata( -#' siteid = "USGS-04024315", -#' service = "ActivityWQX3" -#' ) -#' attr(activity_wqx3, "url") +#' # activity_wqx3 <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # service = "ActivityWQX3" +#' # ) +#' # attr(activity_wqx3, "url") #' #' Dane_activity <- readWQPdata( #' statecode = "Wisconsin", @@ -143,14 +143,14 @@ #' ) #' attr(Dane_activity, "url") #' -#' Dane_activity_wqx3 <- readWQPdata( -#' statecode = "Wisconsin", -#' countycode = "Dane", -#' startDateLo = "2023-01-01", -#' startDateHi = "2023-12-31", -#' service = "ActivityWQX3" -#' ) -#' attr(Dane_activity_wqx3, "url") +#' # Dane_activity_wqx3 <- readWQPdata( +#' # statecode = "Wisconsin", +#' # countycode = "Dane", +#' # startDateLo = "2023-01-01", +#' # startDateHi = "2023-12-31", +#' # service = "ActivityWQX3" +#' # ) +#' # attr(Dane_activity_wqx3, "url") #' #' ######################################################## #' # Additional examples: @@ -199,11 +199,11 @@ #' dataProfile = "narrowResult" #' ) #' -#' samp_narrow_wqx3 <- readWQPdata( -#' siteid = "USGS-04024315", -#' service = "ResultWQX3", -#' dataProfile = "narrow" -#' ) +#' # samp_narrow_wqx3 <- readWQPdata( +#' # siteid = "USGS-04024315", +#' # service = "ResultWQX3", +#' # dataProfile = "narrow" +#' # ) #' #' #' # Data profiles: "Sampling Activity" @@ -240,10 +240,10 @@ #' service = "Result", #' dataProfile = "narrowResult" ) #' -#' rawPHsites <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), -#' characteristicName = "pH", -#' service = "ResultWQX3", -#' dataProfile = "narrow" ) +#' # rawPHsites <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +#' # characteristicName = "pH", +#' # service = "ResultWQX3", +#' # dataProfile = "narrow" ) #' #' } readWQPdata <- function(..., diff --git a/R/readWQPdots.R b/R/readWQPdots.R index 29c4bfe2..49d49589 100644 --- a/R/readWQPdots.R +++ b/R/readWQPdots.R @@ -1,5 +1,5 @@ #' -#' Format and organize WQP arguments that are passed in as \code{...}. +#' Format and organize WQP arguments that are passed in as `...`. #' #' @keywords internal readWQPdots <- function(..., legacy = TRUE) { diff --git a/R/readWQPqw.R b/R/readWQPqw.R index 647722c0..faaca270 100644 --- a/R/readWQPqw.R +++ b/R/readWQPqw.R @@ -1,7 +1,7 @@ #' Raw Data Import for Water Quality Portal #' #' Imports data from the Water Quality Portal. -#' This function gets the data from here: \url{https://www.waterqualitydata.us}. There +#' This function gets the data from here: . There #' are four required input arguments: siteNumbers, parameterCd, startDate, and endDate. #' parameterCd can either be a USGS 5-digit code, or a characteristic name. The sites can be #' either USGS, or other Water Quality Portal offered sites. It is required to use the 'full' @@ -24,15 +24,15 @@ #' "America/Anchorage", as well as the following which do not use daylight savings #' time: "America/Honolulu", #' "America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla". -#' See also \code{OlsonNames()} +#' See also `OlsonNames()` #' for more information on time zones. #' @param querySummary logical to look at number of records and unique sites that #' will be returned from this query. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character. #' @param ignore_attributes logical to choose to ignore fetching site and parameter -#' attributes. Default is \code{FALSE}. +#' attributes. Default is `FALSE`. #' @param legacy Logical. If TRUE, uses legacy WQP services. Default is TRUE. #' Setting legacy = FALSE uses WQX3.0 WQP services, which are in-development, use with caution. #' @keywords data import USGS web service @@ -47,8 +47,8 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' @export -#' @seealso \code{\link{readWQPdata}}, \code{\link{whatWQPsites}}, -#' and \code{\link{importWQP}} +#' @seealso [readWQPdata()], [whatWQPsites()], +#' and [importWQP()] #' @examplesIf is_dataRetrieval_user() #' \donttest{ #' rawPcode <- readWQPqw("USGS-01594440", "01075", "", "") @@ -63,10 +63,10 @@ #' ncol(pHsites_legacy) #' attr(pHsites_legacy, "url") #' -#' pHsites_modern <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), -#' "pH", "", "", legacy = FALSE) -#' ncol(pHsites_modern) -#' attr(pHsites_modern, "url") +#' # pHsites_modern <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +#' # "pH", "", "", legacy = FALSE) +#' # ncol(pHsites_modern) +#' # attr(pHsites_modern, "url") #' #' nwisEx <- readWQPqw("USGS-04024000", c("34247", "30234", "32104", "34220"), "", "2022-12-20") #' diff --git a/R/read_waterdata.R b/R/read_waterdata.R new file mode 100644 index 00000000..b19e9a7a --- /dev/null +++ b/R/read_waterdata.R @@ -0,0 +1,89 @@ +#' Generalized USGS Water Data API retrieval function +#' +#' Function that allows complex CQL queries. +#' See +#' for more information. +#' +#' @export +#' @param service character, can be any existing collection such +#' as "daily", "monitoring-locations", "time-series-metadata" +#' @param CQL A string in a Common Query Language format. +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function +#' will convert the data to dates and qualifier to string vector. +#' @param \dots Additional arguments to send to the request. +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' cql <- '{ +#' "op": "and", +#' "args": [ +#' { +#' "op": "in", +#' "args": [ +#' { "property": "parameter_code" }, +#' [ "00060", "00065" ] +#' ] +#' }, +#' { +#' "op": "in", +#' "args": [ +#' { "property": "monitoring_location_id" }, +#' [ "USGS-07367300", "USGS-03277200" ] +#' ] +#' } +#' ] +#' }' +#' +#' dv_data <- read_waterdata(service = "daily", +#' CQL = cql, +#' time = c("2023-01-01", "2024-01-01")) +#' +#' } +read_waterdata <- function(service, + CQL, + ..., + convertType = TRUE){ + + query_req <- get_collection() + + endpoints <- sapply(query_req$tags, function(x) x[["name"]]) + + match.arg(service, endpoints) + + args <- list(...) + args[["service"]] <- service + + if(!"properties" %in% names(args)){ + args[["properties"]] <- NA_character_ + } + + data_req <- suppressWarnings(do.call(construct_api_requests, args)) + + data_req <- data_req |> + httr2::req_headers(`Content-Type` = "application/query-cql-json") |> + httr2::req_body_raw(CQL) + + if("max_results" %in% names(args)){ + max_results <- args[["max_results"]] + } else { + max_results <- NA + } + + return_list <- walk_pages(data_req, max_results) + + return_list <- deal_with_empty(return_list, args[["properties"]], service) + + if(convertType) return_list <- cleanup_cols(return_list) + + # Add other time series services when they come online + if(service %in% c("daily")){ + return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] + } + + return_list <- rejigger_cols(return_list, args[["properties"]], service) + + return(return_list) +} + + + diff --git a/R/read_waterdata_daily.R b/R/read_waterdata_daily.R new file mode 100644 index 00000000..2ac5f01c --- /dev/null +++ b/R/read_waterdata_daily.R @@ -0,0 +1,120 @@ +#' Get USGS Daily Data +#' +#' Description `r get_description("daily")` +#' +#' @export +#' @param monitoring_location_id `r get_params("daily")$monitoring_location_id` +#' @param parameter_code `r get_params("daily")$parameter_code` +#' @param statistic_id `r get_params("daily")$statistic_id` +#' @param time `r get_params("daily")$time` +#' @param value `r get_params("daily")$value` +#' @param unit_of_measure `r get_params("daily")$unit_of_measure` +#' @param approval_status `r get_params("daily")$approval_status` +#' @param last_modified `r get_params("daily")$last_modified` +#' @param time_series_id `r get_params("daily")$time_series_id` +#' @param qualifier `r get_params("daily")$qualifier` +#' @param daily_id `r get_params("daily")$id` +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r schema <- check_OGC_requests(endpoint = "daily", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param limit The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 10000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param max_results The optional maximum number of rows to return. This value +#' must be less than the requested limit. +#' @param skipGeometry This option can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function +#' will convert the data to dates and qualifier to string vector. +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' site <- "USGS-02238500" +#' pcode <- "00060" +#' dv_data_sf <- read_waterdata_daily(monitoring_location_id = site, +#' parameter_code = "00060", +#' time = c("2021-01-01", "2022-01-01")) +#' +#' dv_data_trim <- read_waterdata_daily(monitoring_location_id = site, +#' parameter_code = "00060", +#' properties = c("monitoring_location_id", +#' "value", +#' "time"), +#' time = c("2021-01-01", "2022-01-01")) +#' +#' dv_data <- read_waterdata_daily(monitoring_location_id = site, +#' parameter_code = "00060", +#' skipGeometry = TRUE) +#' +#' dv_data_period <- read_waterdata_daily(monitoring_location_id = site, +#' parameter_code = "00060", +#' time = "P7D") +#' +#' multi_site <- read_waterdata_daily(monitoring_location_id = c("USGS-01491000", +#' "USGS-01645000"), +#' parameter_code = c("00060", "00010"), +#' limit = 500, +#' time = c("2023-01-01", "2024-01-01")) +#' +#' } +read_waterdata_daily <- function(monitoring_location_id = NA_character_, + parameter_code = NA_character_, + statistic_id = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + daily_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + value = NA, + last_modified = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + convertType = TRUE){ + + service <- "daily" + output_id <- "daily_id" + + args <- mget(names(formals())) + args[["service"]] <- service + + args <- switch_arg_id(args, + id_name = output_id, + service = service) + + args[["properties"]] <- switch_properties_id(properties, + id_name = output_id, + service = service) + + args[["convertType"]] <- NULL + + dv_req <- do.call(construct_api_requests, args) + + return_list <- walk_pages(dv_req, max_results) + + return_list <- deal_with_empty(return_list, properties, service) + + if(convertType) return_list <- cleanup_cols(return_list, + service = "daily") + + return_list <- rejigger_cols(return_list, properties, output_id) + + return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] + + return(return_list) +} + + + diff --git a/R/read_waterdata_monitoring_location.R b/R/read_waterdata_monitoring_location.R new file mode 100644 index 00000000..9489a1bb --- /dev/null +++ b/R/read_waterdata_monitoring_location.R @@ -0,0 +1,161 @@ +#' Get USGS Site Data +#' +#' Description `r get_description("monitoring-locations")` +#' +#' @export +#' @param monitoring_location_id `r get_params("monitoring-locations")$id` +#' @param agency_code `r get_params("monitoring-locations")$agency_code` +#' @param agency_name `r get_params("monitoring-locations")$agency_name` +#' @param monitoring_location_number `r get_params("monitoring-locations")$monitoring_location_number` +#' @param monitoring_location_name `r get_params("monitoring-locations")$monitoring_location_name` +#' @param district_code `r get_params("monitoring-locations")$district_code` +#' @param state_name `r get_params("monitoring-locations")$state_name` +#' @param county_code `r get_params("monitoring-locations")$county_code` +#' @param county_name `r get_params("monitoring-locations")$county_name` +#' @param country_code `r get_params("monitoring-locations")$country_code` +#' @param country_name `r get_params("monitoring-locations")$country_name` +#' @param state_code `r get_params("monitoring-locations")$state_code` +#' @param minor_civil_division_code `r get_params("monitoring-locations")$minor_civil_division_code` +#' @param site_type_code `r get_params("monitoring-locations")$site_type_code` +#' @param site_type `r get_params("monitoring-locations")$site_type` +#' @param hydrologic_unit_code `r get_params("monitoring-locations")$hydrologic_unit_code` +#' @param basin_code `r get_params("monitoring-locations")$basin_code` +#' @param altitude `r get_params("monitoring-locations")$altitude` +#' @param altitude_accuracy `r get_params("monitoring-locations")$altitude_accuracy` +#' @param altitude_method_code `r get_params("monitoring-locations")$altitude_method_code` +#' @param altitude_method_name `r get_params("monitoring-locations")$altitude_method_name` +#' @param vertical_datum `r get_params("monitoring-locations")$vertical_datum` +#' @param vertical_datum_name `r get_params("monitoring-locations")$vertical_datum_name` +#' @param horizontal_positional_accuracy_code `r get_params("monitoring-locations")$horizontal_positional_accuracy_code` +#' @param horizontal_positional_accuracy `r get_params("monitoring-locations")$horizontal_positional_accuracy` +#' @param horizontal_position_method_code `r get_params("monitoring-locations")$horizontal_position_method_code` +#' @param horizontal_position_method_name `r get_params("monitoring-locations")$horizontal_position_method_name` +#' @param original_horizontal_datum `r get_params("monitoring-locations")$original_horizontal_datum` +#' @param original_horizontal_datum_name `r get_params("monitoring-locations")$original_horizontal_datum_name` +#' @param drainage_area `r get_params("monitoring-locations")$drainage_area` +#' @param contributing_drainage_area `r get_params("monitoring-locations")$contributing_drainage_area` +#' @param time_zone_abbreviation `r get_params("monitoring-locations")$time_zone_abbreviation` +#' @param uses_daylight_savings `r get_params("monitoring-locations")$uses_daylight_savings` +#' @param construction_date `r get_params("monitoring-locations")$construction_date` +#' @param aquifer_code `r get_params("monitoring-locations")$aquifer_code` +#' @param national_aquifer_code `r get_params("monitoring-locations")$national_aquifer_code` +#' @param aquifer_type_code `r get_params("monitoring-locations")$aquifer_type_code` +#' @param well_constructed_depth `r get_params("monitoring-locations")$well_constructed_depth` +#' @param hole_constructed_depth `r get_params("monitoring-locations")$hole_constructed_depth` +#' @param depth_source_code `r get_params("monitoring-locations")$depth_source_code` +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r schema <- check_OGC_requests(endpoint = "monitoring-locations", type = "schema"); paste(names(schema$properties), collapse = ", ")`. +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param limit The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 10000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param max_results The optional maximum number of rows to return. This value +#' must be less than the requested limit. +#' @param skipGeometry This option can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' site <- "USGS-02238500" +#' site_info <- read_waterdata_monitoring_location(monitoring_location_id = site) +#' +#' site_slim <- read_waterdata_monitoring_location(monitoring_location_id = site, +#' properties = c("monitoring_location_id", +#' "state_name", +#' "country_name")) +#' +#' site_slim_no_sf_slim <- read_waterdata_monitoring_location(monitoring_location_id = site, +#' properties = c("monitoring_location_id", +#' "state_name", +#' "country_name"), +#' skipGeometry = TRUE) +#' +#' site_info_no_sf <- read_waterdata_monitoring_location(monitoring_location_id = site, +#' skipGeometry = TRUE) +#' +#' bbox_vals = c(-94.00, 35.0, -93.5, 35.5) +#' multi_site <- read_waterdata_monitoring_location(bbox = bbox_vals) +#' multi_site_n_100 <- read_waterdata_monitoring_location(bbox = bbox_vals, +#' max_results = 100) +#' multi_site_limit_100 <- read_waterdata_monitoring_location(bbox = bbox_vals, +#' limit = 100) +#' } +read_waterdata_monitoring_location <- function(monitoring_location_id = NA_character_, + agency_code = NA_character_, + agency_name = NA_character_, + monitoring_location_number = NA_character_, + monitoring_location_name = NA_character_, + district_code = NA_character_, + country_code = NA_character_, + country_name = NA_character_, + state_code = NA_character_, + state_name = NA_character_, + county_code = NA_character_, + county_name = NA_character_, + minor_civil_division_code = NA_character_, + site_type_code = NA_character_, + site_type = NA_character_, + hydrologic_unit_code = NA_character_, + basin_code = NA_character_, + altitude = NA_character_, + altitude_accuracy = NA_character_, + altitude_method_code = NA_character_, + altitude_method_name = NA_character_, + vertical_datum = NA_character_, + vertical_datum_name = NA_character_, + horizontal_positional_accuracy_code = NA_character_, + horizontal_positional_accuracy = NA_character_, + horizontal_position_method_code = NA_character_, + horizontal_position_method_name = NA_character_, + original_horizontal_datum = NA_character_, + original_horizontal_datum_name = NA_character_, + drainage_area = NA_character_, + contributing_drainage_area = NA_character_, + time_zone_abbreviation = NA_character_, + uses_daylight_savings = NA_character_, + construction_date = NA_character_, + aquifer_code = NA_character_, + national_aquifer_code = NA_character_, + aquifer_type_code = NA_character_, + well_constructed_depth = NA_character_, + hole_constructed_depth = NA_character_, + depth_source_code = NA_character_, + properties = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + skipGeometry = NA){ + + service <- "monitoring-locations" + output_id <- "monitoring_location_id" + + args <- mget(names(formals())) + args[["service"]] <- service + + args <- switch_arg_id(args, + id_name = output_id, + service = service) + + args[["properties"]] <- switch_properties_id(properties, + id_name = output_id, + service = service) + + site_req <- do.call(construct_api_requests, args) + + return_list <- walk_pages(site_req, max_results) + + return_list <- deal_with_empty(return_list, properties, service) + + return_list <- rejigger_cols(return_list, properties, output_id) + + return(return_list) +} diff --git a/R/read_USGS_samples.R b/R/read_waterdata_samples.R similarity index 76% rename from R/read_USGS_samples.R rename to R/read_waterdata_samples.R index 446b9ecd..d41f0448 100644 --- a/R/read_USGS_samples.R +++ b/R/read_waterdata_samples.R @@ -1,12 +1,12 @@ #' Construct request for USGS Samples Data #' #' This function creates the call for discrete water quality samples data -#' service described at \url{https://waterdata.usgs.gov/download-samples}. +#' service described at . #' Note: all possible arguments are included, but it is strongly recommended #' to only use the NECESSARY arguments. Leave unnecessary arguments as the default #' NA. #' -#' See also: \url{https://api.waterdata.usgs.gov/samples-data/docs}. +#' See also: . #' #' @param monitoringLocationIdentifier A monitoring location identifier has two parts: the agency code #' and the location number, separated by a dash (-). Location identifiers should be separated with commas, @@ -14,9 +14,9 @@ #' numbers without an agency prefix are assumed to have the prefix USGS. #' @param activityMediaName Sample media refers to the environmental medium that #' was sampled or analyzed. See available options by running -#' \code{check_param("samplemedia")$activityMedia}. +#' `check_waterdata_sample_params("samplemedia")$activityMedia`. #' @param siteTypeCode Site type code query parameter. See available -#' options by running \code{check_param("sitetype")$typeCode}. +#' options by running `check_waterdata_sample_params("sitetype")$typeCode`. #' @param boundingBox North and South are latitude values; East and West are longitude values. #' A vector of 4 (west, south, east, north) is expected. #' An example would be: c(-92.8, 44.2, -88.9, 46.0). @@ -35,40 +35,40 @@ #' records that match the date. #' @param characteristicGroup Characteristic group is a broad category describing the sample. #' See available options by running -#' \code{check_param("characteristicgroup")$characteristicGroup}. +#' `check_waterdata_sample_params("characteristicgroup")$characteristicGroup`. #' @param characteristicUserSupplied Observed property is the USGS term for the #' constituent sampled and the property name gives a detailed description of what #' was sampled. Observed property is mapped to characteristicUserSupplied and replaces #' the parameter name and pcode USGS #' previously used to describe discrete sample data. Find more information in the #' Observed Properties and Parameter Codes section of the Code Dictionary found here: -#' \url{https://waterdata.usgs.gov/code-dictionary/}. +#' . #' @param characteristic Characteristic is a specific category describing the sample. #' See available options by running -#' \code{check_param("characteristics")$characteristicName}. +#' `check_waterdata_sample_params("characteristics")$characteristicName`. #' @param stateFips State query parameter. To get a list of available state fips, -#' run \code{check_param("states")}. The "fips" can be created using the function -#' \code{stateCdLookup} - for example: \code{stateCdLookup("WI", "fips")}. +#' run `check_waterdata_sample_params("states")`. The "fips" can be created using the function +#' `stateCdLookup` - for example: `stateCdLookup("WI", "fips")`. #' FIPs codes for states take the format: #' CountryAbbrev:StateNumber, like US:55 for Wisconsin. #' @param countyFips County query parameter. To get a list of available counties, -#' run \code{check_param("counties")}. The "Fips" can be created using the function -#' \code{countyCdLookup} - for example: \code{countyCdLookup("WI", "Dane", "fips")} +#' run `check_waterdata_sample_params("counties")`. The "Fips" can be created using the function +#' `countyCdLookup` - for example: `countyCdLookup("WI", "Dane", "fips")` #' for Dane County, WI. #' FIPs codes for counties take the format: #' CountryAbbrev:StateNumber:CountyNumber, like US:55:025 for Dane County, WI. #' @param countryFips Country query parameter. Do not set redundant parameters. #' If another query parameter contains the country information, leave this parameter -#' set to the default NA. See available options by running \code{check_param("countries")}, +#' set to the default NA. See available options by running `check_waterdata_sample_params("countries")`, #' where the "id" field contains the value to use in the countryFips input. #' @param projectIdentifier Project identifier query parameter. This information #' would be needed from prior project information. #' @param recordIdentifierUserSupplied Record identifier, user supplied identifier. This #' information would be needed from the data supplier. #' @param siteTypeName Site type name query parameter. See available -#' options by running \code{check_param("sitetype")$typeName}. +#' options by running `check_param("sitetype")$typeName`. #' @param usgsPCode USGS parameter code. See available options by running -#' \code{check_param("characteristics")$parameterCode}. +#' `check_waterdata_sample_params("characteristics")$parameterCode`. #' @param pointLocationLatitude Latitude for a point/radius query (decimal degrees). Must be used #' with pointLocationLongitude and pointLocationWithinMiles. #' @param pointLocationLongitude Longitude for a point/radius query (decimal degrees). Must be used @@ -85,18 +85,19 @@ #' "project" and "projectmonitoringlocationweight". Options for "organizations" are: #' "organization" and "count". #' @export +#' @keywords internal #' @return data frame returned from web service call. #' #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ -#' req <- construct_USGS_sample_request( +#' req <- construct_waterdata_sample_request( #' monitoringLocationIdentifier = "USGS-04074950", #' characteristicUserSupplied = "pH, water, unfiltered, field") #' rawData <- importWQP(req) #' #' } -construct_USGS_sample_request <- function(monitoringLocationIdentifier = NA, +construct_waterdata_sample_request <- function(monitoringLocationIdentifier = NA, siteTypeCode = NA, boundingBox = NA, hydrologicUnit = NA, @@ -175,36 +176,36 @@ construct_USGS_sample_request <- function(monitoringLocationIdentifier = NA, if(all(!is.na(siteTypeCode))){ siteTypeCode <- match.arg(siteTypeCode, - check_param("sitetype")$typeCode, + check_waterdata_sample_params("sitetype")$typeCode, several.ok = TRUE) } if(all(!is.na(activityMediaName))){ activityMediaName <- match.arg(activityMediaName, - check_param("samplemedia")$activityMedia, + check_waterdata_sample_params("samplemedia")$activityMedia, several.ok = TRUE) } if(all(!is.na(characteristicGroup))){ characteristicGroup <- match.arg(characteristicGroup, - check_param("characteristicgroup")$characteristicGroup, + check_waterdata_sample_params("characteristicgroup")$characteristicGroup, several.ok = TRUE) } if(all(!is.na(countryFips))){ countryFips <- match.arg(countryFips, - check_param("countries")$countryCode, + check_waterdata_sample_params("countries")$countryCode, several.ok = TRUE) } if(all(!is.na(siteTypeName))){ siteTypeName <- match.arg(siteTypeName, - check_param("sitetype")$typeLongName, + check_waterdata_sample_params("sitetype")$typeLongName, several.ok = TRUE) } if(all(!is.na(stateFips))){ - states <- check_param("states") + states <- check_waterdata_sample_params("states") state_codes <- paste(states$countryCode, states$fipsCode, sep = ":") stateFips <- match.arg(stateFips, state_codes, @@ -212,10 +213,10 @@ construct_USGS_sample_request <- function(monitoringLocationIdentifier = NA, } if(all(!is.na(countyFips))){ - states <- check_param("states") + states <- check_waterdata_sample_params("states") state_codes <- paste(states$countryCode, states$fipsCode, sep = ":") - counties <- check_param("counties") + counties <- check_waterdata_sample_params("counties") state_cd <- stats::setNames(states$fipsCode, states$stateAbbrev) county_codes <- paste(counties$countryCode, @@ -317,7 +318,7 @@ explode_query <- function(baseURL, POST = FALSE, x){ #' Check values from codeservice #' #' Call a service to check on values from: -#' \url{https://api.waterdata.usgs.gov/samples-data/codeservice/docs}. +#' . #' #' @param service Options are: "characteristicgroup", "states", "counties", #' "countries", "sitetype", "samplemedia", "characteristics", "observedproperty" @@ -328,20 +329,20 @@ explode_query <- function(baseURL, POST = FALSE, x){ #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ -#' groups <- check_param("characteristicgroup") -#' states <- check_param("states") -#' countries <- check_param("countries") -#' counties <- check_param("counties") -#' sitetypes <- check_param("sitetype") -#' samplemedia <- check_param("samplemedia") -#' characteristics <- check_param("characteristics", +#' groups <- check_waterdata_sample_params("characteristicgroup") +#' states <- check_waterdata_sample_params("states") +#' countries <- check_waterdata_sample_params("countries") +#' counties <- check_waterdata_sample_params("counties") +#' sitetypes <- check_waterdata_sample_params("sitetype") +#' samplemedia <- check_waterdata_sample_params("samplemedia") +#' characteristics <- check_waterdata_sample_params("characteristics", #' group = "Biological") -#' observedProperties <- check_param("observedproperty", +#' observedProperties <- check_waterdata_sample_params("observedproperty", #' text = "phosphorus") #' #' } -check_param <- function(service = "characteristicgroup", - ...){ +check_waterdata_sample_params <- function(service = "characteristicgroup", + ...){ service_options <- c("characteristicgroup", "states", "counties", "countries", "sitetype", "samplemedia", @@ -364,9 +365,7 @@ check_param <- function(service = "characteristicgroup", check_group_req <- httr2::req_url_query(check_group_req, !!!params) } - - message("GET: ", check_group_req$url) - + check_group <- httr2::req_perform(check_group_req) |> httr2::resp_body_string() |> jsonlite::fromJSON() @@ -378,40 +377,37 @@ check_param <- function(service = "characteristicgroup", #' USGS Samples Data #' #' This function creates the call and gets the data for discrete water quality samples data -#' service described at \url{https://waterdata.usgs.gov/download-samples}. +#' service described at . #' -#' @inheritParams construct_USGS_sample_request +#' @inheritParams construct_waterdata_sample_request #' @param tz character to set timezone attribute of datetime. Default is UTC #' (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Possible values include "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", #' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua", #' "America/Phoenix", and "America/Metlakatla" -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function -#' will convert the data to dates, datetimes, -#' numerics based on a standard algorithm. If false, everything is returned as a character. #' @export -#' +#' @rdname read_waterdata_samples #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ -#' ph_data <- read_USGS_samples( +#' ph_data <- read_waterdata_samples( #' monitoringLocationIdentifier = "USGS-04074950", #' characteristicUserSupplied = "pH, water, unfiltered, field", #' activityStartDateUpper = "2000-01-01", #' dataProfile = "narrow") #' #' nameToUse <- "pH" -#' pHData <- read_USGS_samples(monitoringLocationIdentifier = "USGS-04024315", +#' pHData <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-04024315", #' characteristic = nameToUse) #' ncol(pHData) #' attr(pHData, "url") #' attr(pHData, "queryTime") #' -#' summary_data <- read_USGS_samples(monitoringLocationIdentifier = "USGS-04024315", +#' summary_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-04024315", #' dataType = "projects") #' #' } -read_USGS_samples <- function(monitoringLocationIdentifier = NA, +read_waterdata_samples <- function(monitoringLocationIdentifier = NA, siteTypeCode = NA, boundingBox = NA, hydrologicUnit = NA, @@ -433,10 +429,9 @@ read_USGS_samples <- function(monitoringLocationIdentifier = NA, pointLocationWithinMiles = NA, dataType = "results", dataProfile = NA, - tz = "UTC", - convertType = TRUE){ + tz = "UTC"){ - request_url <- construct_USGS_sample_request(monitoringLocationIdentifier = monitoringLocationIdentifier, + request_url <- construct_waterdata_sample_request(monitoringLocationIdentifier = monitoringLocationIdentifier, siteTypeCode = siteTypeCode, boundingBox = boundingBox, hydrologicUnit = hydrologicUnit, @@ -459,7 +454,7 @@ read_USGS_samples <- function(monitoringLocationIdentifier = NA, dataType = dataType, dataProfile = dataProfile) - df <- importWQP(request_url, tz = tz, convertType = convertType) + df <- importWQP(request_url, tz = tz) attr(df, "url") <- request_url$url attr(df, "queryTime") <- Sys.time() return(df) @@ -469,7 +464,7 @@ read_USGS_samples <- function(monitoringLocationIdentifier = NA, #' USGS Samples Summary Data #' #' This function creates the call and gets the data for discrete water quality samples summary data -#' service described at \url{https://api.waterdata.usgs.gov/samples-data/docs}. +#' service described at . #' #' @param monitoringLocationIdentifier A monitoring location identifier has two parts, #' separated by a dash (-): the agency code and the location number. Location identifiers should be separated with commas, @@ -477,16 +472,16 @@ read_USGS_samples <- function(monitoringLocationIdentifier = NA, #' numbers without an agency prefix are assumed to have the prefix USGS. #' @export #' @return data frame with summary of data available based on the monitoringLocationIdentifier -#' +#' @rdname summarize_waterdata_samples #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ #' monitoringLocationIdentifier <- "USGS-04074950" #' -#' what_data <- summarize_USGS_samples(monitoringLocationIdentifier) +#' what_data <- summarize_waterdata_samples(monitoringLocationIdentifier) #' #' } -summarize_USGS_samples <- function(monitoringLocationIdentifier){ +summarize_waterdata_samples <- function(monitoringLocationIdentifier){ if(length(monitoringLocationIdentifier) > 1){ stop("Summary service only available for one site at a time.") @@ -509,4 +504,74 @@ summarize_USGS_samples <- function(monitoringLocationIdentifier){ attr(df, "queryTime") <- Sys.time() return(df) -} \ No newline at end of file +} + + +#' @rdname read_waterdata_samples +#' @export +read_USGS_samples <- function(monitoringLocationIdentifier = NA, + siteTypeCode = NA, + boundingBox = NA, + hydrologicUnit = NA, + activityMediaName = NA, + characteristicGroup = NA, + characteristic = NA, + characteristicUserSupplied = NA, + activityStartDateLower = NA, + activityStartDateUpper = NA, + countryFips = NA, + stateFips = NA, + countyFips = NA, + projectIdentifier = NA, + recordIdentifierUserSupplied = NA, + siteTypeName = NA, + usgsPCode = NA, + pointLocationLatitude = NA, + pointLocationLongitude = NA, + pointLocationWithinMiles = NA, + dataType = "results", + dataProfile = NA, + tz = "UTC"){ + + .Deprecated(new = "read_waterdata_samples", + package = "dataRetrieval", + msg = "Function has been renamed. Please begin to migrate to read_waterdata_samples") + + + read_waterdata_samples(monitoringLocationIdentifier = monitoringLocationIdentifier, + siteTypeCode = siteTypeCode, + boundingBox = boundingBox, + hydrologicUnit = hydrologicUnit, + activityMediaName = activityMediaName, + characteristicGroup = characteristicGroup, + characteristic = characteristic, + characteristicUserSupplied = characteristicUserSupplied, + activityStartDateLower = activityStartDateLower, + activityStartDateUpper = activityStartDateUpper, + countryFips = countryFips, + stateFips = stateFips, + countyFips = countyFips, + projectIdentifier = projectIdentifier, + recordIdentifierUserSupplied = recordIdentifierUserSupplied, + siteTypeName = siteTypeName, + usgsPCode = usgsPCode, + pointLocationLatitude = pointLocationLatitude, + pointLocationLongitude = pointLocationLongitude, + pointLocationWithinMiles = pointLocationWithinMiles, + dataType = dataType, + dataProfile = dataProfile, + tz = tz) +} + + +#' @rdname summarize_waterdata_samples +#' @export +summarize_USGS_samples <- function(monitoringLocationIdentifier){ + + .Deprecated(new = "summarize_waterdata_samples", + package = "dataRetrieval", + msg = "Function has been renamed. Please begin to migrate to summarize_waterdata_samples") + + summarize_waterdata_samples(monitoringLocationIdentifier) +} + diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R new file mode 100644 index 00000000..1dc701f7 --- /dev/null +++ b/R/read_waterdata_ts_meta.R @@ -0,0 +1,106 @@ +#' Get USGS Time Series Metadata +#' +#' Description `r get_description("time-series-metadata")` +#' +#' @export +#' @param monitoring_location_id `r get_params("time-series-metadata")$monitoring_location_id` +#' @param parameter_code `r get_params("time-series-metadata")$parameter_code` +#' @param parameter_name `r get_params("time-series-metadata")$parameter_name` +#' @param statistic_id `r get_params("time-series-metadata")$statistic_id` +#' @param computation_identifier `r get_params("time-series-metadata")$computation_identifier` +#' @param computation_period_identifier `r get_params("time-series-metadata")$computation_period_identifier` +#' @param sublocation_identifier `r get_params("time-series-metadata")$sublocation_identifier` +#' @param last_modified `r get_params("time-series-metadata")$last_modified` +#' @param begin `r get_params("time-series-metadata")$begin` +#' @param end `r get_params("time-series-metadata")$end` +#' @param thresholds `r get_params("time-series-metadata")$thresholds` +#' @param unit_of_measure `r get_params("time-series-metadata")$unit_of_measure` +#' @param primary `r get_params("time-series-metadata")$primary` +#' @param web_description `r get_params("time-series-metadata")$web_description` +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r schema <- check_OGC_requests(endpoint = "time-series-metadata", type = "schema"); paste(names(schema$properties), collapse = ", ")` +#' @param time_series_id `r get_params("time-series-metadata")$id` +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param limit The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 10000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param max_results The optional maximum number of rows to return. This value +#' must be less than the requested limit. +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function +#' will convert the data to dates and qualifier to string vector. +#' @param skipGeometry This option can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' site <- "USGS-02238500" +#' meta_1 <- read_waterdata_ts_meta(monitoring_location_id = site) +#' +#' meta_multi <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-01491000", +#' "USGS-01645000"), +#' parameter_code = c("00060", "00010"), +#' properties = c("monitoring_location_id", +#' "parameter_code", +#' "begin", +#' "end", +#' "time_series_id"), +#' skipGeometry = TRUE) +#' } +read_waterdata_ts_meta <- function(monitoring_location_id = NA_character_, + parameter_code = NA_character_, + parameter_name = NA_character_, + properties = NA_character_, + statistic_id = NA_character_, + last_modified = NA_character_, + begin = NA_character_, + end = NA_character_, + unit_of_measure = NA_character_, + computation_period_identifier = NA_character_, + computation_identifier = NA_character_, + thresholds = NA, + sublocation_identifier = NA_character_, + primary = NA_character_, + time_series_id = NA_character_, + web_description = NA_character_, + skipGeometry = NA, + limit = NA, + max_results = NA, + bbox = NA, + convertType = FALSE){ + + service = "time-series-metadata" + output_id <- "time_series_id" + + args <- mget(names(formals())) + args[["service"]] <- service + + args <- switch_arg_id(args, id_name = output_id, service = service) + + args[["convertType"]] <- NULL + + args[["properties"]] <- switch_properties_id(properties, + id_name = output_id, + service = service) + + req_ts_meta <- do.call(construct_api_requests, args) + + return_list <- walk_pages(req_ts_meta, max_results) + + return_list <- deal_with_empty(return_list, properties, service) + + if(convertType) return_list <- cleanup_cols(return_list) + + return_list <- rejigger_cols(return_list, properties, output_id) + + return(return_list) + +} diff --git a/R/renameColumns.R b/R/renameColumns.R index c0254923..69d58a52 100644 --- a/R/renameColumns.R +++ b/R/renameColumns.R @@ -18,8 +18,8 @@ #' @param p72019 the base name for parameter code 72019. #' @param \dots named arguments for the base name for any other parameter code. The #' form of the name must be like pXXXXX, where XXXXX is the parameter code. -#' @return A dataset like \code{data} with selected columns renamed. -#' @note The following statistics codes are converted by \code{renameNWISColumns}. +#' @return A dataset like `data` with selected columns renamed. +#' @note The following statistics codes are converted by `renameNWISColumns`. #' \describe{ #' \item{00000}{Instantaneous Value, suffix: Inst} #' \item{00001}{Maximum value, suffix: Max} @@ -34,7 +34,7 @@ #' \item{00023}{Tidal high-low value, suffix: HiLoTide} #' \item{00024}{Tidal low-low value, suffix: LoLoTide} #' } -#' @seealso \code{\link{readNWISdv}}, \code{\link{readNWISuv}} +#' @seealso [readNWISdv()], [readNWISuv()] #' @keywords manip IO #' @export #' @examples diff --git a/R/setAccess.R b/R/setAccess.R index d5019be1..ac33a195 100644 --- a/R/setAccess.R +++ b/R/setAccess.R @@ -1,7 +1,7 @@ #' Set data endpoint #' #' access Indicate which dataRetrieval access code -#' you want to use options: \code{c('public','internal')} +#' you want to use options: `c('public','internal')` #' #' @param access code for data access. Options are: "public","internal","cooperator", or "USGS". #' \itemize{ @@ -27,12 +27,18 @@ setAccess <- function(access = "public") { if (access == "internal") { pkg.env$access <- "3" message("setting access to internal") + warning("Internal access is slated for decommision. +Please contact comptools@usgs.gov for more information.") } else if (access == "cooperator") { pkg.env$access <- "1" message("setting access to cooperator") + warning("Cooperator access is slated for decommision. +Please contact comptools@usgs.gov for more information.") } else if (access == "USGS") { pkg.env$access <- "2" message("setting access to all USGS Water Science Centers") + warning("Water Science Center access is slated for decommision. +Please contact comptools@usgs.gov for more information.") } else { pkg.env$access <- NULL message("setting access to public") diff --git a/R/walk_pages.R b/R/walk_pages.R new file mode 100644 index 00000000..8a2d8ed9 --- /dev/null +++ b/R/walk_pages.R @@ -0,0 +1,247 @@ +#' Return a data frame if there's an empty response +#' +#' @param return_list data frame returned from walk_pages +#' @param properties A vector of requested columns +#' @param service character, can be any existing collection such +#' as "daily", "monitoring-locations", "time-series-metadata" +#' +#' @return data.frame +#' @noRd +#' @examples +#' +#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = c("time", "value"), +#' service = "daily") +#' +#' df2 <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = NA, +#' service = "daily") +#' +deal_with_empty <- function(return_list, properties, service){ + if(nrow(return_list) == 0){ + if(all(is.na(properties))){ + schema <- check_OGC_requests(endpoint = service, type = "schema") + properties <- names(schema$properties) + } + return_list <- data.frame(matrix(nrow = 0, ncol = length(properties))) + names(return_list) <- properties + } + + return(return_list) +} + +#' Rejigger and rename +#' +#' Reorder columns based on users property input. +#' Add "service" prefix to returned "id" column. +#' This allows better integration with other endpoints. +#' +#' @param df data frame returned from walk_pages +#' @param properties A vector of requested columns +#' @param service character, can be any existing collection such +#' as "daily", "monitoring-locations", "time-series-metadata" +#' +#' @return data.frame +#' @noRd +#' @examples +#' +#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = c("state_code", "county_code", "id"), +#' service = "monitoring-locations") +#' df2 <- dataRetrieval:::rejigger_cols(df, +#' properties = c("state_code", "id", "county_code"), +#' output_id = "monitoring_location_id") +#' +#' df3 <- dataRetrieval:::rejigger_cols(df, +#' properties = c("state_code", "monitoring_location_id", "county_code"), +#' output_id = "monitoring_location_id") +#' +rejigger_cols <- function(df, properties, output_id){ + + if(!all(is.na(properties))){ + if(!"id" %in% properties){ + if(output_id %in% properties){ + names(df)[(names(df) == "id")] <- output_id + } else { + # just in case users become aware of the singular/plural issue + # where the endpoint name is plural, but input to other endpoints are singular + plural <- gsub("_id", "s_id", output_id) + if(plural %in% properties){ + names(df)[(names(df) == "id")] <- plural + } + } + } + df <- df[, properties] + } else { + names(df)[(names(df) == "id")] <- output_id + } + df +} + + +#' Convert columns if needed +#' +#' These are columns that have caused problems in testing. +#' Mostly if the columns are empty on 1 page, but not the next. +#' The qualifier column also comes back as a list column. This +#' is fine for many, but others prefer a character column. +#' +#' +#' @param df data frame returned from walk_pages +#' @param service character, can be any existing collection such +#' as "daily" +#' @return data.frame +#' @noRd +#' @examples +#' +#' df <- dataRetrieval:::deal_with_empty(data.frame(NULL), +#' properties = c("time", "value", "id", "qualifier"), +#' service = "daily") +#' df2 <- dataRetrieval:::rejigger_cols(df, +#' properties = c("value", "id", "time", "qualifier"), +#' service = "daily") +#' df3 <- dataRetrieval:::cleanup_cols(df2) +#' +cleanup_cols <- function(df, service = "daily"){ + + if("qualifier" %in% names(df)){ + if(!all(is.na(df$qualifier))){ + df$qualifier <- vapply(X = df$qualifier, + FUN = function(x) paste(x, collapse = ", "), + FUN.VALUE = c(NA_character_)) + } + } + + if("time" %in% names(df)){ + if(service == "daily"){ + df$time <- as.Date(df$time) + } + # leave some room here for POSIXct in the other services. + } + + if("value" %in% names(df)){ + df$value <- as.numeric(df$value) + } + + if("contributing_drainage_area" %in% names(df)){ + df$contributing_drainage_area <- as.numeric(df$contributing_drainage_area) + } + + df +} + +#' Next request URL +#' +#' Custom function to find the "next" URL from the API response. +#' @seealso [httr2::req_perform_iterative] +#' +#' @param resp httr2 response from last request +#' @param req httr2 request from last time +#' +#' @noRd +#' @return the url for the next request +#' +next_req_url <- function(resp, req) { + + body <- httr2::resp_body_json(resp) + + if(isTRUE(body[["numberReturned"]] == 0)){ + return(NULL) + } + + header_info <- httr2::resp_headers(resp) + if(Sys.getenv("API_USGS_PAT") != ""){ + message("Remaining requests this hour:", header_info$`x-ratelimit-remaining`, " ") + } + + links <- body$links + if(any(sapply(links, function(x) x$rel) == "next")){ + next_index <- which(sapply(links, function(x) x$rel) == "next") + + next_url <- links[[next_index]][["href"]] + + ################################################ + # This offset check will be going away + # offset should be replaced by "cursor" eventually. + offset <- as.integer(sub("(?i).*?\\boffset=?\\s*(\\d+).*", "\\1", next_url)) + if(isTRUE(offset > 40000)){ + warning("Not all data was returned! Split up the query for best results.") + return(NULL) + } + ################################################ + + return(httr2::req_url(req = req, url = next_url)) + } else { + return(NULL) + } +} + +#' Get single response data frame +#' +#' Depending on skipGeometry to decide to use sf or not. +#' +#' @noRd +#' +#' @param resp httr2 response from last request +#' +#' @return data.frame +#' +get_resp_data <- function(resp) { + + body <- httr2::resp_body_json(resp) + + if(isTRUE(body[["numberReturned"]] == 0)){ + return(data.frame()) + } + + use_sf <- !grepl("skipGeometry=true", resp$url, ignore.case = TRUE) + return_df <- sf::read_sf(httr2::resp_body_string(resp)) + + if(!use_sf){ + return_df <- sf::st_drop_geometry(return_df) + } + + return(return_df) + +} + +#' Walk through the pages +#' +#' @param req httr2 initial request +#' +#' @noRd +#' @return data.frame with attributes +walk_pages <- function(req, max_results){ + + if(is.na(max_results)){ + resps <- httr2::req_perform_iterative(req, + next_req = next_req_url, + max_reqs = Inf) + ###################################### + # So far I haven't tested this because I haven't had + # individual failures + failures <- resps |> + httr2::resps_failures() |> + httr2::resps_requests() + + if(length(failures) > 0){ + message("There were", length(failures), "failed requests.") + } + + return_list <- data.frame() + for(resp in resps){ + df1 <- get_resp_data(resp) + return_list <- rbind(return_list, df1) + } + + ###################################### + } else { + resps <- httr2::req_perform(req) + return_list <- get_resp_data(resps) + } + + attr(return_list, "request") <- req + attr(return_list, "queryTime") <- Sys.time() + + return(return_list) +} diff --git a/R/whatNWISdata.R b/R/whatNWISdata.R index d909dff4..dfdf1a23 100644 --- a/R/whatNWISdata.R +++ b/R/whatNWISdata.R @@ -1,19 +1,19 @@ #' USGS data availability #' #' Imports a table of available parameters, period of record, and count. See -#' \url{https://waterservices.usgs.gov/docs/site-service/} +#' #' for more information. #' -#' @param \dots see \url{https://waterservices.usgs.gov/docs/site-service/} +#' @param \dots see #' for a complete list of options. A list of arguments can also be supplied. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function will #' convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @keywords data import USGS web service #' #' @details This function requires users to create their own arguments #' based on the NWIS web services. It is a more complicated function to use -#' compared to other NWIS functions such as \code{\link{readNWISdv}}, \code{\link{readNWISuv}}, +#' compared to other NWIS functions such as [readNWISdv()], [readNWISuv()], #' etc. However, this function adds a lot of #' flexibility to the possible queries. If the "service" argument is included, #' the results will be filtered to the proper data_type_cd. This is a great @@ -63,33 +63,30 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' @export -#' @examplesIf is_dataRetrieval_user() -#' \donttest{ +#' @seealso [read_waterdata_ts_meta()] +#' @examples +#' +#' # see ?read_waterdata_ts_meta +#' +#' #site1 <- whatWQPsamples(siteid = "USGS-01594440") +#' +#' #type <- "Stream" +#' +#' #sites <- whatWQPsamples(countycode = "US:55:025", siteType = type) #' -#' availableData <- whatNWISdata(siteNumber = "05114000") +#' #lakeSites_samples <- whatWQPsamples(siteType = "Lake, Reservoir, Impoundment", +#' # countycode = "US:55:025") +#' #' -#' # To find just unit value ('instantaneous') data: -#' uvData <- whatNWISdata(siteNumber = "05114000", -#' service = "uv") -#' uvDataMulti <- whatNWISdata(siteNumber = c("05114000", "09423350"), -#' service = c("uv", "dv")) -#' flowAndTemp <- whatNWISdata( -#' stateCd = "WI", service = "dv", -#' parameterCd = c("00060", "00010"), -#' statCd = "00003" -#' ) -#' sites <- whatNWISdata(stateCd = "WI", -#' parameterCd = "00060", -#' siteType = "ST", -#' service = "site") -#' -#' sites <- whatNWISdata(stateCd = "WI", -#' service = "gwlevels") -#' } whatNWISdata <- function(..., convertType = TRUE) { matchReturn <- convertLists(...) prewarned <- FALSE + + .Deprecated(new = "read_waterdata_ts_meta", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_ts_meta") + if ("service" %in% names(matchReturn)) { service <- matchReturn$service diff --git a/R/whatNWISsites.R b/R/whatNWISsites.R index a70c09fb..8ece54c8 100644 --- a/R/whatNWISsites.R +++ b/R/whatNWISsites.R @@ -1,10 +1,10 @@ #' Site Data Import from NWIS #' #' Returns a list of sites from the NWIS web service. This function gets the data from: -#' \url{https://waterservices.usgs.gov/docs/site-service/}. +#' . #' Mapper format is used #' -#' @param \dots see \url{https://waterservices.usgs.gov/docs/site-service/} +#' @param \dots see #' for a complete list of options. A list (or lists) can also be supplied. #' #' @return A data frame with at least the following columns: @@ -26,15 +26,21 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' @export +#' @seealso [read_waterdata_monitoring_location()] #' #' @examples -#' \donttest{ -#' -#' siteListPhos <- whatNWISsites(stateCd = "OH", parameterCd = "00665") -#' oneSite <- whatNWISsites(sites = "05114000") -#' } +#' +#' # see ?read_waterdata_monitoring_location +#' #siteListPhos <- whatNWISsites(stateCd = "OH", parameterCd = "00665") +#' #oneSite <- whatNWISsites(sites = "05114000") +#' whatNWISsites <- function(...) { + .Deprecated(new = "read_waterdata_monitoring_location", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_monitoring_location") + + matchReturn <- convertLists(...) if ("service" %in% names(matchReturn)) { service <- matchReturn$service diff --git a/R/whatWQPdata.R b/R/whatWQPdata.R index 9c868056..495bf08b 100644 --- a/R/whatWQPdata.R +++ b/R/whatWQPdata.R @@ -151,9 +151,9 @@ whatWQPmetrics <- function(..., #' Data Available from Water Quality Portal #' #' Returns a list of sites from the Water Quality Portal web service. This function gets -#' the data from: \url{https://www.waterqualitydata.us}. +#' the data from: . #' Arguments to the function should be based on -#' \url{https://www.waterqualitydata.us/webservices_documentation}. +#' . #' The information returned from whatWQPdata describes the #' available data at the WQP sites, and some metadata on the sites themselves. #' For example, a row is returned for each individual site that fulfills this @@ -161,13 +161,13 @@ whatWQPmetrics <- function(..., #' are available for the query. It does not break those results down by any finer #' grain. For example, if you ask for "Nutrients" (characteristicGroup), you will #' not learn what specific nutrients are available at that site. For that -#' kind of data discovery see \code{readWQPsummary}. +#' kind of data discovery see `readWQPsummary`. #' -#' @param \dots see \url{https://www.waterqualitydata.us/webservices_documentation} for +#' @param \dots see for #' a complete list of options. A list of arguments can also be supplied. #' One way to figure out how to construct a WQP query is to go to the "Advanced" #' form in the Water Quality Portal: -#' \url{https://www.waterqualitydata.us/#mimeType=csv&providers=NWIS&providers=STORET} +#' #' Use the form to discover what parameters are available. Once the query is #' set in the form, scroll down to the "Query URL". You will see the parameters #' after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" @@ -175,7 +175,7 @@ whatWQPmetrics <- function(..., #' in the Query URL. The corresponding argument for dataRetrieval is #' characteristicType = "Nutrient". dataRetrieval users do not need to include #' mimeType, and providers is optional (these arguments are picked automatically). -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character. #' @keywords data import WQP web service diff --git a/R/whatWQPsites.R b/R/whatWQPsites.R index 88dfedd1..3067c4eb 100644 --- a/R/whatWQPsites.R +++ b/R/whatWQPsites.R @@ -1,18 +1,18 @@ #' Site Data Import from Water Quality Portal #' #' Returns a list of sites from the Water Quality Portal web service. This function -#' gets the data from: \url{https://www.waterqualitydata.us}. +#' gets the data from: . #' Arguments to the function should be based on -#' \url{https://www.waterqualitydata.us/webservices_documentation}. The return from +#' . The return from #' this function returns the basic metadata on WQP sites. It is -#' generally faster than the \code{\link{whatWQPdata}} function, but does +#' generally faster than the [whatWQPdata()] function, but does #' not return information on what data was collected at the site. #' -#' @param \dots see \url{https://www.waterqualitydata.us/webservices_documentation} +#' @param \dots see #' for a complete list of options. A list of arguments can also be supplied. #' One way to figure out how to construct a WQP query is to go to the "Advanced" #' form in the Water Quality Portal: -#' \url{https://www.waterqualitydata.us/#mimeType=csv&providers=NWIS&providers=STORET} +#' #' Use the form to discover what parameters are available. Once the query is #' set in the form, scroll down to the "Query URL". You will see the parameters #' after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" @@ -22,7 +22,7 @@ #' mimeType, and providers is optional (these arguments are picked automatically). #' @param legacy Logical. If TRUE, uses legacy WQP services. Default is TRUE. #' Setting legacy = FALSE uses WQX3.0 WQP services, which are in-development, use with caution. -#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the +#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the #' function will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character. #' @keywords data import WQP web service @@ -96,17 +96,17 @@ whatWQPsites <- function(..., legacy = TRUE, convertType = TRUE) { #' Summary of Data Available from Water Quality Portal #' #' Returns a list of sites with year-by-year information on what data is available. -#' The function gets the data from: \url{https://www.waterqualitydata.us}. +#' The function gets the data from: . #' Arguments to the function should be based on -#' \url{https://www.waterqualitydata.us/webservices_documentation}. +#' . #' The information returned from this function describes the #' available data at the WQP sites, and some metadata on the sites themselves. #' -#' @param \dots see \url{https://www.waterqualitydata.us/webservices_documentation} +#' @param \dots see #' for a complete list of options. A list of arguments can also be supplied. #' One way to figure out how to construct a WQP query is to go to the "Advanced" #' form in the Water Quality Portal: -#' \url{https://www.waterqualitydata.us/#mimeType=csv&providers=NWIS&providers=STORET} +#' #' Use the form to discover what parameters are available. Once the query is #' set in the form, scroll down to the "Query URL". You will see the parameters #' after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" diff --git a/README.Rmd b/README.Rmd index c0509aa8..af74e5a4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -27,7 +27,7 @@ library(dataRetrieval) [![](http://cranlogs.r-pkg.org/badges/dataRetrieval)](https://cran.r-project.org/package=dataRetrieval) [![](http://cranlogs.r-pkg.org/badges/grand-total/dataRetrieval)](https://cran.r-project.org/package=dataRetrieval) -The `dataRetrieval` package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrology data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). +The `dataRetrieval` package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrology data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. # Introduction @@ -37,17 +37,19 @@ If you have additional questions about these changes, email CompTools@usgs.gov. # What would you like to do? -1. Get instantaneous USGS discharge data. Start here: `?readNWISuv` +1. Get instantaneous USGS data (for example, discharge sensor data). Start here: `?readNWISuv` -2. Get daily USGS discharge data. Start here: `?readNWISdv` +2. Get daily USGS data (for example, mean daily discharge). Start here: `?read_waterdata_daily` 3. Get USGS groundwater data. Start here: `?readNWISgwl` -4. Get discrete water quality data. Start here: `?readWQPdata` +4. Get discrete water quality data from a cooperative service that integrates publicly available water-quality data from the USGS, EPA, and over 400 state, federal, tribal, and local agencies. Start here: `?readWQPdata` -4. Discover USGS data (not including discrete water quality data). Start here: `?whatNWISdata` +5. Get USGS discrete water quality data. Start here: `?read_waterdata_samples` -6. Find Hydro Network-Linked Data Index (NLDI) data. Start here: `?findNLDI` +6. Get metadata about USGS time series data, including instantaneous and daily data. Start here: `?read_waterdata_ts_meta` + +7. Find Hydro Network-Linked Data Index (NLDI) data. Start here: `?findNLDI` For additional tutorials, see: @@ -70,11 +72,23 @@ install.packages("dataRetrieval") The USGS is planning to modernize all web services in the near future. For each of these updates, `dataRetrieval` will create a new function to access the new services. +## API Token + +You can register an API key for use with USGS water data APIs. There are now limits on how many queries can be requested per IP address per hour. If you find yourself running into limits, you can request an API token here: + +Then save your token in your .Renviron file like this: + +``` +API_USGS_PAT = "my_super_secret_token" +``` + +You can use `usethis::edit_r_environ()` to edit find and open your .Renviron file. You will need to restart R for that variable to be recognized. + ## New Features ### Style -New functions will use a "snake case", such as "read_USGS_samples". Older functions use camel case, such as "readNWISdv". The difference is the underscore between words. This should be a handy way to tell the difference between newer modern data access, and the older traditional functions. +New functions will use a "snake case", such as "read_waterdata_samples". Older functions use camel case, such as "readNWISdv". The difference is the underscore between words. This should be a handy way to tell the difference between newer modern data access, and the older traditional functions. ### Structure @@ -82,7 +96,7 @@ Historically, we allowed users to customize their queries via the `...` argument ### Dependencies -As we develop functions for the modern USGS web services, we'll continue to explore updating package dependencies. +As we develop functions for the modern USGS Water Data APIs, we'll continue to explore updating package dependencies. ### Developmental workflow @@ -160,7 +174,7 @@ Water Quality Portal. Washington (DC): National Water Quality Monitoring Council # Package Support -The Water Mission Area of the USGS supports the development and maintenance of `dataRetrieval`, and most likely further into the future. Resources are available primarily for maintenance and responding to user questions. Priorities on the development of new features are determined by the `dataRetrieval` development team. This software was last released with USGS record: IP-147158. +The Water Mission Area of the USGS supports the development and maintenance of `dataRetrieval`, and most likely further into the future. Resources are available primarily for maintenance and responding to user questions. Priorities on the development of new features are determined by the `dataRetrieval` development team. ```{r disclaimer, child="DISCLAIMER.md", eval=TRUE} ``` diff --git a/README.md b/README.md index 153c822c..5125410f 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,7 @@ retrieve the major data types of U.S. Geological Survey (USGS) hydrology data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture -(USDA), and USGS. Direct USGS data is obtained from a service called the -National Water Information System (NWIS). +(USDA), and USGS. # Introduction @@ -28,18 +27,26 @@ If you have additional questions about these changes, email # What would you like to do? -1. Get instantaneous USGS discharge data. Start here: `?readNWISuv` +1. Get instantaneous USGS data (for example, discharge sensor data). + Start here: `?readNWISuv` -2. Get daily USGS discharge data. Start here: `?readNWISdv` +2. Get daily USGS data (for example, mean daily discharge). Start here: + `?read_waterdata_daily` 3. Get USGS groundwater data. Start here: `?readNWISgwl` -4. Get discrete water quality data. Start here: `?readWQPdata` +4. Get discrete water quality data from a cooperative service that + integrates publicly available water-quality data from the USGS, EPA, + and over 400 state, federal, tribal, and local agencies. Start here: + `?readWQPdata` -5. Discover USGS data (not including discrete water quality data). - Start here: `?whatNWISdata` +5. Get USGS discrete water quality data. Start here: + `?read_waterdata_samples` -6. Find Hydro Network-Linked Data Index (NLDI) data. Start here: +6. Get metadata about USGS time series data, including instantaneous + and daily data. Start here: `?read_waterdata_ts_meta` + +7. Find Hydro Network-Linked Data Index (NLDI) data. Start here: `?findNLDI` For additional tutorials, see: @@ -68,11 +75,26 @@ The USGS is planning to modernize all web services in the near future. For each of these updates, `dataRetrieval` will create a new function to access the new services. +## API Token + +You can register an API key for use with USGS water data APIs. There are +now limits on how many queries can be requested per IP address per hour. +If you find yourself running into limits, you can request an API token +here: + +Then save your token in your .Renviron file like this: + + API_USGS_PAT = "my_super_secret_token" + +You can use `usethis::edit_r_environ()` to edit find and open your +.Renviron file. You will need to restart R for that variable to be +recognized. + ## New Features ### Style -New functions will use a “snake case”, such as “read_USGS_samples”. +New functions will use a “snake case”, such as “read_waterdata_samples”. Older functions use camel case, such as “readNWISdv”. The difference is the underscore between words. This should be a handy way to tell the difference between newer modern data access, and the older traditional @@ -94,8 +116,8 @@ able to build up argument lists to pass into the function. ### Dependencies -As we develop functions for the modern USGS web services, we’ll continue -to explore updating package dependencies. +As we develop functions for the modern USGS Water Data APIs, we’ll +continue to explore updating package dependencies. ### Developmental workflow @@ -135,17 +157,17 @@ citation(package = "dataRetrieval") #> #> De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., #> 2025, dataRetrieval: R packages for discovering and retrieving water -#> data available from Federal hydrologic web services, v.2.7.18, +#> data available from Federal hydrologic web services, v.2.7.19, #> doi:10.5066/P9X4L3GE #> #> A BibTeX entry for LaTeX users is #> #> @Manual{, -#> author = {Laura DeCicco and Robert Hirsch and David Lorenz and Jordan Read and Jordan Walker and Lindsay Platt and David Watkins and David Blodgett and Mike Johnson and Aliesha Krall and Lee Stanish}, +#> author = {Laura DeCicco and Robert Hirsch and David Lorenz and Jordan Read and Jordan Walker and Lindsay Platt and David Watkins and David Blodgett and Mike Johnson and Aliesha Krall and Lee Stanish and Joeseph Zemmels and Elise Hinman and Michael Mahoney}, #> title = {dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services}, #> publisher = {U.S. Geological Survey}, #> address = {Reston, VA}, -#> version = {2.7.18}, +#> version = {2.7.19}, #> institution = {U.S. Geological Survey}, #> year = {2025}, #> doi = {10.5066/P9X4L3GE}, @@ -170,15 +192,15 @@ NWIScitation <- create_NWIS_bib(dv) NWIScitation #> U.S. Geological Survey (2025). _National Water Information System data #> available on the World Wide Web (USGS Water Data for the Nation)_. -#> doi:10.5066/F7P55KJN , Accessed Mar -#> 25, 2025, +#> doi:10.5066/F7P55KJN , Accessed Jun +#> 13, 2025, #> . print(NWIScitation, style = "Bibtex") #> @Manual{, #> title = {National Water Information System data available on the World Wide Web (USGS Water Data for the Nation)}, #> author = {{U.S. Geological Survey}}, #> doi = {10.5066/F7P55KJN}, -#> note = {Accessed Mar 25, 2025}, +#> note = {Accessed Jun 13, 2025}, #> year = {2025}, #> url = {https://waterservices.usgs.gov/nwis/dv/?site=09010500&format=waterml%2C1.1&ParameterCd=00060&StatCd=00003&startDT=1851-01-01}, #> } @@ -202,14 +224,14 @@ WQPcitation <- create_WQP_bib(SC) WQPcitation #> National Water Quality Monitoring Council (2025). _Water Quality #> Portal_. doi:10.5066/P9QRKUVJ , -#> Accessed Mar 25, 2025, +#> Accessed Jun 13, 2025, #> . print(WQPcitation, style = "Bibtex") #> @Manual{, #> title = {Water Quality Portal}, #> author = {{National Water Quality Monitoring Council}}, #> doi = {10.5066/P9QRKUVJ}, -#> note = {Accessed Mar 25, 2025}, +#> note = {Accessed Jun 13, 2025}, #> year = {2025}, #> url = {https://www.waterqualitydata.us/data/Result/search?siteid=USGS-05288705&count=no&pCode=00300&mimeType=csv}, #> } @@ -229,8 +251,7 @@ The Water Mission Area of the USGS supports the development and maintenance of `dataRetrieval`, and most likely further into the future. Resources are available primarily for maintenance and responding to user questions. Priorities on the development of new features are determined -by the `dataRetrieval` development team. This software was last released -with USGS record: IP-147158. +by the `dataRetrieval` development team. # Disclaimer diff --git a/_pkgdown.yml b/_pkgdown.yml index ed98dac0..20530cc1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -22,16 +22,10 @@ template: pkgdown-nav-height: 125px navbar: left: - - text: Background - href: articles/dataRetrieval.html + - text: Status + href: articles/Status.html - text: Function Help href: reference/index.html - - text: Large Data Pulls - menu: - - text: Scripting Approach - href: articles/wqp_large_pull_script.html - - text: Pipeline Approach - href: articles/wqp_large_pull_targets.html - text: Water Quality Data menu: - text: Samples Data @@ -46,12 +40,18 @@ navbar: menu: - text: Tutorial href: articles/tutorial.html - - text: Changes to QW - href: articles/qwdata_changes.html + - text: USGS Water Data APIs + href: articles/read_waterdata_functions.html + - text: Background + href: articles/dataRetrieval.html - text: Pivot Data href: articles/long_to_wide.html - text: Join by closest date href: articles/join_by_closest.html + - text: Large Request Scripting Approach + href: articles/wqp_large_pull_script.html + - text: Large Request Pipeline Approach + href: articles/wqp_large_pull_targets.html - text: Stat Service href: articles/statsServiceMap.html - text: NLDI Interface @@ -64,8 +64,18 @@ navbar: - icon: fa-github fa-lg href: https://github.com/DOI-USGS/dataRetrieval reference: + - title: USGS new data services + desc: Functions to retrieve USGS data from new services. + contents: + - read_waterdata_samples + - read_waterdata_daily + - read_waterdata_ts_meta + - read_waterdata_monitoring_location + - read_waterdata + - summarize_waterdata_samples + - check_waterdata_sample_params - title: National Water Information System (NWIS) - desc: Functions to retrieve (USGS) NWIS data. These will be slowly phased out and replaced with the read_USGS family of functions. + desc: Functions to retrieve (USGS) NWIS data. These will be slowly phased out and replaced with the read_waterdata family of functions. contents: - readNWISdv - readNWISuv @@ -91,13 +101,6 @@ reference: - whatWQPmetrics - readWQPsummary - wqp_check_status - - title: USGS new data services - desc: Functions to retrieve USGS data from new services. - contents: - - read_USGS_samples - - summarize_USGS_samples - - construct_USGS_sample_request - - check_param - title: National Ground-Water Monitoring Network desc: Functions to retrieve NGWMN data. contents: diff --git a/docker/Dockerfile b/docker/Dockerfile index a5fa12e5..404040c3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -24,7 +24,8 @@ RUN apt-get update -qq && apt-get -y --no-install-recommends install \ r-cran-maps \ r-cran-leaflet \ r-cran-readxl \ - && apt-get install -y pandoc \ + r-cran-whisker \ + r-cran-ggplot2 \ && rm -rf /var/lib/apt/lists/* diff --git a/inst/CITATION b/inst/CITATION index dd9cf7c1..0a3803b3 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -6,32 +6,42 @@ bibentry(bibtype = "Manual", email = "ldecicco@usgs.gov", comment=c(ORCID="0000-0002-3915-9487")), person("Robert", "Hirsch", role = c("aut"), - comment=c(ORCID="0000-0002-4534-075X")), + comment=c(ORCID = "0000-0002-4534-075X")), person("David","Lorenz", role=c("aut")), person("Jordan", "Read", role = c("ctb")), person("Jordan", "Walker", role = c("ctb")), person("Lindsay","Platt", role=c("ctb")), person("David","Watkins", role=c("aut"), email = "wwatkins@usgs.gov", - comment=c(ORCID="0000-0002-7544-0700")), + comment=c(ORCID = "0000-0002-7544-0700")), person("David", "Blodgett", role="aut", - comment=c(ORCID="0000-0001-9489-1710"), + comment=c(ORCID = "0000-0001-9489-1710"), email = "dblodgett@usgs.gov"), person("Mike", "Johnson", role=c("aut"), email = "mikecp11@gmail.com", - comment=c(ORCID="0000-0002-5288-8350")), + comment=c(ORCID = "0000-0002-5288-8350")), person("Aliesha", "Krall", role="ctb", email = "akrall@usgs.gov", - comment=c(ORCID="0000-0003-2521-5043")), + comment=c(ORCID = "0000-0003-2521-5043")), person("Lee", "Stanish", role="ctb", email = "lstanish@usgs.gov", - comment=c(ORCID = "0000-0002-9775-6861"))), + comment=c(ORCID = "0000-0002-9775-6861")), + person("Joeseph", "Zemmels", role="ctb", + email = "jzemmels@usgs.gov", + comment=c(ORCID = "0009-0008-1463-6313")), + person("Elise", "Hinman", role="ctb", + email = "ehinman@usgs.gov", + comment=c(ORCID = "0000-0001-5396-1583")), + person("Michael", "Mahoney", role="ctb", + email = "mjmahoney@usgs.gov", + comment=c(ORCID = "0000-0003-2402-304X")) + ), title = "dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services", publisher = "U.S. Geological Survey", address="Reston, VA", - version = "2.7.18", + version = "2.7.19", institution = "U.S. Geological Survey", year = 2025, doi = "10.5066/P9X4L3GE", - textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., 2025, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.18, doi:10.5066/P9X4L3GE" + textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., 2025, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.19, doi:10.5066/P9X4L3GE" ) diff --git a/inst/extdata/temperatureAndFlow.RData b/inst/extdata/temperatureAndFlow.RData index 80062797..7e77e642 100644 Binary files a/inst/extdata/temperatureAndFlow.RData and b/inst/extdata/temperatureAndFlow.RData differ diff --git a/inst/templates/param.CQL2 b/inst/templates/param.CQL2 new file mode 100644 index 00000000..305cd380 --- /dev/null +++ b/inst/templates/param.CQL2 @@ -0,0 +1,7 @@ + { + "op": "in", + "args": [ + {"property": "{{{property}}}"}, + [ {{{parameter}}} ] + ] + } \ No newline at end of file diff --git a/inst/templates/post.CQL2 b/inst/templates/post.CQL2 new file mode 100644 index 00000000..c948c51b --- /dev/null +++ b/inst/templates/post.CQL2 @@ -0,0 +1,8 @@ +{ + "op": "and", + "args": [ + {{#params}} + {{{.}}} + {{/params}} + ] +} \ No newline at end of file diff --git a/man/addWaterYear.Rd b/man/addWaterYear.Rd index 0aad52e5..789f974e 100644 --- a/man/addWaterYear.Rd +++ b/man/addWaterYear.Rd @@ -9,13 +9,13 @@ addWaterYear(rawData) \arguments{ \item{rawData}{the daily- or unit-values datset retrieved from NWISweb. Must have at least one of the following columns to add the new water year columns: -`dateTime`, `Date`, `ActivityStartDate`, or `ActivityEndDate`. The date column(s) +\code{dateTime}, \code{Date}, \code{ActivityStartDate}, or \code{ActivityEndDate}. The date column(s) can be character, POSIXct, Date. They cannot be numeric.} } \value{ data.frame with an additional integer column with "WY" appended to the -date column name. For WQP, there will be 2 columns: `ActivityStartDateWY` and -`ActivityEndDateWY`. +date column name. For WQP, there will be 2 columns: \code{ActivityStartDateWY} and +\code{ActivityEndDateWY}. } \description{ Add a column to the dataRetrieval data frame with the water year. WQP diff --git a/man/check_OGC_requests.Rd b/man/check_OGC_requests.Rd new file mode 100644 index 00000000..623379b7 --- /dev/null +++ b/man/check_OGC_requests.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/construct_api_requests.R +\name{check_OGC_requests} +\alias{check_OGC_requests} +\title{Check OGC requests} +\usage{ +check_OGC_requests(endpoint = "daily", type = "queryables") +} +\arguments{ +\item{endpoint}{Character, can be any existing collection} + +\item{type}{Character, can be "queryables", "schema"} +} +\value{ +list +} +\description{ +Check OGC requests +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +\donttest{ + +dv_queryables <- check_OGC_requests(endpoint = "daily", + type = "queryables") +dv_schema <- check_OGC_requests(endpoint = "daily", + type = "schema") +ts_meta_queryables <- check_OGC_requests(endpoint = "time-series-metadata", + type = "queryables") +ts_meta_schema <- check_OGC_requests(endpoint = "time-series-metadata", + type = "schema") +} +\dontshow{\}) # examplesIf} +} +\keyword{internal} diff --git a/man/check_param.Rd b/man/check_waterdata_sample_params.Rd similarity index 55% rename from man/check_param.Rd rename to man/check_waterdata_sample_params.Rd index 63deb385..d8b4a9eb 100644 --- a/man/check_param.Rd +++ b/man/check_waterdata_sample_params.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_USGS_samples.R -\name{check_param} -\alias{check_param} +% Please edit documentation in R/read_waterdata_samples.R +\name{check_waterdata_sample_params} +\alias{check_waterdata_sample_params} \title{Check values from codeservice} \usage{ -check_param(service = "characteristicgroup", ...) +check_waterdata_sample_params(service = "characteristicgroup", ...) } \arguments{ \item{service}{Options are: "characteristicgroup", "states", "counties", @@ -24,15 +24,15 @@ Call a service to check on values from: \dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ -groups <- check_param("characteristicgroup") -states <- check_param("states") -countries <- check_param("countries") -counties <- check_param("counties") -sitetypes <- check_param("sitetype") -samplemedia <- check_param("samplemedia") -characteristics <- check_param("characteristics", +groups <- check_waterdata_sample_params("characteristicgroup") +states <- check_waterdata_sample_params("states") +countries <- check_waterdata_sample_params("countries") +counties <- check_waterdata_sample_params("counties") +sitetypes <- check_waterdata_sample_params("sitetype") +samplemedia <- check_waterdata_sample_params("samplemedia") +characteristics <- check_waterdata_sample_params("characteristics", group = "Biological") -observedProperties <- check_param("observedproperty", +observedProperties <- check_waterdata_sample_params("observedproperty", text = "phosphorus") } diff --git a/man/construct_api_requests.Rd b/man/construct_api_requests.Rd new file mode 100644 index 00000000..0c56fc39 --- /dev/null +++ b/man/construct_api_requests.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/construct_api_requests.R +\name{construct_api_requests} +\alias{construct_api_requests} +\title{Create API url} +\usage{ +construct_api_requests( + service, + properties = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + skipGeometry = FALSE, + ... +) +} +\arguments{ +\item{service}{Which service available on \url{https://api.waterdata.usgs.gov/ogcapi/v0/}.} + +\item{properties}{The properties that should be included for each feature. The +parameter value is a comma-separated list of property names which depend on the +service being called.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth).} + +\item{limit}{The optional limit parameter limits the number of items that are +presented in the response document. Only items are counted that are on the +first level of the collection in the response document. Nested objects +contained within the explicitly requested items shall not be counted.} + +\item{skipGeometry}{This option can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information.} + +\item{...}{Extra parameters from the specific services.} +} +\description{ +Main documentation: \url{https://api.waterdata.usgs.gov/ogcapi/v0/}, +Swagger docs: \url{https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html}. +} +\examples{ +site <- "USGS-02238500" +pcode <- "00060" +req_dv <- construct_api_requests("daily", + monitoring_location_id = site, + parameter_code = "00060") + +req_dv <- construct_api_requests("daily", + monitoring_location_id = site, + parameter_code = c("00060", "00065")) + +sites <- c("USGS-01491000", "USGS-01645000") +start_date <- "2018-01-01" +end_date <- "2022-01-01" +req_dv <- construct_api_requests("daily", + monitoring_location_id = sites, + parameter_code = c("00060", "00065"), + datetime = c(start_date, end_date)) + +} +\keyword{internal} diff --git a/man/construct_USGS_sample_request.Rd b/man/construct_waterdata_sample_request.Rd similarity index 84% rename from man/construct_USGS_sample_request.Rd rename to man/construct_waterdata_sample_request.Rd index 21e194fe..6835e7f2 100644 --- a/man/construct_USGS_sample_request.Rd +++ b/man/construct_waterdata_sample_request.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_USGS_samples.R -\name{construct_USGS_sample_request} -\alias{construct_USGS_sample_request} +% Please edit documentation in R/read_waterdata_samples.R +\name{construct_waterdata_sample_request} +\alias{construct_waterdata_sample_request} \title{Construct request for USGS Samples Data} \usage{ -construct_USGS_sample_request( +construct_waterdata_sample_request( monitoringLocationIdentifier = NA, siteTypeCode = NA, boundingBox = NA, @@ -36,14 +36,14 @@ for example: AZ014-320821110580701, CAX01-15304600, USGS-040851385. Location numbers without an agency prefix are assumed to have the prefix USGS.} \item{siteTypeCode}{Site type code query parameter. See available -options by running \code{check_param("sitetype")$typeCode}.} +options by running \code{check_waterdata_sample_params("sitetype")$typeCode}.} \item{boundingBox}{North and South are latitude values; East and West are longitude values. A vector of 4 (west, south, east, north) is expected. An example would be: c(-92.8, 44.2, -88.9, 46.0).} \item{hydrologicUnit}{Hydrologic Unit Codes (HUCs) identify physical areas -within the US that drain to a certain portion of the stream network. +within the US that drain to a certain portion of the stream network. This filter accepts values containing 2, 4, 6, 8, 10 or 12 digits.} \item{activityMediaName}{Sample media refers to the environmental medium that @@ -51,11 +51,11 @@ was sampled or analyzed.} \item{characteristicGroup}{Characteristic group is a broad category describing the sample. See available options by running -\code{check_param("characteristicgroup")$characteristicGroup}.} +\code{check_waterdata_sample_params("characteristicgroup")$characteristicGroup}.} \item{characteristic}{Characteristic is a specific category describing the sample. -See available options by running -\code{check_param("characteristics")$characteristicName}.} +See available options by running +\code{check_waterdata_sample_params("characteristics")$characteristicName}.} \item{characteristicUserSupplied}{Observed property is the USGS term for the constituent sampled and the property name gives a detailed description of what @@ -75,22 +75,22 @@ than the value entered for activityStartDateLower. Can be an R Date object, or a string with format YYYY-MM-DD. The logic is inclusive, i.e. it will also return records that match the date.} -\item{countryFips}{Country query parameter. Do not set redundant parameters. +\item{countryFips}{Country query parameter. Do not set redundant parameters. If another query parameter contains the country information, leave this parameter -set to the default NA. See available options by running \code{check_param("countries")}, +set to the default NA. See available options by running \code{check_waterdata_sample_params("countries")}, where the "id" field contains the value to use in the countryFips input.} -\item{stateFips}{State query parameter. To get a list of available state fips, -run \code{check_param("states")}. The "fips" can be created using the function -\code{stateCdLookup} - for example: \code{stateCdLookup("WI", "fips")}. -FIPs codes for states take the format: +\item{stateFips}{State query parameter. To get a list of available state fips, +run \code{check_waterdata_sample_params("states")}. The "fips" can be created using the function +\code{stateCdLookup} - for example: \code{stateCdLookup("WI", "fips")}. +FIPs codes for states take the format: CountryAbbrev:StateNumber, like US:55 for Wisconsin.} \item{countyFips}{County query parameter. To get a list of available counties, -run \code{check_param("counties")}. The "Fips" can be created using the function -\code{countyCdLookup} - for example: \code{countyCdLookup("WI", "Dane", "fips")} +run \code{check_waterdata_sample_params("counties")}. The "Fips" can be created using the function +\code{countyCdLookup} - for example: \code{countyCdLookup("WI", "Dane", "fips")} for Dane County, WI. -FIPs codes for counties take the format: +FIPs codes for counties take the format: CountryAbbrev:StateNumber:CountyNumber, like US:55:025 for Dane County, WI.} \item{projectIdentifier}{Project identifier query parameter. This information @@ -102,8 +102,8 @@ information would be needed from the data supplier.} \item{siteTypeName}{Site type name query parameter. See available options by running \code{check_param("sitetype")$typeName}.} -\item{usgsPCode}{USGS parameter code. See available options by running -\code{check_param("characteristics")$parameterCode}.} +\item{usgsPCode}{USGS parameter code. See available options by running +\code{check_waterdata_sample_params("characteristics")$parameterCode}.} \item{pointLocationLatitude}{Latitude for a point/radius query (decimal degrees). Must be used with pointLocationLongitude and pointLocationWithinMiles.} @@ -117,10 +117,10 @@ with pointLocationLatitude and pointLocationLongitude} \item{dataType}{Options include: "Results", "Monitoring locations", "Activities", "Projects", and "Organizations".} -\item{dataProfile}{Profile depends on type. Options for "results" dataType are: +\item{dataProfile}{Profile depends on type. Options for "results" dataType are: "fullphyschem", "basicphyschem", "fullbio", "basicbio", "narrow", -"resultdetectionquantitationlimit", "labsampleprep", "count". Options for "locations" are: -"site" and "count". Options for "activities" are "sampact", "actmetric", "actgroup", +"resultdetectionquantitationlimit", "labsampleprep", "count". Options for "locations" are: +"site" and "count". Options for "activities" are "sampact", "actmetric", "actgroup", and "count". Options for "projects" are: "project" and "projectmonitoringlocationweight". Options for "organizations" are: "organization" and "count".} @@ -130,7 +130,7 @@ data frame returned from web service call. } \description{ This function creates the call for discrete water quality samples data -service described at \url{https://waterdata.usgs.gov/download-samples}. +service described at \url{https://waterdata.usgs.gov/download-samples/}. Note: all possible arguments are included, but it is strongly recommended to only use the NECESSARY arguments. Leave unnecessary arguments as the default NA. @@ -142,7 +142,7 @@ See also: \url{https://api.waterdata.usgs.gov/samples-data/docs}. \dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ -req <- construct_USGS_sample_request( +req <- construct_waterdata_sample_request( monitoringLocationIdentifier = "USGS-04074950", characteristicUserSupplied = "pH, water, unfiltered, field") rawData <- importWQP(req) @@ -150,3 +150,4 @@ rawData <- importWQP(req) } \dontshow{\}) # examplesIf} } +\keyword{internal} diff --git a/man/countyCd.Rd b/man/countyCd.Rd index a7b6e57d..5fcac308 100644 --- a/man/countyCd.Rd +++ b/man/countyCd.Rd @@ -18,7 +18,7 @@ COUNTY_ID \tab character \tab County id \cr } \description{ Classic lookup table for counties. Has been replaced in functions with -\code{check_param("counties")}. +\code{check_USGS_waterdata_params("counties")}. } \examples{ head(countyCd) diff --git a/man/findNLDI.Rd b/man/findNLDI.Rd index d1a6763d..dcc74fa7 100644 --- a/man/findNLDI.Rd +++ b/man/findNLDI.Rd @@ -48,8 +48,8 @@ be explicitly requested.} \item{distance_km}{numeric. Define how far to look along the navigation path in kilometers (default = 100)} -\item{no_sf}{if available, should `sf` be used for parsing, -defaults to `TRUE` if `sf` is locally installed} +\item{no_sf}{if available, should \code{sf} be used for parsing, +defaults to \code{TRUE} if \code{sf} is locally installed} \item{warn}{(default TRUE) should warnings be printed} } diff --git a/man/importWQP.Rd b/man/importWQP.Rd index 352aab77..54c7b2ab 100644 --- a/man/importWQP.Rd +++ b/man/importWQP.Rd @@ -51,5 +51,5 @@ STORETdata_char <- importWQP(STORETex, convertType = FALSE) \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}} +\code{\link[=readWQPdata]{readWQPdata()}}, \code{\link[=readWQPqw]{readWQPqw()}}, \code{\link[=whatWQPsites]{whatWQPsites()}} } diff --git a/man/importWaterML1.Rd b/man/importWaterML1.Rd index 79ed544e..3f0c5f75 100644 --- a/man/importWaterML1.Rd +++ b/man/importWaterML1.Rd @@ -25,7 +25,7 @@ A data frame with the following columns: Name \tab Type \tab Description \cr agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr site_no \tab character \tab The USGS site number \cr - \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = TRUE), \cr +\tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = TRUE), \cr \tab character \tab or raw character string (if asDateTime = FALSE) \cr tz_cd \tab character \tab The time zone code for \cr code \tab character \tab Any codes that qualify the corresponding value\cr @@ -107,5 +107,5 @@ importFile <- importWaterML1(fullPath, TRUE) \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{renameNWISColumns}} +\code{\link[=renameNWISColumns]{renameNWISColumns()}} } diff --git a/man/pcode_to_name.Rd b/man/pcode_to_name.Rd index 963802ee..dd3e1a4e 100644 --- a/man/pcode_to_name.Rd +++ b/man/pcode_to_name.Rd @@ -7,12 +7,12 @@ pcode_to_name(parameterCd = "all") } \arguments{ -\item{parameterCd}{character that contains the code for a character vector +\item{parameterCd}{character that contains the code for a character vector of 5-digit parameter codes. Default is "all" which will return a complete list of parameter codes that have been mapped to a characteristic name.} } \value{ -a data frame with columns "parm_cd", "description", +a data frame with columns "parm_cd", "description", "characteristicname", "measureunitcode", "resultsamplefraction", "resulttemperaturebasis", "resultstatisticalbasis", "resulttimebasis", "resultweightbasis", "resultparticlesizebasis", "last_rev_dt" diff --git a/man/readNWISdata.Rd b/man/readNWISdata.Rd index d1ea37e7..1201b6ac 100644 --- a/man/readNWISdata.Rd +++ b/man/readNWISdata.Rd @@ -11,7 +11,7 @@ readNWISdata(..., asDateTime = TRUE, convertType = TRUE, tz = "UTC") a complete list of options. A list of arguments can also be supplied. One important argument to include is "service". Possible values are "iv" -(for instantaneous), +(for instantaneous), "dv" (for daily values), "gwlevels" (for groundwater levels), "site" (for site service), "measurement", and "stat" (for statistics service). Note: "measurement" calls go to: @@ -39,10 +39,10 @@ Name \tab Type \tab Description \cr agency \tab character \tab The NWIS code for the agency reporting the data\cr site \tab character \tab The USGS site number \cr dateTime \tab POSIXct \tab The date and time (if applicable) of the measurement, - converted to UTC for unit value data. R only allows one time zone attribute per column. For unit data - spanning a time zone change, converting the data to UTC solves this problem. For daily data, - the time zone attribute is the time zone of the first returned measurement. - \cr +converted to UTC for unit value data. R only allows one time zone attribute per column. For unit data +spanning a time zone change, converting the data to UTC solves this problem. For daily data, +the time zone attribute is the time zone of the first returned measurement. +\cr tz_cd \tab character \tab The time zone code for dateTime column\cr code \tab character \tab Any codes that qualify the corresponding value\cr value \tab numeric \tab The numeric value for the parameter \cr @@ -71,17 +71,17 @@ See examples below for ideas of constructing queries. \details{ This function requires users to create their own arguments based on the NWIS web services. It is a more complicated function to use -compared to other NWIS functions such as \code{\link{readNWISdv}}, \code{\link{readNWISuv}}, -\code{\link{readNWISgwl}}, etc. However, this function adds a lot of -flexibility to the possible queries. This function will also behave exactly +compared to other NWIS functions such as \code{\link[=readNWISdv]{readNWISdv()}}, \code{\link[=readNWISuv]{readNWISuv()}}, +\code{\link[=readNWISgwl]{readNWISgwl()}}, etc. However, this function adds a lot of +flexibility to the possible queries. This function will also behave exactly as NWIS when it comes to date queries. NWIS by default will only return the latest value for the daily and instantaneous services. So if you do not provide a starting date, you will only get back the latest value. If you want the full period of record, you can use "startDate = '1900-01-01'". Other options for dates are periods, such as "period = 'P7D'" which translates to a period of 7 days. For period, use only a positive ISO-8601 duration format, which should -not be expressed in periods of less than a day, or in increments of months M or years Y. -period returns data for a site generally from now to a time in the past. +not be expressed in periods of less than a day, or in increments of months M or years Y. +period returns data for a site generally from now to a time in the past. Note that when period is used all data up to the most recent value are returned. } \examples{ @@ -89,7 +89,6 @@ Note that when period is used all data up to the most recent value are returned. \donttest{ # Examples not run for time considerations -dataTemp <- readNWISdata(stateCd = "OH", parameterCd = "00010", service = "dv") instFlow <- readNWISdata( sites = "05114000", service = "iv", parameterCd = "00060", @@ -108,26 +107,7 @@ multiSite <- readNWISdata( service = "iv", parameterCd = "00060" ) -bBoxEx <- readNWISdata(bBox = c(-83, 36.5, -81, 38.5), parameterCd = "00010") -startDate <- as.Date("2013-10-01") -endDate <- as.Date("2014-09-30") -waterYear <- readNWISdata( - bBox = c(-83, 36.5, -82.5, 36.75), - parameterCd = "00010", - service = "dv", - startDate = startDate, - endDate = endDate -) - -siteInfo <- readNWISdata( - stateCd = "WI", parameterCd = "00010", - hasDataTypeCd = "iv", service = "site" -) -temp <- readNWISdata( - bBox = c(-83, 36.5, -82.5, 36.75), parameterCd = "00010", service = "site", - seriesCatalogOutput = TRUE -) GWL <- readNWISdata(site_no = c("392725077582401", "375907091432201"), parameterCd = "62610", @@ -172,19 +152,6 @@ arg.list <- list( ) allDailyStats_2 <- readNWISdata(arg.list, service = "stat") -# use county names to get data -dailyStaffordVA <- readNWISdata( - stateCd = "Virginia", - countyCd = "Stafford", - parameterCd = "00060", - startDate = "2015-01-01", - endDate = "2015-01-30" -) -va_counties <- c("51001", "51003", "51005", "51007", "51009", "51011", "51013", "51015") -va_counties_data <- readNWISdata( - startDate = "2015-01-01", endDate = "2015-12-31", - parameterCd = "00060", countycode = va_counties -) site_id <- "01594440" rating_curve <- readNWISdata(service = "rating", site_no = site_id, file_type = "base") @@ -212,5 +179,5 @@ peak_data <- readNWISdata( \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{renameNWISColumns}}, \code{\link{importWaterML1}}, \code{\link{importRDB1}} +\code{\link[=read_waterdata]{read_waterdata()}} } diff --git a/man/readNWISdv.Rd b/man/readNWISdv.Rd index 8c2f9d80..bd131c90 100644 --- a/man/readNWISdv.Rd +++ b/man/readNWISdv.Rd @@ -54,16 +54,16 @@ queryTime \tab POSIXct \tab The time the data was returned \cr } } \description{ -Imports data from NWIS daily web service. This function gets the data from here: +Imports data from NWIS daily web service. This function gets the data from here: \url{https://waterservices.usgs.gov/docs/dv-service/daily-values-service-details/} Inputs to this function are just USGS site ids, USGS parameter codes, -USGS statistic codes, and start and end date. For a more complex query, use \code{\link{readNWISdata}}, +USGS statistic codes, and start and end date. For a more complex query, use \code{\link[=readNWISdata]{readNWISdata()}}, with an argument service = "dv". Data coming the daily web services are aggregates of the instantaneous (sensor) web services. Not all statistical codes are available for all data. -Use the function \code{\link{whatNWISdata}} to discover what data +Use the function \code{\link[=whatNWISdata]{whatNWISdata()}} to discover what data is available for a USGS site. The column data_type_cd with the values "dv" -returned from \code{\link{whatNWISdata}}) are available from this service. +returned from \code{\link[=whatNWISdata]{whatNWISdata()}}) are available from this service. } \details{ More information on the web service can be found here: @@ -71,35 +71,19 @@ More information on the web service can be found here: "Daily Value Service". } \examples{ -\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -site_id <- "04085427" -startDate <- "2012-01-01" -endDate <- "2012-06-30" -pCode <- "00060" -\donttest{ -rawDailyQ <- readNWISdv(site_id, pCode, startDate, endDate) -rawDailyQAndTempMeanMax <- readNWISdv(site_id, c("00010", "00060"), - startDate, endDate, - statCd = c("00001", "00003") -) -rawDailyQAndTempMeanMax <- renameNWISColumns(rawDailyQAndTempMeanMax) -rawDailyMultiSites <- readNWISdv(c("01491000", "01645000"), c("00010", "00060"), - startDate, endDate, - statCd = c("00001", "00003") -) -# Site with no data: -x <- readNWISdv("10258500", "00060", "2014-09-08", "2014-09-14") -names(attributes(x)) -attr(x, "siteInfo") -attr(x, "variableInfo") -site <- "05212700" -notActive <- readNWISdv(site, "00060", "2014-01-01", "2014-01-07") -} -\dontshow{\}) # examplesIf} +# see ?read_waterdata_daily + +#site_id <- "04085427" +#startDate <- "2012-01-01" +#endDate <- "2012-06-30" +#pCode <- "00060" +# +#rawDailyQ <- readNWISdv(site_id, pCode, startDate, endDate) + } \seealso{ -\code{\link{renameNWISColumns}}, \code{\link{importWaterML1}} +\code{\link[=read_waterdata_daily]{read_waterdata_daily()}} } \keyword{USGS} \keyword{data} diff --git a/man/readNWISgwl.Rd b/man/readNWISgwl.Rd index c354b76c..00d19b10 100644 --- a/man/readNWISgwl.Rd +++ b/man/readNWISgwl.Rd @@ -64,19 +64,18 @@ siteInfo \tab data.frame \tab A data frame containing information on the request Imports groundwater level data from NWIS web service. This function gets the data from here: \url{https://waterservices.usgs.gov/docs/groundwater-levels/groundwater-levels-details/} Inputs to this function are just USGS site ids, USGS parameter codes, -and start and end date. For a more complex query, use \code{\link{readNWISdata}}, +and start and end date. For a more complex query, use \code{\link[=readNWISdata]{readNWISdata()}}, including an argument service="gwlevels". Not all parameter codes are available for all data. -Use the function \code{\link{whatNWISdata}} to discover what data +Use the function \code{\link[=whatNWISdata]{whatNWISdata()}} to discover what data is available for a USGS site. The column data_type_cd with the values "gw" -returned from \code{\link{whatNWISdata}}) are available from this service. +returned from \code{\link[=whatNWISdata]{whatNWISdata()}}) are available from this service. } \details{ More information on the web service can be found here: \url{https://waterservices.usgs.gov/test-tools}, choosing the "Groundwater Levels Value Service". - Mixed date/times come back from the service depending on the year that the data was collected. See \url{https://waterdata.usgs.gov/usa/nwis/gw} for details about groundwater. By default the returned dates are converted to date objects, unless convertType @@ -99,5 +98,5 @@ data5 <- readNWISgwl("263819081585801", parameterCd = "72019") \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{constructNWISURL}}, \code{\link{importRDB1}} +\code{\link[=constructNWISURL]{constructNWISURL()}}, \code{\link[=importRDB1]{importRDB1()}} } diff --git a/man/readNWISmeas.Rd b/man/readNWISmeas.Rd index 01872efb..ba99378c 100644 --- a/man/readNWISmeas.Rd +++ b/man/readNWISmeas.Rd @@ -79,5 +79,5 @@ Meas07227500.exRaw <- readNWISmeas("07227500", expanded = TRUE, convertType = FA \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{constructNWISURL}}, \code{\link{importRDB1}} +\code{\link[=constructNWISURL]{constructNWISURL()}}, \code{\link[=importRDB1]{importRDB1()}} } diff --git a/man/readNWISpCode.Rd b/man/readNWISpCode.Rd index 8e608816..ebeef88b 100644 --- a/man/readNWISpCode.Rd +++ b/man/readNWISpCode.Rd @@ -7,20 +7,20 @@ readNWISpCode(parameterCd) } \arguments{ -\item{parameterCd}{character of USGS parameter codes (or multiple parameter codes). These are 5 digit number codes, -more information can be found here: \url{https://help.waterdata.usgs.gov/}. To get a -complete list of all current parameter codes in the USGS, use "all" as the input.} +\item{parameterCd}{character of USGS parameter codes (or multiple parameter codes). +These are 5 digit number codes. To get a complete list of all current parameter +codes in the USGS, use "all" as the input.} } \value{ parameterData data frame with the following information: \tabular{lll}{ - Name \tab Type \tab Description\cr - parameter_cd \tab character \tab 5-digit USGS parameter code \cr - parameter_group_nm \tab character \tab USGS parameter group name\cr - parameter_nm \tab character \tab USGS parameter name\cr - casrn \tab character \tab Chemical Abstracts Service (CAS) Registry Number\cr - srsname \tab character \tab Substance Registry Services Name\cr - parameter_units \tab character \tab Parameter units\cr +Name \tab Type \tab Description\cr +parameter_cd \tab character \tab 5-digit USGS parameter code \cr +parameter_group_nm \tab character \tab USGS parameter group name\cr +parameter_nm \tab character \tab USGS parameter name\cr +casrn \tab character \tab Chemical Abstracts Service (CAS) Registry Number\cr +srsname \tab character \tab Substance Registry Services Name\cr +parameter_units \tab character \tab Parameter units\cr } } \description{ @@ -39,7 +39,7 @@ one_extra <- readNWISpCode(c("01075", "12345")) } } \seealso{ -\code{\link{importRDB1}} +\code{\link[=importRDB1]{importRDB1()}} } \keyword{USGS} \keyword{data} diff --git a/man/readNWISpeak.Rd b/man/readNWISpeak.Rd index 63ae13d5..d875785c 100644 --- a/man/readNWISpeak.Rd +++ b/man/readNWISpeak.Rd @@ -71,7 +71,7 @@ In some cases, the specific date of the peak data is not know. This function will default to converting complete dates to a "Date" object, and converting incomplete dates to "NA". If those incomplete dates are -needed, set the `asDateTime` argument to FALSE. No dates will be converted to +needed, set the \code{asDateTime} argument to FALSE. No dates will be converted to R Date objects. } \examples{ @@ -86,5 +86,5 @@ peakdata <- readNWISpeak(stations, convertType = FALSE) \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{constructNWISURL}}, \code{\link{importRDB1}} +\code{\link[=constructNWISURL]{constructNWISURL()}}, \code{\link[=importRDB1]{importRDB1()}} } diff --git a/man/readNWISrating.Rd b/man/readNWISrating.Rd index 60541ad7..9166cb2f 100644 --- a/man/readNWISrating.Rd +++ b/man/readNWISrating.Rd @@ -55,5 +55,5 @@ attr(data, "RATING") \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{constructNWISURL}}, \code{\link{importRDB1}} +\code{\link[=constructNWISURL]{constructNWISURL()}}, \code{\link[=importRDB1]{importRDB1()}} } diff --git a/man/readNWISsite.Rd b/man/readNWISsite.Rd index 11696f47..4447faab 100644 --- a/man/readNWISsite.Rd +++ b/man/readNWISsite.Rd @@ -15,46 +15,46 @@ A data frame with at least the following columns: Name \tab Type \tab Description \cr agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr site_no \tab character \tab The USGS site number \cr - station_nm \tab character \tab Site name \cr - site_tp_cd \tab character \tab Site type \cr - lat_va \tab numeric \tab DMS latitude \cr - long_va \tab numeric \tab DMS longitude \cr - dec_lat_va \tab numeric \tab Decimal latitude \cr - dec_long_va \tab numeric \tab Decimal longitude \cr - coord_meth_cd \tab character \tab Latitude-longitude method \cr - coord_acy_cd \tab character \tab Latitude-longitude accuracy \cr - coord_datum_cd \tab character \tab Latitude-longitude datum \cr - dec_coord_datum_cd \tab character \tab Decimal Latitude-longitude datum \cr - district_cd \tab character \tab District code \cr - state_cd \tab character \tab State code \cr - county_cd \tab character \tab County code \cr - country_cd \tab character \tab Country code \cr - land_net_ds \tab character \tab Land net location description \cr - map_nm \tab character \tab Name of location map \cr - map_scale_fc \tab character \tab Scale of location map \cr - alt_va \tab numeric \tab Altitude of Gage/land surface \cr - alt_meth_cd \tab character \tab Method altitude determined \cr - alt_acy_va \tab numeric \tab Altitude accuracy \cr - alt_datum_cd \tab character \tab Altitude datum \cr - huc_cd \tab character \tab Hydrologic unit code \cr - basin_cd \tab character \tab Drainage basin code \cr - topo_cd \tab character \tab Topographic setting code \cr - instruments_cd \tab character \tab Flags for instruments at site \cr - construction_dt \tab character \tab Date of first construction \cr - inventory_dt \tab character \tab Date site established or inventoried \cr - drain_area_va \tab numeric \tab Drainage area \cr - contrib_drain_area_va \tab numeric \tab Contributing drainage area \cr - tz_cd \tab character \tab Time Zone abbreviation \cr - local_time_fg \tab character \tab Site honors Daylight Savings Time \cr - reliability_cd \tab character \tab Data reliability code \cr - gw_file_cd \tab character \tab Data-other GW files \cr - nat_aqfr_cd \tab character \tab National aquifer code \cr - aqfr_cd \tab character \tab Local aquifer code \cr - aqfr_type_cd \tab character \tab Local aquifer type code \cr - well_depth_va \tab numeric \tab Well depth \cr - hole_depth_va \tab numeric \tab Hole depth \cr - depth_src_cd \tab character \tab Source of depth data \cr - project_no \tab character \tab Project number \cr +station_nm \tab character \tab Site name \cr +site_tp_cd \tab character \tab Site type \cr +lat_va \tab numeric \tab DMS latitude \cr +long_va \tab numeric \tab DMS longitude \cr +dec_lat_va \tab numeric \tab Decimal latitude \cr +dec_long_va \tab numeric \tab Decimal longitude \cr +coord_meth_cd \tab character \tab Latitude-longitude method \cr +coord_acy_cd \tab character \tab Latitude-longitude accuracy \cr +coord_datum_cd \tab character \tab Latitude-longitude datum \cr +dec_coord_datum_cd \tab character \tab Decimal Latitude-longitude datum \cr +district_cd \tab character \tab District code \cr +state_cd \tab character \tab State code \cr +county_cd \tab character \tab County code \cr +country_cd \tab character \tab Country code \cr +land_net_ds \tab character \tab Land net location description \cr +map_nm \tab character \tab Name of location map \cr +map_scale_fc \tab character \tab Scale of location map \cr +alt_va \tab numeric \tab Altitude of Gage/land surface \cr +alt_meth_cd \tab character \tab Method altitude determined \cr +alt_acy_va \tab numeric \tab Altitude accuracy \cr +alt_datum_cd \tab character \tab Altitude datum \cr +huc_cd \tab character \tab Hydrologic unit code \cr +basin_cd \tab character \tab Drainage basin code \cr +topo_cd \tab character \tab Topographic setting code \cr +instruments_cd \tab character \tab Flags for instruments at site \cr +construction_dt \tab character \tab Date of first construction \cr +inventory_dt \tab character \tab Date site established or inventoried \cr +drain_area_va \tab numeric \tab Drainage area \cr +contrib_drain_area_va \tab numeric \tab Contributing drainage area \cr +tz_cd \tab character \tab Time Zone abbreviation \cr +local_time_fg \tab character \tab Site honors Daylight Savings Time \cr +reliability_cd \tab character \tab Data reliability code \cr +gw_file_cd \tab character \tab Data-other GW files \cr +nat_aqfr_cd \tab character \tab National aquifer code \cr +aqfr_cd \tab character \tab Local aquifer code \cr +aqfr_type_cd \tab character \tab Local aquifer type code \cr +well_depth_va \tab numeric \tab Well depth \cr +hole_depth_va \tab numeric \tab Hole depth \cr +depth_src_cd \tab character \tab Source of depth data \cr +project_no \tab character \tab Project number \cr } There are also several useful attributes attached to the data frame: @@ -69,13 +69,13 @@ comment \tab character \tab Header comments from the RDB file \cr Imports data from USGS site file site. This function gets data from here: \url{https://waterservices.usgs.gov/} } \examples{ -\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -\donttest{ -siteINFO <- readNWISsite("05114000") -siteINFOMulti <- readNWISsite(c("05114000", "09423350")) +# see ?read_waterdata_monitoring_location +# siteINFOMulti <- readNWISsite(c("05114000", "09423350")) + } -\dontshow{\}) # examplesIf} +\seealso{ +\code{\link[=read_waterdata_monitoring_location]{read_waterdata_monitoring_location()}} } \keyword{USGS} \keyword{data} diff --git a/man/readNWISstat.Rd b/man/readNWISstat.Rd index 860e8e76..b747a558 100644 --- a/man/readNWISstat.Rd +++ b/man/readNWISstat.Rd @@ -91,5 +91,5 @@ x <- readNWISstat( \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{constructNWISURL}}, \code{\link{importRDB1}} +\code{\link[=constructNWISURL]{constructNWISURL()}}, \code{\link[=importRDB1]{importRDB1()}} } diff --git a/man/readNWISuv.Rd b/man/readNWISuv.Rd index 20e0b9fa..6a83daac 100644 --- a/man/readNWISuv.Rd +++ b/man/readNWISuv.Rd @@ -57,12 +57,12 @@ queryTime \tab POSIXct \tab The time the data was returned \cr Imports data from NWIS web service. This function gets the data from here: \url{https://waterservices.usgs.gov/docs/instantaneous-values/instantaneous-values-details/} Inputs to this function are just USGS site ids, USGS parameter codes, -and start and end date. For a more complex query, use \code{\link{readNWISdata}}, +and start and end date. For a more complex query, use \code{\link[=readNWISdata]{readNWISdata()}}, including an arguement service="uv". Not all parameter codes are available for all data. -Use the function \code{\link{whatNWISdata}} to discover what data +Use the function \code{\link[=whatNWISdata]{whatNWISdata()}} to discover what data is available for a USGS site. The column data_type_cd with the values "uv" -returned from \code{\link{whatNWISdata}}) are available from this service. +returned from \code{\link[=whatNWISdata]{whatNWISdata()}}) are available from this service. } \details{ More information on the web service can be found here: @@ -100,7 +100,7 @@ GMTdata <- readNWISuv( \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{renameNWISColumns}}, \code{\link{importWaterML1}} +\code{\link[=renameNWISColumns]{renameNWISColumns()}}, \code{\link[=importWaterML1]{importWaterML1()}} } \keyword{USGS} \keyword{data} diff --git a/man/readWQPdata.Rd b/man/readWQPdata.Rd index 16175970..e740e9cc 100644 --- a/man/readWQPdata.Rd +++ b/man/readWQPdata.Rd @@ -15,9 +15,9 @@ readWQPdata( } \arguments{ \item{\dots}{see \url{https://www.waterqualitydata.us/webservices_documentation} for a complete list of options. -A list of arguments can also be supplied. For more information see the above -description for this help file. One way to figure out how to construct a WQP query is to go to the "Advanced" -form in the Water Quality Portal. Use the form to discover what parameters are available. Once the query is +A list of arguments can also be supplied. For more information see the above +description for this help file. One way to figure out how to construct a WQP query is to go to the "Advanced" +form in the Water Quality Portal. Use the form to discover what parameters are available. Once the query is set in the form, scroll down to the "Query URL". You will see the parameters after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" in the Characteristic Group dropdown, you will see characteristicType=Nutrient @@ -28,7 +28,7 @@ mimeType, and providers is optional (these arguments are picked automatically).} \item{service}{character. See Details for more information.} \item{querySummary}{logical to only return the number of records and unique sites that -will be returned from this query. Choosing TRUE is deprecated, readWQPsummary +will be returned from this query. Choosing TRUE is deprecated, readWQPsummary is recommended instead.} \item{tz}{character to set timezone attribute of dateTime. Default is "UTC", and converts the @@ -66,15 +66,12 @@ Imports data from Water Quality Portal web service. This function gets the data \url{https://www.waterqualitydata.us}. } \details{ -This function uses \dots as a query input, which can be very flexible, but also +This function uses \dots as a query input, which can be very flexible, but also has a steeper learning curve. For a quick overview, scroll down to the Examples in this help file to see many query options. - - There are currently 10 legacy options for data provided by the Water Quality Portal: - Legacy: \tabular{lll}{ @@ -93,9 +90,9 @@ Organization Data \tab Organization \tab /data/Organization/search \cr There are 4 WQX3 options. These are still in-development, and should be used with caution. - + \tabular{llll}{ -WQP Radio Button \tab service argument \tab Base URL \tab dataProfile \cr +WQP Radio Button \tab service argument \tab Base URL \tab dataProfile \cr Monitoring Locations \tab StationWQX3 \tab /wqx3/Station/search \tab \cr Full Physical Chemical \tab ResultWQX3 \tab /wqx3/Result/search \tab fullPhysChem \cr Narrow \tab ResultWQX3 \tab /wqx3/Result/search \tab narrow \cr @@ -118,9 +115,9 @@ attr(pHData, "url") # WQX3: pHData_wqx3 <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse, - service = "ResultWQX3", - dataProfile = "basicPhysChem") + characteristicName = nameToUse, + service = "ResultWQX3", + dataProfile = "basicPhysChem") attr(pHData_wqx3, "url") # More examples: @@ -134,12 +131,12 @@ DeWitt <- readWQPdata( attr(DeWitt, "url") DeWitt_wqx3 <- readWQPdata( - statecode = "Illinois", - countycode = "DeWitt", - characteristicName = "Nitrogen", - service = "ResultWQX3", - dataProfile = "basicPhysChem", - ignore_attributes = TRUE) + statecode = "Illinois", + countycode = "DeWitt", + characteristicName = "Nitrogen", + service = "ResultWQX3", + dataProfile = "basicPhysChem", + ignore_attributes = TRUE) attr(DeWitt_wqx3, "url") @@ -150,11 +147,11 @@ activity <- readWQPdata( ) attr(activity, "url") -activity_wqx3 <- readWQPdata( - siteid = "USGS-04024315", - service = "ActivityWQX3" -) -attr(activity_wqx3, "url") +# activity_wqx3 <- readWQPdata( +# siteid = "USGS-04024315", +# service = "ActivityWQX3" +# ) +# attr(activity_wqx3, "url") Dane_activity <- readWQPdata( statecode = "Wisconsin", @@ -165,14 +162,14 @@ Dane_activity <- readWQPdata( ) attr(Dane_activity, "url") -Dane_activity_wqx3 <- readWQPdata( - statecode = "Wisconsin", - countycode = "Dane", - startDateLo = "2023-01-01", - startDateHi = "2023-12-31", - service = "ActivityWQX3" -) -attr(Dane_activity_wqx3, "url") +# Dane_activity_wqx3 <- readWQPdata( +# statecode = "Wisconsin", +# countycode = "Dane", +# startDateLo = "2023-01-01", +# startDateHi = "2023-12-31", +# service = "ActivityWQX3" +# ) +# attr(Dane_activity_wqx3, "url") ######################################################## # Additional examples: @@ -221,11 +218,11 @@ samp_narrow <- readWQPdata( dataProfile = "narrowResult" ) -samp_narrow_wqx3 <- readWQPdata( - siteid = "USGS-04024315", - service = "ResultWQX3", - dataProfile = "narrow" -) +# samp_narrow_wqx3 <- readWQPdata( +# siteid = "USGS-04024315", +# service = "ResultWQX3", +# dataProfile = "narrow" +# ) # Data profiles: "Sampling Activity" @@ -262,10 +259,10 @@ rawPHsites_legacy <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "W service = "Result", dataProfile = "narrowResult" ) -rawPHsites <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), - characteristicName = "pH", - service = "ResultWQX3", - dataProfile = "narrow" ) +# rawPHsites <- readWQPdata(siteid = c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +# characteristicName = "pH", +# service = "ResultWQX3", +# dataProfile = "narrow" ) } \dontshow{\}) # examplesIf} diff --git a/man/readWQPqw.Rd b/man/readWQPqw.Rd index 23fd4ed4..3148bc96 100644 --- a/man/readWQPqw.Rd +++ b/man/readWQPqw.Rd @@ -89,10 +89,10 @@ pHsites_legacy <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-13304 ncol(pHsites_legacy) attr(pHsites_legacy, "url") -pHsites_modern <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), - "pH", "", "", legacy = FALSE) -ncol(pHsites_modern) -attr(pHsites_modern, "url") +# pHsites_modern <- readWQPqw(c("USGS-05406450", "USGS-05427949", "WIDNR_WQX-133040"), +# "pH", "", "", legacy = FALSE) +# ncol(pHsites_modern) +# attr(pHsites_modern, "url") nwisEx <- readWQPqw("USGS-04024000", c("34247", "30234", "32104", "34220"), "", "2022-12-20") @@ -103,8 +103,8 @@ DO <- readWQPqw(siteNumbers = "USGS-05288705", \dontshow{\}) # examplesIf} } \seealso{ -\code{\link{readWQPdata}}, \code{\link{whatWQPsites}}, -and \code{\link{importWQP}} +\code{\link[=readWQPdata]{readWQPdata()}}, \code{\link[=whatWQPsites]{whatWQPsites()}}, +and \code{\link[=importWQP]{importWQP()}} } \keyword{USGS} \keyword{data} diff --git a/man/readWQPsummary.Rd b/man/readWQPsummary.Rd index 08f28856..8c4b9e1d 100644 --- a/man/readWQPsummary.Rd +++ b/man/readWQPsummary.Rd @@ -8,11 +8,11 @@ readWQPsummary(...) } \arguments{ \item{\dots}{see \url{https://www.waterqualitydata.us/webservices_documentation} - for a complete list of options. A list of arguments can also be supplied. -One way to figure out how to construct a WQP query is to go to the "Advanced" +for a complete list of options. A list of arguments can also be supplied. +One way to figure out how to construct a WQP query is to go to the "Advanced" form in the Water Quality Portal: \url{https://www.waterqualitydata.us/#mimeType=csv&providers=NWIS&providers=STORET} -Use the form to discover what parameters are available. Once the query is +Use the form to discover what parameters are available. Once the query is set in the form, scroll down to the "Query URL". You will see the parameters after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" in the Characteristic Group dropdown, you will see characteristicType=Nutrient diff --git a/man/read_waterdata.Rd b/man/read_waterdata.Rd new file mode 100644 index 00000000..544cf63f --- /dev/null +++ b/man/read_waterdata.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata.R +\name{read_waterdata} +\alias{read_waterdata} +\title{Generalized USGS Water Data API retrieval function} +\usage{ +read_waterdata(service, CQL, ..., convertType = TRUE) +} +\arguments{ +\item{service}{character, can be any existing collection such +as "daily", "monitoring-locations", "time-series-metadata"} + +\item{CQL}{A string in a Common Query Language format.} + +\item{\dots}{Additional arguments to send to the request.} + +\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function +will convert the data to dates and qualifier to string vector.} +} +\description{ +Function that allows complex CQL queries. +See \url{https://api.waterdata.usgs.gov/docs/ogcapi/complex-queries/} +for more information. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +\donttest{ +cql <- '{ +"op": "and", +"args": [ + { + "op": "in", + "args": [ + { "property": "parameter_code" }, + [ "00060", "00065" ] + ] + }, + { + "op": "in", + "args": [ + { "property": "monitoring_location_id" }, + [ "USGS-07367300", "USGS-03277200" ] + ] + } +] +}' + +dv_data <- read_waterdata(service = "daily", + CQL = cql, + time = c("2023-01-01", "2024-01-01")) + +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_daily.Rd b/man/read_waterdata_daily.Rd new file mode 100644 index 00000000..5a29c4ed --- /dev/null +++ b/man/read_waterdata_daily.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_daily.R +\name{read_waterdata_daily} +\alias{read_waterdata_daily} +\title{Get USGS Daily Data} +\usage{ +read_waterdata_daily( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + statistic_id = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + daily_id = NA_character_, + approval_status = NA_character_, + unit_of_measure = NA_character_, + qualifier = NA_character_, + value = NA, + last_modified = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + convertType = TRUE +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500).} + +\item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://help.waterdata.usgs.gov/codes-and-parameters/parameters}.} + +\item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}.} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, id, time_series_id, monitoring_location_id, parameter_code, statistic_id, time, value, unit_of_measure, approval_status, qualifier, last_modified} + +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} + +\item{daily_id}{A universally unique identifier (UUID) representing a single version of a record. It is not stable over time. Every time the record is refreshed in our database (which may happen as part of normal operations and does not imply any change to the data itself) a new ID will be generated. To uniquely identify a single observation over time, compare the \code{time} and \code{time_series_id} fields; each time series will only have a single observation at a given \code{time}.} + +\item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} + +\item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} + +\item{qualifier}{This field indicates any qualifiers associated with an observation, for instance if a sensor may have been impacted by ice or if values were estimated.} + +\item{value}{The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision.} + +\item{last_modified}{The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate anything about the measurement has changed. +You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{last_modified} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} + +\item{skipGeometry}{This option can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information.} + +\item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{limit}{The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 10000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{max_results}{The optional maximum number of rows to return. This value +must be less than the requested limit.} + +\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function +will convert the data to dates and qualifier to string vector.} +} +\description{ +Description Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +\donttest{ +site <- "USGS-02238500" +pcode <- "00060" +dv_data_sf <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = "00060", + time = c("2021-01-01", "2022-01-01")) + +dv_data_trim <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = "00060", + properties = c("monitoring_location_id", + "value", + "time"), + time = c("2021-01-01", "2022-01-01")) + +dv_data <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = "00060", + skipGeometry = TRUE) + +dv_data_period <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = "00060", + time = "P7D") + +multi_site <- read_waterdata_daily(monitoring_location_id = c("USGS-01491000", + "USGS-01645000"), + parameter_code = c("00060", "00010"), + limit = 500, + time = c("2023-01-01", "2024-01-01")) + +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd new file mode 100644 index 00000000..40cd6e0b --- /dev/null +++ b/man/read_waterdata_monitoring_location.Rd @@ -0,0 +1,192 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_monitoring_location.R +\name{read_waterdata_monitoring_location} +\alias{read_waterdata_monitoring_location} +\title{Get USGS Site Data} +\usage{ +read_waterdata_monitoring_location( + monitoring_location_id = NA_character_, + agency_code = NA_character_, + agency_name = NA_character_, + monitoring_location_number = NA_character_, + monitoring_location_name = NA_character_, + district_code = NA_character_, + country_code = NA_character_, + country_name = NA_character_, + state_code = NA_character_, + state_name = NA_character_, + county_code = NA_character_, + county_name = NA_character_, + minor_civil_division_code = NA_character_, + site_type_code = NA_character_, + site_type = NA_character_, + hydrologic_unit_code = NA_character_, + basin_code = NA_character_, + altitude = NA_character_, + altitude_accuracy = NA_character_, + altitude_method_code = NA_character_, + altitude_method_name = NA_character_, + vertical_datum = NA_character_, + vertical_datum_name = NA_character_, + horizontal_positional_accuracy_code = NA_character_, + horizontal_positional_accuracy = NA_character_, + horizontal_position_method_code = NA_character_, + horizontal_position_method_name = NA_character_, + original_horizontal_datum = NA_character_, + original_horizontal_datum_name = NA_character_, + drainage_area = NA_character_, + contributing_drainage_area = NA_character_, + time_zone_abbreviation = NA_character_, + uses_daylight_savings = NA_character_, + construction_date = NA_character_, + aquifer_code = NA_character_, + national_aquifer_code = NA_character_, + aquifer_type_code = NA_character_, + well_constructed_depth = NA_character_, + hole_constructed_depth = NA_character_, + depth_source_code = NA_character_, + properties = NA_character_, + bbox = NA, + limit = NA, + max_results = NA, + skipGeometry = NA +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500).} + +\item{agency_code}{The agency that is reporting the data. Agency codes are fixed values assigned by the National Water Information System (NWIS). A list of agency codes is available \href{https://help.waterdata.usgs.gov/code/agency_cd_query?fmt=html}{at this link}.} + +\item{agency_name}{The name of the agency that is reporting the data.} + +\item{monitoring_location_number}{Each monitoring location in the USGS data base has a unique 8- to 15-digit identification number. Monitoring location numbers are assigned \href{https://help.waterdata.usgs.gov/faq/sites/do-station-numbers-have-any-particular-meaning}{based on this logic}.} + +\item{monitoring_location_name}{This is the official name of the monitoring location in the database. For well information this can be a district-assigned local number.} + +\item{district_code}{The Water Science Centers (WSCs) across the United States use the FIPS state code as the district code. In some case, monitoring locations and samples may be managed by a water science center that is adjacent to the state in which the monitoring location actually resides. For example a monitoring location may have a district code of 30 which translates to Montana, but the state code could be 56 for Wyoming because that is where the monitoring location actually is located.} + +\item{country_code}{The code for the country in which the monitoring location is located.} + +\item{country_name}{The name of the country in which the monitoring location is located.} + +\item{state_code}{State code. A \href{https://www2.census.gov/geo/docs/reference/state.txt}{two-digit ANSI code} (formerly FIPS code) as defined by the American National Standards Institute, to define States and equivalents. A three-digit ANSI code is used to define counties and county equivalents. \href{https://www.census.gov/library/reference/code-lists/ansi.html#states}{A lookup table is available.} The only countries with political subdivisions other than the US are Mexico and Canada. The Mexican states have US state codes ranging from 81-86 and Canadian provinces have state codes ranging from 90-98.} + +\item{state_name}{The name of the state or state equivalent in which the monitoring location is located.} + +\item{county_code}{The code for the county or county equivalent (parish, borough, etc.) in which the monitoring location is located. \href{https://help.waterdata.usgs.gov/code/county_query?fmt=html}{A list of codes is available.}} + +\item{county_name}{The name of the county or county equivalent (parish, borough, etc.) in which the monitoring location is located. \href{https://help.waterdata.usgs.gov/code/county_query?fmt=html}{A list of codes is available.}} + +\item{minor_civil_division_code}{Codes for primary governmental or administrative divisions of the county or county equivalent in which the monitoring location is located.} + +\item{site_type_code}{A code describing the hydrologic setting of the monitoring location. \href{https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html}{A list of codes is available.}} + +\item{site_type}{A description of the hydrologic setting of the monitoring location. \href{https://help.waterdata.usgs.gov/code/site_tp_query?fmt=html}{A list of codes is available.}} + +\item{hydrologic_unit_code}{The United States is divided and sub-divided into successively smaller hydrologic units which are classified into four levels: regions, sub-regions, accounting units, and cataloging units. The hydrologic units are arranged within each other, from the smallest (cataloging units) to the largest (regions). Each hydrologic unit is identified by a unique hydrologic unit code (HUC) consisting of two to eight digits based on the four levels of classification in the hydrologic unit system.} + +\item{basin_code}{The Basin Code or "drainage basin code" is a two-digit code that further subdivides the 8-digit hydrologic-unit code. The drainage basin code is defined by the USGS State Office where the monitoring location is located.} + +\item{altitude}{Altitude of the monitoring location referenced to the specified Vertical Datum.} + +\item{altitude_accuracy}{Accuracy of the altitude, in feet. An accuracy of +/- 0.1 foot would be entered as “.1”. Many altitudes are interpolated from the contours on topographic maps; accuracies determined in this way are generally entered as one-half of the contour interval.} + +\item{altitude_method_code}{Codes representing the method used to measure altitude. \href{https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html}{A list of codes is available.}} + +\item{altitude_method_name}{The name of the the method used to measure altitude. \href{https://help.waterdata.usgs.gov/code/alt_meth_cd_query?fmt=html}{A list of codes is available.}} + +\item{vertical_datum}{The datum used to determine altitude and vertical position at the monitoring location. \href{https://help.waterdata.usgs.gov/code/alt_datum_cd_query?fmt=html}{A list of codes is available.}} + +\item{vertical_datum_name}{The datum used to determine altitude and vertical position at the monitoring location. \href{https://help.waterdata.usgs.gov/code/alt_datum_cd_query?fmt=html}{A list of codes is available.}} + +\item{horizontal_positional_accuracy_code}{Indicates the accuracy of the latitude longitude values. \href{https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html}{A list of codes is available.}} + +\item{horizontal_positional_accuracy}{Indicates the accuracy of the latitude longitude values. \href{https://help.waterdata.usgs.gov/code/coord_acy_cd_query?fmt=html}{A list of codes is available.}} + +\item{horizontal_position_method_code}{Indicates the method used to determine latitude longitude values. \href{https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html}{A list of codes is available.}} + +\item{horizontal_position_method_name}{Indicates the method used to determine latitude longitude values. \href{https://help.waterdata.usgs.gov/code/coord_meth_cd_query?fmt=html}{A list of codes is available.}} + +\item{original_horizontal_datum}{Coordinates are published in EPSG:4326 / WGS84 / World Geodetic System 1984. This field indicates the original datum used to determine coordinates before they were converted. \href{https://help.waterdata.usgs.gov/code/coord_datum_cd_query?fmt=html}{A list of codes is available.}} + +\item{original_horizontal_datum_name}{Coordinates are published in EPSG:4326 / WGS84 / World Geodetic System 1984. This field indicates the original datum used to determine coordinates before they were converted. \href{https://help.waterdata.usgs.gov/code/coord_datum_cd_query?fmt=html}{A list of codes is available.}} + +\item{drainage_area}{The area enclosed by a topographic divide from which direct surface runoff from precipitation normally drains by gravity into the stream above that point.} + +\item{contributing_drainage_area}{The contributing drainage area of a lake, stream, wetland, or estuary monitoring location, in square miles. This item should be present only if the contributing area is different from the total drainage area. This situation can occur when part of the drainage area consists of very porous soil or depressions that either allow all runoff to enter the groundwater or traps the water in ponds so that rainfall does not contribute to runoff. A transbasin diversion can also affect the total drainage area.} + +\item{time_zone_abbreviation}{A short code describing the time zone used by a monitoring location.} + +\item{uses_daylight_savings}{A flag indicating whether or not a monitoring location uses daylight savings.} + +\item{construction_date}{Date the well was completed.} + +\item{aquifer_code}{Local aquifers in the USGS water resources data base are identified by a geohydrologic unit code (a three-digit number related to the age of the formation, followed by a 4 or 5 character abbreviation for the geologic unit or aquifer name). \href{https://help.waterdata.usgs.gov/faq/groundwater/local-aquifer-description}{Additional information is available at this link.}} + +\item{national_aquifer_code}{National aquifers are the principal aquifers or aquifer systems in the United States, defined as regionally extensive aquifers or aquifer systems that have the potential to be used as a source of potable water. Not all groundwater monitoring locations can be associated with a National Aquifer. Such monitoring locations will not be retrieved using this search criteria. \href{https://help.waterdata.usgs.gov/code/nat_aqfr_query?fmt=html}{A list of National aquifer codes and names is available.}} + +\item{aquifer_type_code}{Groundwater occurs in aquifers under two different conditions. Where water only partly fills an aquifer, the upper surface is free to rise and decline. These aquifers are referred to as unconfined (or water-table) aquifers. Where water completely fills an aquifer that is overlain by a confining bed, the aquifer is referred to as a confined (or artesian) aquifer. When a confined aquifer is penetrated by a well, the water level in the well will rise above the top of the aquifer (but not necessarily above land surface). \href{https://help.waterdata.usgs.gov/faq/groundwater/local-aquifer-description}{Additional information is available at this link.}} + +\item{well_constructed_depth}{The depth of the finished well, in feet below land surface datum. Note: Not all groundwater monitoring locations have information on Well Depth. Such monitoring locations will not be retrieved using this search criteria.} + +\item{hole_constructed_depth}{The total depth to which the hole is drilled, in feet below land surface datum. Note: Not all groundwater monitoring locations have information on Hole Depth. Such monitoring locations will not be retrieved using this search criteria.} + +\item{depth_source_code}{A code indicating the source of water-level data. \href{https://help.waterdata.usgs.gov/code/water_level_src_cd_query?fmt=html}{A list of codes is available.}} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{limit}{The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 10000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{max_results}{The optional maximum number of rows to return. This value +must be less than the requested limit.} + +\item{skipGeometry}{This option can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information.} +} +\description{ +Description Location information is basic information about the monitoring location including the name, identifier, agency responsible for data collection, and the date the location was established. It also includes information about the type of location, such as stream, lake, or groundwater, and geographic information about the location, such as state, county, latitude and longitude, and hydrologic unit code (HUC). +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +\donttest{ +site <- "USGS-02238500" +site_info <- read_waterdata_monitoring_location(monitoring_location_id = site) + +site_slim <- read_waterdata_monitoring_location(monitoring_location_id = site, + properties = c("monitoring_location_id", + "state_name", + "country_name")) + +site_slim_no_sf_slim <- read_waterdata_monitoring_location(monitoring_location_id = site, + properties = c("monitoring_location_id", + "state_name", + "country_name"), + skipGeometry = TRUE) + +site_info_no_sf <- read_waterdata_monitoring_location(monitoring_location_id = site, + skipGeometry = TRUE) + +bbox_vals = c(-94.00, 35.0, -93.5, 35.5) +multi_site <- read_waterdata_monitoring_location(bbox = bbox_vals) +multi_site_n_100 <- read_waterdata_monitoring_location(bbox = bbox_vals, + max_results = 100) +multi_site_limit_100 <- read_waterdata_monitoring_location(bbox = bbox_vals, + limit = 100) +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_USGS_samples.Rd b/man/read_waterdata_samples.Rd similarity index 77% rename from man/read_USGS_samples.Rd rename to man/read_waterdata_samples.Rd index e854592b..6eb9081f 100644 --- a/man/read_USGS_samples.Rd +++ b/man/read_waterdata_samples.Rd @@ -1,9 +1,36 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_USGS_samples.R -\name{read_USGS_samples} +% Please edit documentation in R/read_waterdata_samples.R +\name{read_waterdata_samples} +\alias{read_waterdata_samples} \alias{read_USGS_samples} \title{USGS Samples Data} \usage{ +read_waterdata_samples( + monitoringLocationIdentifier = NA, + siteTypeCode = NA, + boundingBox = NA, + hydrologicUnit = NA, + activityMediaName = NA, + characteristicGroup = NA, + characteristic = NA, + characteristicUserSupplied = NA, + activityStartDateLower = NA, + activityStartDateUpper = NA, + countryFips = NA, + stateFips = NA, + countyFips = NA, + projectIdentifier = NA, + recordIdentifierUserSupplied = NA, + siteTypeName = NA, + usgsPCode = NA, + pointLocationLatitude = NA, + pointLocationLongitude = NA, + pointLocationWithinMiles = NA, + dataType = "results", + dataProfile = NA, + tz = "UTC" +) + read_USGS_samples( monitoringLocationIdentifier = NA, siteTypeCode = NA, @@ -27,8 +54,7 @@ read_USGS_samples( pointLocationWithinMiles = NA, dataType = "results", dataProfile = NA, - tz = "UTC", - convertType = TRUE + tz = "UTC" ) } \arguments{ @@ -38,14 +64,14 @@ for example: AZ014-320821110580701, CAX01-15304600, USGS-040851385. Location numbers without an agency prefix are assumed to have the prefix USGS.} \item{siteTypeCode}{Site type code query parameter. See available -options by running \code{check_param("sitetype")$typeCode}.} +options by running \code{check_waterdata_sample_params("sitetype")$typeCode}.} \item{boundingBox}{North and South are latitude values; East and West are longitude values. A vector of 4 (west, south, east, north) is expected. An example would be: c(-92.8, 44.2, -88.9, 46.0).} \item{hydrologicUnit}{Hydrologic Unit Codes (HUCs) identify physical areas -within the US that drain to a certain portion of the stream network. +within the US that drain to a certain portion of the stream network. This filter accepts values containing 2, 4, 6, 8, 10 or 12 digits.} \item{activityMediaName}{Sample media refers to the environmental medium that @@ -53,11 +79,11 @@ was sampled or analyzed.} \item{characteristicGroup}{Characteristic group is a broad category describing the sample. See available options by running -\code{check_param("characteristicgroup")$characteristicGroup}.} +\code{check_waterdata_sample_params("characteristicgroup")$characteristicGroup}.} \item{characteristic}{Characteristic is a specific category describing the sample. -See available options by running -\code{check_param("characteristics")$characteristicName}.} +See available options by running +\code{check_waterdata_sample_params("characteristics")$characteristicName}.} \item{characteristicUserSupplied}{Observed property is the USGS term for the constituent sampled and the property name gives a detailed description of what @@ -77,22 +103,22 @@ than the value entered for activityStartDateLower. Can be an R Date object, or a string with format YYYY-MM-DD. The logic is inclusive, i.e. it will also return records that match the date.} -\item{countryFips}{Country query parameter. Do not set redundant parameters. +\item{countryFips}{Country query parameter. Do not set redundant parameters. If another query parameter contains the country information, leave this parameter -set to the default NA. See available options by running \code{check_param("countries")}, +set to the default NA. See available options by running \code{check_waterdata_sample_params("countries")}, where the "id" field contains the value to use in the countryFips input.} -\item{stateFips}{State query parameter. To get a list of available state fips, -run \code{check_param("states")}. The "fips" can be created using the function -\code{stateCdLookup} - for example: \code{stateCdLookup("WI", "fips")}. -FIPs codes for states take the format: +\item{stateFips}{State query parameter. To get a list of available state fips, +run \code{check_waterdata_sample_params("states")}. The "fips" can be created using the function +\code{stateCdLookup} - for example: \code{stateCdLookup("WI", "fips")}. +FIPs codes for states take the format: CountryAbbrev:StateNumber, like US:55 for Wisconsin.} \item{countyFips}{County query parameter. To get a list of available counties, -run \code{check_param("counties")}. The "Fips" can be created using the function -\code{countyCdLookup} - for example: \code{countyCdLookup("WI", "Dane", "fips")} +run \code{check_waterdata_sample_params("counties")}. The "Fips" can be created using the function +\code{countyCdLookup} - for example: \code{countyCdLookup("WI", "Dane", "fips")} for Dane County, WI. -FIPs codes for counties take the format: +FIPs codes for counties take the format: CountryAbbrev:StateNumber:CountyNumber, like US:55:025 for Dane County, WI.} \item{projectIdentifier}{Project identifier query parameter. This information @@ -104,8 +130,8 @@ information would be needed from the data supplier.} \item{siteTypeName}{Site type name query parameter. See available options by running \code{check_param("sitetype")$typeName}.} -\item{usgsPCode}{USGS parameter code. See available options by running -\code{check_param("characteristics")$parameterCode}.} +\item{usgsPCode}{USGS parameter code. See available options by running +\code{check_waterdata_sample_params("characteristics")$parameterCode}.} \item{pointLocationLatitude}{Latitude for a point/radius query (decimal degrees). Must be used with pointLocationLongitude and pointLocationWithinMiles.} @@ -119,10 +145,10 @@ with pointLocationLatitude and pointLocationLongitude} \item{dataType}{Options include: "Results", "Monitoring locations", "Activities", "Projects", and "Organizations".} -\item{dataProfile}{Profile depends on type. Options for "results" dataType are: +\item{dataProfile}{Profile depends on type. Options for "results" dataType are: "fullphyschem", "basicphyschem", "fullbio", "basicbio", "narrow", -"resultdetectionquantitationlimit", "labsampleprep", "count". Options for "locations" are: -"site" and "count". Options for "activities" are "sampact", "actmetric", "actgroup", +"resultdetectionquantitationlimit", "labsampleprep", "count". Options for "locations" are: +"site" and "count". Options for "activities" are "sampact", "actmetric", "actgroup", and "count". Options for "projects" are: "project" and "projectmonitoringlocationweight". Options for "organizations" are: "organization" and "count".} @@ -132,33 +158,29 @@ and "count". Options for "projects" are: Possible values include "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua", "America/Phoenix", and "America/Metlakatla"} - -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates, datetimes, -numerics based on a standard algorithm. If false, everything is returned as a character.} } \description{ This function creates the call and gets the data for discrete water quality samples data -service described at \url{https://waterdata.usgs.gov/download-samples}. +service described at \url{https://waterdata.usgs.gov/download-samples/}. } \examples{ \dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ -ph_data <- read_USGS_samples( +ph_data <- read_waterdata_samples( monitoringLocationIdentifier = "USGS-04074950", characteristicUserSupplied = "pH, water, unfiltered, field", activityStartDateUpper = "2000-01-01", dataProfile = "narrow") nameToUse <- "pH" -pHData <- read_USGS_samples(monitoringLocationIdentifier = "USGS-04024315", +pHData <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-04024315", characteristic = nameToUse) ncol(pHData) attr(pHData, "url") attr(pHData, "queryTime") -summary_data <- read_USGS_samples(monitoringLocationIdentifier = "USGS-04024315", +summary_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-04024315", dataType = "projects") } diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd new file mode 100644 index 00000000..4847f0a4 --- /dev/null +++ b/man/read_waterdata_ts_meta.Rd @@ -0,0 +1,120 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_ts_meta.R +\name{read_waterdata_ts_meta} +\alias{read_waterdata_ts_meta} +\title{Get USGS Time Series Metadata} +\usage{ +read_waterdata_ts_meta( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + parameter_name = NA_character_, + properties = NA_character_, + statistic_id = NA_character_, + last_modified = NA_character_, + begin = NA_character_, + end = NA_character_, + unit_of_measure = NA_character_, + computation_period_identifier = NA_character_, + computation_identifier = NA_character_, + thresholds = NA, + sublocation_identifier = NA_character_, + primary = NA_character_, + time_series_id = NA_character_, + web_description = NA_character_, + skipGeometry = NA, + limit = NA, + max_results = NA, + bbox = NA, + convertType = FALSE +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500).} + +\item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://help.waterdata.usgs.gov/codes-and-parameters/parameters}.} + +\item{parameter_name}{A human-understandable name corresponding to \code{parameter_code}.} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, computation_period_identifier, computation_identifier, thresholds, sublocation_identifier, primary, monitoring_location_id, web_description, parameter_description} + +\item{statistic_id}{A code corresponding to the statistic an observation represents. Example codes include 00001 (max), 00002 (min), and 00003 (mean). A complete list of codes and their descriptions can be found at \url{https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=\%25&fmt=html}.} + +\item{last_modified}{The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate anything about the measurement has changed. +You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{last_modified} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties.} + +\item{begin}{The datetime of the earliest observation in the time series. Together with \code{end}, this field represents the period of record of a time series. Note that some time series may have large gaps in their collection record. This field is currently in the local time of the monitoring location. We intend to update this \strong{in version v0} to use UTC with a time zone.} + +\item{end}{The datetime of the most recent observation in the time series. Data returned by this endpoint updates at most once per day, and potentially less frequently than that, and as such there may be more recent observations within a time series than the time series \code{end} value reflects. Together with \code{begin}, this field represents the period of record of a time series. It is additionally used to determine whether a time series is "active". We intend to update this \strong{in version v0} to use UTC with a time zone.} + +\item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} + +\item{computation_period_identifier}{Indicates the period of data used for any statistical computations.} + +\item{computation_identifier}{Indicates whether the data from this time series represent a specific statistical computation.} + +\item{thresholds}{Thresholds represent known numeric limits for a time series, for example the historic maximum value for a parameter or a level below which a sensor is non-operative. These thresholds are sometimes used to automatically determine if an observation is erroneous due to sensor error, and therefore shouldn't be included in the time series.} + +\item{sublocation_identifier}{An optional human-readable identifier used to specify where measurements are recorded at a monitoring location.} + +\item{primary}{A flag identifying if the time series is a "primary" time series. "Primary" time series (which have this flag) are standard observations which undergo \href{https://www.usgs.gov/survey-manual/5028-fundamental-science-practices-review-and-approval-scientific-data-release}{Bureau review and approval processes}. Non-primary time series, which will have missing values for "primary", are provisional datasets made available to meet the need for timely best science and to assist with daily operations which need real-time information. Non-primary time series data are only retained by this system for 120 days. See the \href{https://waterdata.usgs.gov/provisional-data-statement/}{USGS Provisional Data Statement} for more information.} + +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} + +\item{web_description}{A description of what this time series represents, as used by WDFN and other USGS data dissemination products.} + +\item{skipGeometry}{This option can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information.} + +\item{limit}{The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 10000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{max_results}{The optional maximum number of rows to return. This value +must be less than the requested limit.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function +will convert the data to dates and qualifier to string vector.} +} +\description{ +Description Daily data and continuous measurements are grouped into time series, which represent a collection of observations of a single parameter, potentially aggregated using a standard statistic, at a single monitoring location. This endpoint provides metadata about those time series, including their operational thresholds, units of measurement, and when the earliest and most recent observations in a time series occurred. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +\donttest{ +site <- "USGS-02238500" +meta_1 <- read_waterdata_ts_meta(monitoring_location_id = site) + +meta_multi <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-01491000", + "USGS-01645000"), + parameter_code = c("00060", "00010"), + properties = c("monitoring_location_id", + "parameter_code", + "begin", + "end", + "time_series_id"), + skipGeometry = TRUE) +} +\dontshow{\}) # examplesIf} +} diff --git a/man/renameNWISColumns.Rd b/man/renameNWISColumns.Rd index 9ee611f1..55244ecb 100644 --- a/man/renameNWISColumns.Rd +++ b/man/renameNWISColumns.Rd @@ -86,7 +86,7 @@ names(newNames) } } \seealso{ -\code{\link{readNWISdv}}, \code{\link{readNWISuv}} +\code{\link[=readNWISdv]{readNWISdv()}}, \code{\link[=readNWISuv]{readNWISuv()}} } \keyword{IO} \keyword{manip} diff --git a/man/stateCd.Rd b/man/stateCd.Rd index a61aad42..b8eaea3d 100644 --- a/man/stateCd.Rd +++ b/man/stateCd.Rd @@ -17,7 +17,7 @@ STATENS \tab character \tab Geographic Names Information System Identifier (GNI } \description{ Classic lookup table for states. Has been replaced in functions with -\code{check_param("states")}. +\code{check_waterdata_sample_params("states")}. } \examples{ head(stateCd) diff --git a/man/summarize_USGS_samples.Rd b/man/summarize_waterdata_samples.Rd similarity index 80% rename from man/summarize_USGS_samples.Rd rename to man/summarize_waterdata_samples.Rd index 92a0023d..c7c8972b 100644 --- a/man/summarize_USGS_samples.Rd +++ b/man/summarize_waterdata_samples.Rd @@ -1,9 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_USGS_samples.R -\name{summarize_USGS_samples} +% Please edit documentation in R/read_waterdata_samples.R +\name{summarize_waterdata_samples} +\alias{summarize_waterdata_samples} \alias{summarize_USGS_samples} \title{USGS Samples Summary Data} \usage{ +summarize_waterdata_samples(monitoringLocationIdentifier) + summarize_USGS_samples(monitoringLocationIdentifier) } \arguments{ @@ -25,7 +28,7 @@ service described at \url{https://api.waterdata.usgs.gov/samples-data/docs}. \donttest{ monitoringLocationIdentifier <- "USGS-04074950" -what_data <- summarize_USGS_samples(monitoringLocationIdentifier) +what_data <- summarize_waterdata_samples(monitoringLocationIdentifier) } \dontshow{\}) # examplesIf} diff --git a/man/whatNWISdata.Rd b/man/whatNWISdata.Rd index 120ee997..fcc94de5 100644 --- a/man/whatNWISdata.Rd +++ b/man/whatNWISdata.Rd @@ -65,7 +65,7 @@ for more information. \details{ This function requires users to create their own arguments based on the NWIS web services. It is a more complicated function to use -compared to other NWIS functions such as \code{\link{readNWISdv}}, \code{\link{readNWISuv}}, +compared to other NWIS functions such as \code{\link[=readNWISdv]{readNWISdv()}}, \code{\link[=readNWISuv]{readNWISuv()}}, etc. However, this function adds a lot of flexibility to the possible queries. If the "service" argument is included, the results will be filtered to the proper data_type_cd. This is a great @@ -73,30 +73,22 @@ function to use before a large data set, by filtering down the number of sites that have useful data. } \examples{ -\dontshow{if (is_dataRetrieval_user()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -\donttest{ -availableData <- whatNWISdata(siteNumber = "05114000") +# see ?read_waterdata_ts_meta + +#site1 <- whatWQPsamples(siteid = "USGS-01594440") + +#type <- "Stream" + +#sites <- whatWQPsamples(countycode = "US:55:025", siteType = type) + +#lakeSites_samples <- whatWQPsamples(siteType = "Lake, Reservoir, Impoundment", +# countycode = "US:55:025") + -# To find just unit value ('instantaneous') data: -uvData <- whatNWISdata(siteNumber = "05114000", - service = "uv") -uvDataMulti <- whatNWISdata(siteNumber = c("05114000", "09423350"), - service = c("uv", "dv")) -flowAndTemp <- whatNWISdata( - stateCd = "WI", service = "dv", - parameterCd = c("00060", "00010"), - statCd = "00003" -) -sites <- whatNWISdata(stateCd = "WI", - parameterCd = "00060", - siteType = "ST", - service = "site") - -sites <- whatNWISdata(stateCd = "WI", - service = "gwlevels") } -\dontshow{\}) # examplesIf} +\seealso{ +\code{\link[=read_waterdata_ts_meta]{read_waterdata_ts_meta()}} } \keyword{USGS} \keyword{data} diff --git a/man/whatNWISsites.Rd b/man/whatNWISsites.Rd index 7f6408f3..b4310020 100644 --- a/man/whatNWISsites.Rd +++ b/man/whatNWISsites.Rd @@ -36,9 +36,12 @@ Returns a list of sites from the NWIS web service. This function gets the data f Mapper format is used } \examples{ -\donttest{ -siteListPhos <- whatNWISsites(stateCd = "OH", parameterCd = "00665") -oneSite <- whatNWISsites(sites = "05114000") +# see ?read_waterdata_monitoring_location +#siteListPhos <- whatNWISsites(stateCd = "OH", parameterCd = "00665") +#oneSite <- whatNWISsites(sites = "05114000") + } +\seealso{ +\code{\link[=read_waterdata_monitoring_location]{read_waterdata_monitoring_location()}} } diff --git a/man/whatWQPdata.Rd b/man/whatWQPdata.Rd index b6fedafd..9e8d8f7f 100644 --- a/man/whatWQPdata.Rd +++ b/man/whatWQPdata.Rd @@ -9,10 +9,10 @@ whatWQPdata(..., convertType = TRUE) \arguments{ \item{\dots}{see \url{https://www.waterqualitydata.us/webservices_documentation} for a complete list of options. A list of arguments can also be supplied. -One way to figure out how to construct a WQP query is to go to the "Advanced" +One way to figure out how to construct a WQP query is to go to the "Advanced" form in the Water Quality Portal: \url{https://www.waterqualitydata.us/#mimeType=csv&providers=NWIS&providers=STORET} -Use the form to discover what parameters are available. Once the query is +Use the form to discover what parameters are available. Once the query is set in the form, scroll down to the "Query URL". You will see the parameters after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" in the Characteristic Group dropdown, you will see characteristicType=Nutrient @@ -25,8 +25,8 @@ will convert the data to dates, datetimes, numerics based on a standard algorithm. If false, everything is returned as a character.} } \value{ -A data frame that returns basic data availability such as -sites, number of results, and number of sampling activities from the +A data frame that returns basic data availability such as +sites, number of results, and number of sampling activities from the query parameters for the Water Quality Portal. } \description{ @@ -36,11 +36,11 @@ Arguments to the function should be based on \url{https://www.waterqualitydata.us/webservices_documentation}. The information returned from whatWQPdata describes the available data at the WQP sites, and some metadata on the sites themselves. -For example, a row is returned for each individual site that fulfills this -query. In that we can learn how many sampling activities and results -are available for the query. It does not break those results down by any finer +For example, a row is returned for each individual site that fulfills this +query. In that we can learn how many sampling activities and results +are available for the query. It does not break those results down by any finer grain. For example, if you ask for "Nutrients" (characteristicGroup), you will -not learn what specific nutrients are available at that site. For that +not learn what specific nutrients are available at that site. For that kind of data discovery see \code{readWQPsummary}. } \examples{ diff --git a/man/wqpSpecials.Rd b/man/wqpSpecials.Rd index fda2a399..369ef387 100644 --- a/man/wqpSpecials.Rd +++ b/man/wqpSpecials.Rd @@ -15,10 +15,10 @@ whatWQPsites(..., legacy = TRUE, convertType = TRUE) \arguments{ \item{\dots}{see \url{https://www.waterqualitydata.us/webservices_documentation} for a complete list of options. A list of arguments can also be supplied. -One way to figure out how to construct a WQP query is to go to the "Advanced" +One way to figure out how to construct a WQP query is to go to the "Advanced" form in the Water Quality Portal: \url{https://www.waterqualitydata.us/#mimeType=csv&providers=NWIS&providers=STORET} -Use the form to discover what parameters are available. Once the query is +Use the form to discover what parameters are available. Once the query is set in the form, scroll down to the "Query URL". You will see the parameters after "https://www.waterqualitydata.us/#". For example, if you chose "Nutrient" in the Characteristic Group dropdown, you will see characteristicType=Nutrient @@ -45,7 +45,7 @@ gets the data from: \url{https://www.waterqualitydata.us}. Arguments to the function should be based on \url{https://www.waterqualitydata.us/webservices_documentation}. The return from this function returns the basic metadata on WQP sites. It is -generally faster than the \code{\link{whatWQPdata}} function, but does +generally faster than the \code{\link[=whatWQPdata]{whatWQPdata()}} function, but does not return information on what data was collected at the site. } \examples{ diff --git a/man/wqp_check_status.Rd b/man/wqp_check_status.Rd index f46c172e..ca3e4737 100644 --- a/man/wqp_check_status.Rd +++ b/man/wqp_check_status.Rd @@ -15,8 +15,8 @@ a list generated from the WQP describing what data was returned. } \description{ -The information from this request is only available for a -limited time after the original query from the WQP. In the +The information from this request is only available for a +limited time after the original query from the WQP. In the readWQPdata and readWQPqw functions, the results from this function will be attached as an attribute to the data. } diff --git a/pkgdown/extra.css b/pkgdown/extra.css index 52e2c5da..fb14c3a9 100644 --- a/pkgdown/extra.css +++ b/pkgdown/extra.css @@ -1,27 +1,27 @@ -/* ================INSTRUCTIONS=================*/ +/* ================INSTRUCTIONS=================*/ /* By changing the info below, you can reduce the size of the logo or hide the search box. You can also override the standard font characteristics if you would like to use your own custom styles. In order for your changes to work though, you MUST include a reference in your HTML pages to both the common CSS file and custom CSS file in that order. Instructions are provided below for customizing these classes. */ -/* =============Width===============*/ +/* =============Width===============*/ /* BY DEFAULT, THERE IS NO MAX WIDTH: If you want the want to restrict the width of the page, remove the comment out slashes and astricks surrounding the ".tmp-container {max-width: 1170px;}". you can change the 1170px to a smaller/larger max width if you'd like */ - + /* .tmp-container { max-width: 1170px; } */ -/* =============Search===============*/ +/* =============Search===============*/ /* BY DEFAULT, Search box is displayed: If you want the to hide the search, remove the comment out slashes and astricks surrounding the ".search-box {display:none;}" below. */ - + /* #search-box { display: none; } */ -/* =============LOGO===============*/ +/* =============LOGO===============*/ /* THE DEFAULT LOGO HEIGHT IS 65PX: If you want the logo to be smaller (50px), comment out the ".logo-header img {height 65px;}" below and remove the comment out slashes and astricks surrounding the ".logo-header img {height: 50px...margin-top: 18px;}" and the header search input (so the search box size is reduced too). 50px is the MINIMUM HEIGHT for the logo. */ .logo-header img { @@ -30,7 +30,7 @@ If you want the logo to be smaller (50px), comment out the ".logo-header img {he /* .logo-header img { height: 50px; -} +} .header-search input[type="search"] { height: 30px; margin-top: 16px; @@ -42,7 +42,7 @@ If you want the logo to be smaller (50px), comment out the ".logo-header img {he } */ -/* =============STANDARD CONTENT===============*/ +/* =============STANDARD CONTENT===============*/ /* TO CHANGE THE TEXT SIZE OF THE CONTENT, FONT, ETC: By default, USGS has set the font size, family, etc. in order to provide a consistent size for content across all pages. If you would prefer not to have any of these pre-defined formats, you can change them below. NOTE: header and footer will not be changed. */ #maincontent { @@ -55,19 +55,19 @@ By default, USGS has set the font size, family, etc. in order to provide a consi padding-right: 15px; } -/* =============SEARCH===============*/ +/* =============SEARCH===============*/ /* THIS HIDES THE SEARCH BOX ON VERY SMALL DEVICES: For simplification, search bar is visible on larger screens but is hidden on small screens. If you would prefer not to have the search box at all, you can remove the "@media (max-width:500px) {" and the second closing "}". below */ @media (max-width:500px) { -.header-search form { +.header-search form { display: none} } -/* =============SOCIAL MEDIA===============*/ +/* =============SOCIAL MEDIA===============*/ /* If you would prefer not to have the social media links, you can remove the comment out slashes and astricks surrounding the content below */ - /* .footer-social-links { + /* .footer-social-links { display: none} */ - + @charset "UTF-8"; /* CSS Document */ @@ -81,7 +81,7 @@ footer, #navbar { -webkit-box-sizing: border-box; -moz-box-sizing: border-box; box-sizing: border-box; -} +} footer, header, main, nav, div { display: block; @@ -109,7 +109,7 @@ footer, header, main, nav, div { } hr { - width: 100%; + width: 100%; margin-top: 42px; clear: both; } @@ -352,6 +352,10 @@ Content: "\f082"; color: white !important; } +.text-muted { + color: white !important; +} + .navbar-nav a:hover { text-decoration: underline; color: white !important; diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index 3e35a42b..97469582 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -1,8 +1,68 @@ context("General functions") -test_that("General NWIS retrievals working", { +test_that("General USGS retrievals working", { testthat::skip_on_cran() + + cql <- '{ + "op": "and", + "args": [ + { + "op": "in", + "args": [ + { "property": "parameter_code" }, + [ "00060", "00065" ] + ] + }, + { + "op": "in", + "args": [ + { "property": "monitoring_location_id" }, + [ "USGS-07367300", "USGS-03277200" ] + ] + } + ] + }' + + dv_data <- read_waterdata(service = "daily", + CQL = cql, + time = c("2023-01-01", "2024-01-01")) + expect_equal(as.Date(c("2023-01-01", "2024-01-01")), + range(dv_data$time)) + expect_true(all(unique(dv_data$monitoring_location_id) %in% + c("USGS-07367300", "USGS-03277200"))) + + + cql_not_active <- '{ + "op": "and", + "args": [ + { + "op": "in", + "args": [ + { "property": "parameter_code" }, + [ "00060", "00065" ] + ] + }, + { + "op": "in", + "args": [ + { "property": "monitoring_location_id" }, + [ "USGS-05212700"] + ] + } + ] + }' + + notActiveUSGS <- read_waterdata(CQL = cql_not_active, + service = "daily", + time = c("2014-01-01", "2014-01-07")) + expect_true(nrow(notActiveUSGS) == 0) + +}) + +test_that("General NWIS retrievals working", { + testthat::skip_on_cran() + skip_on_ci() multiSite <- readNWISdata( sites = c("04025500", "040263491"), service = "iv", parameterCd = "00060", @@ -37,27 +97,18 @@ test_that("General NWIS retrievals working", { expect_error(readNWISdata(), "No arguments supplied") expect_error(readNWISdata(siteNumber = NA), "NA's are not allowed in query") - bBoxEx <- readNWISdata(bBox = c(-83, 36.5, -81, 38.5), parameterCd = "00010") - expect_true(length(unique(bBoxEx$site_no)) > 1) - - startDate <- as.Date("2013-10-01") - endDate <- as.Date("2014-09-30") - waterYear <- readNWISdata( - bBox = c(-83, 36.5, -81, 38.5), - parameterCd = "00010", - service = "dv", - startDate = startDate, - endDate = endDate - ) - expect_is(waterYear$dateTime, "POSIXct") + bBox_inventory <- read_waterdata_ts_meta(bbox = c(-83, 38, -82.5, 38.5), + parameter_code = "00010") + + expect_true(length(unique(bBox_inventory$monitoring_location_id)) > 1) - siteInfo <- readNWISdata( - stateCd = "WI", - parameterCd = "00010", - hasDataTypeCd = "iv", - service = "site" - ) - expect_is(siteInfo$station_nm, "character") + siteInfo <- read_waterdata_monitoring_location(state_name = "Wisconsin") + + timeseriesInfo <- read_waterdata_ts_meta(bbox = sf::st_bbox(siteInfo), + parameter_code = "00010", + computation_period_identifier = "Points" ) + + expect_is(timeseriesInfo$begin, "POSIXct") gw_data <- readNWISdata( stateCd = "AL", @@ -117,30 +168,45 @@ test_that("General NWIS retrievals working", { instData <- readNWISdata(args) - args <- list( - sites = "05114000", service = "dv", - parameterCd = "00060", - startDate = "2014-05-01", - endDate = "2014-05-01" + args2 <- list( + monitoring_location_id = "USGS-05114000", + parameter_code = "00060", + time = c("2014-05-01", endDate = "2014-05-01") ) - - dailyData <- readNWISdata(args) - expect_lt(nrow(dailyData), nrow(instData)) - args <- list(stateCd = "OH", parameterCd = "00665") - sites <- whatNWISsites(args) - expect_type(sites, "list") + + daily_USGS <- do.call(read_waterdata_daily, args2) + expect_lt(nrow(daily_USGS), nrow(instData)) + + ohio <- read_waterdata_monitoring_location(state_name = "Ohio", + site_type_code = "ST") + bbox <- sf::st_bbox(ohio) + what_sites <- read_waterdata_ts_meta(parameter_code = "00665", + bbox = bbox) + expect_true(all(c("monitoring_location_id", + "begin", "end", "parameter_name") %in% names(what_sites))) + + huc <- read_waterdata_monitoring_location(hydrologic_unit_code = "02080202") + expect_true(nrow(huc) > 0) # Test counties: - dailyStaffordVA <- readNWISdata( - stateCd = "Virginia", - countyCd = "Stafford", - parameterCd = "00060", - startDate = "2015-01-01", - endDate = "2015-01-30" + + county_code_stafford <- countyCdLookup(state = "Virginia", + county = "Stafford", + outputType = "id") + state_code_va <- stateCdLookup(input = "Virginia", outputType = "id") + stafford <- read_waterdata_monitoring_location(county_code = county_code_stafford, + state_code = state_code_va) + stafford_bbox <- sf::st_bbox(stafford) + + dailyStaffordVA <- read_waterdata_daily( + bbox = stafford_bbox, + parameter_code = "00060", + time = c("2015-01-01", "2015-01-30") ) expect_gt(nrow(dailyStaffordVA), 1) - AS <- readNWISdata(stateCd = "AS", service = "site") + # America Samoa? + AS <- read_waterdata_monitoring_location(state_name = "American Samoa") expect_gt(nrow(AS), 0) site_id <- "01594440" @@ -173,15 +239,14 @@ test_that("General NWIS retrievals working", { ))) multi_hucs <- c("07130007", "07130011") - multi_huc <- dataRetrieval::readNWISdata( - huc = multi_hucs, - parameterCd = "63680", - startDate = "2015-06-18", - endDate = "2015-06-18", - service = "dv" + multi_huc_sites <- read_waterdata_monitoring_location(hydrologic_unit_code = multi_hucs) + + multi_huc <- read_waterdata_daily(bbox = sf::st_bbox(multi_huc_sites), + parameter_code = "63680", + statistic_id = "00003", + time = c("2015-06-18", "2015-06-18") ) - expect_equal(2, nrow(multi_huc)) - + expect_equal(4, length(unique(multi_huc$monitoring_location_id))) peak_data <- readNWISdata( service = "peak", @@ -196,92 +261,89 @@ test_that("General NWIS retrievals working", { expect_lt(nrow(peak_data), 100000) }) -test_that("whatNWISdata", { +test_that("read_waterdata_ts_meta", { # no service specified: - availableData <- whatNWISdata(siteNumber = "05114000") - expect_equal(ncol(availableData), 24) + availableData <- read_waterdata_ts_meta(monitoring_location_id = "USGS-05114000") + expect_equal(ncol(availableData), 17) - uvData <- whatNWISdata(siteNumber = "05114000", service = "uv") - expect_equal(unique(uvData$data_type_cd), "uv") + uvData <- read_waterdata_ts_meta(monitoring_location_id = "USGS-05114000", + computation_period_identifier = c("Points")) + expect_equal(unique(uvData$computation_period_identifier), "Points") # multiple services - uvDataMulti <- whatNWISdata( - siteNumber = c("05114000", "09423350"), - service = c("uv", "dv") - ) - expect_true(all(unique(uvDataMulti$data_type_cd) %in% c("uv", "dv"))) + uvDataMulti <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-05114000", + "USGS-09423350"), + computation_period_identifier = c("Daily", + "Points")) + + expect_true(all(unique(uvDataMulti$computation_period_identifier) %in% c("Daily", + "Points"))) # state codes: - flowAndTemp <- whatNWISdata( - stateCd = "WI", service = c("uv", "dv"), - parameterCd = c("00060", "00010"), - statCd = "00003" - ) - expect_true(all(unique(flowAndTemp$data_type_cd) %in% c("uv", "dv"))) - expect_true(all(unique(flowAndTemp$parm_cd) %in% c("00060", "00010"))) - expect_true(all(unique(flowAndTemp$stat_cd) %in% c("00003", NA))) - - # site service - sites <- whatNWISdata(stateCd = "WI", service = "site") - expect_true(all(c("gw", "sv", "qw", "dv", "pk", "uv") - %in% unique(sites$data_type_cd))) + wi_sites <- read_waterdata_monitoring_location(state_name = "Wisconsin") + flow_and_temp <- read_waterdata_ts_meta(bbox = sf::st_bbox(wi_sites), + parameter_code = c("00060", "00010"), + statistic_id = "00003", + computation_period_identifier = c("Daily", + "Points")) + + expect_true(all(unique(flow_and_temp$computation_period_identifier) %in% c("Daily", + "Points"))) + expect_true(all(unique(flow_and_temp$parameter_code) %in% c("00060", "00010"))) + expect_true(all(unique(flow_and_temp$statistic_id) %in% c("00003"))) + }) test_that("General WQP retrievals working", { testthat::skip_on_cran() nameToUse <- "pH" - # pHData <- readWQPdata(siteid = "USGS-04024315", - # characteristicName = nameToUse, - # service = "ResultWQX3") - # expect_is(pHData$Activity_StartDateTime, "POSIXct") + pHData <- readWQPdata(siteid = "USGS-04024315", + characteristicName = nameToUse, + service = "ResultWQX3") + expect_is(pHData$Activity_StartDateTime, "POSIXct") # # # testing lists: - # startDate <- as.Date("2022-01-01") - # secchi.names <- c("Depth, Secchi disk depth", - # "Secchi depth", - # "Water transparency, Secchi disc", - # "Depth, Secchi disk depth (choice list)") - # # "Transparency, Secchi tube with disk", - # # "Secchi Reading Condition (choice list)", - # # "Depth, Secchi disk visible at bottom (Y/N) (choice list)") - # - # args_2 <- list( - # "startDateLo" = startDate, - # "startDateHi" = "2024-01-01", - # statecode = "WI", - # characteristicName = secchi.names - # ) - # + startDate <- as.Date("2022-01-01") + secchi.names <- c("Depth, Secchi disk depth", + "Secchi depth", + "Water transparency, Secchi disc", + "Depth, Secchi disk depth (choice list)") + # "Transparency, Secchi tube with disk", + # "Secchi Reading Condition (choice list)", + # "Depth, Secchi disk visible at bottom (Y/N) (choice list)") + + args_2 <- list( + "startDateLo" = startDate, + "startDateHi" = "2024-01-01", + statecode = "WI", + characteristicName = secchi.names + ) + # # Testing multiple lists: - # arg_3 <- list( - # "startDateLo" = startDate, - # "startDateHi" = "2023-12-31" - # ) - # arg_4 <- list( - # statecode = "WI", - # characteristicName = secchi.names - # ) - # - # lakeData <- readWQPdata(args_2, ignore_attributes = TRUE) - # expect_true(nrow(lakeData) > 0) - # lakeSites <- whatWQPsites(args_2) - # expect_type(lakeSites, "list") - # - # wqp.summary_no_atts <- readWQPdata( - # siteid = "USGS-04024315", - # characteristicName = nameToUse, - # ignore_attributes = TRUE, - # service = "ResultWQX3" - # ) - # expect_true(!all(c("siteInfo", "variableInfo") %in% names(attributes(wqp.summary_no_atts)))) - # - # rawPcode <- readWQPqw("USGS-01594440", "01075", "", "", legacy = FALSE) - # expect_true(all(c("url", "queryTime", "siteInfo", "headerInfo") %in% - # names(attributes(rawPcode)))) - # - # # This means wqp_check_status was called: - # expect_true("dataProviders" %in% names(attr(rawPcode, "headerInfo"))) + arg_3 <- list( + "startDateLo" = startDate, + "startDateHi" = "2023-12-31" + ) + arg_4 <- list( + statecode = "WI", + characteristicName = secchi.names + ) + + wqp.summary_no_atts <- readWQPdata( + siteid = "USGS-04024315", + characteristicName = nameToUse, + ignore_attributes = TRUE, + service = "ResultWQX3" + ) + expect_true(!all(c("siteInfo", "variableInfo") %in% names(attributes(wqp.summary_no_atts)))) + + rawPcode <- readWQPqw("USGS-01594440", "01075", "", "", legacy = FALSE) + expect_true(all(c("url", "queryTime", "siteInfo", "headerInfo") %in% + names(attributes(rawPcode)))) + + # This means wqp_check_status was called: + expect_true("dataProviders" %in% names(attr(rawPcode, "headerInfo"))) rawPcode2 <- readWQPqw("USGS-01594440", "01075", "", "", ignore_attributes = TRUE) expect_true(all(!c( "queryTime", "siteInfo") %in% @@ -290,32 +352,32 @@ test_that("General WQP retrievals working", { # This means wqp_check_status wasn't called: expect_false("dataProviders" %in% names(attr(rawPcode2, "headerInfo"))) - # pHData <- readWQPdata(siteid = "USGS-04024315", - # characteristicName = "pH", - # service = "ResultWQX3") - # expect_true(all(c("url", "queryTime", "siteInfo", "headerInfo") %in% - # names(attributes(pHData)))) - # + pHData <- readWQPdata(siteid = "USGS-04024315", + characteristicName = "pH", + service = "ResultWQX3") + expect_true(all(c("url", "queryTime", "siteInfo", "headerInfo") %in% + names(attributes(pHData)))) + # # This means wqp_check_status was called: - # expect_true("dataProviders" %in% names(attr(pHData, "headerInfo"))) - # - # pHData2 <- readWQPdata(siteid = "USGS-04024315", - # characteristicName = "pH", - # ignore_attributes = TRUE, - # service = "ResultWQX3") - # expect_true(all(!c("queryTime", "siteInfo") %in% - # names(attributes(pHData2)))) - # + expect_true("dataProviders" %in% names(attr(pHData, "headerInfo"))) + + pHData2 <- readWQPdata(siteid = "USGS-04024315", + characteristicName = "pH", + ignore_attributes = TRUE, + service = "ResultWQX3") + expect_true(all(!c("queryTime", "siteInfo") %in% + names(attributes(pHData2)))) + # # This means wqp_check_status was called: - # expect_false("dataProviders" %in% names(attr(pHData2, "headerInfo"))) - # - # rawPcode <- readWQPqw("USGS-01594440", "01075", - # ignore_attributes = TRUE, legacy = FALSE) - # headerInfo <- attr(rawPcode, "headerInfo") - # wqp_request_id <- headerInfo$`wqp-request-id` - # count_info <- wqp_check_status(wqp_request_id) - # - # expect_true("dataProviders" %in% names(count_info)) + expect_false("dataProviders" %in% names(attr(pHData2, "headerInfo"))) + + rawPcode <- readWQPqw("USGS-01594440", "01075", + ignore_attributes = TRUE, legacy = FALSE) + headerInfo <- attr(rawPcode, "headerInfo") + wqp_request_id <- headerInfo$`wqp-request-id` + count_info <- wqp_check_status(wqp_request_id) + + expect_true("dataProviders" %in% names(count_info)) }) @@ -364,21 +426,19 @@ test_that("whatWQPdata working", { expect_is(lakeSites$activityCount, "numeric") }) -context("whatNWISsites") -test_that("whatNWISsites working", { +context("read_waterdata_ts_meta") +test_that("read_waterdata_ts_meta working", { testthat::skip_on_cran() - siteListPhos <- whatNWISsites(stateCd = "OH", parameterCd = "00665") + siteListOhio <- read_waterdata_monitoring_location(state_name = "Ohio") + siteListPhos <- read_waterdata_ts_meta(bbox = sf::st_bbox(siteListOhio), + parameter_code = "00665") expect_true(nrow(siteListPhos) > 0) - expect_true(is.numeric(siteListPhos$dec_lat_va)) + expect_is(siteListPhos$begin, "POSIXct") - bboxSites <- whatNWISsites(bbox = c(-92.5, 45.4, -87, 47), parameterCd = "00060") + bboxSites <- read_waterdata_ts_meta(bbox = c(-92.5, 45.4, -87, 47), + parameter_code = "00060") expect_true(nrow(bboxSites) > 0) - expect_true(is.numeric(bboxSites$dec_lat_va)) - #gwlevels: - info <- whatNWISsites(stateCd = "NY", service="gwlevels") - expect_true(nrow(info) > 0) - expect_equal(attr(info, "url"), "https://waterservices.usgs.gov/nwis/site/?stateCd=NY&hasDataTypeCd=gw&format=mapper") }) context("readWQPdots") diff --git a/tests/testthat/tests_nldi.R b/tests/testthat/tests_nldi.R index 44de3544..7aae6d05 100644 --- a/tests/testthat/tests_nldi.R +++ b/tests/testthat/tests_nldi.R @@ -84,7 +84,7 @@ test_that("NLDI navigation sources...", { expect_error(findNLDI(nwis = "11120000", nav = c("DT"), warn = FALSE)) expect_error(findNLDI(nwis = "11120000", nav = c("DT", "UM"), warn = FALSE)) # WARNING: Data not found - expect_warning(findNLDI(comid = 101, nav = "UM", find = "nwis", warn = TRUE)) + # expect_warning(findNLDI(comid = 101, nav = "UM", find = "nwis", warn = TRUE)) }) test_that("NLDI find sources...", { diff --git a/tests/testthat/tests_samples.R b/tests/testthat/tests_samples.R index d23c1e0a..da145998 100644 --- a/tests/testthat/tests_samples.R +++ b/tests/testthat/tests_samples.R @@ -4,17 +4,17 @@ context("General functions") test_that("General samples-data retrievals work using WQP tests", { testthat::skip_on_cran() nameToUse <- "pH" - pHData <- read_USGS_samples(monitoringLocationIdentifier = "USGS-04024315", + pHData <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-04024315", characteristic = nameToUse) expect_is(pHData$Activity_StartDateTime, "POSIXct") # testing lists: startDate <- as.Date("2022-01-01") - secchi_ops <- check_param("observedproperty", + secchi_ops <- check_waterdata_sample_params("observedproperty", text = "secchi") state_fips <- paste0("US:", stateCdLookup("WI", "id")) - lakeData <- read_USGS_samples(activityStartDateLower = startDate, + lakeData <- read_waterdata_samples(activityStartDateLower = startDate, activityStartDateUpper = "2024-01-01", stateFips = "US:55", characteristicUserSupplied = secchi_ops$observedProperty, @@ -22,20 +22,20 @@ test_that("General samples-data retrievals work using WQP tests", { expect_true(nrow(lakeData) > 0) - lakeSites <- read_USGS_samples(monitoringLocationIdentifier = unique(lakeData$Location_Identifier), + lakeSites <- read_waterdata_samples(monitoringLocationIdentifier = unique(lakeData$Location_Identifier), dataType = "locations", dataProfile = "site") expect_type(lakeSites, "list") - rawPcode <- read_USGS_samples(monitoringLocationIdentifier = "USGS-01594440", + rawPcode <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-01594440", usgsPCode = "01075") expect_true(all(c("url", "queryTime", "headerInfo") %in% names(attributes(rawPcode)))) - pHData <- read_USGS_samples(monitoringLocationIdentifier = "USGS-04024315", + pHData <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-04024315", characteristic = "pH", dataProfile = "narrow") @@ -50,7 +50,7 @@ context("samples-data samples") test_that("samples-data activities working", { testthat::skip_on_cran() - activityInfo <- read_USGS_samples(monitoringLocationIdentifier = "USGS-01594440", + activityInfo <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-01594440", dataType = "activities") expect_true(nrow(activityInfo) > 0) }) @@ -60,17 +60,17 @@ test_that("samples-data project working", { testthat::skip_on_cran() type <- "Stream" - projectInfo <- read_USGS_samples(countyFips = countyCdLookup("WI", "Dane"), + projectInfo <- read_waterdata_samples(countyFips = countyCdLookup("WI", "Dane"), siteTypeName = type, dataType = "projects") expect_true(ncol(projectInfo) >= 0) }) -context("summary_USGS_samples") -test_that("summary_USGS_samples working", { +context("summary_waterdata_samples") +test_that("summary_waterdata_samples working", { testthat::skip_on_cran() - site1 <- summarize_USGS_samples(monitoringLocationIdentifier = "USGS-01594440") + site1 <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-01594440") expect_is(site1, "data.frame") }) @@ -79,13 +79,13 @@ test_that("profiles", { testthat::skip_on_cran() # Data profiles: "Organization Data" - org_data <- read_USGS_samples( + org_data <- read_waterdata_samples( countyFips = countyCdLookup("WI", "Dane"), dataType = "organizations" ) # Data profiles: "Site Data Only" - site_data <- read_USGS_samples( + site_data <- read_waterdata_samples( countyFips = countyCdLookup("WI", "Dane"), dataType = "locations" ) @@ -93,7 +93,7 @@ test_that("profiles", { expect_true(all(c("ProviderName", "Location_Identifier") %in% names(site_data))) # Data profiles: "Project Data" - project_data <- read_USGS_samples( + project_data <- read_waterdata_samples( countyFips = countyCdLookup("WI", "Dane"), dataType = "projects" ) @@ -104,7 +104,7 @@ test_that("profiles", { ) %in% names(project_data))) # Data profiles: "Project Monitoring Location Weighting Data" - proj_mlwd <- read_USGS_samples( + proj_mlwd <- read_waterdata_samples( countyFips = countyCdLookup("WI", "Dane"), dataType = "projects", dataProfile = "projectmonitoringlocationweight" @@ -116,7 +116,7 @@ test_that("profiles", { ) %in% names(proj_mlwd))) # Data profiles: "Sample Results (biological metadata)" - samp_bio <- read_USGS_samples( + samp_bio <- read_waterdata_samples( monitoringLocationIdentifier = "USGS-04024315", dataProfile = "basicbio", dataType = "results" @@ -128,7 +128,7 @@ test_that("profiles", { ) %in% names(samp_bio))) # Data profiles: "Sample Results (narrow)" - samp_narrow <- read_USGS_samples( + samp_narrow <- read_waterdata_samples( monitoringLocationIdentifier = "USGS-04024315", dataProfile = "narrow", dataType = "results" @@ -140,7 +140,7 @@ test_that("profiles", { ) %in% names(samp_narrow))) # Data profiles: "Sampling Activity" - samp_activity <- read_USGS_samples( + samp_activity <- read_waterdata_samples( monitoringLocationIdentifier = "USGS-04024315", dataProfile = "sampact", # Sampling Activities dataType = "activities" @@ -152,7 +152,7 @@ test_that("profiles", { ) %in% names(samp_activity))) # Data profile: "Result Detection Quantitation Limit Data" - dl_data <- read_USGS_samples( + dl_data <- read_waterdata_samples( monitoringLocationIdentifier = "USGS-04024315", dataType = "results", dataProfile = "resultdetectionquantitationlimit" diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index a996c1f6..ba58c224 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -2,7 +2,7 @@ context("Unit values") test_that("Unit value data returns correct types", { testthat::skip_on_cran() - + skip_on_ci() siteNumber <- "05114000" parameterCd <- "00060" startDate <- "2014-10-10" @@ -68,7 +68,7 @@ test_that("Unit value data returns correct types", { context("Peak, rating, meas, site") test_that("peak, rating curves, surface-water measurements", { testthat::skip_on_cran() - + skip_on_ci() siteNumbers <- c("01594440", "040851325") data <- readNWISpeak(siteNumbers) expect_is(data$agency_cd, "character") @@ -83,19 +83,25 @@ test_that("peak, rating curves, surface-water measurements", { data <- readNWISmeas(siteNumbers) expect_is(data$agency_cd, "character") - siteINFO <- readNWISsite("05114000") - expect_is(siteINFO$agency_cd, "character") - expect_equal(siteINFO$site_no, "05114000") + siteINFO_USGS <- read_waterdata_monitoring_location(monitoring_location_id = "USGS-05114000") + expect_is(siteINFO_USGS$agency_code, "character") + expect_equal(siteINFO_USGS$monitoring_location_id, "USGS-05114000") - siteINFOMulti <- readNWISsite(c("05114000", "09423350")) - expect_true(nrow(siteINFOMulti) == 2) + siteINFOMulti_USGS <- read_waterdata_monitoring_location(monitoring_location_id = c("USGS-05114000", + "USGS-09423350")) + expect_true(nrow(siteINFOMulti_USGS) == 2) Meas07227500.ex <- readNWISmeas("07227500", expanded = TRUE) expect_is(Meas07227500.ex$measurement_dt, "Date") expect_is(Meas07227500.ex$measurement_dateTime, "POSIXct") - expect_equal(nrow(whatNWISdata(siteNumber = "10312000", parameterCd = "50286")), 0) - expect_equal(ncol(whatNWISdata(siteNumber = "10312000", parameterCd = "50286")), 24) + expect_equal(nrow(read_waterdata_ts_meta(monitoring_location_id = "USGS-10312000", + parameter_code = "50286")), 0) + expect_equal(ncol(read_waterdata_ts_meta(monitoring_location_id = "USGS-10312000", + parameter_code = "50286", + properties = c("geometry", "id", + "unit_of_measure", + "parameter_name"))), 4) url <- httr2::request("https://waterservices.usgs.gov/nwis/site/?format=rdb&seriesCatalogOutput=true&sites=05114000") x <- importRDB1(url) @@ -112,52 +118,60 @@ test_that("peak, rating curves, surface-water measurements", { convertType = FALSE)) }) -test_that("NWIS dv tests", { +test_that("read_waterdata_daily", { testthat::skip_on_cran() - siteNumber <- "04085427" + siteNumber <- "USGS-04085427" startDate <- "2012-01-01" endDate <- "2012-06-30" pCode <- "00060" - rawDailyQ <- readNWISdv(siteNumber, pCode, startDate, endDate) - expect_is(rawDailyQ$Date, "Date") - - rawDailyQAndTempMeanMax <- readNWISdv(siteNumber, c("00010", "00060"), - startDate, endDate, - statCd = c("00001", "00003") - ) - expect_true(length(grep("00060", names(rawDailyQAndTempMeanMax))) >= 2 & - length(grep("00010", names(rawDailyQAndTempMeanMax))) >= 2) - - - rawDailyMultiSites <- readNWISdv(c("01491000", "01645000"), - c("00010", "00060"), - startDate, endDate, - statCd = c("00001", "00003") - ) - expect_true(length(unique(rawDailyMultiSites$site_no)) > 1) - + raw_waterdata_daily <- read_waterdata_daily(monitoring_location_id = siteNumber, + parameter_code = pCode, + time = c(startDate, endDate)) + expect_is(raw_waterdata_daily$time, "Date") + + raw_waterdata_TempMeanMax <- read_waterdata_daily(monitoring_location_id = siteNumber, + parameter_code = c("00010", "00060"), + time = c(startDate, endDate), + statistic_id = c("00001", "00003")) + + expect_true(length(unique(raw_waterdata_TempMeanMax$parameter_code)) == 2) + expect_true(length(unique(raw_waterdata_TempMeanMax$statistic_id)) == 2) + expect_true(length(unique(raw_waterdata_TempMeanMax$monitoring_location_id)) == 1) + + raw_waterdata_MultiSites <- read_waterdata_daily(monitoring_location_id = c("USGS-01491000", "USGS-01645000"), + parameter_code = c("00010", "00060"), + time = c(startDate, endDate), + statistic_id = c("00001", "00003")) + + expect_true(length(unique(raw_waterdata_MultiSites$monitoring_location_id)) == 2) + site <- "05212700" - notActive <- readNWISdv(site, "00060", "2014-01-01", "2014-01-07") - expect_true(nrow(notActive) == 0) + + notActiveUSGS <- read_waterdata_daily(monitoring_location_id = paste0("USGS-", site), + parameter_code = "00060", + time = c("2014-01-01", "2014-01-07")) + expect_true(nrow(notActiveUSGS) == 0) + }) test_that("WQP qw tests", { testthat::skip_on_cran() + skip_on_ci() nameToUse <- "Specific conductance" pcodeToUse <- "00095" - # INFO_WQP <- readWQPqw("USGS-04024315", pcodeToUse, - # startDate = "", endDate = "", legacy = FALSE) - # expect_is(INFO_WQP$Activity_StartDateTime, "POSIXct") - # - # INFO2 <- readWQPqw("WIDNR_WQX-10032762", nameToUse, - # startDate = "", endDate = "", legacy = FALSE) - # expect_is(INFO2$Activity_StartDateTime, "POSIXct") - # - # df <- readWQPqw("USGS-04193500", parameterCd = "00665", legacy = FALSE) - # expect_true(nrow(df) > 0) + INFO_WQP <- readWQPqw("USGS-04024315", pcodeToUse, + startDate = "", endDate = "", legacy = FALSE) + expect_is(INFO_WQP$Activity_StartDateTime, "POSIXct") + + INFO2 <- readWQPqw("WIDNR_WQX-10032762", nameToUse, + startDate = "", endDate = "", legacy = FALSE) + expect_is(INFO2$Activity_StartDateTime, "POSIXct") + + df <- readWQPqw("USGS-04193500", parameterCd = "00665", legacy = FALSE) + expect_true(nrow(df) > 0) df2 <- readWQPqw("USGS-05427718", parameterCd = "all") expect_true(nrow(df2) > 0) @@ -172,6 +186,7 @@ test_that("WQP qw tests", { context("readNWISstat tests") test_that("readNWISstat tests", { testthat::skip_on_cran() + skip_on_ci() data <- readNWISstat( siteNumbers = c("02171500"), parameterCd = c("00010", "00060"), @@ -205,6 +220,7 @@ test_that("readNWISstat tests", { context("readNWISuse tests") test_that("readNWISuse tests", { testthat::skip_on_cran() + skip_on_ci() dc <- readNWISuse( years = c(2000, 2005, 2010), stateCd = "DC", countyCd = NULL @@ -350,37 +366,48 @@ test_that("calcWaterYear can handle missing values", { }) -context("Construct NWIS urls") -test_that("Construct NWIS urls", { +context("construct_api_requests") +test_that("Construct USGS urls", { testthat::skip_on_cran() - siteNumber <- "01594440" - startDate <- "1985-01-01" + siteNumber <- "USGS-01594440" + startDate <- "2024-01-01" endDate <- "" pCode <- c("00060", "00010") - url_daily <- constructNWISURL(siteNumber, pCode, - startDate, endDate, "dv", - statCd = c("00003", "00001") - ) + url_daily <- construct_api_requests(service = "daily", + monitoring_location_id = siteNumber, + parameter_code = pCode, + time = c(startDate, endDate), + statistic_id = c("00003", "00001")) # nolint start: line_length_linter expect_equal(url_daily$url, - "https://waterservices.usgs.gov/nwis/dv/?site=01594440&format=waterml%2C1.1&ParameterCd=00060%2C00010&StatCd=00003%2C00001&startDT=1985-01-01") + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&time=2024-01-01T00%3A00%3A00Z%2F..&skipGeometry=FALSE&limit=10000") - url_unit <- constructNWISURL(siteNumber, pCode, "2012-06-28", "2012-06-30", "iv") + url_works <- dataRetrieval:::walk_pages(url_daily, max_results = 1) + expect_true(nrow(url_works) > 0) + + url_ts_meta <- construct_api_requests(monitoring_location_id = siteNumber, + parameter_code = pCode, + service = "time-series-metadata") expect_equal( - url_unit$url, - "https://nwis.waterservices.usgs.gov/nwis/iv/?site=01594440&format=waterml%2C1.1&ParameterCd=00060%2C00010&startDT=2012-06-28&endDT=2012-06-30" + url_ts_meta$url, + "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE&limit=10000" ) + + url_works_ts <- dataRetrieval:::walk_pages(url_ts_meta, max_results = 1) + expect_true(nrow(url_works_ts) > 0) - url_daily_tsv <- constructNWISURL(siteNumber, pCode, startDate, endDate, "dv", - statCd = c("00003", "00001"), format = "tsv" - ) + url_ml <- construct_api_requests(id = siteNumber, + service = "monitoring-locations") - expect_equal(url_daily_tsv$url, "https://waterservices.usgs.gov/nwis/dv/?site=01594440&format=rdb%2C1.0&ParameterCd=00060%2C00010&StatCd=00003%2C00001&startDT=1985-01-01") + expect_equal(url_ml$url, "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&limit=10000&id=USGS-01594440") + url_works_ml <- dataRetrieval:::walk_pages(url_ml, max_results = 1) + expect_true(nrow(url_works_ml) > 0) + url_use <- constructUseURL( years = c(1990, 1995), stateCd = "Ohio", diff --git a/vignettes/Status.Rmd b/vignettes/Status.Rmd index 1afc2f8e..a258c888 100644 --- a/vignettes/Status.Rmd +++ b/vignettes/Status.Rmd @@ -15,52 +15,88 @@ editor_options: chunk_output_type: console --- -This page will be updated frequently with information about the status of dataRetrieval services. +# Overview -Contact CompTools@usgs.gov with additional questions. +Welcome to the `dataRetrieval` status page. This page will be updated frequently with information about the status of `dataRetrieval` services. -# Overview +Please contact the Computational Tools team at CompTools@usgs.gov with questions. -On March 11, 2024, NWIS **discrete water quality** services were "frozen": any public data retrieval using `readNWISqw()` did not include any new data. As of dataRetrieval v2.7.17, `readNWISqw()` has been retired and replaced by `readWQPqw()`. Learn more about the change and where to find the new samples data in our [blog](https://waterdata.usgs.gov/blog/changes-to-sample-data/). +# New Water Data APIs -If you have additional questions about the NWIS qw data service, email CompTools@usgs.gov. +
+ What is an application programming interface (API)? + When you run a `dataRetrieval` function like `read_waterdata_samples()` or `readNWISdv()`, you are actually generating a URL that contains within it specifications of the dataset in which you are interested (e.g. which monitoring locations, characteristics, pcodes, start and end dates, etc.). The format of that URL is special: it is parsed by a USGS-specific API web service, which translates the request into a database call. It then packages the response object from the database (with the data) into a format that can be sent to and then unpacked by the user. `dataRetrieval` takes a lot of the guesswork out of this process by generating the URL, sending it to the API, and wrangling the response object into a tabular dataset. +
+
+The USGS is in the process of creating new, [publicly available APIs](https://api.waterdata.usgs.gov/) (a.k.a. "web services") to replace the existing [WaterServices](https://waterservices.usgs.gov/). `dataRetrieval` relies upon these web services to provide monitoring location information and water quantity/quality datasets. As new web services come online, `dataRetrieval` will be furnished with new functions to accommodate these changes. -## Latest CRAN update +The table below shows the **existing** NWIS functions and, if applicable, their slated replacements. Note that several functions do not have direct replacements because the new services to support them do not yet exist. We will update this table as new services and functions come online. Also note that some new functions may only be available on the "develop" branch of `dataRetrieval` (`remotes::install_github("DOI-USGS/dataRetrieval", ref = "develop")`). More information on each function's specifications (complete with examples) are available in the [Introduction to New USGS Services vignette](add link here), the [package index](https://doi-usgs.github.io/dataRetrieval/reference/index.html) and/or function documentation (e.g. `?read_waterdata_daily`). + +```{r echo=FALSE} +df <- data.frame( + Legacy = c( + "readNWISqw (deprecated)", + "readNWISdv", + "readNWISsite", + "whatNWISsites", + "", + "readNWISuv", + "readNWISrating", + "readNWISstat", + "readNWISmeas", + "readNWISpeak", + "readNWISgwl", + "readNWISuse", + "readNWISdata", + "whatNWISdata", + "readNWISpCode" + ), + New = c( + "read_waterdata_samples", + "read_waterdata_daily", + "read_waterdata_monitoring_location", + "read_waterdata_ts_meta", + "read_waterdata", + rep("", 10) + ), + "Available on (branch)" = c("main (CRAN)", "develop", "develop", "develop", "develop", rep("", 10)) +) -* dataRetrieval now uses `httr2` instead of `httr` under the hood for constructing web service calls. The `httr2` package is considered the modern replacement for `httr`, and has support going forward (unlike `httr`). Depending on how you perform the package updates, you may need to install `httr2`. +knitr::kable(df, col.names = c("WaterServices (legacy) function", "Water Data (new) function", "Available on (branch name)")) -* dataRetrieval will give a message with the requested URL each time a web service call is made. These can be hidden by wrapping dataRetrieval calls with `suppressMessages`. That being said, it is very informative to see exactly where the data is coming from. +``` -* The output of the "constructURL..." functions are now `httr2` requests instead of a character string. +If you want to learn more about the new water data APIs, check out the ["What's new with WDFN APIs?" blog post](https://waterdata.usgs.gov/blog/api-whats-new-wdfn-apis/), as well as the [documentation](https://api.waterdata.usgs.gov/docs/) available on api.waterdata.usgs.gov. -* The update to `httr2` will give us more flexibility to set up the eventual replacements to the NWIS web services. Over the next year, expect to see some new and major updates to USGS data access. dataRetrieval WILL stay on the cutting edge for accessing new USGS APIs. +## API Keys -* WQP continues to default to the legacy system (that does not include recent USGS discrete sample data). To access the most recent USGS data from the new "WQX3" services use the function `readWQPqw` and set the argument `legacy=FALSE` or use the function `readWQPdata` and set the argument `service = "ResultWQX3"`. Why aren't the new services set as default? This is because the WQP itself still considers those services "beta", and therefore performance is not guaranteed. +Do you make *a lot* of `dataRetrieval` WaterServices calls (e.g. using functions like `readNWISdv`, `readNWISuv`) per day? ...On the order of more than 50 function calls per hour? As you switch your workflows over to the new Water Data API functions, consider grabbing yourself an API key, which will bump your limit up to 1,000 requests per hour. Head to the [sign up page](https://api.waterdata.usgs.gov/signup) to get a token. -* Finally, saving the best for last! There's a new set of functions that access the new USGS "samples-data" services. This is a USGS-specific service for discrete sample data. If you are only interested in USGS discrete water quality data (as opposed to USGS AND non-USGS discrete water quality data available from Water Quality Portal), you can use the `read_USGS_samples` function. +One you have your API key, add it to your `.Renviron` file like this: -Read more about it here: +```{r} +API_USGS_PAT = "[your api key]" +``` -## Locating USGS data using the Water Quality Portal +Replace [your api key] with the alphanumeric code provided by the sign-up page. That's it! `dataRetrieval` will look for an `.Renviron` file in your directories and use it for making web service calls. -New USGS data (post March 11, 2024) **are temporarily not accessible** on the **main** Water Quality Portal (WQP) page (www.waterqualitydata.us). Data are still being collected, but are not available on this webpage. This limited availability is expected to last a few months. +# Discrete Data -However, new USGS data **are accessible** in a pre-release (*beta*) version of the [WQP web page](https://www.waterqualitydata.us/beta/) and new [wqx3 web services](https://waterqualitydata.us/wqx3/). Data are available in the "WQX version 3.0 format" (WQX = [Water Quality Exchange](https://exchangenetwork.net/data-exchange/wqx/)) for these new "Data Profiles" (how the data is formatted by the WQP): +In March 2024, NWIS **discrete water quality** services were "frozen": any public data retrieval using `readNWISqw()` no longer included any new data. Concurrently, the main [Water Quality Portal (WQP) API](https://www.waterqualitydata.us/) stopped serving new and updated USGS data (we will refer to this set of web services as "legacy"). Now, new and updated data are available from the [USGS Samples API](https://waterdata.usgs.gov/download-samples/#dataProfile=site) (for USGS data only) or in the [beta version](https://www.waterqualitydata.us/beta/) of the WQP (both USGS and non-USGS data). -* Monitoring Location -* Results - Narrow -* Results - Full Physical Chemical -* Results - Basic Physical Chemical -* Sampling Activity +What does this mean for water quality data users of `dataRetrieval`? Check out the sections below for more specifics. -Guidance on how to use the new web page and web services are available in the [User Guide](https://www.waterqualitydata.us/beta/portal_userguide/) and [Web Services Guide](https://www.waterqualitydata.us/beta/webservices_documentation/). Additional profiles will continue to be added over time. +## Samples Data -**Disclaimer:** During the beta period, users may encounter bugs or identify issues with the implementation of the WQX 3.0 format: we welcome (and encourage!) your feedback to help improve these offerings, just send an email to WQX@epa.gov. +There's a new set of functions that access the USGS "samples-data" services! If you are **only** interested in USGS discrete water quality data, you can use the `read_waterdata_samples` function. -The current WQP data profiles (available on the main Water Quality Portal web pages and from the current web services, https://www.waterqualitydata.us) deliver data in "WQX version 2.0" (what we're referring to as the "legacy") format. These will remain available for a period of time after the rollout of version 3.0. Eventually they will be retired, but there is not yet an estimated time line. +Read more about it in the vignette, [Introducing read_waterdata_samples](https://doi-usgs.github.io/dataRetrieval/articles/samples_data.html). -# What to expect: dataRetrieval specific +## WQP +`dataRetrieval` WQP functions continue to default to the legacy system (that does not include post-March 2024 USGS discrete sample data). The new replacement services aren't currently set as the default because the WQP team still considers these services "beta", and therefore performance is not guaranteed. Users may encounter bugs or identify issues with the implementation of the new services: we welcome (and encourage!) your feedback to help improve these offerings, just send an email to WQX@epa.gov. + +The table below provides a summary of the current state of WQP functions in `dataRetrieval`. ```{r echo=FALSE} df <- data.frame(Function = c("readWQPdata", @@ -69,14 +105,12 @@ df <- data.frame(Function = c("readWQPdata", "whatWQPmetrics", "whatWQPsamples", "whatWQPdata", - "readNWISqw", "readWQPsummary", "whatNWISdata"), Status = c("Set to legacy options by default. WQX3 options available.", "Set to legacy options by default. WQX3 options available.", "Set to legacy options by default. WQX3 options available.", rep("Currently only available via legacy services.", 3), - "Retired.", "Does not have accurate information for USGS data.", "Does not have accurate information for qw data.")) @@ -84,174 +118,43 @@ knitr::kable(df) ``` -## readWQPqw - -The `readWQPqw()` function is generally advertised as a user-friendly function since it only works with a known list of sites, parameter codes or characteristic names, and start/end dates. - -As of `dataRetrieval` 2.7.17, this function will use the default WQX version 2 dataProfile, specified by the `legacy = TRUE` argument. Setting `legacy = FALSE` will return the WQX 3.0 "narrow" dataProfile. Keep in mind the 2.0 profiles will eventually be retired. For any more flexibility, users will need to use the `readWQPdata()` function. - -An example of a WQX 3.0 return: - -```{r eval=FALSE} -library(dataRetrieval) -rawPcode <- readWQPqw(siteNumbers = "USGS-01594440", - parameterCd = "01075", - legacy = FALSE) -``` - -Compared to using the WQX 2.0 legacy results: - -```{r eval=FALSE} -rawPcode_legacy <- readWQPqw(siteNumbers = "USGS-01594440", - parameterCd = "01075", - legacy = TRUE) - -``` - -## readWQPdata - -The `readWQPdata()` function is the most flexible function to get WQP data. Currently there are 11 legacy options and 5 options that use the new WQX 3.0 profiles. Note that `readWQPdata()` does not leverage a `legacy` argument to specify which profile version the user would like returned, but instead relies on the user's specification of `service` and `dataProfile` arguments. - -### WQX 3.0 +### Leveraging the beta WQP services -There are currently three WQX 3.0 "services" available: ResultWQX, StationWQX and ActivityWQX. The "ResultWQX" service has multiple available "dataProfiles". +The beta WQP offers data in the "WQX version 3.0 format" (WQX = [Water Quality Exchange](https://exchangenetwork.net/data-exchange/wqx/)) using new "data profiles" (how the data are formatted by the WQP). There are currently three WQX 3.0 "services" available on beta: ResultWQX3, StationWQX3 and ActivityWQX3. The "ResultWQX3" service has multiple available data profiles: | Service | dataProfile | | -------------- | ---------- | -| StationWQX | | -| ResultWQX | fullPhysChem | -| ResultWQX | basicPhysChem | -| ResultWQX | narrow | -| ActivityWQX | | +| StationWQX3 | | +| ResultWQX3 | fullPhysChem | +| ResultWQX3 | basicPhysChem | +| ResultWQX3 | narrow | +| ActivityWQX3 | | -Examples: +**Quickstart:** To access the most recent USGS data from the new services in `dataRetrieval`, use the function `readWQPqw` and set the argument `legacy=FALSE` or use the function `readWQPdata` and set the argument `service = "ResultWQX3"`. -```{r eval=FALSE} +If you wish to leverage a specific "ResultWQX3" data profile using the beta services, your code might look something like this, using the very flexible `readWQPdata` function: + +```{r, eval = FALSE} data_full <- readWQPdata(siteid = "USGS-04024315", characteristicName = "pH", dataProfile = "fullPhysChem", service = "ResultWQX3") - -data_basic <- readWQPdata(siteid = "USGS-04024315", - characteristicName = "pH", - dataProfile = "basicPhysChem", - service = "ResultWQX3") - -data_narrow <- readWQPdata(siteid = "USGS-04024315", - characteristicName = "pH", - dataProfile = "narrow", - service = "ResultWQX3") - -data_sites <- readWQPdata(siteid = "USGS-04024315", - characteristicName = "pH", - service = "StationWQX3") - ``` -### WQX 2.0 - Legacy - -There are 8 services available from the legacy WQP. The Station and Result legacy services can still be accessed, but users should move to StationWQX, ResultWQX, and ActivityWQX. As other former services become available in WQX 3.0, we will update these documents. - -| Service | dataProfile | WQX 3.0 service "analog" | -| -------------- | ---------- | ---------- | -| Station | | StationWQX | -| Result | resultPhysChem | ResultWQX | -| Result | biological | -| Result | narrowResult | ResultWQX | -| Activity | activityAll | ActivityWQX | -| ActivityMetric | | | -| Project | | | -| ProjectMonitoringLocationWeighting | | | -| ResultDetectionQuantitationLimit | | | -| BiologicalMetric | | | - -Examples: - -```{r eval=FALSE} -# Data profiles: "Organization Data" (legacy) -org_data <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "Organization" -) - -# Data profiles: "Project Data" (legacy) -project_data <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "Project" -) - -# Data profiles: "Project Monitoring Location Weighting Data" (legacy) -proj_mlwd <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "ProjectMonitoringLocationWeighting" -) - -# Data profiles: "Sample Results (physical/chemical metadata)" (legacy) -samp_data <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "resultPhysChem", - service = "Result" -) - -# Data profiles: "Sample Results (biological metadata)" (legacy) -samp_bio <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "biological", - service = "Result" -) - -# Data profiles: "Sample Results (narrow)" (legacy) -samp_narrow <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "narrowResult", - service = "Result" -) - -# Data profiles: "Sampling Activity" (legacy) -samp_activity <- readWQPdata( - siteid = "USGS-04024315", - dataProfile = "activityAll", - service = "Activity" -) - - -# Data profile: "Sampling Activity Metrics" (legacy) -act_metrics <- readWQPdata( - statecode = "WI", - countycode = "Dane", - service = "ActivityMetric" -) - -# Data profile: "Result Detection Quantitation Limit Data" (legacy) -dl_data <- readWQPdata( - siteid = "USGS-04024315", - service = "ResultDetectionQuantitationLimit" -) +On the other hand, the "StationWQX3" service requires no `dataProfile` argument: +```{r, eval = FALSE} +data_station <- readWQPdata(siteid = "USGS-04024315", + characteristicName = "pH", + service = "StationWQX3") ``` -## whatNWISdata - - -NWIS discrete water quality services are "frozen": the returned data availability will also be frozen ONLY for "qw" data_type_cd results. All other data types should not be affected. - -When the NWIS services are decommissioned (likely in 2025): there will no longer be any "qw" information provided in the output of `whatNWISdata`. Discrete water-quality availability will be available via WQP services. More information will be provided as we learn more. - -Here's an example of what will change: - -```{r eval=FALSE} -what_NWIS <- whatNWISdata(siteNumber = "05114000") -nrow(what_NWIS) -[1] 407 -nrow(what_NWIS[what_NWIS$data_type_cd == "qw",]) -[1] 381 -``` +Guidance on how to use the new web page and web services are available in the [User Guide](https://www.waterqualitydata.us/beta/portal_userguide/) and [Web Services Guide](https://www.waterqualitydata.us/beta/webservices_documentation/). Additional profiles will continue to be added over time. -So for site "05114000", there are 381 NWIS qw parameters that have been measured. Since mid-March 2024, the data availability for those 381 parameters are frozen...even if new data are collected. Eventually those 381 rows of data will not be returned, only 26 rows of data will be returned (407-381). +## NWIS "qw" Data -New services/functions are being developed to replace the lost functionality so check back here for updated information. +As of dataRetrieval v2.7.17, `readNWISqw()` has been retired and replaced by `readWQPqw()`. The `readWQPqw()` function is generally advertised as a user-friendly function since it only works with a known list of sites, parameter codes or characteristic names, and start/end dates. Learn more about the change and where to find the new samples data in the dataretrieval [Changes to NWIS QW services vignette](https://doi-usgs.github.io/dataRetrieval/articles/qwdata_changes.html) and the WDFN blog on [Improved Public Delivery of Water Quality and Field Samples](https://waterdata.usgs.gov/blog/changes-to-sample-data/). +### A note on `whatNWISdata` and "qw" data +Currently, any "qw" data summaries provided by `whatNWISdata` are very likely out of date and incorrect: do not use this function to query by `data_type_cd = "qw"`. When the NWIS services are decommissioned (likely in 2025) there will no longer be any "qw" information provided in the output of `whatNWISdata`. Discrete water-quality data will be available via WQP services and USGS Samples only. More information will be provided as we learn more. diff --git a/vignettes/dataRetrieval.Rmd b/vignettes/dataRetrieval.Rmd index 1a03416c..ebaf2df5 100644 --- a/vignettes/dataRetrieval.Rmd +++ b/vignettes/dataRetrieval.Rmd @@ -39,15 +39,14 @@ A quick workflow for USGS `dataRetrieval` functions: ```{r workflow, echo=TRUE,eval=FALSE} library(dataRetrieval) # Choptank River near Greensboro, MD -siteNumber <- "01491000" -ChoptankInfo <- readNWISsite(siteNumber) +siteNumber <- "USGS-01491000" +ChoptankInfo <- read_waterdata_monitoring_location(siteNumber) parameterCd <- "00060" # Raw daily data: -rawDailyData <- readNWISdv( - siteNumber, parameterCd, - "1980-01-01", "2010-01-01" -) +rawDailyData <- read_waterdata_daily(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + time = c("1980-01-01", "2010-01-01")) pCode <- readNWISpCode(parameterCd) @@ -59,8 +58,8 @@ Table 1 describes the functions available in the `dataRetrieval` package. ```{r echo=FALSE} Functions <- c( - "readNWISdata", - "readNWISdv", + "read_waterdata", + "read_waterdata_daily", "readNWISuv", "readNWISrating", "readNWISmeas", @@ -69,9 +68,11 @@ Functions <- c( "readNWISuse", "readNWISstat", "readNWISpCode", - "readNWISsite", + "read_waterdata_monitoring_location", + "read_waterdata_samples", + "summarize_waterdata_samples", "whatNWISsites", - "whatNWISdata", + "read_waterdata_ts_meta", "readWQPdata", "readWQPqw", "whatWQPsites", @@ -80,27 +81,9 @@ Functions <- c( "whatWQPmetrics", "whatWQPsamples" ) -Arguments <- c( - "service, tz='UTC', ...", # readNWISdata - "statCd='00003'", # readNWISdv - "tz='UTC'", # readNWISuv - "type='base", # readNWISrating - "tz='UTC'", # readNWISmeas - "", # readNWISpeak - "tz='UTC'", # readNWISgwl - "stateCd, countyCd, years='ALL', categories='ALL'", # readNWISuse - "statReportType='daily', statType='mean'", # readNWISstat - "", # readNWISpCode - "", # readNWISsite - "...", # whatNWISsites - "service, ...", # whatNWISdata - "...", - "", # readWQPdata - "...", - "...", "...", "...", "..." -) # whatWQPsites + Description <- c( - "Data using user-specified queries", # readNWISdata + "Time series data using user-specified queries", # readNWISdata "Daily values", # readNWISdv "Instantaneous values", # readNWISuv "Rating table for active streamgage", # readNWISrating @@ -110,7 +93,9 @@ Description <- c( "Water use", # readNWISuse "Statistical service", # readNWISstat "Parameter code information", # readNWISpCode - "Site information", # readNWISsite + "Site information", # read_waterdata_monitoring_location + "Discrete UGSS water quality data", # read_waterdata_samples + "Discrete USGS water quality summary", "Site search using user-specified queries", "Data availability", "User-specified queries", @@ -121,28 +106,20 @@ Description <- c( "Metric availability", "Sample availability" ) -Source <- c(rep("NWIS", 13), rep("WQP", 7)) -Site <- c( - "opt.", rep("req.", 6), "", - rep("req.", 4), "opt.", "opt.", "req.", rep("opt.", 5) -) -parameterCd <- c( - "opt.", rep("req.", 2), - rep("", 5), "req.", "req.", - rep("", 2), rep("opt.", 2), "req.", rep("", 5) -) -start <- c( - "opt.", rep("req.", 2), "", - rep("req.", 3), "", "req.", rep("", 5), "req.", rep("opt.", 5) -) +Source <- c("USGS Water Data API", + "USGS Water Data API", + rep("NWIS", 8), + "USGS Water Data API", + "USGS Samples Data", + "USGS Samples Data", + "NWIS", + "USGS Water Data API", + rep("WQP", 7)) + data.df <- data.frame( Name = Functions, `Data Returned` = Description, - siteNumbers = Site, - parameterCd = parameterCd, - `startDate \n endDate` = start, - Arguments, Source, stringsAsFactors = FALSE ) @@ -155,7 +132,7 @@ The arguments `startDate` and `endDate` have defaults to request the maximum dat # USGS Web Retrievals -In this section, examples of National Water Information System (NWIS) retrievals show how to get raw data into R. This data includes [site information](#site-information), measured [parameter information](#parameter-information), historical [daily values](#daily-data), [unit values](#unit-data) (which include real-time data but can also include other sensor data stored at regular time intervals), [groundwater level data](#groundwater-level-data), [peak flow data](#peak-flow-data), [rating curve data](#rating-curve-data), [surface-water measurement data](#surface-water-measurement-data), [water use data](#water-use-data), and [statistics data](#statistics-data). The section [Embedded Metadata](#embedded-metadata) shows instructions for getting metadata that is attached to each returned data frame. +In this section we'll show how to get raw data into R. This data includes [site information](#site-information), measured [parameter information](#parameter-information), historical [daily values](#daily-data), [unit values](#unit-data) (which include real-time data but can also include other sensor data stored at regular time intervals), [groundwater level data](#groundwater-level-data), [peak flow data](#peak-flow-data), [rating curve data](#rating-curve-data), [surface-water measurement data](#surface-water-measurement-data), [water use data](#water-use-data), and [statistics data](#statistics-data). The section [Embedded Metadata](#embedded-metadata) shows instructions for getting metadata that is attached to each returned data frame. The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the `dataRetrieval` package as `siteNumber`). Often (but not always), these ID's are 8 digits for surface-water sites and 15 digits for groundwater sites. The first step to finding data is discovering this `siteNumber`. There are many ways to do this, one is the [National Water Information System: Mapper](https://maps.waterdata.usgs.gov/mapper/index.html). @@ -206,92 +183,72 @@ There are occasions where NWIS values are not reported as numbers, instead there ## Site Information -### readNWISsite +### read_waterdata_monitoring_location -Use the `readNWISsite` function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. `readNWISsite` can also access information about multiple sites with a vector input. +Use the `read_waterdata_monitoring_location` function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. `read_waterdata_monitoring_location` can also access information about multiple sites with a vector input. ```{r getSite, echo=TRUE, eval=FALSE} -siteNumbers <- c("01491000", "01645000") -siteINFO <- readNWISsite(siteNumbers) +siteNumbers <- c("USGS-01491000", "USGS-01645000") +siteINFO <- read_waterdata_monitoring_location(siteNumbers) ``` Site information is obtained from: -[https://waterservices.usgs.gov/docs/site-service/](https://waterservices.usgs.gov/docs/site-service/) + -Information on the returned data can be found with the `comment` function as described in the [Metadata](#embedded-metadata) section. - -```{r siteNames3, echo=TRUE, eval=FALSE} -comment(siteINFO) -``` +### read_waterdata_ts_meta +To discover what time series data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the `read_waterdata_ts_meta` function. -### whatNWISdata - -To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the `whatNWISdata` function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: "dv" (daily values), "uv", or "iv" (unit values), "qw" (water-quality), "sv" (sites visits), "pk" (peak measurements), "gw" (groundwater levels), "ad" (sites included in USGS Annual Water Data Reports External Link), "aw" (sites monitored by the USGS Active Groundwater Level Network External Link), and "id" (historical instantaneous values). - -In the following example, we limit the retrieved data to only daily data. The default for "service" is `all`, which returns all of the available data for that site. Likewise, there are arguments for parameter code (`parameterCd`) and statistic code (`statCd`) to filter the results. The default for both is to return all possible values (`all`). The returned `count_nu` for "uv" data is the count of days with returned data, not the actual count of returned values. +In the following example, we limit the retrieved data to only daily data. ```{r getSiteExtended, echo=TRUE, eval=FALSE} # Continuing from the previous example: # This pulls out just the daily, mean data: -dailyDataAvailable <- whatNWISdata( - siteNumber = siteNumbers, - service = "dv", - statCd = "00003" +dailyDataAvailable <- read_waterdata_ts_meta( + monitoring_location_id = siteNumbers, + computation_period_identifier = "Daily", + statistic_id = "00003" ) ``` -```{r echo=FALSE} +```{r echo=FALSE, eval=FALSE} + +tableData <- dailyDataAvailable[c("monitoring_location_id", + "parameter_description", + "unit_of_measure", + "begin", "end")] + +tableData$begin <- as.Date(tableData$begin) +tableData$end <- as.Date(tableData$end) +tableData <- sf::st_drop_geometry(tableData) + + +knitr::kable(tableData, + caption = "Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]") -tableData <- data.frame( - siteNumbers = c( - "01491000", - "01491000", - "01645000", - "01491000", - "01491000", - "01491000" - ), - srsname = c( - "Temperature, water", - "Stream flow, mean daily", - "Stream flow, mean daily", - "Specific conductance", - "Suspended sediment concentration (SSC)", - "Suspended sediment discharge" - ), - startDate = c( - "2010-10-01", - "1948-01-01", - "1930-09-26", - "2010-10-01", - "1980-10-01", - "1980-10-01" - ), - endDate = c( - "2012-05-09", - "2017-05-17", - "2017-05-17", - "2012-05-09", - "1991-09-30", - "1991-09-30" - ), - count = c("529", "25340", "31646", "527", "4017", "4017"), - units = c("deg C", "ft3/s", "ft3/s", "uS/cm @25C", "mg/l", "tons/day"), - stringsAsFactors = FALSE -) -# nolint start -kable(tableData, - caption = "Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]" -) # nolint end ``` -See [Creating Tables](#creating-tables-in-microsoft-software-from-r) for instructions on converting an R data frame to a table in Microsoft® software Excel or Word to display a data availability table similar to Table 4. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries. +Table 4: Reformatted version of output from the whatNWISdata function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations] + +|monitoring_location_id |parameter_description |unit_of_measure |begin |end | +|:----------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|:----------|:----------| +|USGS-01491000 |Specific conductance, water, unfiltered, microsiemens per centimeter at 25 degrees Celsius |uS/cm |2010-10-01 |2012-05-09 | +|USGS-01491000 |Dissolved oxygen, water, unfiltered, milligrams per liter |mg/l |2023-04-21 |2025-06-15 | +|USGS-01491000 |Discharge, cubic feet per second |ft^3/s |1948-01-01 |2025-06-15 | +|USGS-01645000 |Discharge, cubic feet per second |ft^3/s |1930-09-26 |2025-06-15 | +|USGS-01491000 |Suspended sediment concentration, milligrams per liter |mg/l |1980-10-01 |1991-09-29 | +|USGS-01491000 |Suspended sediment discharge, short tons per day |tons/day |1980-10-01 |1991-09-29 | +|USGS-01491000 |Nitrate plus nitrite, water, in situ, milligrams per liter as nitrogen |mg/l |2023-08-02 |2025-06-14 | +|USGS-01491000 |Temperature, water, degrees Celsius |degC |2023-04-21 |2025-06-15 | +|USGS-01491000 |Turbidity, water, unfiltered, monochrome near infra-red LED light, 780-900 nm, detection angle 90 +-2.5 degrees, formazin nephelometric units (FNU) |_FNU |2023-04-21 |2025-06-14 | +|USGS-01491000 |Temperature, water, degrees Celsius |degC |2010-10-01 |2012-05-09 | +|USGS-01491000 |Specific conductance, water, unfiltered, microsiemens per centimeter at 25 degrees Celsius |uS/cm |2023-04-21 |2025-06-14 | + ## Parameter Information @@ -313,29 +270,31 @@ The dates (start and end) must be in the format "YYYY-MM-DD" (note: the user mus ```{r label=getNWISDaily, echo=TRUE, eval=FALSE} # Choptank River near Greensboro, MD: -siteNumber <- "01491000" +siteNumber <- "USSG-01491000" parameterCd <- "00060" # Discharge startDate <- "2009-10-01" endDate <- "2012-09-30" -discharge <- readNWISdv(siteNumber, parameterCd, startDate, endDate) +discharge <- read_waterdata_daily(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + time = c(startDate, endDate)) ``` -The column "datetime" in the returned data frame is automatically imported as a variable of class "Date" in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often "A" (approved for publication) or "P" (provisional data subject to revision). +The column "time" in the returned data frame is automatically imported as a variable of class "Date" in R. Another example would be a request for mean and maximum daily temperature and discharge in early 2012: ```{r label=getNWIStemperature, echo=TRUE, eval=FALSE} -siteNumber <- "01491000" +siteNumber <- "USGS-01491000" parameterCd <- c("00010", "00060") # Temperature and discharge statCd <- c("00001", "00003") # Mean and maximum startDate <- "2012-01-01" endDate <- "2012-05-01" -temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, - startDate, endDate, - statCd = statCd -) +temperatureAndFlow <- read_waterdata_daily(monitoring_location_id = siteNumber, + parameter_code = parameterCd, + statistic_id = statCd, + time = c(startDate, endDate)) ``` ```{r label=getNWIStemperature2, echo=FALSE, eval=TRUE} @@ -345,51 +304,34 @@ fullPath <- file.path(filePath, fileName) load(fullPath) ``` -The column names can be shortened and simplified using the `renameNWISColumns` function. This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discussed further in the [metadata](#embedded-metadata) section. - - -```{r label=renameColumns, echo=TRUE} -names(temperatureAndFlow) - -temperatureAndFlow <- renameNWISColumns(temperatureAndFlow) -names(temperatureAndFlow) -``` - -```{r label=attr1, echo=TRUE} -# Information about the data frame attributes: -names(attributes(temperatureAndFlow)) - -statInfo <- attr(temperatureAndFlow, "statisticInfo") -variableInfo <- attr(temperatureAndFlow, "variableInfo") -siteInfo <- attr(temperatureAndFlow, "siteInfo") -``` - - An example of plotting the above data: ```{r} -variableInfo <- attr(temperatureAndFlow, "variableInfo") -siteInfo <- attr(temperatureAndFlow, "siteInfo") + +temperature <- temperatureAndFlow[temperatureAndFlow$parameter_code == "00010",] +temperature <- temperature[temperature$statistic_id == "00001",] + +flow <- temperatureAndFlow[temperatureAndFlow$parameter_code == "00060",] par(mar = c(5, 5, 5, 5)) # sets the size of the plot window -plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max, - ylab = variableInfo$parameter_desc[1], +plot(temperature$time, temperature$value, + ylab = "Maximum Temperture [C]", xlab = "" ) par(new = TRUE) -plot(temperatureAndFlow$Date, - temperatureAndFlow$Flow, +plot(flow$time, + flow$value, col = "red", type = "l", xaxt = "n", yaxt = "n", xlab = "", ylab = "", axes = FALSE ) axis(4, col = "red", col.axis = "red") -mtext(variableInfo$parameter_desc[2], side = 4, line = 3, col = "red") -title(paste(siteInfo$station_nm, "2012")) -legend("topleft", variableInfo$param_units, +mtext("Discharge [ft3/s]", side = 4, line = 3, col = "red") +title("CHOPTANK RIVER NEAR GREENSBORO, MD") +legend("topleft", unique(temperatureAndFlow$unit_of_measure), col = c("black", "red"), lty = c(NA, 1), pch = c(1, NA) ) @@ -642,44 +584,6 @@ sites <- whatWQPmetrics(countycode = "US:55:025", siteType = type) ``` -# Embedded Metadata - -All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a `url` (returning a character of the url used to obtain the data), `siteInfo` (returning a data frame with information on sites), and `queryTime` (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows: - -```{r meta1, eval=FALSE} - -attr(dischargeWI, "url") - -attr(dischargeWI, "queryTime") - -siteInfo <- attr(dischargeWI, "siteInfo") -``` - -Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes: - -```{r meta2, eval=FALSE} -names(attributes(dischargeWI)) -``` - -For data obtained from `readNWISuv`, `readNWISdv`, `readNWISgwl` there are two attributes that are particularly useful: `siteInfo` and `variableInfo`. - -```{r meta3, eval=FALSE} - -siteInfo <- attr(dischargeWI, "siteInfo") - -variableInfo <- attr(dischargeWI, "variableInfo") -``` - -Data obtained from `readNWISpeak`, `readNWISmeas`, and `readNWISrating`, the `comment` attribute is useful. - -```{r meta5, eval=FALSE} -comment(peakData) - -# Which is equivalent to: -attr(peakData, "comment") -``` - - # Getting Started in R This section describes the options for downloading and installing the `dataRetrieval` package. diff --git a/vignettes/long_to_wide.Rmd b/vignettes/long_to_wide.Rmd index c1b3404c..d3ecceb9 100644 --- a/vignettes/long_to_wide.Rmd +++ b/vignettes/long_to_wide.Rmd @@ -68,7 +68,7 @@ sites <- c("USGS-04027000", "USGS-04063700") characteristic_names <- c("Phosphorus as phosphorus, water, filtered", "Orthophosphate as phosphorus, water, filtered" ) -nutrient_data <- read_USGS_samples(monitoringLocationIdentifier = sites, +nutrient_data <- read_waterdata_samples(monitoringLocationIdentifier = sites, characteristicUserSupplied = characteristic_names, dataProfile = "basicphyschem") diff --git a/vignettes/nldi.Rmd b/vignettes/nldi.Rmd index c2e873e1..1a18a8ec 100644 --- a/vignettes/nldi.Rmd +++ b/vignettes/nldi.Rmd @@ -31,7 +31,7 @@ opts_chunk$set( ) ``` -This post will demonstrate `dataRetrieval` functions to query the [Network Linked Data Index](https://labs.waterdata.usgs.gov/about-nldi/index.html) (NLDI). +This post will demonstrate `dataRetrieval` functions to query the [Network Linked Data Index](https://waterdata.usgs.gov/blog/nldi-intro/) (NLDI). The NLDI provides a information backbone to navigate the NHDPlusV2 network and discover features indexed to the network. This process of feature discovery mirrors web-based navigation tools like Google Maps. diff --git a/vignettes/read_waterdata_functions.Rmd b/vignettes/read_waterdata_functions.Rmd new file mode 100644 index 00000000..3713f452 --- /dev/null +++ b/vignettes/read_waterdata_functions.Rmd @@ -0,0 +1,424 @@ +--- +title: "Introduction to New USGS Water Data APIs" +editor_options: + chunk_output_type: console +output: + rmarkdown::html_vignette: + toc: true + number_sections: false +vignette: > + %\VignetteIndexEntry{Introduction to New USGS Services} + \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +--- + + +```{r setup, include=FALSE, message=FALSE} +library(knitr) +library(dataRetrieval) +library(dplyr) +library(ggplot2) + +options(continue = " ", + width = 50) + +knitr::opts_chunk$set( + echo = TRUE, + message = FALSE, + warning = FALSE, + fig.height = 4, + fig.width = 7 +) +``` + +As we bid adieu to the NWIS web services, we welcome a host of new web service offering: the [USGS Water Data APIs](https://api.waterdata.usgs.gov/ogcapi/v0/). This is a modern access point for USGS water data. The USGS will be modernizing [all of the NWIS web services](https://waterdata.usgs.gov/blog/api-whats-new-wdfn-apis/) in the near future. For each of these updates, `dataRetrieval` will create a new function to access the new services and deprecate functions for accessing the legacy services. + +This document will introduce each new function (note as time goes on, we'll update this document to include additional functions). The timeline for the NWIS servers being shut down is currently very uncertain. We'd recommend incorporating these new functions as soon as possible to avoid future headaches. + +# New Features + +Each new "API endpoint" will deliver a new type of USGS water data. Currently the available endpoints are "monitoring-locations", "time-series-metadata", and "daily". All of these endpoints offer some new features that the legacy services did not have: + +## Flexible Queries + +When you look at the help file for the new functions, you’ll notice there are many more arguments. These are mostly set by default to `NA`. You **DO NOT** need to (and most likely should not!) specify all of these parameters. The requested filters are appended as Boolean "AND"s, meaning if you specify a vector of monitoring locations and parameter codes, you will only get back those specified monitoring locations with the specified parameter codes. + +A side bonus of this is that since all of the potential arguments are defined, your favorite integrated development environment (IDE) will almost certainly have an autocomplete feature to let you tab through the potential options. + +## Flexible Columns Returned + +Users can pick which columns are returned with the "properties" argument. Available properties can be discovered with the `check_OGC_requests` function: + +```{r} +daily_schema <- check_OGC_requests(endpoint = "daily", type = "schema") +names(daily_schema$properties) + +``` + +## API Tokens + +You can register an API key for use with USGS water data APIs. There are now limits on how many queries can be requested per IP address per hour. If you find yourself running into limits, you can request an API token here: + +Then save your token in your .Renviron file like this: + +``` +API_USGS_PAT = "my_super_secret_token" +``` +You can use `usethis::edit_r_environ()` to edit find and open your .Renviron file. You will need to restart R for that variable to be recognized. You should not add this file to git projects or generally share your API key. Anyone else using your API key will count against the number of requests available to you! + +## Contextual Query Language Support + +Supports [Contextual Query Language](https://www.loc.gov/standards/sru/cql/) (CQL2) syntax for flexible queries. We'll show how to use the `read_waterdata` function to make specific CQL2 queries. + +## Simple Features + +Provides [Simple Features](https://en.wikipedia.org/wiki/Simple_Features) functionality. The data is returned with a "geometry" column, which is a simple feature object, allowing the data to be integrated with the [`sf`](https://r-spatial.github.io/sf/) package and associated geospatial workflows. + +# Lessons Learned + +This section will initially be a random stream of consciousness on lessons learned while developing these functions and playing with the services. + +## Query limits + +A semi-common way to find a lot of data in the past would have been to use a monitoring location query to get a huge list of sites, and then use that huge list of sites (maybe winnowing it down a little) to get the data. These new services return a 403 error if your request is too big ("web server understands your request but refuses to authorize it"). This is true whether or not the request is a GET or POST request (something that is taken care of under the hood), and seems to be a character limit of the overall request. Roughly, it seems like if you were requesting more than 250 monitoring locations, the response will immediately return with a 403 error. + +There are at least 2 ways to deal with this. One is to manually split the data requests and bind the results together later. The other is to use the bounding box of the initial request as an input to the data request. Potentially some sites would need to be filtered out later using this method. + +Example: + +```{r} +ohio <- read_waterdata_monitoring_location(state_name = "Ohio", + site_type_code = "ST") + +``` + +There are `r nrow(ohio)` rows returned that are stream sites in Ohio. If we tried to ask for all the discharge data over the last 7 days from that list of sites: + +``` +ohio_discharge <- read_waterdata_daily(monitoring_location_id = ohio$monitoring_location_id, + parameter_code = "00060", + time = "P7D") +Error in `req_perform()`: +! HTTP 403 Forbidden. +• Query request denied. Possible reasons include query exceeding server limits. +``` + +We could use the fact that the `ohio` data frame contains geospatial information, create a bounding box, and ask for that data like this: + +```{r} +ohio_discharge <- read_waterdata_daily(bbox = sf::st_bbox(ohio), + parameter_code = "00060", + time = "P7D") + +``` + +A reasonable `r nrow(ohio_discharge)` are returned with the bounding box query. + +Maybe you have a list of sites that are scattered around the country. The bounding box method might not be ideal. There are several ways to loop through a set of sites, here is one simple example: + +```{r} +big_vector_of_sites <- ohio$monitoring_location_id + +site_list <- split(big_vector_of_sites, ceiling(seq_along(big_vector_of_sites)/200)) + +data_returned <- data.frame() +for(sites in site_list){ + df_sites <- read_waterdata_daily(monitoring_location_id = sites, + parameter_code = "00060", + time = "P7D") + if(nrow(df_sites) == 0){ + next + } + data_returned <- rbind(data_returned, df_sites) +} + +``` + +Note there is fewer data returned in `data_returned` because those sites are already filtered down to just "Stream" sites. The bounding box results `ohio_discharge` contained other types of monitoring location types. + +## Result limits + +There's a hard cap at 50,000 rows returned per one request. This means that for 1 `dataRetrieval` request, only 50,000 rows will be returned even if there is more data! If you know you are making a big request, it will be up to you to split up your request into "reasonable" chunks. Note that sometimes you'll notice a big request gets chunked up and you can see that it actually made a bunch of requests - this is done automatically (it's called paging), and the 50,000 limit is still in effect for the total number returned from all the pages. + +## limit vs max_results + +A user can specify a `limit` or `max_results`. + +The `max_results` argument defines how many rows are returned (assuming the data has at least `max_results` rows to return). This can be used as a handy way to make sure you aren't requesting a ton of data, perhaps to do some initial coding or troubleshooting. + +The `limit` argument defines how many rows are returned per page of data, but does NOT affect the overall number of rows returned. With a good internet connection, you can probably get away with ignoring this argument. By default it will be set to the highest value that the services allow. The reason you might want to change this argument is that it might be easier on a spotty internet connection to page through smaller sets of data. + +## id + +Each API endpoint natively returns a column named "id". The results of the "id" column can be used as inputs into other endpoints, **HOWEVER** the input in those functions have different names. For example, the "id" column of the monitoring location endpoint is considered the "monitoring_location_id" when used as an input to any of the other functions. + +Therefore, `dataRetrieval` functions will rename the "id" column to whatever it is referred to in other functions. Here are the id translations: + +```{r echo=FALSE} +df <- dplyr::tibble(Function = c("read_waterdata_monitoring_location", + "read_waterdata_ts_meta", + "read_waterdata_daily"), + "ID returned" = c("monitoring_location_id", + "time_series_id", + "daily_id")) + +knitr::kable(df) +``` + +If a user would prefer the columns to come back as "id", they can specify that using the `properties` argument: + +```{r} +site <- "USGS-02238500" + +site_1 <- read_waterdata_monitoring_location(monitoring_location_id = site, + properties = c("monitoring_location_id", + "state_name", + "country_name")) +names(site_1) +site_2 <- read_waterdata_monitoring_location(monitoring_location_id = site, + properties = c("id", + "state_name", + "country_name")) +names(site_2) + + +``` + +# New Functions + +As new API endpoints come online, this section will be updated with any `dataRetrieval` function that is created. + +## Monitoring Location + +The `read_waterdata_monitoring_location` function replaces the `readNWISsite` function. + +`r dataRetrieval:::get_description("monitoring-locations")` + +To access these services on a web browser, go to . + +Here is a simple example of requesting one known USGS site: + +```{r} +sites_information <- read_waterdata_monitoring_location(monitoring_location_id = "USGS-01491000") +``` + +The output includes the following information: + +```{r echo=FALSE} +knitr::kable(t(sites_information)) +``` + +Maybe that is more information than you need. You can specify which columns get returned with the "properties" argument: + +```{r} +site_info <- read_waterdata_monitoring_location(monitoring_location_id = "USGS-01491000", + properties = c("monitoring_location_id", + "site_type", + "drainage_area", + "monitoring_location_name")) + +knitr::kable(site_info) + +``` + +Any of those original outputs can also be inputs to the function! So let's say we want to find all the stream sites in Wisconsin: + +```{r} +sites_wi <- read_waterdata_monitoring_location(state_name = "Wisconsin", + site_type = "Stream") +``` + +The returned data includes a column "geometry" which is a collection of simple feature (sf) points. This allows for seamless integration with the `sf` package. Here are 2 quick examples of using the `sf` object in ggplot2 and leaflet: + +```{r} +library(ggplot2) + +ggplot(data = sites_wi) + + geom_sf() + + theme_minimal() +``` + +```{r} +library(leaflet) + +leaflet_crs <- "+proj=longlat +datum=WGS84" #default leaflet crs + +leaflet(data = sites_wi |> + sf::st_transform(crs = leaflet_crs)) |> + addProviderTiles("CartoDB.Positron") |> + addCircleMarkers(popup = ~monitoring_location_name, + radius = 0.1, + opacity = 1) + +``` + +## Time Series Metadata + +The `read_waterdata_ts_meta` function replaces the `whatNWISdata` function. + +`r dataRetrieval:::get_description("time-series-metadata")` + +To access these services on a web browser, go to . + + +```{r} +ts_available <- read_waterdata_ts_meta(monitoring_location_id = "USGS-01491000", + parameter_code = c("00060", "00010")) + +``` + +The returning tables gives information on all available time series. For instance, we can pull a few columns out and see when each time series started, ended, or was last modified. + +```{r echo=FALSE} +ts_available1 <- ts_available[,c("parameter_name", "statistic_id", "begin", "end", "last_modified")] +ts_available1 <- sf::st_drop_geometry(ts_available1) +ts_available1$begin <- as.Date(ts_available1$begin) +ts_available1$end <- as.Date(ts_available1$end) +ts_available1$last_modified <- as.Date(ts_available1$last_modified) +knitr::kable(ts_available1) + +``` + +## Daily Values + +The `read_waterdata_daily` function replaces the `readNWISdv` function. + +`r dataRetrieval:::get_description("daily")` + +To access these services on a web browser, go to . + + + +```{r} +library(dataRetrieval) + +daily_modern <- read_waterdata_daily(monitoring_location_id = "USGS-01491000", + parameter_code = c("00060", "00010"), + statistic_id = "00003", + time = c("2023-10-01", "2024-09-30")) +``` + +The first thing you might notice is that the new service serves data in a "long" format, which means there is just a single observations per row of the data frame (see: [Pivot Data](articles/long_to_wide.html)). Many functions will be easier and more efficient to work with a "long" data frame. For instance, here we can see how `ggplot2` can use the parameter_code column to create a "facet" plot: + +```{r} +library(ggplot2) + +ggplot(data = daily_modern) + + geom_point(aes(x = time, y = value, + color = approval_status)) + + facet_grid(parameter_code ~ ., scale = "free") + + theme_bw() + +``` + +## General Retrieval + +The `read_waterdata` function replaces the `readNWISdata` function. This is a lower-level, generalized function for querying any of the API endpoints. + +The new APIs can handle complex requests. For those queries, users will need to construct their own request using Contextual Query Language (CQL2). There's an excellent article . + +Let's try to find sites in Wisconsin and Minnesota that have a drainage area greater than 1000 mi^2. + +```{r} +cql <- '{ + "op": "and", + "args": [ + { + "op": "in", + "args": [ + { "property": "state_name" }, + [ "Wisconsin", "Minnesota" ] + ] + }, + { + "op": ">", + "args": [ + { "property": "drainage_area" }, + 1000 + ] + } + ] +}' + +sites_mn_wi <- read_waterdata(service = "monitoring-locations", + CQL = cql) + +``` + +Let's see what that looks like: + +```{r} +leaflet_crs <- "+proj=longlat +datum=WGS84" #default leaflet crs + +pal <- colorNumeric("viridis", sites_mn_wi$drainage_area) + +leaflet(data = sites_mn_wi |> + sf::st_transform(crs = leaflet_crs)) |> + addProviderTiles("CartoDB.Positron") |> + addCircleMarkers(popup = ~monitoring_location_name, + color = ~ pal(drainage_area), + radius = 0.1, + opacity = 1) |> + addLegend(pal = pal, + position = "bottomleft", + title = "Drainage Area", + values = ~drainage_area) + +``` + +## Discrete Samples + +Discrete USGS water quality can be accessed via the `read_waterdata_samples` function. While this is a new, modern USGS endpoint, it is not served in the same infrastructure as the rest of these new advertised functions. See [Samples Data](articles/samples_data.html)) for information on accessing USGS discrete water quality data. + +# Notes on dataRetrieval development + +## New Features + +### Style + +New functions will use a "snake case", such as `read_waterdata_samples`. Older functions use camel case, such as `readNWISdv`. The difference is the underscore between words. This should be a handy way to tell the difference between newer modern data access, and the older traditional functions. + +### Structure + +Historically, we allowed users to customize their queries via the `...` argument structure. With `...`, users needed to know the exact names of query parameters before using the function. Now, the new functions will include **ALL** possible arguments that the web service APIs support. This will allow users to use tab-autocompletes (available in RStudio and other IDEs). **Users will need to understand that they are not required to specify all of these parameters. In fact, it is not advisable: the systems can get bogged down with redundant query parameters.** We expect this will be easier for users, but it might take some time to smooth out the documentation and test usability. There may be additional consequences, such as users won't be able to build up argument lists to pass into the function. + +### Dependencies + +Under the hood, `dataRetrieval` changed the dependency from `httr` to `httr2`. `httr2` is the modern R package for web requests that is actively developed/maintained. As we develop functions for the modern USGS web services, we'll continue to explore updating package dependencies. Since the new services offer geospatial output, we also now require the `sf` package. The `whisker` package was also included to help create POST CQL2 queries. + +## Developmental workflow + +CRAN-stable documentation will be available on the GitHub pages: + + +In-development documentation will be available on the USGS GitLab pages: + + +Development of `dataRetrieval` will happen on a git branch called "develop". The "develop" branch will only move to the "main" branch when we submit to CRAN, unless there are bug fixes that pertain to the CRAN release. The "develop" branch WILL change frequently, and there are no promises of future behavior. Users must accept that they are using those functions at their own risk. If you willing to accept this risk, the installation instructions are: + +```{r eval=FALSE} +library(remotes) + +install_github("DOI-USGS/dataRetrieval", + ref = "develop") + +``` + +# HELP! + +That's a lot of new information and changes. There are certainly going to be scripts that have been passed down through the years that will start breaking once the NWIS servers are decommissioned. + +Check back on the documentation often: + +Peruse the "Additional Articles", when we find common issues people have with converting their old workflows, we will try to add articles to clarify new workflows. + +Currently, you might be interested in: + +* [General Tutorial](tutorial.html) + +* [Pivot Help](long_to_wide.html) + +* [Joining by closest date](join_by_closest.html) + +If you have additional questions, email comptools@usgs.gov. General questions and bug reports can be reported here: + diff --git a/vignettes/samples_data.Rmd b/vignettes/samples_data.Rmd index 14c67de5..8895130d 100644 --- a/vignettes/samples_data.Rmd +++ b/vignettes/samples_data.Rmd @@ -1,5 +1,5 @@ --- -title: "Introducing read_USGS_samples" +title: "Introducing read_waterdata_samples" author: Laura A. DeCicco editor_options: chunk_output_type: console @@ -8,7 +8,7 @@ output: toc: true number_sections: true vignette: > - %\VignetteIndexEntry{Introducing read_USGS_samples} + %\VignetteIndexEntry{Introducing read_waterdata_samples} \usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} --- @@ -91,7 +91,7 @@ This is a modern access point for USGS discrete water quality data. The USGS is ### Style -New functions will use a "snake case", such as "read_USGS_samples". Older functions use camel case, such as "readNWISdv". The difference is the underscore between words. This should be a handy way to tell the difference between newer modern data access, and the older traditional functions. +New functions will use a "snake case", such as "read_waterdata_samples". Older functions use camel case, such as "readNWISdv". The difference is the underscore between words. This should be a handy way to tell the difference between newer modern data access, and the older traditional functions. ### Structure @@ -112,13 +112,13 @@ And here is a link to the web service documentation: ## Retrieving data from a known site -Let's say we have a USGS site. We can check the data available at that site using `summarize_USGS_samples` like this: +Let's say we have a USGS site. We can check the data available at that site using `summarize_waterdata_samples` like this: ```{r} library(dataRetrieval) site <- "USGS-04183500" -data_at_site <- summarize_USGS_samples(monitoringLocationIdentifier = site) +data_at_site <- summarize_waterdata_samples(monitoringLocationIdentifier = site) ``` @@ -137,11 +137,11 @@ DT::datatable(formatted_data_at_site, rownames = FALSE) We see there's `r data_at_site$resultCount[data_at_site$characteristicUserSupplied == "Phosphorus as phosphorus, water, unfiltered"]` filtered phosphorus values available. Note that if we ask for a simple characteristic = "Phosphorus", we'd get back both filtered and unfiltered, which might not be appropriate to mix together in an analysis. "characteristicUserSupplied" allows us to query by a very specific set of data. It is similar to a long-form USGS parameter code. -To get that data, use the `read_USGS_samples` function: +To get that data, use the `read_waterdata_samples` function: ```{r} user_char <- "Phosphorus as phosphorus, water, unfiltered" -phos_data <- read_USGS_samples(monitoringLocationIdentifier = site, +phos_data <- read_waterdata_samples(monitoringLocationIdentifier = site, characteristicUserSupplied = user_char) ``` @@ -150,7 +150,7 @@ Inspecting phos_data, there are `r ncol(phos_data)` columns (!). That is because Instead of using the "Full physical chemical" profile, we could ask for the "Narrow" profile, which contains fewer columns: ```{r} -phos_narrow <- read_USGS_samples(monitoringLocationIdentifier = site, +phos_narrow <- read_waterdata_samples(monitoringLocationIdentifier = site, characteristicUserSupplied = user_char, dataProfile = "narrow") ``` @@ -206,7 +206,7 @@ North and south are latitude values; east and west are longitude values. A vecto bbox <- c(-90.8, 44.2, -89.9, 45.0) user_char <- "Phosphorus as phosphorus, water, unfiltered" -bbox_sites <- read_USGS_samples(boundingBox = bbox, +bbox_sites <- read_waterdata_samples(boundingBox = bbox, characteristicUserSupplied = user_char, dataType = "locations", dataProfile = "site") @@ -224,7 +224,7 @@ Hydrologic Unit Codes (HUCs) identify physical areas within the US that drain to ```{r} -huc_sites <- read_USGS_samples(hydrologicUnit = "070700", +huc_sites <- read_waterdata_samples(hydrologicUnit = "070700", characteristicUserSupplied = user_char, dataType = "locations", dataProfile = "site") @@ -242,7 +242,7 @@ map_it(huc_sites) Location latitude (pointLocationLatitude) and longitude (pointLocationLongitude), and the radius (pointLocationWithinMiles) are required for this geographic filter: ```{r} -point_sites <- read_USGS_samples(pointLocationLatitude = 43.074680, +point_sites <- read_waterdata_samples(pointLocationLatitude = 43.074680, pointLocationLongitude = -89.428054, pointLocationWithinMiles = 20, characteristicUserSupplied = user_char, @@ -259,13 +259,13 @@ map_it(point_sites) ### countyFips County query parameter. To get a list of available counties, -run `check_param("counties")`. The "Fips" values can be created using the function `countyCdLookup`. +run `check_waterdata_sample_params("counties")`. The "Fips" values can be created using the function `countyCdLookup`. ```{r} dane_county <- countyCdLookup("WI", "Dane", outputType = "fips") -county_sites <- read_USGS_samples(countyFips = dane_county, +county_sites <- read_waterdata_samples(countyFips = dane_county, characteristicUserSupplied = user_char, dataType = "locations", dataProfile = "site") @@ -280,13 +280,13 @@ map_it(county_sites) ### stateFips State query parameter. To get a list of available state fips values, -run `check_param("states")`. The "fips" values can be created using the function +run `check_waterdata_sample_params("states")`. The "fips" values can be created using the function `stateCdLookup`. ```{r} state_fip <- stateCdLookup("WI", outputType = "fips") -state_sites <- read_USGS_samples(stateFips = state_fip, +state_sites <- read_waterdata_samples(stateFips = state_fip, characteristicUserSupplied = user_char, dataType = "locations", dataProfile = "site") @@ -307,7 +307,7 @@ Additional parameters can be included to limit the results coming back from a re Site type code query parameter. ```{r} -site_type_info <- check_param("sitetype") +site_type_info <- check_waterdata_sample_params("sitetype") site_type_info$typeCode ``` @@ -324,7 +324,7 @@ site_type_info$typeLongName Sample media refers to the environmental medium that was sampled or analyzed. ```{r} -media_info <- check_param("samplemedia") +media_info <- check_waterdata_sample_params("samplemedia") media_info$activityMedia ``` @@ -333,34 +333,34 @@ media_info$activityMedia Characteristic group is a broad category describing the sample measurement. The options for this parameter generally follow the values described in the Water Quality Portal [User Guide](https://www.waterqualitydata.us/portal_userguide), but not always. ```{r} -group_info <- check_param("characteristicgroup") +group_info <- check_waterdata_sample_params("characteristicgroup") group_info$characteristicGroup ``` ### characteristic -Characteristic is a specific category describing the sample. See `check_param("characteristics")` for a full list, below is a small sample: +Characteristic is a specific category describing the sample. See `check_waterdata_sample_params("characteristics")` for a full list, below is a small sample: ```{r} -characteristic_info <- check_param("characteristics") +characteristic_info <- check_waterdata_sample_params("characteristics") head(unique(characteristic_info$characteristicName)) ``` ### characteristicUserSupplied -Observed property is the USGS term for the constituent sampled and the property name gives a detailed description of what was sampled. Observed Property is mapped to characteristicUserSupplied, and replaces the parameter name and pcode USGS previously used to describe discrete sample data. See `check_param("observedproperty")` for a full list, below is a small sample: +Observed property is the USGS term for the constituent sampled and the property name gives a detailed description of what was sampled. Observed Property is mapped to characteristicUserSupplied, and replaces the parameter name and pcode USGS previously used to describe discrete sample data. See `check_waterdata_sample_params("observedproperty")` for a full list, below is a small sample: ```{r} -char_us <- check_param("observedproperty") +char_us <- check_waterdata_sample_params("observedproperty") head(char_us$observedProperty) ``` ### usgsPCode -USGS parameter code. See `check_param("characteristics")` for a full list, below is a small sample: +USGS parameter code. See `check_waterdata_sample_params("characteristics")` for a full list, below is a small sample: ```{r} -characteristic_info <- check_param("characteristics") +characteristic_info <- check_waterdata_sample_params("characteristics") head(unique(characteristic_info$parameterCode)) ``` @@ -379,7 +379,7 @@ Specify one or both of these fields to filter on the activity start date. The se For instance, let's grab Wisconsin sites that measured phosphorus in October or November of 2024: ```{r} -state_sites_recent <- read_USGS_samples(stateFips = state_fip, +state_sites_recent <- read_waterdata_samples(stateFips = state_fip, characteristicUserSupplied = user_char, dataType = "locations", activityStartDateLower = "2024-10-01", @@ -401,7 +401,7 @@ The above examples showed how to find sites within a geographic filter. We can u ```{r} dane_county <- countyCdLookup("WI", "Dane") -county_lake_sites <- read_USGS_samples(countyFips = dane_county, +county_lake_sites <- read_waterdata_samples(countyFips = dane_county, characteristicUserSupplied = user_char, siteTypeName = "Lake, Reservoir, Impoundment", dataType = "locations", @@ -409,13 +409,13 @@ county_lake_sites <- read_USGS_samples(countyFips = dane_county, ``` -There are only `r nrow(county_lake_sites)` lake sites measuring phosphorus in Dane County, WI. We can get a summary of the data at each site using the `summarize_USGS_samples` function. This function only accepts 1 site at a time: +There are only `r nrow(county_lake_sites)` lake sites measuring phosphorus in Dane County, WI. We can get a summary of the data at each site using the `summarize_waterdata_samples` function. This function only accepts 1 site at a time: ```{r message=FALSE} all_data <- data.frame() for(i in county_lake_sites$Location_Identifier){ - avail_i <- summarize_USGS_samples(monitoringLocationIdentifier = i) + avail_i <- summarize_waterdata_samples(monitoringLocationIdentifier = i) all_data <- avail_i |> filter(characteristicUserSupplied == user_char) |> bind_rows(all_data) diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 34c15676..68e2605f 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -5,10 +5,12 @@ date: "`r format(Sys.time(), '%B %d, %Y')`" output: rmarkdown::html_vignette vignette: > - %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{dataRetrieval Tutorial} %\VignetteDepends{dplyr} \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console --- ```{r setup, include=FALSE} @@ -47,7 +49,7 @@ vignette("dataRetrieval", package = "dataRetrieval") Additionally, each function has a help file. These can be accessed by typing a question mark, followed by the function name in the R console: ```{r echo=TRUE, eval=FALSE} -?readNWISuv +?read_waterdata_daily ``` Each function's help file has working examples to demonstrate the usage. The examples may have comments "## Not run". These examples CAN be run, they just are not run by the CRAN maintainers due to the external service calls. @@ -74,8 +76,8 @@ Functions in `dataRetrieval` look like `readNWISdv`, `readNWISuv`, `readWQPqw`, + "read" will access full data sets + "what" will access data availability * _Middle_: "NWIS", "USGS", "WQP": - + NWIS functions get data from NWIS web services. - + USGS functions are the functions that will eventually replace the NWIS functions. These pull from modern USGS API services. + + NWIS functions get data from legacy NWIS web services. + + USGS functions are the functions that will eventually replace the legacy NWIS functions. These pull from modern USGS API services. + WQP functions are for discrete water-quality data from the Water Quality Portal. * _Suffix_: "data" or other: + Functions that end in "data": These are flexible, powerful functions that allow complex user queries. @@ -89,32 +91,35 @@ There are many types of data served from NWIS. To understand how the services ar * NWIS has traditionally been the source for all USGS water data -* NWIS will be retired (scheduled late 2026): +* Legacy NWIS services will be retired (scheduled 2026, but uncertain): * * USGS functions will slowly replace NWIS functions - * `read_USGS_samples` is the first replacement + * `read_waterdata_samples` has replaced `readNWISqw` + * `read_waterdata_daily` can replace `readNWISdv` + * `read_waterdata_monitoring_location` can replace `readNWISsite` + * `read_waterdata_ts_meta` can replace `whatNWISdata` * Discrete water quality data: * WQP functions should be used when accessing non-USGS discrete water quality data - * `read_USGS_samples` should be used for USGS data + * `read_waterdata_samples` should be used for USGS data # NWIS Data: Current NWIS offerings -| data_type_cd |Function| Data description | +| data_type_cd |Function| Data description | Replacement Function | |--------|:-------|------:|-------:| -|uv|[readNWISuv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuv.html)|Continuous data| -|dv|[readNWISdv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdv.html)|Daily aggregated | -|gwlevels|[readNWISgwl](https://doi-usgs.github.io/dataRetrieval/reference/readNWISgwl.html)|Groundwater levels | -|site|[readNWISsite](https://doi-usgs.github.io/dataRetrieval/reference/readNWISsite.html)|Site metadata| -|pcode|[readNWISpCode](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpCode.html)|Parameter code metadata | -|stat|[readNWISstat](https://doi-usgs.github.io/dataRetrieval/reference/readNWISstat.html)| Site statistics | -|rating|[readNWISrating](https://doi-usgs.github.io/dataRetrieval/reference/readNWISrating.html)| Rating curves| -|peak|[readNWISpeak](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpeak.html)|Peak flow| -|use|[readNWISuse](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuse.html)|Water Use| -|meas|[readNWISmeas](https://doi-usgs.github.io/dataRetrieval/reference/readNWISmeas.html)|Discrete surface water| -| | [readNWISdata](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdata.html) | General data import for NWIS| +|uv|[readNWISuv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuv.html)|Continuous data| None yet | +|dv|[readNWISdv](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdv.html)|Daily aggregated | [read_waterdata_daily](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_daily.html) | +|gwlevels|[readNWISgwl](https://doi-usgs.github.io/dataRetrieval/reference/readNWISgwl.html)|Groundwater levels | None yet | +|site|[readNWISsite](https://doi-usgs.github.io/dataRetrieval/reference/readNWISsite.html)|Site metadata| [read_waterdata_monitoring_location](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata_monitoring_location.html) | +|pcode|[readNWISpCode](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpCode.html)|Parameter code metadata | None yet | +|stat|[readNWISstat](https://doi-usgs.github.io/dataRetrieval/reference/readNWISstat.html)| Site statistics | None yet | +|rating|[readNWISrating](https://doi-usgs.github.io/dataRetrieval/reference/readNWISrating.html)| Rating curves| None yet | +|peak|[readNWISpeak](https://doi-usgs.github.io/dataRetrieval/reference/readNWISpeak.html)|Peak flow| None yet | +|use|[readNWISuse](https://doi-usgs.github.io/dataRetrieval/reference/readNWISuse.html)|Water Use| None yet | +|meas|[readNWISmeas](https://doi-usgs.github.io/dataRetrieval/reference/readNWISmeas.html)|Discrete surface water| None yet | +| | [readNWISdata](https://doi-usgs.github.io/dataRetrieval/reference/readNWISdata.html) | General data import for NWIS| [read_waterdata](https://doi-usgs.github.io/dataRetrieval/reference/read_waterdata.html) | ## USGS Basic Retrievals @@ -142,20 +147,14 @@ df <- data.frame( names(df) <- c("Parameter Codes", "Short Name") -knitr::kable(df) -``` - - - -```{r echo=FALSE, eval=TRUE} -df <- data.frame( +df2 <- data.frame( pCode = c("00001", "00002", "00003", "00008"), shName = c("Maximum", "Minimum", "Mean", "Median") ) -names(df) <- c("Statistic Codes", "Short Name") +names(df2) <- c("Statistic Codes", "Short Name") -knitr::kable(df) +knitr::kable(list(df, df2)) ``` @@ -197,17 +196,14 @@ You can use the "user-friendly" functions. These functions take the same 4 input Let's start by asking for discharge (parameter code = 00060) at a site right next to the old USGS office in Wisconsin (Pheasant Branch Creek). ```{r echo=TRUE, eval=TRUE} -siteNo <- "05427948" +siteNo <- "USGS-05427948" pCode <- "00060" start.date <- "2023-10-01" end.date <- "2024-09-30" -pheasant <- readNWISdv( - siteNumbers = siteNo, - parameterCd = pCode, - startDate = start.date, - endDate = end.date -) +pheasant <- read_waterdata_daily(monitoring_location_id = siteNo, + parameter_code = pCode, + time = c(start.date, end.date)) ``` From the Pheasant Creek example, let's look at the data. The column names are: @@ -217,53 +213,28 @@ names(pheasant) ``` -The names of the columns are based on the parameter and statistic codes. In many cases, you can clean up the names with the convenience function `renameNWISColumns`: - -```{r echo=TRUE, eval=TRUE} -pheasant <- renameNWISColumns(pheasant) -names(pheasant) -``` - -The returned data also has several attributes attached to the data frame. To see what the attributes are: - -```{r echo=TRUE, eval=TRUE} -names(attributes(pheasant)) -``` - -Each `dataRetrieval` return should have the attributes: url, siteInfo, and variableInfo. Additional attributes are available depending on the data service. - -To access the attributes: - -```{r echo=TRUE, eval=TRUE} -url <- attr(pheasant, "url") -url -``` - -[Raw Data](`r url`) - -Make a simple plot to see the data: +Let's make a simple plot to see the data: ```{r echo=TRUE, eval=TRUE, fig.height=3.5} library(ggplot2) ts <- ggplot( data = pheasant, - aes(Date, Flow) -) + + aes(time, value)) + geom_line() ts ``` -Then use the attributes attached to the data frame to create better labels: +Then we can use the `readNWISpCode` and `read_waterdata_monitoring_location` functions to create better labels: ```{r echo=TRUE, eval=TRUE, fig.height=3.5} -parameterInfo <- attr(pheasant, "variableInfo") -siteInfo <- attr(pheasant, "siteInfo") +parameterInfo <- readNWISpCode(pCode) +siteInfo <- read_waterdata_monitoring_location(siteNo) ts <- ts + xlab("") + - ylab(parameterInfo$variableDescription) + - ggtitle(siteInfo$station_nm) + ylab(parameterInfo$parameter_nm) + + ggtitle(siteInfo$monitoring_location_name) ts ``` @@ -273,25 +244,26 @@ The most common question the dataRetrieval team gets is: "I KNOW this site has data but it's not coming out of dataRetrieval! Where's my data?" -The best way to verify you are calling your data correctly, use the `whatNWISdata` function to find out the data_type_cd (which will tell you the service you need to call), the parameter/stat codes available at that site, and the period of record. All rows that have "qw" in the column data_type_cd will come from the Water Quality Portal. +First verify that the data you think is available is actually associated with the location. For time series data, use the `read_NWIS_ts_meta` function to find out the available time series data. ```{r echo=TRUE} library(dplyr) -site <- "05407000" -data_available <- whatNWISdata(siteNumber = site) +site <- "USGS-05407000" +ts_data_available <- read_waterdata_ts_meta(monitoring_location_id = site) + -data_available_NWIS <- data_available |> - select(data_type_cd, parm_cd, stat_cd, - begin_date, end_date, count_nu) |> - filter(!data_type_cd %in% c("qw", "ad")) |> - arrange(data_type_cd) +data_available <- ts_data_available |> + sf::st_drop_geometry() |> + mutate(begin = as.Date(begin), + end = as.Date(end)) |> + select(parameter_name, parameter_code, statistic_id, computation_identifier, + begin, end) ``` -This is the only available data from NWIS for site `r site`. ```{r echo=FALSE} -datatable(data_available_NWIS, +datatable(data_available, rownames = FALSE, options = list(pageLength = 7, lengthChange = FALSE, @@ -300,42 +272,61 @@ datatable(data_available_NWIS, ``` -The data_type_cd can be used to figure out where to request data: - -```{r echo=FALSE} -df <- data.frame(data_type_cd = c("dv", "uv", "pk", "sv", "gwl"), - readNWIS = c("readNWISdv", "readNWISuv", - "readNWISpeak", "readNWISmeas", "readNWISgwl"), - readNWISdata = c('readNWISdata(..., service = "dv")', - 'readNWISdata(..., service = "iv")', - 'readNWISdata(..., service = "peak")', - 'Not available', - 'readNWISdata(..., service = "gwlevels")')) -knitr::kable(df) -``` -So to get all the NWIS data from the above site: +The time series that have "Instantaneous" in the computation_identifier column will be available in the instantaneous data service (currently `readNWISuv`), and the rest of the data will be available in the daily service (`read_waterdata_daily`). ```{r eval=FALSE, echo=TRUE} -dv_pcodes <- data_available_NWIS$parm_cd[data_available_NWIS$data_type_cd == "dv"] -stat_cds <- data_available_NWIS$stat_cd[data_available_NWIS$data_type_cd == "dv"] +dv_pcodes <- data_available$parameter_code[data_available$computation_identifier != "Instantaneous"] +stat_cds <- data_available$statistic_id[data_available$computation_identifier != "Instantaneous"] -dv_data <- readNWISdv(siteNumbers = site, - parameterCd = unique(dv_pcodes), - statCd = unique(stat_cds)) +dv_data <- read_waterdata_daily(monitoring_location_id = site, + parameter_code = unique(dv_pcodes), + statistic_id = unique(stat_cds)) -uv_pcodes <- data_available_NWIS$parm_cd[data_available_NWIS$data_type_cd == "uv"] +uv_pcodes <- data_available$parameter_code[data_available$computation_identifier == "Instantaneous"] -uv_data <- readNWISuv(siteNumbers = site, +uv_data <- readNWISuv(siteNumbers = gsub("USGS-", "", site), parameterCd = unique(uv_pcodes)) -peak_data <- readNWISpeak(site) +peak_data <- readNWISpeak(gsub("USGS-", "", site)) + +``` + +For discrete water quality data, use the `summarize_waterdata_samples` function: + +```{r echo=TRUE} +discrete_data_available_all <- summarize_waterdata_samples(site) + +discrete_data_available <- discrete_data_available_all |> + select(parameter_name = characteristicUserSupplied, + begin = firstActivity, end = mostRecentActivity, + count = resultCount) + +``` + +```{r echo=FALSE} +datatable(discrete_data_available, + rownames = FALSE, + options = list(pageLength = 7, + lengthChange = FALSE, + searching = FALSE) +) + +``` + +The discrete water quality data can be accessed with the `read_waterdata_samples` function: +```{r eval=FALSE, echo=TRUE} +samples_data <- read_waterdata_samples(monitoringLocationIdentifier = site, + dataProfile = "basicphyschem") ``` + # Water Quality Portal (WQP) `dataRetrieval` also allows users to access data from the [Water Quality Portal](http://www.waterqualitydata.us/). The WQP houses data from multiple agencies; while USGS data comes from the NWIS database, EPA data comes from the STORET database (this includes many state, tribal, NGO, and academic groups). The WQP brings data from all these organizations together and provides it in a single format that has a more verbose output than NWIS. +This tutorial will use the modern WQX3 format. This is still considered "beta", but it is the best way to get up-to-date multi-agency data. + The single user-friendly function is `readWQPqw`. This function will take a site or vector of sites in the first argument "siteNumbers". USGS sites need to add "USGS-" before the site number. The 2nd argument "parameterCd". Although it is called "parameterCd", it can take EITHER a USGS 5-digit parameter code OR a characterisitc name (this is what non-USGS databases use). Leaving "parameterCd" as empty quotes will return all data for a site. @@ -343,22 +334,25 @@ The 2nd argument "parameterCd". Although it is called "parameterCd", it can take So we could get all the water quality data for site `r site` like this: ```{r eval=FALSE, echo=TRUE} -qw_data_all <- readWQPqw(siteNumbers = paste0("USGS-", site), - parameterCd = "") +qw_data_all <- readWQPqw(siteNumbers = site, + parameterCd = "", + legacy = FALSE) ``` or 1 parameter code: ```{r eval=FALSE, echo=TRUE} -qw_data_00095 <- readWQPqw(siteNumbers = paste0("USGS-", site), - parameterCd = "00095") +qw_data_00095 <- readWQPqw(siteNumbers = site, + parameterCd = "00095", + legacy = FALSE) ``` or 1 characteristic name: ```{r eval=FALSE, echo=TRUE} -qw_data_sp <- readWQPqw(siteNumbers = paste0("USGS-", site), - parameterCd = "Specific conductance") +qw_data_sp <- readWQPqw(siteNumbers = site, + parameterCd = "Specific conductance", + legacy = FALSE) ``` @@ -366,22 +360,16 @@ qw_data_sp <- readWQPqw(siteNumbers = paste0("USGS-", site), This is all great when you know your site numbers. What do you do when you don't? -There are 2 `dataRetrieval` functions that help with discover in NWIS: +There are 2 `dataRetrieval` functions that help with USGS data discovery: -* `whatNWISsites` finds sites within a specified filter (quicker) -* `whatNWISdata` summarizes the data within the specified filter (more information) +* `read_waterdata_monitoring_location` finds sites within a specified filter +* `read_waterdata_ts_meta` summarizes the time series meta data And 2 functions that help with discover in WQP: * `readWQPsummary` summarizes the data available within the WQP by year. * `whatWQPdata` summarizes the data available within the WQP. -There are several ways to specify the requests. The best way to discover how flexible the USGS web services are is to click on the links and see all of the filtering options: -[http://waterservices.usgs.gov/](http://waterservices.usgs.gov/) - -```{r echo=FALSE, out.width = "500px"} -knitr::include_graphics("waterservices.png") -``` Available geographic filters are individual site(s), a single state, a bounding box, or a HUC (hydrologic unit code). See examples for those services by looking at the help page for the `readNWISdata` and `readWQPdata` functions: @@ -389,11 +377,9 @@ Here are a few examples: ```{r eval=FALSE} # Daily temperature in Ohio -dataTemp <- readNWISdata( - stateCd = "OH", - parameterCd = "00010", - service = "dv" -) +ohio_sites <- read_waterdata_monitoring_location(state_name = "Ohio") +ohio_ts_meta <- read_waterdata_ts_meta(bbox = sf::st_bbox(ohio_sites), + parameter_code = "00010") # Real-time discharge at a site instFlow <- readNWISdata( @@ -405,12 +391,6 @@ instFlow <- readNWISdata( tz = "America/Chicago" ) -# Temperature within a bounding box: -bBoxEx <- readNWISdata( - bBox = c(-83, 36.5, -81, 38.5), - parameterCd = "00010" -) - # Groundwater levels within a HUC: groundwaterHUC <- readNWISdata( huc = "02070010", @@ -418,51 +398,6 @@ groundwaterHUC <- readNWISdata( ) ``` - -### Arizona Example - -For example, let's see which sites ever measured phosphorus at least 100 times over at least 20 years in Arizona. Water quality data is exclusively found in WQP functions. - -```{r az, echo=TRUE} -AZ_sites <- readWQPsummary( - statecode = "AZ", - siteType = "Stream" -) - -az_phos_summary <- AZ_sites |> - mutate(ResultCount = as.numeric(ResultCount), - Lat = as.numeric(MonitoringLocationLatitude), - Lon = as.numeric(MonitoringLocationLongitude)) |> - rename(Site = MonitoringLocationIdentifier) |> - group_by(Site, Lat, Lon) |> - summarise(min_year = min(YearSummarized), - max_year = max(YearSummarized), - count = sum(ResultCount)) |> - mutate(POR = max_year - min_year) |> - filter(count > 100, - POR >= 20) |> - arrange(desc(count)) |> - ungroup() - -``` - - -```{r echo=TRUE, eval=TRUE, fig.height=4} -library(leaflet) - -leaflet(data = az_phos_summary) %>% - addProviderTiles("CartoDB.Positron") %>% - addCircleMarkers(~Lon, ~Lat, - color = "red", radius = 3, stroke = FALSE, - fillOpacity = 0.8, opacity = 0.8, - popup = ~Site - ) -``` - - - - - # Time/Time zone discussion * The arguments for all `dataRetrieval` functions concerning dates (startDate, endDate) can be R Date objects, or character strings, as long as the string is in the form "YYYY-MM-DD".