From f9d7fad918c39aec06f6212f46c7bb7ba192a930 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Mon, 7 Apr 2025 11:04:57 -0500 Subject: [PATCH 1/6] add data to ignore --- .Rbuildignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index 91be94e2..ae92cf13 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -91,6 +91,8 @@ vignettes/long_to_wide.Rmd vignettes/join_by_closest.Rmd vignettes/wqx3_development_plan.Rmd vignettes/dataretrieval_discrete_changes_images/* +vignettes/Wide data example.xlsx +vignettes/messyData.png ^ci$ ^public$ ^docker$ From fc8b6547ab5ddf7465f28bb75b2f8096bb970597 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 17 Jun 2025 09:25:12 -0500 Subject: [PATCH 2/6] use github page --- R/dataRetrievals-package.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dataRetrievals-package.R b/R/dataRetrievals-package.R index b04a04d4..cdb8bdf1 100644 --- a/R/dataRetrievals-package.R +++ b/R/dataRetrievals-package.R @@ -9,7 +9,7 @@ See: https://api.waterdata.usgs.gov/signup" packageStartupMessage("dataRetrieval ", dataRetrieval_version," Extended Documentation: https://doi-usgs.github.io/dataRetrieval Learn about the new functions that are replacing NWIS functions here: -https://water.code-pages.usgs.gov/dataRetrieval/articles/read_waterdata_functions.html", +https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html", token_message) } From d29ad9472e3a7878d529c28317427ac5bfdc9628 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Tue, 17 Jun 2025 15:21:43 -0500 Subject: [PATCH 3/6] getting ready for a tag --- code.json | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/code.json b/code.json index 1709ad75..0bbb49a5 100644 --- a/code.json +++ b/code.json @@ -89,5 +89,51 @@ "date": { "metadataLastUpdated": "2022-12-06" } + }, + { + "name": "dataRetrieval", + "organization": "U.S. Geological Survey", + "description": "This R package is designed to obtain USGS or EPA water quality sample data, streamflow data, and metadata directly from web services", + "version": "2.7.19", + "status": "Development", + + "permissions": { + "usageType": "openSource", + "licenses": [ + { + "name": "Public Domain, CC0-1.0", + "URL": "https://code.usgs.gov/water/dataRetrieval/-/raw/2.7.19/LICENSE.md" + } + ] + }, + + "homepageURL": "https://code.usgs.gov/water/dataRetrieval/-/tags/2.7.19", + "downloadURL": "https://code.usgs.gov/water/dataRetrieval/-/archive/2.7.19/dataRetrieval-2.7.19.zip", + "disclaimerURL": "https://code.usgs.gov/water/dataRetrieval/-/raw/2.7.19/DISCLAIMER.md", + "repositoryURL": "https://code.usgs.gov/water/dataRetrieval.git", + + "vcs": "git", + + "laborHours": 500, + + "tags": [ + "R", + "rstats", + "USGS", + "water" + ], + + "languages": [ + "R" + ], + + "contact": { + "name": "Laura De Cicco", + "email": "ldecicco@usgs.gov" + }, + + "date": { + "metadataLastUpdated": "2025-06-17" + } } ] From 4f312efaee885db03277140c8a447110a74a7b13 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 18 Jun 2025 15:05:45 -0500 Subject: [PATCH 4/6] Add reference list option --- R/read_waterdata_samples.R | 21 ++++++++++++++------- man/check_waterdata_sample_params.Rd | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/R/read_waterdata_samples.R b/R/read_waterdata_samples.R index d46e919b..0202bc1f 100644 --- a/R/read_waterdata_samples.R +++ b/R/read_waterdata_samples.R @@ -339,21 +339,28 @@ explode_query <- function(baseURL, POST = FALSE, x){ #' group = "Biological") #' observedProperties <- check_waterdata_sample_params("observedproperty", #' text = "phosphorus") -#' +#' ref_list <- check_waterdata_sample_params("reference-list") #' } check_waterdata_sample_params <- function(service = "characteristicgroup", ...){ service_options <- c("characteristicgroup", "states", "counties", "countries", "sitetype", "samplemedia", - "characteristics", "observedproperty") + "characteristics", "observedproperty", + "reference-list") match.arg(service, choices = service_options, several.ok = FALSE) check_group_req <- httr2::request("https://api.waterdata.usgs.gov") |> - httr2::req_url_path_append("samples-data", - "codeservice", - service) |> + httr2::req_url_path_append("samples-data") + + if(service != "reference-list"){ + check_group_req <- check_group_req |> + httr2::req_url_path_append("codeservice") + } + + check_group_req <- check_group_req |> + httr2::req_url_path_append(service) |> httr2::req_user_agent(default_ua()) |> httr2::req_url_query(mimeType = "application/json") @@ -365,7 +372,7 @@ check_waterdata_sample_params <- function(service = "characteristicgroup", check_group_req <- httr2::req_url_query(check_group_req, !!!params) } - + message("GET: ", check_group_req$url) check_group <- httr2::req_perform(check_group_req) |> @@ -506,7 +513,7 @@ summarize_waterdata_samples <- function(monitoringLocationIdentifier){ df$firstActivity <- as.Date(df$firstActivity) df$mostRecentActivity <- as.Date(df$mostRecentActivity) } - + attr(df, "url") <- baseURL$url attr(df, "queryTime") <- Sys.time() diff --git a/man/check_waterdata_sample_params.Rd b/man/check_waterdata_sample_params.Rd index d8b4a9eb..ea4a5775 100644 --- a/man/check_waterdata_sample_params.Rd +++ b/man/check_waterdata_sample_params.Rd @@ -34,7 +34,7 @@ characteristics <- check_waterdata_sample_params("characteristics", group = "Biological") observedProperties <- check_waterdata_sample_params("observedproperty", text = "phosphorus") - +ref_list <- check_waterdata_sample_params("reference-list") } \dontshow{\}) # examplesIf} } From 1af7c796827b7dc405ff0eaa40372851cd07a9c1 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 18 Jun 2025 16:47:55 -0500 Subject: [PATCH 5/6] Rejigger how to call the data. --- R/read_waterdata_daily.R | 25 ++------------ R/read_waterdata_monitoring_location.R | 21 +++--------- R/read_waterdata_ts_meta.R | 22 ++----------- R/walk_pages.R | 45 +++++++++++++++++++++++++- 4 files changed, 54 insertions(+), 59 deletions(-) diff --git a/R/read_waterdata_daily.R b/R/read_waterdata_daily.R index 2ac5f01c..9927ba01 100644 --- a/R/read_waterdata_daily.R +++ b/R/read_waterdata_daily.R @@ -88,29 +88,10 @@ read_waterdata_daily <- function(monitoring_location_id = NA_character_, output_id <- "daily_id" args <- mget(names(formals())) - args[["service"]] <- service + return_list <- get_ogc_data(args, + output_id, + service) - args <- switch_arg_id(args, - id_name = output_id, - service = service) - - args[["properties"]] <- switch_properties_id(properties, - id_name = output_id, - service = service) - - args[["convertType"]] <- NULL - - dv_req <- do.call(construct_api_requests, args) - - return_list <- walk_pages(dv_req, max_results) - - return_list <- deal_with_empty(return_list, properties, service) - - if(convertType) return_list <- cleanup_cols(return_list, - service = "daily") - - return_list <- rejigger_cols(return_list, properties, output_id) - return_list <- return_list[order(return_list$time, return_list$monitoring_location_id), ] return(return_list) diff --git a/R/read_waterdata_monitoring_location.R b/R/read_waterdata_monitoring_location.R index 9489a1bb..2b96f6e8 100644 --- a/R/read_waterdata_monitoring_location.R +++ b/R/read_waterdata_monitoring_location.R @@ -139,23 +139,10 @@ read_waterdata_monitoring_location <- function(monitoring_location_id = NA_chara output_id <- "monitoring_location_id" args <- mget(names(formals())) - args[["service"]] <- service - - args <- switch_arg_id(args, - id_name = output_id, - service = service) - - args[["properties"]] <- switch_properties_id(properties, - id_name = output_id, - service = service) - - site_req <- do.call(construct_api_requests, args) - - return_list <- walk_pages(site_req, max_results) - - return_list <- deal_with_empty(return_list, properties, service) - - return_list <- rejigger_cols(return_list, properties, output_id) + args[["convertType"]] <- FALSE + return_list <- get_ogc_data(args, + output_id, + service) return(return_list) } diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R index 1dc701f7..b0df77eb 100644 --- a/R/read_waterdata_ts_meta.R +++ b/R/read_waterdata_ts_meta.R @@ -81,25 +81,9 @@ read_waterdata_ts_meta <- function(monitoring_location_id = NA_character_, output_id <- "time_series_id" args <- mget(names(formals())) - args[["service"]] <- service - - args <- switch_arg_id(args, id_name = output_id, service = service) - - args[["convertType"]] <- NULL - - args[["properties"]] <- switch_properties_id(properties, - id_name = output_id, - service = service) - - req_ts_meta <- do.call(construct_api_requests, args) - - return_list <- walk_pages(req_ts_meta, max_results) - - return_list <- deal_with_empty(return_list, properties, service) - - if(convertType) return_list <- cleanup_cols(return_list) - - return_list <- rejigger_cols(return_list, properties, output_id) + return_list <- get_ogc_data(args, + output_id, + service) return(return_list) diff --git a/R/walk_pages.R b/R/walk_pages.R index 8a2d8ed9..34b6a81a 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -19,6 +19,7 @@ #' deal_with_empty <- function(return_list, properties, service){ if(nrow(return_list) == 0){ + if(all(is.na(properties))){ schema <- check_OGC_requests(endpoint = service, type = "schema") properties <- names(schema$properties) @@ -240,8 +241,50 @@ walk_pages <- function(req, max_results){ return_list <- get_resp_data(resps) } + return(return_list) +} + + +#' Coordinate the request and retrieval of OGC calls +#' +#' @param args arguments from individual functions +#' @param output_id Name of id column to return +#' @param service Endpoint name. +#' @param max_results +#' +#' @noRd +#' @return data.frame with attributes +get_ogc_data <- function(args, + output_id, + service){ + + args[["service"]] <- service + max_results <- args[["max_results"]] + args[["max_results"]] <- NULL + args <- switch_arg_id(args, + id_name = output_id, + service = service) + + properties <- args[["properties"]] + args[["properties"]] <- switch_properties_id(properties, + id_name = output_id, + service = service) + convertType <- args[["convertType"]] + args[["convertType"]] <- NULL + + req <- do.call(construct_api_requests, args) + + return_list <- walk_pages(req, max_results) + + return_list <- deal_with_empty(return_list, properties, service) + + if(convertType) return_list <- cleanup_cols(return_list, service = service) + + return_list <- rejigger_cols(return_list, properties, output_id) + attr(return_list, "request") <- req attr(return_list, "queryTime") <- Sys.time() - return(return_list) } + + From 96e34915e9a20156931cd4b0cd874c94c2a2d686 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Wed, 18 Jun 2025 17:00:19 -0500 Subject: [PATCH 6/6] Bring back the messages --- R/walk_pages.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/walk_pages.R b/R/walk_pages.R index 34b6a81a..a08c477d 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -214,6 +214,8 @@ get_resp_data <- function(resp) { #' @return data.frame with attributes walk_pages <- function(req, max_results){ + message("Requesting:\n", req$url) + if(is.na(max_results)){ resps <- httr2::req_perform_iterative(req, next_req = next_req_url,