Skip to content

Commit 13575c8

Browse files
authored
Merge pull request #12 from e-kotov/5-add-safe-guards-and-warnings-when-requesting-more-then-10k-entries
5 add safe guards and warnings when requesting more then 10k entries
2 parents 91880b8 + 7cc949f commit 13575c8

File tree

5 files changed

+94
-19
lines changed

5 files changed

+94
-19
lines changed

R/data.R

Lines changed: 87 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#' @param level A `character` string specifying the NUTS level ("0", "1", "2", or "3").
99
#' @param x_filters A `named list` where the names are the filter fields for the x variable and the values are the selected values for those fields. Default is an empty list. To find out which filters to use, use \code{\link{mi_source_filters}} with the desired `source_name`.
1010
#' @param y_filters (Optional) A `named list` where the names are the filter fields for the y variable and the values are the selected values for those fields. Default is `NULL`. To find out which filters to use, use \code{\link{mi_source_filters}} with the desired `source_name`.
11-
#' @param limit An `integer` specifying the maximum number of results to return. Default is 2500.
11+
#' @param limit An `integer` specifying the maximum number of results to return. Default is 2500. This default should be enough for most uses, as it is well above the number of NUTS 3 regions in the EU. The maximum limited by the API is 10000.
1212
#'
1313
#' @return A `tibble` with the following columns:
1414
#'
@@ -21,6 +21,7 @@
2121
#' * `x`: the value of the univariate variable.
2222
#' * `y` (optional): the value of the y variable (only included when `y_source` is provided).
2323
#'
24+
#' @importFrom rlang .data
2425
#' @export
2526
#'
2627
#' @examples
@@ -30,7 +31,7 @@
3031
#' x_source = "TGS00010",
3132
#' year = 2020,
3233
#' level = "2",
33-
#' x_filters = list(isced11 = "TOTAL", unit = "PC", age = "Y_GE15", freq = "A")
34+
#' x_filters = list(isced11 = "TOTAL", sex = "F")
3435
#' )
3536
#'
3637
#' # Bivariate example
@@ -39,8 +40,8 @@
3940
#' y_source = "DEMO_R_MLIFEXP",
4041
#' year = 2020,
4142
#' level = "2",
42-
#' x_filters = list(isced11 = "TOTAL", unit = "PC", age = "Y_GE15", freq = "A"),
43-
#' y_filters = list(unit = "YR", age = "Y_LT1", freq = "A")
43+
#' x_filters = list(isced11 = "TOTAL", sex = "F"),
44+
#' y_filters = list(age = "Y2", sex = "F")
4445
#' )
4546
#' }
4647
mi_data <- function(
@@ -58,6 +59,7 @@ mi_data <- function(
5859
checkmate::assert_list(x_filters, types = c("character", "NULL"))
5960
checkmate::assert_integerish(year, null.ok = TRUE, max.len = 1)
6061
checkmate::assert_list(y_filters, types = c("character", "NULL"), null.ok = TRUE)
62+
checkmate::assert_number(limit, lower = 1, upper = 10000)
6163
if (!is.null(y_source)) checkmate::assert_string(y_source)
6264

6365
# Build filter JSONs for X and Y
@@ -69,7 +71,10 @@ mi_data <- function(
6971
source = x_source,
7072
conditions = x_conditions
7173
)
72-
x_json_string <- jsonlite::toJSON(x_json, auto_unbox = TRUE)
74+
# Minify JSON to remove extra whitespace/newlines
75+
x_json_string <- jsonlite::minify(
76+
jsonlite::toJSON(x_json, auto_unbox = TRUE)
77+
)
7378

7479
# Check if it's bivariate (Y filters are provided)
7580
if (!is.null(y_source) && !is.null(y_filters)) {
@@ -80,7 +85,9 @@ mi_data <- function(
8085
source = y_source,
8186
conditions = y_conditions
8287
)
83-
y_json_string <- jsonlite::toJSON(y_json, auto_unbox = TRUE)
88+
y_json_string <- jsonlite::minify(
89+
jsonlite::toJSON(y_json, auto_unbox = TRUE)
90+
)
8491
}
8592

8693
# Build API endpoint
@@ -104,26 +111,94 @@ mi_data <- function(
104111
query_params$`_outcome_year` <- as.character(year)
105112
}
106113

107-
# Add JSON parameters as proper strings without URL encoding issues
108-
query_params$`X_JSON` <- I(x_json_string)
114+
# Add JSON parameters as proper strings so that httr2 can URL encode them automatically
115+
query_params$`X_JSON` <- x_json_string
109116
if (!is.null(y_source) && !is.null(y_filters)) {
110-
query_params$`Y_JSON` <- I(y_json_string)
117+
query_params$`Y_JSON` <- y_json_string
111118
}
112119

113120
# Perform API request
114-
response <- httr2::request(url_endpoint) |>
121+
request <- httr2::request(url_endpoint) |>
115122
httr2::req_headers(
116123
"Content-Type" = "application/json",
117124
"User-Agent" = getOption("mapineqr.user_agent")
118125
) |>
119126
httr2::req_url_query(!!!query_params) |>
120-
httr2::req_method("GET") |>
121-
httr2::req_perform()
127+
httr2::req_method("GET")
128+
129+
response <- request |> httr2::req_perform()
122130

123131
# Parse response
124132
response_data <- httr2::resp_body_json(response, simplifyVector = TRUE) |>
125133
tibble::as_tibble()
126134

135+
# Check for duplicate values within each geo for x and (if applicable) y.
136+
duplicate_issues <- response_data |>
137+
dplyr::group_by(.data$geo) |>
138+
dplyr::summarise(
139+
distinct_x = dplyr::n_distinct(.data$x),
140+
distinct_y = if ("y" %in% names(response_data)) dplyr::n_distinct(.data$y) else NA_integer_,
141+
.groups = "drop"
142+
)
143+
144+
x_issue <- any(duplicate_issues$distinct_x > 1)
145+
y_issue <- if ("y" %in% names(response_data)) any(duplicate_issues$distinct_y > 1) else FALSE
146+
147+
# Only perform additional filter checking if duplicate geos exist
148+
if (x_issue || y_issue) {
149+
# --- For the x variable ---
150+
missing_x_filters <- character(0)
151+
if (x_issue) {
152+
available_filters <- mi_source_filters(source_name = x_source, year = year, level = level)
153+
# Determine which filter fields have more than one option
154+
multi_option_fields <- available_filters |>
155+
dplyr::group_by(.data$field) |>
156+
dplyr::summarise(n_options = dplyr::n_distinct(.data$value), .groups = "drop") |>
157+
dplyr::filter(.data$n_options > 1) |>
158+
dplyr::pull(.data$field)
159+
# Only require filters for those fields with multiple options.
160+
missing_x_filters <- setdiff(multi_option_fields, names(x_filters))
161+
}
162+
163+
# --- For the y variable (if applicable) ---
164+
missing_y_filters <- character(0)
165+
if (y_issue) {
166+
available_y_filters <- mi_source_filters(source_name = y_source, year = year, level = level)
167+
multi_option_y_fields <- available_y_filters |>
168+
dplyr::group_by(.data$field) |>
169+
dplyr::summarise(n_options = dplyr::n_distinct(.data$value), .groups = "drop") |>
170+
dplyr::filter(.data$n_options > 1) |>
171+
dplyr::pull(.data$field)
172+
missing_y_filters <- setdiff(multi_option_y_fields, names(y_filters))
173+
}
174+
175+
# Only raise an error if any missing filter is found among fields with multiple options.
176+
if (length(missing_x_filters) > 0 || length(missing_y_filters) > 0) {
177+
msg <- "The API returned duplicate values for some geographic regions. This may indicate that not all necessary filters were specified."
178+
if (length(missing_x_filters) > 0) {
179+
msg <- paste0(
180+
msg,
181+
"\n\nFor the 'x' variable (source: '", x_source, "'):",
182+
"\n The following filter fields (with multiple available options) were not specified: ",
183+
paste(missing_x_filters, collapse = ", "),
184+
"\nYou can review available filters by running:\n mi_source_filters(source_name = '", x_source, "', year = ", year, ", level = '", level, "')"
185+
)
186+
}
187+
if (length(missing_y_filters) > 0) {
188+
msg <- paste0(
189+
msg,
190+
"\n\nFor the 'y' variable (source: '", y_source, "'):",
191+
"\n The following filter fields (with multiple available options) were not specified: ",
192+
paste(missing_y_filters, collapse = ", "),
193+
"\nYou can review available filters by running:\n mi_source_filters(source_name = '", y_source, "', year = ", year, ", level = '", level, "')"
194+
)
195+
}
196+
stop(msg)
197+
}
198+
}
199+
200+
201+
127202
# Define expected columns based on whether y_source is specified
128203
if (is.null(y_source)) {
129204
expected_columns <- c("geo", "geo_name", "geo_source", "geo_year", "data_year", "x")

man/mi_data.Rd

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mi_source_coverage.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mi_source_filters.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mi_sources.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)