-
Couldn't load subscription status.
- Fork 60
Open
Labels
Description
Prework
- Read and agree to the code of conduct and contributing guidelines.
- If there is already a relevant issue, whether open or closed, comment on the existing thread instead of posting a new issue.
Description
get_sundered_data() returns a SQL error on DuckDB based tables when there is more than one row-level test. The error message suggests that it has something to do with DuckDB not knowing how to add Booleans.
It works fine for local data frames and SQLite tables.
It also works fine in DuckDB if there is only one test as no two Booleans are added in the operations.
Reproducible example
library(pointblank)
library(duckdb)
#> Loading required package: DBI
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
con <- dbConnect(duckdb::duckdb(), "test.duckdb")
tbl_db <- copy_to(con, airquality |> tibble::rowid_to_column("idcol"),
"airquality_cp", temporary = TRUE, overwrite = TRUE)
ag <- create_agent(tbl_db) |>
col_vals_between(Wind, 1, 2) |>
col_vals_gt(Ozone, 20) |>
interrogate()
ag |> get_sundered_data(id_cols = "idcol")
#> Error in `collect()`:
#> ! Failed to collect lazy table.
#> Caused by error in `dbSendQuery()`:
#> ! rapi_prepare: Failed to prepare query SELECT idcol, Ozone, "Solar.R", Wind, "Temp", "Month", "Day"
#> FROM (
#> SELECT
#> idcol,
#> Ozone,
#> "Solar.R",
#> Wind,
#> "Temp",
#> "Month",
#> "Day",
#> CASE WHEN (pb_is_good_ = 2) THEN TRUE ELSE FALSE END AS pb_is_good_
#> FROM (
#> SELECT
#> idcol,
#> Ozone,
#> "Solar.R",
#> Wind,
#> "Temp",
#> "Month",
#> "Day",
#> pb_is_good_1 + pb_is_good_2 AS pb_is_good_
#> FROM (
#> SELECT
#> "...1".*,
#> pb_is_good_2,
#> Ozone,
#> "Solar.R",
#> Wind,
#> "Temp",
#> "Month",
#> "Day"
#> FROM (
#> SELECT
#> idcol,
#> CASE WHEN ((Wind IS NULL)) THEN FALSE ELSE pb_is_good_ END AS pb_is_good_1
#> FROM (
#> SELECT
#> airquality_cp.*,
#> CASE
#> WHEN (Wind >= 1.0 AND Wind <= 2.0) THEN TRUE
#> WHEN (Wind < 1.0 OR Wind > 2.0) THEN FALSE
#> END AS pb_is_good_
#> FROM airquality_cp
#> ) q01
#> ) "...1"
#> LEFT JOIN (
#> SELECT
#> idcol,
#> CASE WHEN ((pb_is_good_ IS NULL)) THEN FALSE ELSE pb_is_good_ END AS pb_is_good_2
#> FROM (
#> SELECT airquality_cp.*, Ozone > 20.0 AS pb_is_good_
#> FROM airquality_cp
#> ) q01
#> ) "...2"
#> ON ("...1".idcol = "...2".idcol)
#> LEFT JOIN (
#> SELECT
#> airquality_cp.*,
#> CASE
#> WHEN (Wind >= 1.0 AND Wind <= 2.0) THEN TRUE
#> WHEN (Wind < 1.0 OR Wind > 2.0) THEN FALSE
#> END AS pb_is_good_
#> FROM airquality_cp
#> ) "...3"
#> ON ("...1".idcol = "...3".idcol)
#> ) q01
#> ) q01
#> ) q01
#> WHERE (pb_is_good_ = 1.0)
#> LIMIT 21
#> Error: Binder Error: No function matches the given name and argument types '+(BOOLEAN, BOOLEAN)'. You might need to add explicit type casts.
#> Candidate functions:
#> +(TINYINT) -> TINYINT
#> +(TINYINT, TINYINT) -> TINYINT
#> +(SMALLINT) -> SMALLINT
#> +(SMALLINT, SMALLINT) -> SMALLINT
#> +(INTEGER) -> INTEGER
#> +(INTEGER, INTEGER) -> INTEGER
#> +(BIGINT) -> BIGINT
#> +(BIGINT, BIGINT) -> BIGINT
#> +(HUGEINT) -> HUGEINT
#> +(HUGEINT, HUGEINT) -> HUGEINT
#> +(FLOAT) -> FLOAT
#> +(FLOAT, FLOAT) -> FLOAT
#> +(DOUBLE) -> DOUBLE
#> +(DOUBLE, DOUBLE) -> DOUBLE
#> +(DECIMAL) -> DECIMAL
#> +(DECIMAL, DECIMAL) -> DECIMAL
#> +(UTINYINT) -> UTINYINT
#> +(UTINYINT, UTINYINT) -> UTINYINT
#> +(USMALLINT) -> USMALLINT
#> +(USMALLINT, USMALLINT) -> USMALLINT
#> +(UINTEGER) -> UINTEGER
#> +(UINTEGER, UINTEGER) -> UINTEGER
#> +(UBIGINT) -> UBIGINT
#> +(UBIGINT, UBIGINT) -> UBIGINT
#> +(UHUGEINT) -> UHUGEINT
#> +(UHUGEINT, UHUGEINT) -> UHUGEINT
#> +(DATE, INTEGER) -> DATE
#> +(INTEGER, DATE) -> DATE
#> +(INTERVAL, INTERVAL) -> INTERVAL
#> +(DATE, INTERVAL) -> TIMESTAMP
#> +(INTERVAL, DATE) -> TIMESTAMP
#> +(TIME, INTERVAL) -> TIME
#> +(INTERVAL, TIME) -> TIME
#> +(TIMESTAMP, INTERVAL) -> TIMESTAMP
#> +(INTERVAL, TIMESTAMP) -> TIMESTAMP
#> +(TIME WITH TIME ZONE, INTERVAL) -> TIME WITH TIME ZONE
#> +(INTERVAL, TIME WITH TIME ZONE) -> TIME WITH TIME ZONE
#> +(TIME, DATE) -> TIMESTAMP
#> +(DATE, TIME) -> TIMESTAMP
#> +(TIME WITH TIME ZONE, DATE) -> TIMESTAMP WITH TIME ZONE
#> +(DATE, TIME WITH TIME ZONE) -> TIMESTAMP WITH TIME ZONE
#> +([ANY[]...]) -> ANY[]
#>
#>
#> LINE 21: pb_is_good_1 + pb_is_good_2 AS pb_is_good_
#> ^
dbDisconnect(con)Created on 2025-07-12 with reprex v2.1.1
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.4.1 (2024-06-14)
#> os macOS 15.5
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype UTF-8
#> tz Europe/Prague
#> date 2025-07-12
#> pandoc 3.6.3 @ /Applications/Positron.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> blastula 0.3.6 2025-04-03 [1] CRAN (R 4.4.1)
#> blob 1.2.4 2023-03-17 [1] CRAN (R 4.4.0)
#> cli 3.6.5.9000 2025-07-12 [1] Github (r-lib/cli@cbb3424)
#> DBI * 1.2.3 2024-06-02 [1] CRAN (R 4.4.0)
#> dbplyr 2.5.0 2024-03-19 [1] CRAN (R 4.4.0)
#> digest 0.6.37 2024-08-19 [1] CRAN (R 4.4.1)
#> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.0)
#> duckdb * 1.3.0 2025-06-02 [1] CRAN (R 4.4.1)
#> evaluate 1.0.4 2025-06-18 [1] CRAN (R 4.4.1)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0)
#> fs 1.6.6 2025-04-12 [1] CRAN (R 4.4.1)
#> generics 0.1.4 2025-05-09 [1] CRAN (R 4.4.1)
#> glue 1.8.0 2024-09-30 [1] CRAN (R 4.4.1)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
#> knitr 1.50 2025-03-16 [1] CRAN (R 4.4.1)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
#> pillar 1.11.0 2025-07-04 [1] CRAN (R 4.4.1)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0)
#> pointblank * 0.12.2.9000 2025-07-12 [1] Github (rstudio/pointblank@8371ca0)
#> purrr 1.1.0 2025-07-10 [1] RSPM (R 4.4.0)
#> R6 2.6.1 2025-02-15 [1] CRAN (R 4.4.1)
#> reprex 2.1.1 2024-07-06 [1] CRAN (R 4.4.0)
#> rlang 1.1.6 2025-04-11 [1] CRAN (R 4.4.1)
#> rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.4.1)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
#> tibble 3.3.0 2025-06-08 [1] CRAN (R 4.4.1)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.0)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
#> withr 3.0.2 2024-10-28 [1] CRAN (R 4.4.1)
#> xfun 0.52 2025-04-02 [1] CRAN (R 4.4.1)
#> yaml 2.3.10 2024-07-26 [1] CRAN (R 4.4.1)
#>
#> [1] /Users/petr/Library/R/4.4/library
#> [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────Expected result
This should work the same as for local data frames.
Session info
see reprex