-
-
Notifications
You must be signed in to change notification settings - Fork 40
Open
Labels
Bug 🐛Something isn't workingSomething isn't workingEnhancement 💥Implemented features can be improved or revisedImplemented features can be improved or revised
Description
BSDA::tsum.test
allows performing t-tests when only summary stats are available instead of the full data. However, the htest objects it returns are slightly different than those returned by stats::t.test
, and parameters
doesn't seem to like them. The main issue seems to be that the "method" is written slightly differently for some reason. Here is a reprex demonstrating some issues, and how they can be fixed by setting the right method.
library(BSDA)
#> Loading required package: lattice
#>
#> Attaching package: 'BSDA'
#> The following object is masked from 'package:datasets':
#>
#> Orange
library(parameters)
library(broom)
library(assertthat)
library(waldo)
## Examples taken from tsum.test help text
## One-sample test
htest1 <- tsum.test(mean.x=5.6, s.x=2.1, n.x=16, mu=4.9, alternative="greater")
#> Warning in tsum.test(mean.x = 5.6, s.x = 2.1, n.x = 16, mu = 4.9, alternative =
#> "greater"): argument 'var.equal' ignored for one-sample test.
## Do an equivalent test with t.test
tt1 <- t.test(scale(rnorm(16)) * 2.1 + 5.6, mu = 4.9, alternative = "greater")
## Examine differences
compare(tt1, htest1, x_arg = "t.test", y_arg = "tsum.test", tolerance = 1e-10)
#> `t.test` is length 10
#> `tsum.test` is length 9
#>
#> names(t.test) | names(tsum.test)
#> [1] "statistic" | "statistic" [1]
#> [2] "parameter" - "parameters" [2]
#> [3] "p.value" | "p.value" [3]
#> [4] "conf.int" | "conf.int" [4]
#> [5] "estimate" | "estimate" [5]
#> [6] "null.value" | "null.value" [6]
#> [7] "stderr" -
#> [8] "alternative" | "alternative" [7]
#> [9] "method" | "method" [8]
#> [10] "data.name" | "data.name" [9]
#>
#> `t.test$parameter` is a double vector (15)
#> `tsum.test$parameter` is absent
#>
#> `t.test$conf.int`: 4.7 Inf
#> `tsum.test$conf.int`: 4.7 NA
#>
#> `t.test$stderr` is a double vector (0.525)
#> `tsum.test$stderr` is absent
#>
#> `t.test$method`: "One Sample t-test"
#> `tsum.test$method`: "One-sample t-Test"
#>
#> `t.test$data.name`: "scale(rnorm(16)) * 2.1 + 5.6"
#> `tsum.test$data.name`: "Summarized x"
#>
#> `t.test$parameters` is absent
#> `tsum.test$parameters` is a double vector (15)
## Gives correct info
tidy(htest1)
#> # A tibble: 1 × 7
#> estimate statistic p.value conf.low conf.high method alternative
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 5.6 1.33 0.101 4.68 NA One-sample t-Test greater
## Thinks it's a different kind of test
parameters(htest1)
#> One-sample t-Test
#>
#> Parameter1 | tau | z | df | p
#> ---------------------------------------
#> Summarized x | 5.60 | 1.33 | 15 | 0.101
#>
#> Alternative hypothesis: true mean is greater than 4.9
## No t statistic
assert_that("t" %in% names(parameters(htest1)))
#> Error: `%in%`(x = "t", table = names(parameters(htest1))) is not TRUE
## Looks like the problem is a slightly different value for "method".
## We can "fix" it by copying the method from t.test
htest1_fixed <- htest1
htest1_fixed$method <- tt1$method
parameters(htest1_fixed)
#> One Sample t-test
#>
#> Parameter | Mean | mu | Difference | 95% CI | t(15) | p
#> -------------------------------------------------------------------
#> Summarized x | 5.60 | 4.90 | 0.70 | [4.68, ] | 1.33 | 0.101
#>
#> Alternative hypothesis: true mean is greater than 4.9
assert_that("t" %in% names(parameters(htest1_fixed)))
#> [1] TRUE
## Two-sample test
x <- c(7.8, 6.6, 6.5, 7.4, 7.3, 7.0, 6.4, 7.1, 6.7, 7.6, 6.8)
y <- c(4.5, 5.4, 6.1, 6.1, 5.4, 5.0, 4.1, 5.5)
htest2 <- tsum.test(
mean(x), s.x=sd(x), n.x=length(x),
mean(y), s.y=sd(y), n.y=length(y),
mu = 2
)
## Gives correct info
tidy(htest2)
#> # A tibble: 1 × 8
#> estimate1 estimate2 statistic p.value conf.low conf.high method alternative
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 7.02 5.26 -0.853 0.411 1.13 2.38 Welch Mo… two.sided
## Throws error
parameters(htest2)
#> Error in `$<-.data.frame`(`*tmp*`, "tau", value = c(`mean of x` = 7.01818181818182, : replacement has 2 rows, data has 1
## Regular t-test on same data works correctly
tt2 <- t.test(x,y, mu = 2)
parameters(tt2)
#> Welch Two Sample t-test
#>
#> Parameter1 | Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference
#> ------------------------------------------------------------------------
#> x | y | 7.02 | 5.26 | 1.76
#>
#> Parameter1 | 95% CI | t(11.30) | p
#> --------------------------------------------
#> x | [1.13, 2.38] | -0.85 | 0.411
#>
#> Alternative hypothesis: true difference in means is not equal to 2
## Structural differences,
compare(tt2, htest2, tolerance = 1e-10, x_arg = "t.test", y_arg = "tsum.test")
#> `t.test` is length 10
#> `tsum.test` is length 9
#>
#> names(t.test) | names(tsum.test)
#> [1] "statistic" | "statistic" [1]
#> [2] "parameter" - "parameters" [2]
#> [3] "p.value" | "p.value" [3]
#> [4] "conf.int" | "conf.int" [4]
#> [5] "estimate" | "estimate" [5]
#> [6] "null.value" | "null.value" [6]
#> [7] "stderr" -
#> [8] "alternative" | "alternative" [7]
#> [9] "method" | "method" [8]
#> [10] "data.name" | "data.name" [9]
#>
#> `t.test$parameter` is a double vector (11.3029200067925)
#> `tsum.test$parameter` is absent
#>
#> `t.test$stderr` is a double vector (0.286497791189657)
#> `tsum.test$stderr` is absent
#>
#> `t.test$method`: "Welch Two Sample t-test"
#> `tsum.test$method`: "Welch Modified Two-Sample t-Test"
#>
#> `t.test$data.name`: "x and y"
#> `tsum.test$data.name`: "Summarized x and y"
#>
#> `t.test$parameters` is absent
#> `tsum.test$parameters` is a double vector (11.3029200067925)
## Again, we can fix it by sett2ing the correct method value from t.test
htest2_fixed <- htest2
htest2_fixed$method <- tt2$method
htest2_fixed$data.name <- tt2$data.name
compare(tt2, htest2_fixed, tolerance = 1e-10, x_arg = "t.test", y_arg = "tsum.test")
#> `t.test` is length 10
#> `tsum.test` is length 9
#>
#> names(t.test) | names(tsum.test)
#> [1] "statistic" | "statistic" [1]
#> [2] "parameter" - "parameters" [2]
#> [3] "p.value" | "p.value" [3]
#> [4] "conf.int" | "conf.int" [4]
#> [5] "estimate" | "estimate" [5]
#> [6] "null.value" | "null.value" [6]
#> [7] "stderr" -
#> [8] "alternative" | "alternative" [7]
#> [9] "method" | "method" [8]
#> [10] "data.name" | "data.name" [9]
#>
#> `t.test$parameter` is a double vector (11.3029200067925)
#> `tsum.test$parameter` is absent
#>
#> `t.test$stderr` is a double vector (0.286497791189657)
#> `tsum.test$stderr` is absent
#>
#> `t.test$parameters` is absent
#> `tsum.test$parameters` is a double vector (11.3029200067925)
parameters(htest2_fixed)
#> Welch Two Sample t-test
#>
#> Parameter1 | Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference
#> ------------------------------------------------------------------------
#> x | y | 7.02 | 5.26 | 1.76
#>
#> Parameter1 | 95% CI | t(11.30) | p
#> --------------------------------------------
#> x | [1.13, 2.38] | -0.85 | 0.411
#>
#> Alternative hypothesis: true difference in means is not equal to 2
Created on 2025-09-17 with reprex v2.1.1
Metadata
Metadata
Assignees
Labels
Bug 🐛Something isn't workingSomething isn't workingEnhancement 💥Implemented features can be improved or revisedImplemented features can be improved or revised