Skip to content

parameters fails or gives wrong info on htest objects from BSDA::tsum.test #1168

@DarwinAwardWinner

Description

@DarwinAwardWinner

BSDA::tsum.test allows performing t-tests when only summary stats are available instead of the full data. However, the htest objects it returns are slightly different than those returned by stats::t.test, and parameters doesn't seem to like them. The main issue seems to be that the "method" is written slightly differently for some reason. Here is a reprex demonstrating some issues, and how they can be fixed by setting the right method.

library(BSDA)
#> Loading required package: lattice
#> 
#> Attaching package: 'BSDA'
#> The following object is masked from 'package:datasets':
#> 
#>     Orange
library(parameters)
library(broom)
library(assertthat)
library(waldo)

## Examples taken from tsum.test help text

## One-sample test
htest1 <- tsum.test(mean.x=5.6, s.x=2.1, n.x=16, mu=4.9, alternative="greater")
#> Warning in tsum.test(mean.x = 5.6, s.x = 2.1, n.x = 16, mu = 4.9, alternative =
#> "greater"): argument 'var.equal' ignored for one-sample test.
## Do an equivalent test with t.test
tt1 <- t.test(scale(rnorm(16)) * 2.1 + 5.6, mu = 4.9, alternative = "greater")

## Examine differences
compare(tt1, htest1, x_arg = "t.test", y_arg = "tsum.test", tolerance = 1e-10)
#> `t.test` is length 10
#> `tsum.test` is length 9
#> 
#>      names(t.test) | names(tsum.test)    
#>  [1] "statistic"   | "statistic"      [1]
#>  [2] "parameter"   - "parameters"     [2]
#>  [3] "p.value"     | "p.value"        [3]
#>  [4] "conf.int"    | "conf.int"       [4]
#>  [5] "estimate"    | "estimate"       [5]
#>  [6] "null.value"  | "null.value"     [6]
#>  [7] "stderr"      -                     
#>  [8] "alternative" | "alternative"    [7]
#>  [9] "method"      | "method"         [8]
#> [10] "data.name"   | "data.name"      [9]
#> 
#> `t.test$parameter` is a double vector (15)
#> `tsum.test$parameter` is absent
#> 
#>    `t.test$conf.int`: 4.7 Inf
#> `tsum.test$conf.int`: 4.7  NA
#> 
#> `t.test$stderr` is a double vector (0.525)
#> `tsum.test$stderr` is absent
#> 
#> `t.test$method`:    "One Sample t-test"
#> `tsum.test$method`: "One-sample t-Test"
#> 
#> `t.test$data.name`:    "scale(rnorm(16)) * 2.1 + 5.6"
#> `tsum.test$data.name`: "Summarized x"                
#> 
#> `t.test$parameters` is absent
#> `tsum.test$parameters` is a double vector (15)

## Gives correct info
tidy(htest1)
#> # A tibble: 1 × 7
#>   estimate statistic p.value conf.low conf.high method            alternative
#>      <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>             <chr>      
#> 1      5.6      1.33   0.101     4.68        NA One-sample t-Test greater
## Thinks it's a different kind of test
parameters(htest1)
#> One-sample t-Test
#> 
#> Parameter1   |  tau |    z | df |     p
#> ---------------------------------------
#> Summarized x | 5.60 | 1.33 | 15 | 0.101
#> 
#> Alternative hypothesis: true mean is greater than 4.9
## No t statistic
assert_that("t" %in% names(parameters(htest1)))
#> Error: `%in%`(x = "t", table = names(parameters(htest1))) is not TRUE

## Looks like the problem is a slightly different value for "method".
## We can "fix" it by copying the method from t.test
htest1_fixed <- htest1
htest1_fixed$method <- tt1$method
parameters(htest1_fixed)
#> One Sample t-test
#> 
#> Parameter    | Mean |   mu | Difference |    95% CI | t(15) |     p
#> -------------------------------------------------------------------
#> Summarized x | 5.60 | 4.90 |       0.70 | [4.68,  ] |  1.33 | 0.101
#> 
#> Alternative hypothesis: true mean is greater than 4.9
assert_that("t" %in% names(parameters(htest1_fixed)))
#> [1] TRUE

## Two-sample test
x <- c(7.8, 6.6, 6.5, 7.4, 7.3, 7.0, 6.4, 7.1, 6.7, 7.6, 6.8)
y <- c(4.5, 5.4, 6.1, 6.1, 5.4, 5.0, 4.1, 5.5)
htest2 <- tsum.test(
  mean(x), s.x=sd(x), n.x=length(x),
  mean(y), s.y=sd(y), n.y=length(y),
  mu = 2
)
## Gives correct info
tidy(htest2)
#> # A tibble: 1 × 8
#>   estimate1 estimate2 statistic p.value conf.low conf.high method    alternative
#>       <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl> <chr>     <chr>      
#> 1      7.02      5.26    -0.853   0.411     1.13      2.38 Welch Mo… two.sided
## Throws error
parameters(htest2)
#> Error in `$<-.data.frame`(`*tmp*`, "tau", value = c(`mean of x` = 7.01818181818182, : replacement has 2 rows, data has 1
## Regular t-test on same data works correctly
tt2 <- t.test(x,y, mu = 2)
parameters(tt2)
#> Welch Two Sample t-test
#> 
#> Parameter1 | Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference
#> ------------------------------------------------------------------------
#> x          |          y |            7.02 |            5.26 |       1.76
#> 
#> Parameter1 |       95% CI | t(11.30) |     p
#> --------------------------------------------
#> x          | [1.13, 2.38] |    -0.85 | 0.411
#> 
#> Alternative hypothesis: true difference in means is not equal to 2

## Structural differences,
compare(tt2, htest2, tolerance = 1e-10, x_arg = "t.test", y_arg = "tsum.test")
#> `t.test` is length 10
#> `tsum.test` is length 9
#> 
#>      names(t.test) | names(tsum.test)    
#>  [1] "statistic"   | "statistic"      [1]
#>  [2] "parameter"   - "parameters"     [2]
#>  [3] "p.value"     | "p.value"        [3]
#>  [4] "conf.int"    | "conf.int"       [4]
#>  [5] "estimate"    | "estimate"       [5]
#>  [6] "null.value"  | "null.value"     [6]
#>  [7] "stderr"      -                     
#>  [8] "alternative" | "alternative"    [7]
#>  [9] "method"      | "method"         [8]
#> [10] "data.name"   | "data.name"      [9]
#> 
#> `t.test$parameter` is a double vector (11.3029200067925)
#> `tsum.test$parameter` is absent
#> 
#> `t.test$stderr` is a double vector (0.286497791189657)
#> `tsum.test$stderr` is absent
#> 
#> `t.test$method`:    "Welch Two Sample t-test"         
#> `tsum.test$method`: "Welch Modified Two-Sample t-Test"
#> 
#> `t.test$data.name`:    "x and y"           
#> `tsum.test$data.name`: "Summarized x and y"
#> 
#> `t.test$parameters` is absent
#> `tsum.test$parameters` is a double vector (11.3029200067925)

## Again, we can fix it by sett2ing the correct method value from t.test
htest2_fixed <- htest2
htest2_fixed$method <- tt2$method
htest2_fixed$data.name <- tt2$data.name
compare(tt2, htest2_fixed, tolerance = 1e-10, x_arg = "t.test", y_arg = "tsum.test")
#> `t.test` is length 10
#> `tsum.test` is length 9
#> 
#>      names(t.test) | names(tsum.test)    
#>  [1] "statistic"   | "statistic"      [1]
#>  [2] "parameter"   - "parameters"     [2]
#>  [3] "p.value"     | "p.value"        [3]
#>  [4] "conf.int"    | "conf.int"       [4]
#>  [5] "estimate"    | "estimate"       [5]
#>  [6] "null.value"  | "null.value"     [6]
#>  [7] "stderr"      -                     
#>  [8] "alternative" | "alternative"    [7]
#>  [9] "method"      | "method"         [8]
#> [10] "data.name"   | "data.name"      [9]
#> 
#> `t.test$parameter` is a double vector (11.3029200067925)
#> `tsum.test$parameter` is absent
#> 
#> `t.test$stderr` is a double vector (0.286497791189657)
#> `tsum.test$stderr` is absent
#> 
#> `t.test$parameters` is absent
#> `tsum.test$parameters` is a double vector (11.3029200067925)

parameters(htest2_fixed)
#> Welch Two Sample t-test
#> 
#> Parameter1 | Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference
#> ------------------------------------------------------------------------
#> x          |          y |            7.02 |            5.26 |       1.76
#> 
#> Parameter1 |       95% CI | t(11.30) |     p
#> --------------------------------------------
#> x          | [1.13, 2.38] |    -0.85 | 0.411
#> 
#> Alternative hypothesis: true difference in means is not equal to 2

Created on 2025-09-17 with reprex v2.1.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    Bug 🐛Something isn't workingEnhancement 💥Implemented features can be improved or revised

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions