@@ -53,7 +53,7 @@ C(levels = ::Vector{Any}, base = ::Any) # specify levels and base
53
53
mean of the lower levels
54
54
* [`SeqDiffCoding`](@ref) - Code for differences between sequential levels of
55
55
the variable.
56
- * [`HypothesisCoding`](@ref) - Manually specify contrasts via a hypothesis
56
+ * [`HypothesisCoding`](@ref) - Manually specify contrasts via a hypothesis
57
57
matrix, which gives the weighting for the average response for each level
58
58
* [`StatsModels.ContrastsCoding`](@ref) - Manually specify contrasts matrix,
59
59
which is directly copied into the model matrix.
@@ -79,15 +79,15 @@ The easiest way to specify custom contrasts is with `HypothesisCoding` or
79
79
contrast coding system, you can subtype `AbstractContrasts`. This requires a
80
80
constructor, a `contrasts_matrix` method for constructing the actual contrasts
81
81
matrix that maps from levels to `ModelMatrix` column values, and (optionally) a
82
- `termnames ` method:
82
+ `coefnames ` method:
83
83
84
84
```julia
85
85
mutable struct MyCoding <: AbstractContrasts
86
86
...
87
87
end
88
88
89
89
contrasts_matrix(C::MyCoding, baseind, n) = ...
90
- termnames (C::MyCoding, levels, baseind) = ...
90
+ coefnames (C::MyCoding, levels, baseind) = ...
91
91
```
92
92
93
93
# References
@@ -103,30 +103,32 @@ abstract type AbstractContrasts end
103
103
# Contrasts + Levels (usually from data) = ContrastsMatrix
104
104
struct ContrastsMatrix{C <: AbstractContrasts , M <: AbstractMatrix , T, U}
105
105
matrix:: M
106
- termnames :: Vector{U}
106
+ coefnames :: Vector{U}
107
107
levels:: Vector{T}
108
108
contrasts:: C
109
109
invindex:: Dict{T,Int}
110
110
function ContrastsMatrix (matrix:: M ,
111
- termnames :: Vector{U} ,
111
+ coefnames :: Vector{U} ,
112
112
levels:: Vector{T} ,
113
113
contrasts:: C ) where {U, T, C <: AbstractContrasts , M <: AbstractMatrix }
114
114
allunique (levels) || throw (ArgumentError (" levels must be all unique, got $(levels) " ))
115
115
invindex = Dict {T,Int} (x=> i for (i,x) in enumerate (levels))
116
- new {C,M,T,U} (matrix, termnames , levels, contrasts, invindex)
116
+ new {C,M,T,U} (matrix, coefnames , levels, contrasts, invindex)
117
117
end
118
118
end
119
119
120
- # only check equality of matrix, termnames, and levels, and that the type is the
120
+ StatsAPI. coefnames (cm:: ContrastsMatrix ) = cm. coefnames
121
+
122
+ # only check equality of matrix, coefnames, and levels, and that the type is the
121
123
# same for the contrasts (values are irrelevant). This ensures that the two
122
124
# will behave identically in creating modelmatrix columns
123
125
Base.:(== )(a:: ContrastsMatrix{C} , b:: ContrastsMatrix{C} ) where {C<: AbstractContrasts } =
124
126
a. matrix == b. matrix &&
125
- a. termnames == b. termnames &&
127
+ a. coefnames == b. coefnames &&
126
128
a. levels == b. levels
127
129
128
130
Base. hash (a:: ContrastsMatrix{C} , h:: UInt ) where {C} =
129
- hash (C, hash (a. matrix, hash (a. termnames , hash (a. levels, h))))
131
+ hash (C, hash (a. matrix, hash (a. coefnames , hash (a. levels, h))))
130
132
131
133
"""
132
134
An instantiation of a contrast coding system for particular levels
@@ -166,7 +168,7 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst
166
168
# 3. contrast levels missing from data: would have empty columns, generate a
167
169
# rank-deficient model matrix.
168
170
c_levels = something (DataAPI. levels (contrasts), levels)
169
-
171
+
170
172
mismatched_levels = symdiff (c_levels, levels)
171
173
if ! isempty (mismatched_levels)
172
174
throw (ArgumentError (" contrasts levels not found in data or vice-versa: " *
@@ -198,7 +200,7 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst
198
200
" $c_levels ." ))
199
201
end
200
202
201
- tnames = termnames (contrasts, c_levels, baseind)
203
+ tnames = coefnames (contrasts, c_levels, baseind)
202
204
203
205
mat = contrasts_matrix (contrasts, baseind, n)
204
206
@@ -224,7 +226,7 @@ function ContrastsMatrix(c::ContrastsMatrix, levels::AbstractVector)
224
226
return c
225
227
end
226
228
227
- function termnames (C:: AbstractContrasts , levels:: AbstractVector , baseind:: Integer )
229
+ function StatsAPI . coefnames (C:: AbstractContrasts , levels:: AbstractVector , baseind:: Integer )
228
230
not_base = [1 : (baseind- 1 ); (baseind+ 1 ): length (levels)]
229
231
levels[not_base]
230
232
end
@@ -233,7 +235,7 @@ Base.getindex(contrasts::ContrastsMatrix, rowinds, colinds) =
233
235
getindex (contrasts. matrix, getindex .(Ref (contrasts. invindex), rowinds), colinds)
234
236
235
237
# Making a contrast type T only requires that there be a method for
236
- # contrasts_matrix(T, baseind, n) and optionally termnames (T, levels, baseind)
238
+ # contrasts_matrix(T, baseind, n) and optionally coefnames (T, levels, baseind)
237
239
# The rest is boilerplate.
238
240
for contrastType in [:DummyCoding , :EffectsCoding , :HelmertCoding ]
239
241
@eval begin
@@ -254,7 +256,7 @@ DataAPI.levels(c::AbstractContrasts) = nothing
254
256
FullDummyCoding()
255
257
256
258
Full-rank dummy coding generates one indicator (1 or 0) column for each level,
257
- **including** the base level. This is sometimes known as
259
+ **including** the base level. This is sometimes known as
258
260
[one-hot encoding](https://en.wikipedia.org/wiki/One-hot).
259
261
260
262
Not exported but included here for the sake of completeness.
@@ -331,7 +333,7 @@ column is generated with 1 where `variable .== x` and -1 where `variable .== bas
331
333
of 0.
332
334
333
335
If `levels` are omitted or `nothing`, they are determined from the data
334
- by calling the `levels` function when constructing `ContrastsMatrix`.
336
+ by calling the `levels` function when constructing `ContrastsMatrix`.
335
337
If `base` is omitted or `nothing`, the first level is used as the base.
336
338
337
339
When all levels are equally frequent, effects coding generates model matrix
@@ -373,7 +375,7 @@ Helmert coding codes each level as the difference from the average of the lower
373
375
levels.
374
376
375
377
If `levels` are omitted or `nothing`, they are determined from the data
376
- by calling the `levels` function when constructing `Contrastsmatrix`.
378
+ by calling the `levels` function when constructing `Contrastsmatrix`.
377
379
If `base` is omitted or `nothing`, the first level is used as the base.
378
380
For each non-base level, Helmert coding generates a columns with -1 for each of
379
381
n levels below, n for that level, and 0 above.
@@ -462,7 +464,7 @@ function contrasts_matrix(C::SeqDiffCoding, _, n)
462
464
end
463
465
464
466
# TODO : consider customizing term names:
465
- # termnames (C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) =
467
+ # StatsAPI.coefnames (C::SeqDiffCoding, levels::AbstractVector, baseind::Integer) =
466
468
# ["$(levels[i])-$(levels[i-1])" for i in 2:length(levels)]
467
469
468
470
"""
@@ -591,7 +593,7 @@ function contrasts_matrix(C::HypothesisCoding, baseind, n)
591
593
C. contrasts
592
594
end
593
595
594
- termnames (C:: HypothesisCoding , levels:: AbstractVector , baseind:: Int ) =
596
+ StatsAPI . coefnames (C:: HypothesisCoding , levels:: AbstractVector , baseind:: Int ) =
595
597
something (C. labels, levels[1 : length (levels) .!= baseind])
596
598
597
599
DataAPI. levels (c:: HypothesisCoding ) = c. levels
@@ -602,8 +604,8 @@ DataAPI.levels(c::HypothesisCoding) = c.levels
602
604
603
605
Coding by manual specification of contrasts matrix. For k levels, the contrasts
604
606
must be a k by k-1 Matrix. The contrasts in this matrix will be copied directly
605
- into the model matrix; if you want to specify your contrasts as hypotheses (i.e.,
606
- weights assigned to each level's cell mean), you should use
607
+ into the model matrix; if you want to specify your contrasts as hypotheses (i.e.,
608
+ weights assigned to each level's cell mean), you should use
607
609
[`HypothesisCoding`](@ref) instead.
608
610
"""
609
611
mutable struct ContrastsCoding{T<: AbstractMatrix } <: AbstractContrasts
@@ -687,9 +689,9 @@ julia> StatsModels.hypothesis_matrix(cmat)
687
689
-1 0 0 1
688
690
```
689
691
690
- For non-centered contrasts like `DummyCoding`, without including the intercept
691
- the hypothesis matrix is incorrect. So while `intercept=true` is the default for
692
- non-centered contrasts, you can see the (wrong) hypothesis matrix when ignoring
692
+ For non-centered contrasts like `DummyCoding`, without including the intercept
693
+ the hypothesis matrix is incorrect. So while `intercept=true` is the default for
694
+ non-centered contrasts, you can see the (wrong) hypothesis matrix when ignoring
693
695
it by forcing `intercept=false`:
694
696
695
697
```jldoctest hypmat
@@ -710,7 +712,7 @@ julia> StatsModels.hypothesis_matrix(cmat, tolerance=0) # ugly
710
712
1.0 -2.23753e-16 6.91749e-18 -1.31485e-16
711
713
-1.0 1.0 -2.42066e-16 9.93754e-17
712
714
-1.0 4.94472e-17 1.0 9.93754e-17
713
- -1.0 1.04958e-16 -1.31044e-16 1.0
715
+ -1.0 1.04958e-16 -1.31044e-16 1.0
714
716
```
715
717
716
718
Finally, the hypothesis matrix for a constructed `ContrastsMatrix` (as stored by
0 commit comments