From dd6376e8d217bf356e8936d55f826af65c1b3b82 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Tue, 8 Apr 2025 13:29:38 -0700 Subject: [PATCH 01/12] create mvhypergeomtric.jl --- src/multivariate/mvhypergeometric.jl | 128 +++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 src/multivariate/mvhypergeometric.jl diff --git a/src/multivariate/mvhypergeometric.jl b/src/multivariate/mvhypergeometric.jl new file mode 100644 index 0000000000..149017cdca --- /dev/null +++ b/src/multivariate/mvhypergeometric.jl @@ -0,0 +1,128 @@ +""" +The [Multivariate hypergeometric distribution](https://en.wikipedia.org/wiki/Hypergeometric_distribution#Multivariate_hypergeometric_distribution) +generalizes the *hypergeometric distribution*. Consider ``n`` draws from a finite population containing `k` types of elements. Suppose that the population has size `M` and there are ``m_i`` elements of type ``i`` for ``i = 1, .., k`` with ``m_1+...m_k = M``. Let ``X = (X_1, ..., X_k)`` where ``X_i`` represents the number of elements of type ``i`` drawn, then the distribution of ``X`` is a multivariate hypergeometric distribution. Each sample of a multivariate hypergeometric distribution is a ``k``-dimensional integer vector that sums to ``n``. + + +The probability mass function is given by + +```math +f(x; m, n) = {{{m_1 \\choose x_1}{m_2 \\choose x_2}\\cdots {m_k \\choose x_k}}\\over {N \\choose n}}, +\\quad x_1 + \\cdots + x_k = n, \\quad x_i \\le m_i +``` + +```julia +MvHypergeometric(m, n) # Multivariate hypergeometric distribution for a population with + # m = (m_1, ..., m_k) elements of type 1 to k and n draws + +params(d) # Get the parameters, i.e. (m, n) +``` +""" +struct MvHypergeometric <: DiscreteMultivariateDistribution + m::AbstractVector{Int} # number of elements of each type + n::Int # number of draws + function MvHypergeometric(m::Real, n::Real; check_args::Bool=true) + @check_args( + MvHypergeometric, + (m, m >= zero.(n)), + zero(n) <= n <= sum(m), + ) + new(m, n) + end +end + + +# Parameters + +ncategories(d::MvHypergeometric) = length(d.m) +length(d::MvHypergeometric) = ncategories(d) +nelements(d::MvHypergeometric) = d.m +ntrials(d::MvHypergeometric) = d.n + +params(d::MvHypergeometric) = (d.m, d.n) + +# Statistics + +mean(d::MvHypergeometric) = d.n .* d.m ./ sum(d.m) + +function var(d::MvHypergeometric{T}) where T<:Real + m = nelements(d) + k = length(m) + n = ntrials(d) + M = sum(m) + p = m / M + f = n * (M - n) / (M-1) + + v = Vector{T}(undef, k) + for i = 1:k + @inbounds p_i = p[i] + v[i] = f * p_i * (1 - p_i) + end + v +end + +function cov(d::MvHypergeometric{T}) where T<:Real + m = nelements(d) + k = length(m) + n = ntrials(d) + M = sum(m) + p = m / M + f = n * (M - n) / (M-1) + + C = Matrix{T}(undef, k, k) + for j = 1:k + pj = p[j] + for i = 1:j-1 + @inbounds C[i,j] = - f * p[i] * pj + end + + @inbounds C[j,j] = f * pj * (1-pj) + end + + for j = 1:k-1 + for i = j+1:k + @inbounds C[i,j] = C[j,i] + end + end + C +end + + +# Evaluation +function insupport(d::MvHypergeometric, x::AbstractVector{T}) where T<:Real + k = length(d) + m = nelements(d) + length(x) == k || return false + s = 0.0 + for i = 1:k + @inbounds xi = x[i] + if !(isinteger(xi) && xi >= 0 && x <= m[i]) + return false + end + s += xi + end + return s == ntrials(d) # integer computation would not yield truncation errors +end + +function _logpdf(d::MvHypergeometric, x::AbstractVector{T}) where T<:Real + m = nelements(d) + M = sum(m) + n = ntrials(d) + insupport(d,x) || return -Inf + s = -logabsbinomial(M, n)[1] + for i = 1:length(m) + @inbounds xi = x[i] + @inbounds m_i = m[i] + s += logabsbinomial(m_i, xi)[1] + end + return s +end + +# Sampling + +# if only a single sample is requested, no alias table is created +_rand!(rng::AbstractRNG, d::MvHypergeometric, x::AbstractVector{<:Real}) = + multinom_rand!(rng, ntrials(d), probs(d), x) + +sampler(d::MvHypergeometric) = MvHypergeometricSampler(ntrials(d), probs(d)) + + From 8bcfaba22b1b34cfdeb118926ff613b754a094e1 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Tue, 8 Apr 2025 13:56:31 -0700 Subject: [PATCH 02/12] add MvHypergeometric to module --- src/Distributions.jl | 1 + src/multivariate/mvhypergeometric.jl | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Distributions.jl b/src/Distributions.jl index fcf6994347..11c80f6ec5 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -125,6 +125,7 @@ export Logistic, LogNormal, LogUniform, + MvHypergeometric, MvLogitNormal, LogitNormal, MatrixBeta, diff --git a/src/multivariate/mvhypergeometric.jl b/src/multivariate/mvhypergeometric.jl index 149017cdca..a7710a48a0 100644 --- a/src/multivariate/mvhypergeometric.jl +++ b/src/multivariate/mvhypergeometric.jl @@ -117,12 +117,12 @@ function _logpdf(d::MvHypergeometric, x::AbstractVector{T}) where T<:Real return s end -# Sampling +# Testing it out +n = 5 +m = [5, 3, 2] -# if only a single sample is requested, no alias table is created -_rand!(rng::AbstractRNG, d::MvHypergeometric, x::AbstractVector{<:Real}) = - multinom_rand!(rng, ntrials(d), probs(d), x) - -sampler(d::MvHypergeometric) = MvHypergeometricSampler(ntrials(d), probs(d)) +d = MvHypergeometric(m, n) +x = [2, 2, 1] +println("Probability of $x: ", exp(_logpdf(d, x))) From 7f494afb4ba747b9fae0b7f1b955db150d9b777e Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Tue, 8 Apr 2025 17:27:06 -0700 Subject: [PATCH 03/12] add multivariate hypergeometric sampler --- Project.toml | 2 ++ src/Distributions.jl | 4 ++- src/multivariate/mvhypergeometric.jl | 23 +++++++--------- src/multivariates.jl | 3 ++- src/samplers.jl | 1 + src/samplers/mvhypergeometric.jl | 38 +++++++++++++++++++++++++++ test/multivariate/mvhypergeometric.jl | 0 7 files changed, 56 insertions(+), 15 deletions(-) create mode 100644 src/samplers/mvhypergeometric.jl create mode 100644 test/multivariate/mvhypergeometric.jl diff --git a/Project.toml b/Project.toml index e46ac361d4..ad6829e0ad 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" @@ -48,6 +49,7 @@ PDMats = "0.10, 0.11" Printf = "<0.0.1, 1" QuadGK = "2" Random = "<0.0.1, 1" +Revise = "3.7.3" SparseArrays = "<0.0.1, 1" SpecialFunctions = "1.2, 2" StableRNGs = "1" diff --git a/src/Distributions.jl b/src/Distributions.jl index 11c80f6ec5..173097dd6b 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -125,7 +125,6 @@ export Logistic, LogNormal, LogUniform, - MvHypergeometric, MvLogitNormal, LogitNormal, MatrixBeta, @@ -134,6 +133,7 @@ export MatrixTDist, MixtureModel, Multinomial, + MvHypergeometric, MultivariateNormal, MvLogNormal, MvNormal, @@ -282,6 +282,8 @@ export # type system include("common.jl") + + # implementation helpers include("utils.jl") include("eachvariate.jl") diff --git a/src/multivariate/mvhypergeometric.jl b/src/multivariate/mvhypergeometric.jl index a7710a48a0..434829b9f8 100644 --- a/src/multivariate/mvhypergeometric.jl +++ b/src/multivariate/mvhypergeometric.jl @@ -20,10 +20,10 @@ params(d) # Get the parameters, i.e. (m, n) struct MvHypergeometric <: DiscreteMultivariateDistribution m::AbstractVector{Int} # number of elements of each type n::Int # number of draws - function MvHypergeometric(m::Real, n::Real; check_args::Bool=true) + function MvHypergeometric(m::AbstractVector{Int}, n::Int; check_args::Bool=true) @check_args( MvHypergeometric, - (m, m >= zero.(n)), + (m, all(m .>= zero.(n))), zero(n) <= n <= sum(m), ) new(m, n) @@ -44,7 +44,7 @@ params(d::MvHypergeometric) = (d.m, d.n) mean(d::MvHypergeometric) = d.n .* d.m ./ sum(d.m) -function var(d::MvHypergeometric{T}) where T<:Real +function var(d::MvHypergeometric) m = nelements(d) k = length(m) n = ntrials(d) @@ -52,7 +52,7 @@ function var(d::MvHypergeometric{T}) where T<:Real p = m / M f = n * (M - n) / (M-1) - v = Vector{T}(undef, k) + v = Vector{Real}(undef, k) for i = 1:k @inbounds p_i = p[i] v[i] = f * p_i * (1 - p_i) @@ -60,7 +60,7 @@ function var(d::MvHypergeometric{T}) where T<:Real v end -function cov(d::MvHypergeometric{T}) where T<:Real +function cov(d::MvHypergeometric) m = nelements(d) k = length(m) n = ntrials(d) @@ -68,7 +68,7 @@ function cov(d::MvHypergeometric{T}) where T<:Real p = m / M f = n * (M - n) / (M-1) - C = Matrix{T}(undef, k, k) + C = Matrix{Real}(undef, k, k) for j = 1:k pj = p[j] for i = 1:j-1 @@ -95,7 +95,7 @@ function insupport(d::MvHypergeometric, x::AbstractVector{T}) where T<:Real s = 0.0 for i = 1:k @inbounds xi = x[i] - if !(isinteger(xi) && xi >= 0 && x <= m[i]) + if !(isinteger(xi) && xi >= 0 && xi <= m[i]) return false end s += xi @@ -117,12 +117,9 @@ function _logpdf(d::MvHypergeometric, x::AbstractVector{T}) where T<:Real return s end -# Testing it out -n = 5 -m = [5, 3, 2] +_rand!(rng::AbstractRNG, d::MvHypergeometric, x::AbstractVector{Int}) = + mvhypergeom_rand!(rng, nelements(d), ntrials(d), x) + -d = MvHypergeometric(m, n) -x = [2, 2, 1] -println("Probability of $x: ", exp(_logpdf(d, x))) diff --git a/src/multivariates.jl b/src/multivariates.jl index 56d91233cf..8639e4caed 100644 --- a/src/multivariates.jl +++ b/src/multivariates.jl @@ -119,6 +119,7 @@ for fname in ["dirichlet.jl", "mvlognormal.jl", "mvtdist.jl", "product.jl", # deprecated - "vonmisesfisher.jl"] + "vonmisesfisher.jl", + "mvhypergeometric.jl"] include(joinpath("multivariate", fname)) end diff --git a/src/samplers.jl b/src/samplers.jl index fa667fff2b..bff267f189 100644 --- a/src/samplers.jl +++ b/src/samplers.jl @@ -21,6 +21,7 @@ for fname in ["aliastable.jl", "exponential.jl", "gamma.jl", "multinomial.jl", + "mvhypergeometric.jl", "vonmises.jl", "vonmisesfisher.jl", "discretenonparametric.jl", diff --git a/src/samplers/mvhypergeometric.jl b/src/samplers/mvhypergeometric.jl new file mode 100644 index 0000000000..c160a73c08 --- /dev/null +++ b/src/samplers/mvhypergeometric.jl @@ -0,0 +1,38 @@ +function mvhypergeom_rand!(rng::AbstractRNG, m::AbstractVector{Int}, n::Int, + x::AbstractVector{<:Real}) + k = length(m) + length(x) == k || throw(DimensionMismatch("Invalid argument dimension.")) + + z = zero(eltype(m)) + M = sum(m) # remaining total probability (widens type if needed) + i = 0 + km1 = k - 1 + + while i < km1 && n > 0 + i += 1 + @inbounds mi = m[i] + if mi < M + xi = rand(rng, Hypergeometric(mi, M-mi, n)) + @inbounds x[i] = xi + n -= xi + M -= mi + else + # In this case, we don't even have to sample + # from Hypergeometric. Just assign remaining counts + + @inbounds x[i] = n + n = 0 + end + end + + if i == km1 + @inbounds x[k] = n + else # n must have been zero + z = zero(eltype(x)) + for j = i+1 : k + @inbounds x[j] = z + end + end + + return x + end diff --git a/test/multivariate/mvhypergeometric.jl b/test/multivariate/mvhypergeometric.jl new file mode 100644 index 0000000000..e69de29bb2 From 47e416dda7c44e38cde557c032756727a411b737 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Tue, 8 Apr 2025 18:14:18 -0700 Subject: [PATCH 04/12] add tests --- src/Distributions.jl | 1 + test/multivariate/mvhypergeom.jl | 74 ++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 test/multivariate/mvhypergeom.jl diff --git a/src/Distributions.jl b/src/Distributions.jl index 173097dd6b..bc3a4d159a 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -245,6 +245,7 @@ export ncategories, # the number of categories in a Categorical distribution ncomponents, # the number of components in a mixture model ntrials, # the number of trials being performed in the experiment + nelements, # the number of elements of each type in a finite population params, # get the tuple of parameters params!, # provide storage space to calculate the tuple of parameters for a multivariate distribution like mvlognormal partype, # returns a type large enough to hold all of a distribution's parameters' element types diff --git a/test/multivariate/mvhypergeom.jl b/test/multivariate/mvhypergeom.jl new file mode 100644 index 0000000000..5cbb03daae --- /dev/null +++ b/test/multivariate/mvhypergeom.jl @@ -0,0 +1,74 @@ +# Tests for Multivariate Hypergeometric + +using Distributions, Random +using Test + +@testset "Multivariate Hypergeometric" begin + m = [5, 3, 2] + n = 4 + d = MvHypergeometric(m, n) + + @test length(d) == 3 + @test d.n == n + @test nelements(d) == m + @test ncategories(d) == length(m) + + @test mean(d) ≈ [2.0, 1.2, 0.8] + + @test var(d) ≈ [2/3, 56/100, 32/75] + + covmat = cov(d) + @test covmat ≈ (8/3) .* [1/4 -3/20 -1/10; -3/20 21/100 -3/50; -1/10 -3/50 4/25] + + @test insupport(d, [2, 1, 1]) + @test !insupport(d, [3, 2, 1]) + @test !insupport(d, [0, 0, 4]) + + # random sampling + rng = MersenneTwister(123) + x = rand(rng, d) + @test isa(x, Vector{Int}) + @test sum(x) == n + @test insupport(d, x) + + x = rand(rng, d, 100) + @test isa(x, Matrix{Int}) + @test all(sum(x, dims=1) .== n) + @test all(x .>= 0) + @test all(x .<= m) + @test all(insupport(d, x)) + + # log pdf + x = [2, 1, 1] + @test pdf(d, x) ≈ 2/7 + @test logpdf(d, x) ≈ log(2/7) + @test logpdf(d, x) ≈ log(pdf(d, x)) + + x = rand(rng, d, 100) + pv = pdf(d, x) + lp = logpdf(d, x) + for i in 1 : size(x, 2) + @test pv[i] ≈ pdf(d, x[:,i]) + @test lp[i] ≈ logpdf(d, x[:,i]) + end + + # test degenerate cases of logpdf + d1 = MvHypergeometric([1], 1) + @test logpdf(d1, [1]) ≈ 0 + @test logpdf(d1, [0]) == -Inf + d2 = MvHypergeometric([2, 0], 1) + @test logpdf(d2, [1, 0]) ≈ 0 + @test logpdf(d2, [0, 1]) == -Inf + + # behavior with n = 0 + d0 = MvHypergeometric([5, 3, 2], 0) + @test logpdf(d0, [0, 0, 0]) ≈ 0 + @test logpdf(d0, [1, 0, 0]) == -Inf + + @test rand(rng, d0) == [0, 0, 0] + @test mean(d0) == [0.0, 0.0, 0.0] + @test var(d0) == [0.0, 0.0, 0.0] + @test insupport(d0, [0, 0, 0]) + @test !insupport(d0, [1, 0, 0]) + @test length(d0) == 3 +end \ No newline at end of file From 585caff46ad29f33526709aff8937e84e30bc0f3 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 10:48:00 -0700 Subject: [PATCH 05/12] adding tests --- test/multivariate/mvhypergeom.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/multivariate/mvhypergeom.jl b/test/multivariate/mvhypergeom.jl index 5cbb03daae..0c2804bcd3 100644 --- a/test/multivariate/mvhypergeom.jl +++ b/test/multivariate/mvhypergeom.jl @@ -52,7 +52,7 @@ using Test @test lp[i] ≈ logpdf(d, x[:,i]) end - # test degenerate cases of logpdf + # test degenerate cases d1 = MvHypergeometric([1], 1) @test logpdf(d1, [1]) ≈ 0 @test logpdf(d1, [0]) == -Inf @@ -71,4 +71,8 @@ using Test @test insupport(d0, [0, 0, 0]) @test !insupport(d0, [1, 0, 0]) @test length(d0) == 3 + + # compare with hypergeometric + dh1 = MvHypergeometric([5, 5], 4) + dh2 end \ No newline at end of file From 43e824a06da67748e3f89ce3e5ff315c79d2cb9b Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 11:01:19 -0700 Subject: [PATCH 06/12] add tests comparing multivariate hypergeometric to univariate hypergeometric --- test/multivariate/mvhypergeom.jl | 41 +++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/test/multivariate/mvhypergeom.jl b/test/multivariate/mvhypergeom.jl index 0c2804bcd3..065ffa6121 100644 --- a/test/multivariate/mvhypergeom.jl +++ b/test/multivariate/mvhypergeom.jl @@ -4,10 +4,16 @@ using Distributions, Random using Test @testset "Multivariate Hypergeometric" begin + + + + + @test_throws DomainError MvHypergeometric([5, 3, -2], 4) + @test_throws ArgumentError MvHypergeometric([5, 3], 10) + m = [5, 3, 2] n = 4 d = MvHypergeometric(m, n) - @test length(d) == 3 @test d.n == n @test nelements(d) == m @@ -60,6 +66,11 @@ using Test @test logpdf(d2, [1, 0]) ≈ 0 @test logpdf(d2, [0, 1]) == -Inf + d3 = MvHypergeometric([5, 0, 0, 0], 3) + @test logpdf(d3, [3, 0, 0, 0]) ≈ 0 + @test logpdf(d3, [2, 1, 0, 0]) == -Inf + @test logpdf(d3, [2, 0, 0, 0]) == -Inf + # behavior with n = 0 d0 = MvHypergeometric([5, 3, 2], 0) @test logpdf(d0, [0, 0, 0]) ≈ 0 @@ -73,6 +84,30 @@ using Test @test length(d0) == 3 # compare with hypergeometric - dh1 = MvHypergeometric([5, 5], 4) - dh2 + ns = 3 + nf = 5 + n = 4 + dh1 = MvHypergeometric([ns, nf], n) + dh2 = Hypergeometric(ns, nf, n) + + x = 2 + @test pdf(dh1, [x, n-x]) ≈ pdf(dh2, x) + x = 3 + @test pdf(dh1, [x, n-x]) ≈ pdf(dh2, x) + + # comparing marginals to hypergeometric + m = [5, 3, 2] + n = 4 + d = MvHypergeometric(m, n) + dh = Hypergeometric(m[1], sum(m[2:end]), n) + x1 = 2 + @test pdf(dh, x1) ≈ sum([pdf(d, [x1, x2, n-x1-x2]) for x2 in 0:m[2]]) + + # comparing conditionals to hypergeometric + x1 = 2 + dh = Hypergeometric(m[2], m[3], n-x1) + q = sum([pdf(d, [x1, x2, n-x1-x2]) for x2 in 0:m[2]]) + for x2 = 0:m[2] + @test pdf(dh, x2) ≈ pdf(d, [x1, x2, n-x1-x2])/q + end end \ No newline at end of file From 53b005f17878d8837fdd363aff5e6f49aee2205f Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 11:08:24 -0700 Subject: [PATCH 07/12] add comments to mvhypergeometric sampler --- src/multivariate/mvhypergeometric.jl | 4 +++- src/samplers/mvhypergeometric.jl | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/multivariate/mvhypergeometric.jl b/src/multivariate/mvhypergeometric.jl index 434829b9f8..3ac4733172 100644 --- a/src/multivariate/mvhypergeometric.jl +++ b/src/multivariate/mvhypergeometric.jl @@ -1,6 +1,6 @@ """ The [Multivariate hypergeometric distribution](https://en.wikipedia.org/wiki/Hypergeometric_distribution#Multivariate_hypergeometric_distribution) -generalizes the *hypergeometric distribution*. Consider ``n`` draws from a finite population containing `k` types of elements. Suppose that the population has size `M` and there are ``m_i`` elements of type ``i`` for ``i = 1, .., k`` with ``m_1+...m_k = M``. Let ``X = (X_1, ..., X_k)`` where ``X_i`` represents the number of elements of type ``i`` drawn, then the distribution of ``X`` is a multivariate hypergeometric distribution. Each sample of a multivariate hypergeometric distribution is a ``k``-dimensional integer vector that sums to ``n``. +generalizes the *hypergeometric distribution*. Consider ``n`` draws from a finite population containing ``k`` types of elements. Suppose that the population has size `M` and there are ``m_i`` elements of type ``i`` for ``i = 1, .., k`` with ``m_1+...m_k = M``. Let ``X = (X_1, ..., X_k)`` where ``X_i`` represents the number of elements of type ``i`` drawn, then the distribution of ``X`` is a multivariate hypergeometric distribution. Each sample of a multivariate hypergeometric distribution is a ``k``-dimensional integer vector that sums to ``n`` and satisfies ``0 \\le X_i \\le m_i``. The probability mass function is given by @@ -117,6 +117,8 @@ function _logpdf(d::MvHypergeometric, x::AbstractVector{T}) where T<:Real return s end +# Sampling is performed by sequentially sampling each entry from the +# hypergeometric distribution _rand!(rng::AbstractRNG, d::MvHypergeometric, x::AbstractVector{Int}) = mvhypergeom_rand!(rng, nelements(d), ntrials(d), x) diff --git a/src/samplers/mvhypergeometric.jl b/src/samplers/mvhypergeometric.jl index c160a73c08..43e684ed97 100644 --- a/src/samplers/mvhypergeometric.jl +++ b/src/samplers/mvhypergeometric.jl @@ -4,7 +4,7 @@ function mvhypergeom_rand!(rng::AbstractRNG, m::AbstractVector{Int}, n::Int, length(x) == k || throw(DimensionMismatch("Invalid argument dimension.")) z = zero(eltype(m)) - M = sum(m) # remaining total probability (widens type if needed) + M = sum(m) # remaining population i = 0 km1 = k - 1 @@ -12,16 +12,19 @@ function mvhypergeom_rand!(rng::AbstractRNG, m::AbstractVector{Int}, n::Int, i += 1 @inbounds mi = m[i] if mi < M - xi = rand(rng, Hypergeometric(mi, M-mi, n)) + # Sample from hypergeometric distribution + # element of type i are considered successes + # all other elements are considered failures + xi = rand(rng, Hypergeometric(mi, M-mi, n)) @inbounds x[i] = xi - n -= xi - M -= mi + # Remove elements of type i from population + n -= xi + M -= mi else - # In this case, we don't even have to sample - # from Hypergeometric. Just assign remaining counts - - @inbounds x[i] = n - n = 0 + # In this case, we don't even have to sample + # from Hypergeometric. Just assign remaining counts + @inbounds x[i] = n + n = 0 end end From f2d6b4f27dba2dd320f6465ffb44000b51d248b1 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 11:09:15 -0700 Subject: [PATCH 08/12] rename test file for multivariate hypergeometric --- test/multivariate/mvhypergeom.jl | 113 -------------------------- test/multivariate/mvhypergeometric.jl | 113 ++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 113 deletions(-) delete mode 100644 test/multivariate/mvhypergeom.jl diff --git a/test/multivariate/mvhypergeom.jl b/test/multivariate/mvhypergeom.jl deleted file mode 100644 index 065ffa6121..0000000000 --- a/test/multivariate/mvhypergeom.jl +++ /dev/null @@ -1,113 +0,0 @@ -# Tests for Multivariate Hypergeometric - -using Distributions, Random -using Test - -@testset "Multivariate Hypergeometric" begin - - - - - @test_throws DomainError MvHypergeometric([5, 3, -2], 4) - @test_throws ArgumentError MvHypergeometric([5, 3], 10) - - m = [5, 3, 2] - n = 4 - d = MvHypergeometric(m, n) - @test length(d) == 3 - @test d.n == n - @test nelements(d) == m - @test ncategories(d) == length(m) - - @test mean(d) ≈ [2.0, 1.2, 0.8] - - @test var(d) ≈ [2/3, 56/100, 32/75] - - covmat = cov(d) - @test covmat ≈ (8/3) .* [1/4 -3/20 -1/10; -3/20 21/100 -3/50; -1/10 -3/50 4/25] - - @test insupport(d, [2, 1, 1]) - @test !insupport(d, [3, 2, 1]) - @test !insupport(d, [0, 0, 4]) - - # random sampling - rng = MersenneTwister(123) - x = rand(rng, d) - @test isa(x, Vector{Int}) - @test sum(x) == n - @test insupport(d, x) - - x = rand(rng, d, 100) - @test isa(x, Matrix{Int}) - @test all(sum(x, dims=1) .== n) - @test all(x .>= 0) - @test all(x .<= m) - @test all(insupport(d, x)) - - # log pdf - x = [2, 1, 1] - @test pdf(d, x) ≈ 2/7 - @test logpdf(d, x) ≈ log(2/7) - @test logpdf(d, x) ≈ log(pdf(d, x)) - - x = rand(rng, d, 100) - pv = pdf(d, x) - lp = logpdf(d, x) - for i in 1 : size(x, 2) - @test pv[i] ≈ pdf(d, x[:,i]) - @test lp[i] ≈ logpdf(d, x[:,i]) - end - - # test degenerate cases - d1 = MvHypergeometric([1], 1) - @test logpdf(d1, [1]) ≈ 0 - @test logpdf(d1, [0]) == -Inf - d2 = MvHypergeometric([2, 0], 1) - @test logpdf(d2, [1, 0]) ≈ 0 - @test logpdf(d2, [0, 1]) == -Inf - - d3 = MvHypergeometric([5, 0, 0, 0], 3) - @test logpdf(d3, [3, 0, 0, 0]) ≈ 0 - @test logpdf(d3, [2, 1, 0, 0]) == -Inf - @test logpdf(d3, [2, 0, 0, 0]) == -Inf - - # behavior with n = 0 - d0 = MvHypergeometric([5, 3, 2], 0) - @test logpdf(d0, [0, 0, 0]) ≈ 0 - @test logpdf(d0, [1, 0, 0]) == -Inf - - @test rand(rng, d0) == [0, 0, 0] - @test mean(d0) == [0.0, 0.0, 0.0] - @test var(d0) == [0.0, 0.0, 0.0] - @test insupport(d0, [0, 0, 0]) - @test !insupport(d0, [1, 0, 0]) - @test length(d0) == 3 - - # compare with hypergeometric - ns = 3 - nf = 5 - n = 4 - dh1 = MvHypergeometric([ns, nf], n) - dh2 = Hypergeometric(ns, nf, n) - - x = 2 - @test pdf(dh1, [x, n-x]) ≈ pdf(dh2, x) - x = 3 - @test pdf(dh1, [x, n-x]) ≈ pdf(dh2, x) - - # comparing marginals to hypergeometric - m = [5, 3, 2] - n = 4 - d = MvHypergeometric(m, n) - dh = Hypergeometric(m[1], sum(m[2:end]), n) - x1 = 2 - @test pdf(dh, x1) ≈ sum([pdf(d, [x1, x2, n-x1-x2]) for x2 in 0:m[2]]) - - # comparing conditionals to hypergeometric - x1 = 2 - dh = Hypergeometric(m[2], m[3], n-x1) - q = sum([pdf(d, [x1, x2, n-x1-x2]) for x2 in 0:m[2]]) - for x2 = 0:m[2] - @test pdf(dh, x2) ≈ pdf(d, [x1, x2, n-x1-x2])/q - end -end \ No newline at end of file diff --git a/test/multivariate/mvhypergeometric.jl b/test/multivariate/mvhypergeometric.jl index e69de29bb2..065ffa6121 100644 --- a/test/multivariate/mvhypergeometric.jl +++ b/test/multivariate/mvhypergeometric.jl @@ -0,0 +1,113 @@ +# Tests for Multivariate Hypergeometric + +using Distributions, Random +using Test + +@testset "Multivariate Hypergeometric" begin + + + + + @test_throws DomainError MvHypergeometric([5, 3, -2], 4) + @test_throws ArgumentError MvHypergeometric([5, 3], 10) + + m = [5, 3, 2] + n = 4 + d = MvHypergeometric(m, n) + @test length(d) == 3 + @test d.n == n + @test nelements(d) == m + @test ncategories(d) == length(m) + + @test mean(d) ≈ [2.0, 1.2, 0.8] + + @test var(d) ≈ [2/3, 56/100, 32/75] + + covmat = cov(d) + @test covmat ≈ (8/3) .* [1/4 -3/20 -1/10; -3/20 21/100 -3/50; -1/10 -3/50 4/25] + + @test insupport(d, [2, 1, 1]) + @test !insupport(d, [3, 2, 1]) + @test !insupport(d, [0, 0, 4]) + + # random sampling + rng = MersenneTwister(123) + x = rand(rng, d) + @test isa(x, Vector{Int}) + @test sum(x) == n + @test insupport(d, x) + + x = rand(rng, d, 100) + @test isa(x, Matrix{Int}) + @test all(sum(x, dims=1) .== n) + @test all(x .>= 0) + @test all(x .<= m) + @test all(insupport(d, x)) + + # log pdf + x = [2, 1, 1] + @test pdf(d, x) ≈ 2/7 + @test logpdf(d, x) ≈ log(2/7) + @test logpdf(d, x) ≈ log(pdf(d, x)) + + x = rand(rng, d, 100) + pv = pdf(d, x) + lp = logpdf(d, x) + for i in 1 : size(x, 2) + @test pv[i] ≈ pdf(d, x[:,i]) + @test lp[i] ≈ logpdf(d, x[:,i]) + end + + # test degenerate cases + d1 = MvHypergeometric([1], 1) + @test logpdf(d1, [1]) ≈ 0 + @test logpdf(d1, [0]) == -Inf + d2 = MvHypergeometric([2, 0], 1) + @test logpdf(d2, [1, 0]) ≈ 0 + @test logpdf(d2, [0, 1]) == -Inf + + d3 = MvHypergeometric([5, 0, 0, 0], 3) + @test logpdf(d3, [3, 0, 0, 0]) ≈ 0 + @test logpdf(d3, [2, 1, 0, 0]) == -Inf + @test logpdf(d3, [2, 0, 0, 0]) == -Inf + + # behavior with n = 0 + d0 = MvHypergeometric([5, 3, 2], 0) + @test logpdf(d0, [0, 0, 0]) ≈ 0 + @test logpdf(d0, [1, 0, 0]) == -Inf + + @test rand(rng, d0) == [0, 0, 0] + @test mean(d0) == [0.0, 0.0, 0.0] + @test var(d0) == [0.0, 0.0, 0.0] + @test insupport(d0, [0, 0, 0]) + @test !insupport(d0, [1, 0, 0]) + @test length(d0) == 3 + + # compare with hypergeometric + ns = 3 + nf = 5 + n = 4 + dh1 = MvHypergeometric([ns, nf], n) + dh2 = Hypergeometric(ns, nf, n) + + x = 2 + @test pdf(dh1, [x, n-x]) ≈ pdf(dh2, x) + x = 3 + @test pdf(dh1, [x, n-x]) ≈ pdf(dh2, x) + + # comparing marginals to hypergeometric + m = [5, 3, 2] + n = 4 + d = MvHypergeometric(m, n) + dh = Hypergeometric(m[1], sum(m[2:end]), n) + x1 = 2 + @test pdf(dh, x1) ≈ sum([pdf(d, [x1, x2, n-x1-x2]) for x2 in 0:m[2]]) + + # comparing conditionals to hypergeometric + x1 = 2 + dh = Hypergeometric(m[2], m[3], n-x1) + q = sum([pdf(d, [x1, x2, n-x1-x2]) for x2 in 0:m[2]]) + for x2 = 0:m[2] + @test pdf(dh, x2) ≈ pdf(d, [x1, x2, n-x1-x2])/q + end +end \ No newline at end of file From 4bdaca438883ba27f651c5d8f2163c581ba56da7 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 11:13:09 -0700 Subject: [PATCH 09/12] append mvhypergeometric tests to runtest.jl --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index f63a3d5e8e..e08eda167f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -100,6 +100,7 @@ const tests = [ "univariate/continuous/triangular", "statsapi", "univariate/continuous/inversegaussian", + "multivariate/mvhypergeometric", ### missing files compared to /src: # "common", From d17c249292af5cd42749e0f8b743d04c35c3073a Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 14:45:08 -0700 Subject: [PATCH 10/12] update tests --- test/multivariate/mvhypergeometric.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/multivariate/mvhypergeometric.jl b/test/multivariate/mvhypergeometric.jl index 065ffa6121..c74fb4c84b 100644 --- a/test/multivariate/mvhypergeometric.jl +++ b/test/multivariate/mvhypergeometric.jl @@ -4,10 +4,6 @@ using Distributions, Random using Test @testset "Multivariate Hypergeometric" begin - - - - @test_throws DomainError MvHypergeometric([5, 3, -2], 4) @test_throws ArgumentError MvHypergeometric([5, 3], 10) From 9e37f959ce47d5fd559ecc729a38e6499140a965 Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 14:51:25 -0700 Subject: [PATCH 11/12] update project.toml --- Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index ad6829e0ad..e46ac361d4 100644 --- a/Project.toml +++ b/Project.toml @@ -13,7 +13,6 @@ PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" @@ -49,7 +48,6 @@ PDMats = "0.10, 0.11" Printf = "<0.0.1, 1" QuadGK = "2" Random = "<0.0.1, 1" -Revise = "3.7.3" SparseArrays = "<0.0.1, 1" SpecialFunctions = "1.2, 2" StableRNGs = "1" From 91d9da56d6a37a2f700c35d0323a1410ae4512fd Mon Sep 17 00:00:00 2001 From: Michael-Howes Date: Wed, 9 Apr 2025 15:29:38 -0700 Subject: [PATCH 12/12] update tests to improve coverage --- test/multivariate/mvhypergeometric.jl | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/test/multivariate/mvhypergeometric.jl b/test/multivariate/mvhypergeometric.jl index c74fb4c84b..be49fcbfd4 100644 --- a/test/multivariate/mvhypergeometric.jl +++ b/test/multivariate/mvhypergeometric.jl @@ -1,6 +1,6 @@ # Tests for Multivariate Hypergeometric -using Distributions, Random +using Distributions using Test @testset "Multivariate Hypergeometric" begin @@ -14,9 +14,9 @@ using Test @test d.n == n @test nelements(d) == m @test ncategories(d) == length(m) + @test params(d) == (m, n) @test mean(d) ≈ [2.0, 1.2, 0.8] - @test var(d) ≈ [2/3, 56/100, 32/75] covmat = cov(d) @@ -25,28 +25,41 @@ using Test @test insupport(d, [2, 1, 1]) @test !insupport(d, [3, 2, 1]) @test !insupport(d, [0, 0, 4]) + # random sampling - rng = MersenneTwister(123) - x = rand(rng, d) + x = rand(d) @test isa(x, Vector{Int}) @test sum(x) == n + @test all(x .>= 0) + @test all(x .<= m) @test insupport(d, x) - x = rand(rng, d, 100) + x = rand(d, 100) @test isa(x, Matrix{Int}) @test all(sum(x, dims=1) .== n) @test all(x .>= 0) @test all(x .<= m) @test all(insupport(d, x)) + # random sampling with many catergories + m = [20, 2, 2, 2, 1, 1, 1] + n = 5 + d2 = MvHypergeometric(m, n) + x = rand(d2) + @test isa(x, Vector{Int}) + @test sum(x) == n + @test all(x .>= 0) + @test all(x .<= m) + @test insupport(d2, x) + # log pdf x = [2, 1, 1] @test pdf(d, x) ≈ 2/7 @test logpdf(d, x) ≈ log(2/7) @test logpdf(d, x) ≈ log(pdf(d, x)) - x = rand(rng, d, 100) + x = rand(d, 100) pv = pdf(d, x) lp = logpdf(d, x) for i in 1 : size(x, 2) @@ -72,7 +85,7 @@ using Test @test logpdf(d0, [0, 0, 0]) ≈ 0 @test logpdf(d0, [1, 0, 0]) == -Inf - @test rand(rng, d0) == [0, 0, 0] + @test rand(d0) == [0, 0, 0] @test mean(d0) == [0.0, 0.0, 0.0] @test var(d0) == [0.0, 0.0, 0.0] @test insupport(d0, [0, 0, 0])