Skip to content

Commit 5191554

Browse files
committed
update posterior distribution and predictive distributio
1 parent b8d3942 commit 5191554

File tree

1 file changed

+18
-19
lines changed

1 file changed

+18
-19
lines changed

bayesml/hiddenmarkovnormal/hiddenmarkovnormal.md

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ $$
4444
p(\boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi},\boldsymbol{A}) &= \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_0,(\kappa_0 \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_0, \nu_0) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\eta}_0) \prod_{j=1}^{K}\mathrm{Dir}(\boldsymbol{a}_{j}|\boldsymbol{\zeta}_{0,j}), \\
4545
&= \Biggl[ \prod_{k=1}^K \left( \frac{\kappa_0}{2\pi} \right)^{D/2} |\boldsymbol{\Lambda}_k|^{1/2} \exp \left\{ -\frac{\kappa_0}{2}(\boldsymbol{\mu}_k -\boldsymbol{m}_0)^\top \boldsymbol{\Lambda}_k (\boldsymbol{\mu}_k - \boldsymbol{m}_0) \right\} \\
4646
&\qquad \times B(\boldsymbol{W}_0, \nu_0) | \boldsymbol{\Lambda}_k |^{(\nu_0 - D - 1) / 2} \exp \left\{ -\frac{1}{2} \mathrm{Tr} \{ \boldsymbol{W}_0^{-1} \boldsymbol{\Lambda}_k \} \right\}\biggl] \\
47-
&\qquad \times \Biggl[ \prod_{k=1}^KC(\boldsymbol{\eta}_0)\pi_k^{\eta_{0,k}-1}\biggl]\times \biggl[\prod_{j=1}^KC(\boldsymbol{\zeta}_{0,j})\prod_{k=1}^K a_{jk}^{\zeta_{0,j,k}-1}\Biggr],\\
47+
&\qquad \times \Biggl[ \prod_{k=1}^KC(\boldsymbol{\eta}_0)\pi_k^{\eta_{0,k}-1}\biggl]\\
48+
&\qquad \times \biggl[\prod_{j=1}^KC(\boldsymbol{\zeta}_{0,j})\prod_{k=1}^K a_{jk}^{\zeta_{0,j,k}-1}\Biggr],\\
4849
\end{align}
4950
$$
5051

@@ -57,42 +58,40 @@ $$
5758
C(\boldsymbol{\zeta}_{0,j}) &= \frac{\Gamma(\sum_{k=1}^K \zeta_{0,j,k})}{\Gamma(\zeta_{0,j,1})\cdots\Gamma(\zeta_{0,j,K})}.
5859
\end{align}
5960
$$
60-
<!--
61+
6162
The apporoximate posterior distribution in the $t$-th iteration of a variational Bayesian method is as follows:
6263

6364
* $\boldsymbol{x}^n = (\boldsymbol{x}_1, \boldsymbol{x}_2, \dots , \boldsymbol{x}_n) \in \mathbb{R}^{D \times n}$: given data
6465
* $\boldsymbol{z}^n = (\boldsymbol{z}_1, \boldsymbol{z}_2, \dots , \boldsymbol{z}_n) \in \{ 0, 1 \}^{K \times n}$: latent classes of given data
65-
* $\boldsymbol{r}_i^{(t)} = (r_{i,1}^{(t)}, r_{i,2}^{(t)}, \dots , r_{i,K}^{(t)}) \in [0, 1]^K$: a parameter for the $i$-th latent class. ($\sum_{k=1}^K r_{i, k}^{(t)} = 1$)
6666
* $\boldsymbol{m}_{n,k}^{(t)} \in \mathbb{R}^{D}$: a hyperparameter
6767
* $\kappa_{n,k}^{(t)} \in \mathbb{R}_{>0}$: a hyperparameter
6868
* $\nu_{n,k}^{(t)} \in \mathbb{R}$: a hyperparameter $(\nu_n > D-1)$
6969
* $\boldsymbol{W}_{n,k}^{(t)} \in \mathbb{R}^{D\times D}$: a hyperparameter (a positive definite matrix)
7070
* $\boldsymbol{\eta}_n^{(t)} \in \mathbb{R}_{> 0}^K$: a hyperparameter
71-
71+
* $\boldsymbol{\zeta}_{n,j}^{(t)} \in \mathbb{R}_{> 0}^K$: a hyperparameter
7272
$$
7373
\begin{align}
74-
q(\boldsymbol{z}^n, \boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi}) &= \left\{ \prod_{i=1}^n \mathrm{Cat} (\boldsymbol{z}_i | \boldsymbol{r}_i^{(t)}) \right\} \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_{n,k}^{(t)},(\kappa_{n,k}^{(t)} \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_{n,k}^{(t)}, \nu_{n,k}^{(t)}) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\eta}_n^{(t)}) \\
75-
&= \Biggl[ \prod_{i=1}^n \prod_{k=1}^K (r_{i,k}^{(t)})^{z_{i,k}} \Biggr] \Biggl[ \prod_{k=1}^K \left( \frac{\kappa_{n,k}^{(t)}}{2\pi} \right)^{D/2} |\boldsymbol{\Lambda}_k|^{1/2} \exp \left\{ -\frac{\kappa_{n,k}^{(t)}}{2}(\boldsymbol{\mu}_k -\boldsymbol{m}_{n,k}^{(t)})^\top \boldsymbol{\Lambda}_k (\boldsymbol{\mu}_k - \boldsymbol{m}_{n,k}^{(t)}) \right\} \\
74+
q(\boldsymbol{z}^{n+1}, \boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi}) &= q(\boldsymbol{z}^{n+1}) \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_{n,k}^{(t)},(\kappa_{n,k}^{(t)} \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_{n,k}^{(t)}, \nu_{n,k}^{(t)}) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\eta}_n^{(t)})\left\{\prod_{j=1}^K\mathrm{Dir}(\boldsymbol{a}_j|\boldsymbol{\zeta}_{n,j}^{(t)})\right\}, \\
75+
&= q(\boldsymbol{z}^{n+1}) \Biggl[ \prod_{k=1}^K \left( \frac{\kappa_{n,k}^{(t)}}{2\pi} \right)^{D/2} |\boldsymbol{\Lambda}_k|^{1/2} \exp \left\{ -\frac{\kappa_{n,k}^{(t)}}{2}(\boldsymbol{\mu}_k -\boldsymbol{m}_{n,k}^{(t)})^\top \boldsymbol{\Lambda}_k (\boldsymbol{\mu}_k - \boldsymbol{m}_{n,k}^{(t)}) \right\} \\
7676
&\qquad \times B(\boldsymbol{W}_{n,k}^{(t)}, \nu_{n,k}^{(t)}) | \boldsymbol{\Lambda}_k |^{(\nu_{n,k}^{(t)} - D - 1) / 2} \exp \left\{ -\frac{1}{2} \mathrm{Tr} \{ ( \boldsymbol{W}_{n,k}^{(t)} )^{-1} \boldsymbol{\Lambda}_k \} \right\} \Biggr] \\
77-
&\qquad \times C(\boldsymbol{\eta}_n^{(t)})\prod_{k=1}^K \pi_k^{\eta_{n,k}^{(t)}-1},\\
77+
&\qquad \times C(\boldsymbol{\eta}_n^{(t)})\prod_{k=1}^K \pi_k^{\eta_{n,k}^{(t)}-1}\left[\prod_{j=1}^K C(\boldsymbol{\zeta}_{n,j}^{(t)})\prod_{k=1}^K a_{j,k}^{\zeta_{n,j,k}^{(t)}-1}\right],\\
7878
\end{align}
7979
$$
8080

8181
where the updating rule of the hyperparameters is as follows.
8282

8383
$$
8484
\begin{align}
85-
N_k^{(t)} &= \sum_{i=1}^n r_{i,k}^{(t)} \\
86-
\bar{\boldsymbol{x}}_k^{(t)} &= \frac{1}{N_k^{(t)}} \sum_{i=1}^n r_{i,k}^{(t)} \boldsymbol{x}_i \\
85+
N_k^{(t)} &= \sum_{i=1}^n q(z_{i,k})^{(t)}, \\
86+
M_{j,k}^{(t)} &= \sum_{i=2}^{n+1}q(z_{i-1,j}z_{i,k})^{(t)},\\
87+
\bar{\boldsymbol{x}}_k^{(t)} &= \frac{1}{N_k^{(t)}} \sum_{i=1}^n q(z_{i,k})^{(t)} \boldsymbol{x}_i, \\
88+
S_k^{(t)} &= \frac{1}{N_k}\sum_{i=1}^nq(z_{i,k})^{(t)}(x_i-\bar{\boldsymbol{x}}_k^{(t)})(x_i-\bar{\boldsymbol{x}}_k^{(t)})^{\top},\\
8789
\boldsymbol{m}_{n,k}^{(t+1)} &= \frac{\kappa_0\boldsymbol{\mu}_0 + N_k^{(t)} \bar{\boldsymbol{x}}_k^{(t)}}{\kappa_0 + N_k^{(t)}}, \\
8890
\kappa_{n,k}^{(t+1)} &= \kappa_0 + N_k^{(t)}, \\
89-
(\boldsymbol{W}_{n,k}^{(t+1)})^{-1} &= \boldsymbol{W}_0^{-1} + \sum_{i=1}^{n} r_{i,k}^{(t)} (\boldsymbol{x}_i-\bar{\boldsymbol{x}}_k^{(t)})(\boldsymbol{x}_i-\bar{\boldsymbol{x}}_k^{(t)})^\top + \frac{\kappa_0 N_k^{(t)}}{\kappa_0 + N_k^{(t)}}(\bar{\boldsymbol{x}}_k^{(t)}-\boldsymbol{\mu}_0)(\bar{\boldsymbol{x}}_k^{(t)}-\boldsymbol{\mu}_0)^\top, \\
91+
(\boldsymbol{W}_{n,k}^{(t+1)})^{-1} &= \boldsymbol{W}_0^{-1} + N_k^{(t)}S_k^{(t)} + \frac{\kappa_0 N_k^{(t)}}{\kappa_0 + N_k^{(t)}}(\bar{\boldsymbol{x}}_k^{(t)}-\boldsymbol{\mu}_0)(\bar{\boldsymbol{x}}_k^{(t)}-\boldsymbol{\mu}_0)^\top, \\
9092
\nu_{n,k}^{(t+1)} &= \nu_0 + N_k^{(t)},\\
91-
\eta_{n,k}^{(t+1)} &= \eta_{0,k} + N_k^{(t)} \\
92-
\ln \rho_{i,k}^{(t+1)} &= \psi (\eta_{n,k}^{(t+1)}) - \psi ( {\textstyle \sum_{k=1}^K \eta_{n,k}^{(t+1)}} ) \nonumber \\
93-
&\qquad + \frac{1}{2} \Biggl[ \sum_{d=1}^D \psi \left( \frac{\nu_{n,k}^{(t+1)} + 1 - d}{2} \right) + D \ln 2 + \ln | \boldsymbol{W}_{n,k}^{(t+1)} | \nonumber \\
94-
&\qquad - D \ln (2 \pi ) - \frac{D}{\kappa_{n,k}^{(t+1)}} - \nu_{n,k}^{(t+1)} (\boldsymbol{x}_i - \boldsymbol{m}_{n,k}^{(t+1)})^\top \boldsymbol{W}_{n,k}^{(t+1)} (\boldsymbol{x}_i - \boldsymbol{m}_{n,k}^{(t+1)}) \Biggr] \\
95-
r_{i,k}^{(t+1)} &= \frac{\rho_{i,k}^{(t+1)}}{\sum_{k=1}^K \rho_{i,k}^{(t+1)}}
93+
\eta_{n,k}^{(t+1)} &= \eta_{0,k} + q(z_{0,k})^{(t)}, \\
94+
\zeta_{n,j,k}^{(t+1)} &= \zeta_{0,j,k}+M_{j,k}^{(t)}.
9695
\end{align}
9796
$$
9897

@@ -106,19 +105,19 @@ The approximate predictive distribution is as follows:
106105
$$
107106
\begin{align}
108107
&p(x_{n+1}|x^n) \\
109-
&= \frac{1}{\sum_{k=1}^K \eta_{n,k}^{(t)}} \sum_{k=1}^K \eta_{n,k}^{(t)} \mathrm{St}(x_{n+1}|\boldsymbol{\mu}_{\mathrm{p},k},\boldsymbol{\Lambda}_{\mathrm{p},k}, \nu_{\mathrm{p},k}) \\
110-
&= \frac{1}{\sum_{k=1}^K \eta_{n,k}^{(t)}} \sum_{k=1}^K \eta_{n,k}^{(t)} \Biggl[ \frac{\Gamma (\nu_{\mathrm{p},k} / 2 + D / 2)}{\Gamma (\nu_{\mathrm{p},k} / 2)} \frac{|\boldsymbol{\Lambda}_{\mathrm{p},k}|^{1/2}}{(\nu_{\mathrm{p},k} \pi)^{D/2}} \nonumber \\
108+
&\simeq \frac{1}{\sum_{k=1}^K \alpha(z_{n+1,k})^{(t)}} \sum_{k=1}^K \alpha(z_{n+1,k})^{(t)} \mathrm{St}(x_{n+1}|\boldsymbol{\mu}_{\mathrm{p},k},\boldsymbol{\Lambda}_{\mathrm{p},k}, \nu_{\mathrm{p},k}) \\
109+
&= \frac{1}{\sum_{k=1}^K \alpha(z_{n+1,k})^{(t)}} \sum_{k=1}^K \alpha(z_{n+1,k})^{(t)}\Biggl[ \frac{\Gamma (\nu_{\mathrm{p},k} / 2 + D / 2)}{\Gamma (\nu_{\mathrm{p},k} / 2)} \frac{|\boldsymbol{\Lambda}_{\mathrm{p},k}|^{1/2}}{(\nu_{\mathrm{p},k} \pi)^{D/2}} \nonumber \\
111110
&\qquad \qquad \qquad \qquad \qquad \times \left( 1 + \frac{1}{\nu_{\mathrm{p},k}} (\boldsymbol{x}_{n+1} - \boldsymbol{\mu}_{\mathrm{p},k})^\top \boldsymbol{\Lambda}_{\mathrm{p},k} (\boldsymbol{x}_{n+1} - \boldsymbol{\mu}_{\mathrm{p},k}) \right)^{-\nu_{\mathrm{p},k}/2 - D/2} \Biggr],
112111
\end{align}
113112
$$
114113

115-
where the parameters are obtained from the hyperparameters of the posterior distribution as follows:
114+
where the parameters are obtained from the hyperparameters of the predictive distribution as follows:
116115

117116
$$
118117
\begin{align}
118+
\alpha(\boldsymbol{z}_{n+1})^{(t)}&=\sum_{\boldsymbol{z}^{n}}q(\boldsymbol{z}^{n+1})^{(t)}\\
119119
\boldsymbol{\mu}_{\mathrm{p},k} &= \boldsymbol{m}_{n,k}^{(t)} \\
120120
\boldsymbol{\Lambda}_{\mathrm{p},k} &= \frac{\kappa_{n,k}^{(t)} (\nu_{n,k}^{(t)} - D + 1)}{\kappa_{n,k}^{(t)} + 1} \boldsymbol{W}_{n,k}^{(t)}, \\
121121
\nu_{\mathrm{p},k} &= \nu_{n,k}^{(t)} - D + 1.
122122
\end{align}
123123
$$
124-
-->

0 commit comments

Comments
 (0)