Skip to content

Commit 19868d1

Browse files
committed
Update hiddenmarkovnormal.md
1 parent f523912 commit 19868d1

File tree

1 file changed

+19
-15
lines changed

1 file changed

+19
-15
lines changed

bayesml/hiddenmarkovnormal/hiddenmarkovnormal.md

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ The Hidden Markov model with the Gauss-Wishart prior distribution and the Dirich
77
The stochastic data generative model is as follows:
88

99
* $K \in \mathbb{N}$: number of latent classes
10-
* $\boldsymbol{z}_{n} \in \{ 0, 1 \}^K$: a one-hot vector representing the $n$-th latent class (latent variable)
10+
* $\boldsymbol{z} \in \{ 0, 1 \}^K$: a one-hot vector representing the latent class (latent variable)
1111
* $\boldsymbol{\pi} \in [0, 1]^K$: a parameter for latent classes, ($\sum_{k=1}^K \pi_k=1$)
12+
* $\boldsymbol{A}=(a_{k'k})_{0\leq k',k\leq K} \in [0, 1]^{K\times K}$: a parameter for latent classes, ($\sum_{k=1}^K a_{k'k}=1$)
1213
* $D \in \mathbb{N}$: a dimension of data
13-
* $\boldsymbol{x}_{n} \in \mathbb{R}^D$: the $n$-th data point
14+
* $\boldsymbol{x} \in \mathbb{R}^D$: a data point
1415
* $\boldsymbol{\mu}_k \in \mathbb{R}^D$: a parameter
1516
* $\boldsymbol{\mu} = \{ \boldsymbol{\mu}_k \}_{k=1}^K$
1617
* $\boldsymbol{\Lambda}_k \in \mathbb{R}^{D\times D}$ : a parameter (a positive definite matrix)
@@ -20,7 +21,7 @@ The stochastic data generative model is as follows:
2021
$$
2122
\begin{align}
2223
p(\boldsymbol{z}_{1} | \boldsymbol{\pi}) &= \mathrm{Cat}(\boldsymbol{z}_{1}|\boldsymbol{\pi}) = \prod_{k=1}^K \pi_k^{z_{1,k}},\\
23-
p(\boldsymbol{z}_{n} |\boldsymbol{z}_{n-1} ,\boldsymbol{A}) &= \prod_{k=1}^K \prod_{k'=1}^K a_{k'k}^{z_{n-1,k}z_{n,k}},\\
24+
p(\boldsymbol{z}_{n} |\boldsymbol{z}_{n-1} ,\boldsymbol{A}) &= \prod_{k=1}^K \prod_{k'=1}^K a_{k'k}^{z_{n-1,k'}z_{n,k}},\\
2425
p(\boldsymbol{x}_{n} | \boldsymbol{\mu}, \boldsymbol{\Lambda}, \boldsymbol{z}_{n}) &= \prod_{k=1}^K \mathcal{N}(\boldsymbol{x}|\boldsymbol{\mu}_k,\boldsymbol{\Lambda}_k^{-1})^{z_{n,k}} \\
2526
&= \prod_{k=1}^K \left( \frac{| \boldsymbol{\Lambda}_{k} |^{1/2}}{(2\pi)^{D/2}} \exp \left\{ -\frac{1}{2}(\boldsymbol{x}-\boldsymbol{\mu}_{k})^\top \boldsymbol{\Lambda}_{k} (\boldsymbol{x}-\boldsymbol{\mu}_{k}) \right\} \right)^{z_{n,k}},
2627
\end{align}
@@ -32,25 +33,28 @@ The prior distribution is as follows:
3233
* $\kappa_0 \in \mathbb{R}_{>0}$: a hyperparameter
3334
* $\nu_0 \in \mathbb{R}$: a hyperparameter ($\nu_0 > D-1$)
3435
* $\boldsymbol{W}_0 \in \mathbb{R}^{D\times D}$: a hyperparameter (a positive definite matrix)
35-
* $\boldsymbol{\alpha}_0 \in \mathbb{R}_{> 0}^K$: a hyperparameter
36+
* $\boldsymbol{\eta}_0 \in \mathbb{R}_{> 0}^K$: a hyperparameter
37+
* $\boldsymbol{\zeta}_{0,k'} \in \mathbb{R}_{> 0}^K$: a hyperparameter
3638
* $\mathrm{Tr} \{ \cdot \}$: a trace of a matrix
3739
* $\Gamma (\cdot)$: the gamma function
3840

3941
$$
4042
\begin{align}
41-
p(\boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi}) &= \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_0,(\kappa_0 \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_0, \nu_0) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\alpha}_0) \\
43+
p(\boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi}) &= \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_0,(\kappa_0 \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_0, \nu_0) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\eta}_0) \prod_{k'=1}^{K}\mathrm{Dir}(\boldsymbol{a}_{k'}|\boldsymbol{\zeta}_{0,k'}), \\
4244
&= \Biggl[ \prod_{k=1}^K \left( \frac{\kappa_0}{2\pi} \right)^{D/2} |\boldsymbol{\Lambda}_k|^{1/2} \exp \left\{ -\frac{\kappa_0}{2}(\boldsymbol{\mu}_k -\boldsymbol{m}_0)^\top \boldsymbol{\Lambda}_k (\boldsymbol{\mu}_k - \boldsymbol{m}_0) \right\} \\
4345
&\qquad \times B(\boldsymbol{W}_0, \nu_0) | \boldsymbol{\Lambda}_k |^{(\nu_0 - D - 1) / 2} \exp \left\{ -\frac{1}{2} \mathrm{Tr} \{ \boldsymbol{W}_0^{-1} \boldsymbol{\Lambda}_k \} \right\} \Biggr] \\
44-
&\qquad \times C(\boldsymbol{\alpha}_0)\prod_{k=1}^K \pi_k^{\alpha_{0,k}-1},\\
46+
&\qquad \times C(\boldsymbol{\eta}_0)\prod_{k=1}^K \pi_k^{\eta_{0,k}-1}\\
47+
&\qquad \times \prod_{k'=1}^KC(\boldsymbol{\zeta}_{0,k'})\prod_{k=1}^K a_{k'k}^{\zeta_{0,k',k}-1},\\
4548
\end{align}
4649
$$
4750

48-
where $B(\boldsymbol{W}_0, \nu_0)$ and $C(\boldsymbol{\alpha}_0)$ are defined as follows:
51+
where $B(\boldsymbol{W}_0, \nu_0)$ and $C(\boldsymbol{\eta}_0)$ are defined as follows:
4952

5053
$$
5154
\begin{align}
5255
B(\boldsymbol{W}_0, \nu_0) &= | \boldsymbol{W}_0 |^{-\nu_0 / 2} \left( 2^{\nu_0 D / 2} \pi^{D(D-1)/4} \prod_{i=1}^D \Gamma \left( \frac{\nu_0 + 1 - i}{2} \right) \right)^{-1}, \\
53-
C(\boldsymbol{\alpha}_0) &= \frac{\Gamma(\sum_{k=1}^K \alpha_{0,k})}{\Gamma(\alpha_{0,1})\cdots\Gamma(\alpha_{0,K})}.
56+
C(\boldsymbol{\eta}_0) &= \frac{\Gamma(\sum_{k=1}^K \eta_{0,k})}{\Gamma(\eta_{0,1})\cdots\Gamma(\eta_{0,K})},\\
57+
C(\boldsymbol{\zeta}_{0,k'}) &= \frac{\Gamma(\sum_{k=1}^K \zeta_{0,k',k})}{\Gamma(\zeta_{0,k',1})\cdots\Gamma(\zeta_{0,k',K})}.
5458
\end{align}
5559
$$
5660

@@ -63,14 +67,14 @@ The apporoximate posterior distribution in the $t$-th iteration of a variational
6367
* $\kappa_{n,k}^{(t)} \in \mathbb{R}_{>0}$: a hyperparameter
6468
* $\nu_{n,k}^{(t)} \in \mathbb{R}$: a hyperparameter $(\nu_n > D-1)$
6569
* $\boldsymbol{W}_{n,k}^{(t)} \in \mathbb{R}^{D\times D}$: a hyperparameter (a positive definite matrix)
66-
* $\boldsymbol{\alpha}_n^{(t)} \in \mathbb{R}_{> 0}^K$: a hyperparameter
70+
* $\boldsymbol{\eta}_n^{(t)} \in \mathbb{R}_{> 0}^K$: a hyperparameter
6771

6872
$$
6973
\begin{align}
70-
q(\boldsymbol{z}^n, \boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi}) &= \left\{ \prod_{i=1}^n \mathrm{Cat} (\boldsymbol{z}_i | \boldsymbol{r}_i^{(t)}) \right\} \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_{n,k}^{(t)},(\kappa_{n,k}^{(t)} \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_{n,k}^{(t)}, \nu_{n,k}^{(t)}) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\alpha}_n^{(t)}) \\
74+
q(\boldsymbol{z}^n, \boldsymbol{\mu},\boldsymbol{\Lambda},\boldsymbol{\pi}) &= \left\{ \prod_{i=1}^n \mathrm{Cat} (\boldsymbol{z}_i | \boldsymbol{r}_i^{(t)}) \right\} \left\{ \prod_{k=1}^K \mathcal{N}(\boldsymbol{\mu}_k|\boldsymbol{m}_{n,k}^{(t)},(\kappa_{n,k}^{(t)} \boldsymbol{\Lambda}_k)^{-1})\mathcal{W}(\boldsymbol{\Lambda}_k|\boldsymbol{W}_{n,k}^{(t)}, \nu_{n,k}^{(t)}) \right\} \mathrm{Dir}(\boldsymbol{\pi}|\boldsymbol{\eta}_n^{(t)}) \\
7175
&= \Biggl[ \prod_{i=1}^n \prod_{k=1}^K (r_{i,k}^{(t)})^{z_{i,k}} \Biggr] \Biggl[ \prod_{k=1}^K \left( \frac{\kappa_{n,k}^{(t)}}{2\pi} \right)^{D/2} |\boldsymbol{\Lambda}_k|^{1/2} \exp \left\{ -\frac{\kappa_{n,k}^{(t)}}{2}(\boldsymbol{\mu}_k -\boldsymbol{m}_{n,k}^{(t)})^\top \boldsymbol{\Lambda}_k (\boldsymbol{\mu}_k - \boldsymbol{m}_{n,k}^{(t)}) \right\} \\
7276
&\qquad \times B(\boldsymbol{W}_{n,k}^{(t)}, \nu_{n,k}^{(t)}) | \boldsymbol{\Lambda}_k |^{(\nu_{n,k}^{(t)} - D - 1) / 2} \exp \left\{ -\frac{1}{2} \mathrm{Tr} \{ ( \boldsymbol{W}_{n,k}^{(t)} )^{-1} \boldsymbol{\Lambda}_k \} \right\} \Biggr] \\
73-
&\qquad \times C(\boldsymbol{\alpha}_n^{(t)})\prod_{k=1}^K \pi_k^{\alpha_{n,k}^{(t)}-1},\\
77+
&\qquad \times C(\boldsymbol{\eta}_n^{(t)})\prod_{k=1}^K \pi_k^{\eta_{n,k}^{(t)}-1},\\
7478
\end{align}
7579
$$
7680

@@ -84,8 +88,8 @@ $$
8488
\kappa_{n,k}^{(t+1)} &= \kappa_0 + N_k^{(t)}, \\
8589
(\boldsymbol{W}_{n,k}^{(t+1)})^{-1} &= \boldsymbol{W}_0^{-1} + \sum_{i=1}^{n} r_{i,k}^{(t)} (\boldsymbol{x}_i-\bar{\boldsymbol{x}}_k^{(t)})(\boldsymbol{x}_i-\bar{\boldsymbol{x}}_k^{(t)})^\top + \frac{\kappa_0 N_k^{(t)}}{\kappa_0 + N_k^{(t)}}(\bar{\boldsymbol{x}}_k^{(t)}-\boldsymbol{\mu}_0)(\bar{\boldsymbol{x}}_k^{(t)}-\boldsymbol{\mu}_0)^\top, \\
8690
\nu_{n,k}^{(t+1)} &= \nu_0 + N_k^{(t)},\\
87-
\alpha_{n,k}^{(t+1)} &= \alpha_{0,k} + N_k^{(t)} \\
88-
\ln \rho_{i,k}^{(t+1)} &= \psi (\alpha_{n,k}^{(t+1)}) - \psi ( {\textstyle \sum_{k=1}^K \alpha_{n,k}^{(t+1)}} ) \nonumber \\
91+
\eta_{n,k}^{(t+1)} &= \eta_{0,k} + N_k^{(t)} \\
92+
\ln \rho_{i,k}^{(t+1)} &= \psi (\eta_{n,k}^{(t+1)}) - \psi ( {\textstyle \sum_{k=1}^K \eta_{n,k}^{(t+1)}} ) \nonumber \\
8993
&\qquad + \frac{1}{2} \Biggl[ \sum_{d=1}^D \psi \left( \frac{\nu_{n,k}^{(t+1)} + 1 - d}{2} \right) + D \ln 2 + \ln | \boldsymbol{W}_{n,k}^{(t+1)} | \nonumber \\
9094
&\qquad - D \ln (2 \pi ) - \frac{D}{\kappa_{n,k}^{(t+1)}} - \nu_{n,k}^{(t+1)} (\boldsymbol{x}_i - \boldsymbol{m}_{n,k}^{(t+1)})^\top \boldsymbol{W}_{n,k}^{(t+1)} (\boldsymbol{x}_i - \boldsymbol{m}_{n,k}^{(t+1)}) \Biggr] \\
9195
r_{i,k}^{(t+1)} &= \frac{\rho_{i,k}^{(t+1)}}{\sum_{k=1}^K \rho_{i,k}^{(t+1)}}
@@ -102,8 +106,8 @@ The approximate predictive distribution is as follows:
102106
$$
103107
\begin{align}
104108
&p(x_{n+1}|x^n) \\
105-
&= \frac{1}{\sum_{k=1}^K \alpha_{n,k}^{(t)}} \sum_{k=1}^K \alpha_{n,k}^{(t)} \mathrm{St}(x_{n+1}|\boldsymbol{\mu}_{\mathrm{p},k},\boldsymbol{\Lambda}_{\mathrm{p},k}, \nu_{\mathrm{p},k}) \\
106-
&= \frac{1}{\sum_{k=1}^K \alpha_{n,k}^{(t)}} \sum_{k=1}^K \alpha_{n,k}^{(t)} \Biggl[ \frac{\Gamma (\nu_{\mathrm{p},k} / 2 + D / 2)}{\Gamma (\nu_{\mathrm{p},k} / 2)} \frac{|\boldsymbol{\Lambda}_{\mathrm{p},k}|^{1/2}}{(\nu_{\mathrm{p},k} \pi)^{D/2}} \nonumber \\
109+
&= \frac{1}{\sum_{k=1}^K \eta_{n,k}^{(t)}} \sum_{k=1}^K \eta_{n,k}^{(t)} \mathrm{St}(x_{n+1}|\boldsymbol{\mu}_{\mathrm{p},k},\boldsymbol{\Lambda}_{\mathrm{p},k}, \nu_{\mathrm{p},k}) \\
110+
&= \frac{1}{\sum_{k=1}^K \eta_{n,k}^{(t)}} \sum_{k=1}^K \eta_{n,k}^{(t)} \Biggl[ \frac{\Gamma (\nu_{\mathrm{p},k} / 2 + D / 2)}{\Gamma (\nu_{\mathrm{p},k} / 2)} \frac{|\boldsymbol{\Lambda}_{\mathrm{p},k}|^{1/2}}{(\nu_{\mathrm{p},k} \pi)^{D/2}} \nonumber \\
107111
&\qquad \qquad \qquad \qquad \qquad \times \left( 1 + \frac{1}{\nu_{\mathrm{p},k}} (\boldsymbol{x}_{n+1} - \boldsymbol{\mu}_{\mathrm{p},k})^\top \boldsymbol{\Lambda}_{\mathrm{p},k} (\boldsymbol{x}_{n+1} - \boldsymbol{\mu}_{\mathrm{p},k}) \right)^{-\nu_{\mathrm{p},k}/2 - D/2} \Biggr],
108112
\end{align}
109113
$$

0 commit comments

Comments
 (0)