stan-dev
diff --git a/‎src/functions-reference/binary_distributions.qmd
Lines changed: 5 additions & 5 deletions b/‎src/functions-reference/binary_distributions.qmd
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/functions-reference/bounded_continuous_distributions.qmd
Lines changed: 1 addition & 1 deletion b/‎src/functions-reference/bounded_continuous_distributions.qmd
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/functions-reference/bounded_discrete_distributions.qmd
Lines changed: 16 additions & 16 deletions b/‎src/functions-reference/bounded_discrete_distributions.qmd
Lines changed: 16 additions & 16 deletions
diff --git a/‎src/functions-reference/circular_distributions.qmd
Lines changed: 1 addition & 1 deletion b/‎src/functions-reference/circular_distributions.qmd
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/functions-reference/continuous_distributions_on_0_1.qmd
Lines changed: 2 additions & 2 deletions b/‎src/functions-reference/continuous_distributions_on_0_1.qmd
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/functions-reference/correlation_matrix_distributions.qmd
Lines changed: 2 additions & 2 deletions b/‎src/functions-reference/correlation_matrix_distributions.qmd
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/functions-reference/covariance_matrix_distributions.qmd
Lines changed: 2 additions & 2 deletions b/‎src/functions-reference/covariance_matrix_distributions.qmd
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/functions-reference/distributions_over_unbounded_vectors.qmd
Lines changed: 10 additions & 10 deletions b/‎src/functions-reference/distributions_over_unbounded_vectors.qmd
Lines changed: 10 additions & 10 deletions
diff --git a/‎src/functions-reference/multivariate_discrete_distributions.qmd
Lines changed: 3 additions & 3 deletions b/‎src/functions-reference/multivariate_discrete_distributions.qmd
Lines changed: 3 additions & 3 deletions
@@ -21,7 +21,7 @@ If $\theta \in [0,1]$, then for $y \in \{0,1\}$,
 \end{array} \right.
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`bernoulli`**`(theta)`
 
@@ -101,7 +101,7 @@ If $\alpha \in \mathbb{R}$, then for $y \in \{0,1\}$,
 \text{logit}^{-1}(\alpha) &  \text{if } y = 0. \end{array} \right.
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`bernoulli_logit`**`(alpha)`
 
@@ -142,10 +142,10 @@ quantities blocks. For a description of argument and return types, see section
 ## Bernoulli-logit generalized linear model (Logistic Regression) {#bernoulli-logit-glm}
 
 Stan also supplies a single function for a generalized linear model
-with Bernoulli likelihood and logit link function, i.e. a function
+with Bernoulli distribution and logit link function, i.e. a function
 for a logistic regression. This provides a more efficient
 implementation of logistic regression than a manually written
-regression in terms of a Bernoulli likelihood and matrix
+regression in terms of a Bernoulli distribution and matrix
 multiplication.
 
 ### Probability mass function
@@ -160,7 +160,7 @@ n}\text{Bernoulli}(y_i~|~\text{logit}^{-1}(\alpha_i + x_i\cdot
 \sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) & \text{if } y_i = 0.
 \end{array} \right. \end{align*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`bernoulli_logit_glm`**`(x, alpha, beta)`
 
 
@@ -18,7 +18,7 @@ If $\alpha \in \mathbb{R}$ and $\beta \in (\alpha,\infty)$, then for
 $y \in [\alpha,\beta]$, \begin{equation*} \text{Uniform}(y|\alpha,\beta) =
 \frac{1}{\beta - \alpha} . \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`uniform`**`(alpha, beta)`
 
 
@@ -29,7 +29,7 @@ Suppose $N \in \mathbb{N}$ and $\theta \in [0,1]$, and $n \in
 \begin{equation*} \frac{\partial}{\partial \theta} \log \text{Binomial}(n~|~N,\theta)
 = \frac{n}{\theta} - \frac{N - n}{1 - \theta} \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `n ~ ` **`binomial`**`(N, theta)`
 
@@ -120,7 +120,7 @@ Suppose $N \in \mathbb{N}$, $\alpha \in \mathbb{R}$, and $n \in
 \frac{n}{\text{logit}^{-1}(-\alpha)} - \frac{N -
 n}{\text{logit}^{-1}(\alpha)} \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `n ~ ` **`binomial_logit`**`(N, alpha)`
 
@@ -150,10 +150,10 @@ logit-scaled chance of success alpha dropping constant additive terms
 ## Binomial-logit generalized linear model (Logistic Regression) {#binomial-logit-glm}
 
 Stan also supplies a single function for a generalized linear model
-with binomial likelihood and logit link function, i.e., a function
+with binomial distribution and logit link function, i.e., a function
 for logistic regression with aggregated outcomes. This provides a more efficient
 implementation of logistic regression than a manually written
-regression in terms of a Binomial likelihood and matrix
+regression in terms of a binomial distribution and matrix
 multiplication.
 
 ### Probability mass function
@@ -165,7 +165,7 @@ Suppose $N \in \mathbb{N}$, $x\in \mathbb{R}^{n\cdot m}, \alpha \in \mathbb{R}^n
   &= \binom{N}{n} \left( \text{logit}^{-1}(\alpha_i + \sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) \right)^{n}  \left( 1 - \text{logit}^{-1}(\alpha_i + \sum_{1\leq j\leq m}x_{ij}\cdot \beta_j) \right)^{N - n}.
 \end{align*}
 
-### Sampling statement
+### Distribution statement
 
 `n ~ ` **`binomial_logit_glm`**`(N, x, alpha, beta)`
 
@@ -286,7 +286,7 @@ If $N \in \mathbb{N}$, $\alpha \in \mathbb{R}^+$, and $\beta \in
 \mathbb{R}^+$ and $v \in \mathbb{R}^+$ by \begin{equation*} \mathrm{B}(u,v) =
 \frac{\Gamma(u) \ \Gamma(v)}{\Gamma(u + v)}. \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `n ~ ` **`beta_binomial`**`(N, alpha, beta)`
 
@@ -362,7 +362,7 @@ If $a \in \mathbb{N}$, $b \in \mathbb{N}$, and $N \in
 \begin{equation*} \text{Hypergeometric}(n~|~N,a,b) = \frac{\normalsize{\binom{a}{n}
 \binom{b}{N - n}}}      {\normalsize{\binom{a + b}{N}}}. \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `n ~ ` **`hypergeometric`**`(N, a, b)`
 
@@ -411,7 +411,7 @@ distribution, \begin{equation*} \text{CategoricalLogit}(y~|~\beta) =
 \text{Categorical}(y~|~\text{softmax}(\beta)). \end{equation*}
 See [the definition of softmax](matrix_operations.qmd#softmax) for the definition of the softmax function.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`categorical`**`(theta)`
 
@@ -421,7 +421,7 @@ dropping constant additive terms.
 <!-- real; categorical ~; -->
 \index{{\tt \bfseries categorical }!sampling statement|hyperpage}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`categorical_logit`**`(beta)`
 
@@ -492,10 +492,10 @@ quantities blocks
 ## Categorical logit generalized linear model (softmax regression) {#categorical-logit-glm}
 
 Stan also supplies a single function for a generalized linear model
-with categorical likelihood and logit link function, i.e. a function
+with categorical distribution and logit link function, i.e. a function
 for a softmax regression. This provides a more efficient
 implementation of softmax regression than a manually written
-regression in terms of a Categorical likelihood and matrix
+regression in terms of a categorical distribution and matrix
 multiplication.
 
 Note that the implementation does not put any restrictions on the coefficient
@@ -515,7 +515,7 @@ If $N,M,K \in \mathbb{N}$, $N,M,K > 0$, and if $x\in \mathbb{R}^{M\times K}, \al
 \end{equation*}
 See [the definition of softmax](matrix_operations.qmd#softmax) for the definition of the softmax function.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`categorical_logit_glm`**`(x, alpha, beta)`
 
@@ -604,7 +604,7 @@ If $l, u \in \mathbb{Z}$ are lower and upper bounds ($l \le u$), then for
 any integer $y \in \{l,\ldots,u\}$, \begin{equation*} \text{DiscreteRange}(y ~|~ l, u) =
 \frac{1}{u - l + 1}. \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`discrete_range`**`(l, u)`
 
@@ -688,7 +688,7 @@ subsumed into the general definition by setting $c_0 = -\infty$ and
 $c_K = +\infty$ with $\text{logit}^{-1}(-\infty) = 0$ and
 $\text{logit}^{-1}(\infty) = 1$.
 
-### Sampling statement
+### Distribution statement
 
 `k ~ ` **`ordered_logistic`**`(eta, c)`
 
@@ -747,7 +747,7 @@ subsumed into the general definition by setting $c_0 = -\infty$ and
 $c_K = +\infty$ with $\text{logit}^{-1}(-\infty) = 0$ and
 $\text{logit}^{-1}(\infty) = 1$.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`ordered_logistic_glm`**`(x, beta, c)`
 
@@ -844,7 +844,7 @@ cases can be subsumed into the general definition by setting $c_0 =
 -\infty$ and $c_K = +\infty$ with $\Phi(-\infty) = 0$ and
 $\Phi(\infty) = 1$.
 
-### Sampling statement
+### Distribution statement
 
 `k ~ ` **`ordered_probit`**`(eta, c)`
 
 
@@ -36,7 +36,7 @@ For $\kappa = 0$, the Von Mises distribution corresponds to the
 circular uniform distribution with density $1 / (2 \pi)$ (independently
 of the values of $y$ or $\mu$).
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`von_mises`**`(mu, kappa)`
 
 
@@ -23,7 +23,7 @@ is 0 and the log probability is $-\infty$.  Similarly, the
 distribution requires strictly positive parameters, $\alpha, \beta >
 0$.
 
-### Sampling statement
+### Distribution statement
 
 `theta ~ ` **`beta`**`(alpha, beta)`
 
@@ -104,7 +104,7 @@ is 0 and the log probability is $-\infty$.  Similarly, the
 distribution requires $\mu \in (0, 1)$ and strictly positive
 parameter, $\kappa > 0$.
 
-### Sampling statement
+### Distribution statement
 
 `theta ~ ` **`beta_proportion`**`(mu, kappa)`
 
 
@@ -54,7 +54,7 @@ much better computationally to work directly with the Cholesky factor
 of $\Sigma$, so this distribution should never be explicitly used in
 practice.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`lkj_corr`**`(eta)`
 
@@ -132,7 +132,7 @@ A lower triangular $L$ is a Cholesky factor for a correlation matrix
 if and only if $L_{k,k} > 0$ for $k \in 1{:}K$ and each row $L_k$ has
 unit Euclidean length.
 
-### Sampling statement
+### Distribution statement
 
 `L ~ ` **`lkj_corr_cholesky`**`(eta)`
 
 
@@ -29,7 +29,7 @@ Gamma function,
 \Gamma_K(x) = \frac{1}{\pi^{K(K-1)/4}} \ \prod_{k=1}^K \Gamma \left( x + \frac{1 - k}{2} \right) \!.
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `W ~ ` **`wishart`**`(nu, Sigma)`
 
@@ -143,7 +143,7 @@ for symmetric and positive-definite $W \in \mathbb{R}^{K \times K}$,
  \ \exp \! \left( - \frac{1}{2} \ \text{tr}(SW^{-1}) \right) \! .
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `W ~ ` **`inv_wishart`**`(nu, Sigma)`
 
 
@@ -24,7 +24,7 @@ for $y \in \mathbb{R}^K$,
 \end{equation*}
 where $|\Sigma|$ is the absolute determinant of $\Sigma$.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_normal`**`(mu, Sigma)`
 
@@ -158,7 +158,7 @@ If $K \in \mathbb{N}$, $\mu \in \mathbb{R}^K$, and $\Omega \in
 for $y \in \mathbb{R}^K$, \begin{equation*} \text{MultiNormalPrecision}(y|\mu,\Omega)
 = \text{MultiNormal}(y|\mu,\Omega^{-1}) \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_normal_prec`**`(mu, Omega)`
 
@@ -251,7 +251,7 @@ must be strictly positive for $k \in 1{:}K$.  If an $L$ is provided
 that is not the Cholesky factor of a positive-definite matrix, the
 probability functions will raise errors.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_normal_cholesky`**`(mu, L)`
 
@@ -384,7 +384,7 @@ with multi-variate outputs where only the output dimensions share a
 kernel function but vary based on their scale.  Note that this
 function does not take into account the mean prediction.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_gp`**`(Sigma, w)`
 
@@ -429,7 +429,7 @@ factor of the kernel matrix, this distribution is also more efficient
 than $\text{MultiGP}()$. Note that this function does not take into
 account the mean prediction.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_gp_cholesky`**`(L, w)`
 
@@ -472,7 +472,7 @@ K)/2\right)}      {\Gamma(\nu/2)} \ \frac{1}{\sqrt{\left| \Sigma
 \Sigma^{-1} \, \left(y - \mu\right) \right)^{-(\nu + K)/2} \! .
 \end{array} \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_student_t`**`(nu, mu, Sigma)`
 
@@ -608,7 +608,7 @@ L^{-T}L^{-1} \, \left(y - \mu\right) \right)^{-(\nu + K)/2} \! .
 \end{array}
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multi_student_t_cholesky`**`(nu, mu, L)`
 
@@ -682,14 +682,14 @@ A Gaussian Dynamic Linear model is defined as follows, For $t \in 1,
 \\     \theta_{t} &\sim N(G \theta_{t - 1}, W) \\     \theta_{0} &\sim
 N(m_{0}, C_{0})   \end{aligned} \end{equation*} where $y$ is $n \times T$ matrix
 where rows are variables and columns are observations. These functions
-calculate the log-likelihood of the observations marginalizing over
+calculate the log-density of the observations marginalizing over
 the latent states ($p(y | F, G, V, W, m_{0}, C_{0})$). This
-log-likelihood is a system that is calculated using the Kalman Filter.
+log-density is a system that is calculated using the Kalman Filter.
 If $V$ is diagonal, then a more efficient algorithm which sequentially
 processes observations and avoids a matrix inversions can be used
 [@DurbinKoopman:2001 section 6.4].
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`gaussian_dlm_obs`**`(F, G, V, W, m0, C0)`
 
 
@@ -24,7 +24,7 @@ where the multinomial coefficient is defined by
 \binom{N}{y_1,\ldots,y_k} = \frac{N!}{\prod_{k=1}^K y_k!}.
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multinomial`**`(theta)`
 
@@ -85,7 +85,7 @@ where the multinomial coefficient is defined by
 \binom{N}{y_1,\ldots,y_k} = \frac{N!}{\prod_{k=1}^K y_k!}.
 \end{equation*}
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`multinomial_logit`**`(gamma)`
 
@@ -141,7 +141,7 @@ distribution is defined as
 \end{equation*}
 where $\alpha_0$ is defined as $\alpha_0 = \sum_{k=1}^K \alpha_k$.
 
-### Sampling statement
+### Distribution statement
 
 `y ~ ` **`dirichlet_multinomial`**`(alpha)`