More polishing documentation

baddstats · baddstats · commit 7136d85c1ea5 · 2025-02-26T18:28:13.000+08:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: spatstat.univar
-Version: 3.1-1.002
-Date: 2025-02-25
+Version: 3.1-1.003
+Date: 2025-02-26
 Title: One-Dimensional Probability Distribution Support for the 'spatstat' Family 
 Authors@R: c(person("Adrian", "Baddeley", 
                     role = c("aut", "cre", "cph"),
diff --git a/NEWS b/NEWS
@@ -1,4 +1,4 @@
-        CHANGES IN spatstat.univar VERSION 3.1-1.002
+        CHANGES IN spatstat.univar VERSION 3.1-1.003
 
 OVERVIEW
 
diff --git a/inst/doc/packagesizes.txt b/inst/doc/packagesizes.txt
@@ -5,4 +5,4 @@ date version nhelpfiles nobjects ndatasets Rlines srclines
 "2024-09-05" "3.0-1" 30 60 0 1742 321
 "2024-11-05" "3.1-0" 36 68 0 2571 2233
 "2024-11-05" "3.1-1" 36 68 0 2571 2235
-"2025-02-25" "3.1-1.002" 36 68 0 2577 2235
+"2025-02-26" "3.1-1.003" 36 68 0 2577 2235
diff --git a/inst/info/packagesizes.txt b/inst/info/packagesizes.txt
@@ -5,4 +5,4 @@ date version nhelpfiles nobjects ndatasets Rlines srclines
 "2024-09-05" "3.0-1" 30 60 0 1742 321
 "2024-11-05" "3.1-0" 36 68 0 2571 2233
 "2024-11-05" "3.1-1" 36 68 0 2571 2235
-"2025-02-25" "3.1-1.002" 36 68 0 2577 2235
+"2025-02-26" "3.1-1.003" 36 68 0 2577 2235
diff --git a/man/densityBC.Rd b/man/densityBC.Rd
@@ -2,9 +2,9 @@
 \alias{densityBC}
 \title{Kernel Density Estimation with Optional Boundary Correction}
 \description{
-  A simple implementation of fixed-bandwidth kernel density
+  Fixed-bandwidth kernel density
   estimation on the real line, or the positive real half-line,
-  including optional corrections for a boundary at zero. 
+  including optional corrections for a boundary at zero.
 }
 \usage{
   densityBC(x, kernel = "epanechnikov", bw=NULL,
@@ -18,7 +18,7 @@
       internal=list())
 }
 \arguments{
-  \item{x}{Numeric vector.}
+  \item{x}{Numeric vector of observed values.}
   \item{kernel}{String specifying kernel.
     Options are
     \code{"gaussian"}, \code{"rectangular"},
@@ -27,12 +27,13 @@
     \code{"biweight"},
     \code{"cosine"} and \code{"optcosine"}.
     (Partial matching is used).
+    Options are described in the help for \code{\link[stats]{density.default}}.
   }
   \item{bw,h}{
     Alternative specifications of the scale factor for the kernel.
     The bandwidth \code{bw} is the standard deviation of the
     kernel (this agrees with the argument \code{bw} in
-    \code{\link[stats]{density.default}}.
+    \code{\link[stats]{density.default}}).
     The rescale factor \code{h} is the factor by which
     the `standard form' of the kernel is rescaled. For the Epanechnikov
     kernel, \code{h = bw * sqrt(5)} is the
@@ -66,14 +67,13 @@
     String (partially matched) specifying a correction for the boundary effect
     bias at \eqn{r=0} when estimating a density on the positive
     half line. Possible values are
-    \code{"none"}, \code{"weighted"}, \code{"convolution"}, \code{"reflection"}
-    and \code{"bdrykern"}. 
+    \code{"none"}, \code{"weighted"}, \code{"convolution"},
+    \code{"reflection"}, \code{"bdrykern"} and \code{"JonesFoster"}. 
   }
   \item{fast}{
     Logical value specifying whether to perform the calculation rapidly
     using the Fast Fourier Transform (\code{fast=TRUE})
     or to use slower, exact code (\code{fast=FALSE}, the default).
-    Option \code{zerocor="bdrykern"} is not available when \code{fast=TRUE}.
   }
   \item{internal}{
     Internal use only.
@@ -83,12 +83,38 @@
   }
 }
 \details{
+  This function computes a fixed-bandwidth kernel estimate of
+  a probability density on the real line, or the positive half-line,
+  including optional boundary corrections
+  for truncation of the density onto the positive half line.
+
+  Weighted estimates are supported, including negative weights.
+  Weights are not renormalised to sum to 1. The resulting
+  probability density estimate is not renormalised to integrate to 1.
+
+  Options for the smoothing kernel
+  are described in the help for \code{\link[stats]{density.default}}.
+  The default is the Epanechnikov (truncated quadratic) kernel.
+  
   If \code{zerocor} is missing, or given as \code{"none"},
-  this function computes the fixed bandwidth kernel estimator of the
-  probability density on the real line. 
+  this function computes the fixed-bandwidth kernel estimator of the
+  probability density on the real line,
+  using \code{\link[stats]{density.default}}.
+  The estimated probability density (unnormalised) is
+  \deqn{
+    \widehat f(x) = \sum_{i=1}^n w_i \; \kappa(x - x_i)
+  }{
+    f(x) = sum[i=1,...,n] w[i] * kappa(x - x[i])
+  }
+  where \eqn{x_1,\ldots,x_n}{x[1], ..., x[n]} are the data values,
+  \eqn{w_1,\ldots,w_n}{w[1], ..., w[n]} are the weights (defaulting
+  to \eqn{w_i = 1/n}{w[i] = 1/n}),
+  and \eqn{\kappa}{kappa} is the kernel, a probability density on the
+  real line. 
 
-  If \code{zerocor} is given, it is assumed that the density
-  is confined to the positive half-line, and a boundary correction is
+  If \code{zerocor} is given, the probability density is assumed to be
+  confined to the positive half-line; the numerical values in \code{x}
+  must all be non-negative; and a boundary correction is
   applied to compensate for bias arising due to truncation at the origin:
   \describe{
     \item{\code{zerocor="weighted"}:}{
@@ -97,53 +123,100 @@
       where \eqn{m(x) = 1 - F(-x)} is the total mass of the kernel centred on
       \eqn{x} that lies in the positive half-line, and \eqn{F(x)} is the
       cumulative distribution function of the kernel.
+      The corrected estimate is
+      \deqn{
+	\widehat f_W(x) = \sum_{i=1}^n w_i \; \frac{\kappa(x - x_i)}{1-F(-x_i)}
+      }{
+	fW(x) = sum[i=1,...,n] w[i] * kappa(x - x[i])/(1-F(-x[i]))
+      }
       This is the \dQuote{cut-and-normalization} method of
       Gasser and \ifelse{latex}{\out{M\"{u}ller}}{Mueller} (1979).
       Effectively the kernel is renormalized so that it integrates to 1,
       and the adjusted kernel conserves mass.
     }
     \item{\code{zerocor="convolution"}:}{
-      The estimate of the density \eqn{f(r)} is
-      weighted by the factor \eqn{1/m(r)} where \eqn{m(r) = 1 - F(-r)}
+      The estimate of the density \eqn{f(x)} is
+      weighted by the factor \eqn{1/m(x)} where \eqn{m(r) = 1 - F(-x)}
       is given above.
+      The corrected estimate is
+      \deqn{
+	\widehat f_C(x) = \sum_{i=1}^n w_i \; \frac{\kappa(x - x_i)}{1-F(-x)}
+      }{
+	fC(x) = sum[i=1,...,n] w[i] * kappa(x - x[i])/(1-F(-x))
+      }
       This is the \dQuote{convolution}, \dQuote{uniform}
       or \dQuote{zero-order} boundary correction method
       often attributed to Diggle (1985).
       This correction does not conserve mass.
+      It is faster to compute than the weighted correction.
     }
     \item{\code{zerocor="reflection"}:}{
       if the kernel centred at data point \eqn{x_i}{x[i]}
       has a tail that lies on the negative half-line, this tail is
       reflected onto the positive half-line.
-      This is the reflection method first proposed by
-      Boneva et al (1971).
-      This correction conserves mass.
+      The corrected estimate is
+      \deqn{
+	\widehat f_R(x) = \sum_{i=1}^n w_i \; 
+	              [ \kappa(x - x_i) + \kappa(-x - x_i) ]
+      }{
+	fR(x) = sum[i=1,...,n] w[i] * ( kappa(x - x[i]) + kappa(-x - x[i]) )
+      }
+      This is the \dQuote{reflection} method first proposed by
+      Boneva et al (1971). This correction conserves mass.
+      The estimated density always has zero derivative at the origin,
+      \eqn{\widehat f_R^\prime(0) = 0}{fR'(0) = 0}, which may or
+      may not be desirable.
     }
     \item{\code{zerocor="bdrykern"}:}{
       The density estimate is computed using the
       Linear Boundary Kernel associated with the chosen kernel
       (Wand and Jones, 1995, page 47).
-      That is, when estimating the density \eqn{f(r)} for values of
-      \eqn{r} close to zero (defined as \eqn{r < h} for all kernels
+      The estimated (unnormalised) probability density is
+      \deqn{
+	\widehat f_B(x) =  \sum_{i=1}^n w_i \;
+	[ A(x) + (x-x_i) B(x)] \kappa(x - x_i)
+      }{
+	fB(x) = sum[i=1,...,n] w[i] * 
+	( A(x) + (x-x[i]) B(x)) * kappa(x - x[i])
+      }
+      where \eqn{A(x) = a_2(x)/D(x)}{A(x) = a[2](x)/D(x)} and
+      \eqn{B(x) = -a_1(x)/D(x)}{B(x) = -a[1](x)/D(x)}
+      with
+      \eqn{D(x) = a_0(x) a_2(x) - a_1(x)^2}{D(x) = a[0](x) a[2](x) - a[1](x)^2}
+      where
+      \eqn{
+	a_k(x) = \int_{-\infty}^x t^k \kappa(t) dt.
+      }{
+	a[k](x) = integral[-Inf,x] ( t^k * kappa(t) dt).
+      }
+      That is, when estimating the density \eqn{f(x)} for values of
+      \eqn{x} close to zero (defined as \eqn{x < h} for all kernels
       except the Gaussian), the kernel contribution
-      \eqn{k_h(r - x_i)}{k[h](r - x[i])} is multiplied by a
-      term that is a linear function of \eqn{r - x_i}{r - x[i]},
-      with coefficients depending on \eqn{r}.
+      \eqn{k_h(x - x_i)}{k[h](x - x[i])} is multiplied by a
+      term that is a linear function of \eqn{x - x_i}{x - x[i]},
+      with coefficients depending on \eqn{x}.
       This correction does not conserve mass and may result in
       negative values, but is asymptotically optimal.
+      Computation time for this estimate is greater than for
+      the options above.
     }
     \item{\code{zerocor="JonesFoster"}:}{
       The modification of the Boundary Kernel estimate
-      proposed by Jones and Foster (1996) is computed. This is equal to
-      \eqn{
-	\overline f(r) \exp( \hat f(r)/\overline f(r) - 1)
+      proposed by Jones and Foster (1996) is computed:
+      \deqn{
+	\widehat f_{JF}(x) =
+	\widehat f_C(x)
+	\exp\left( \frac{\widehat f_B(x)}{\widehat f_C(r)} - 1 \right)
       }{
-	f#(r) exp(f*(r)/f#(r) - 1)
+	fJF(x) = fC(x) exp(fB(x)/fC(x) - 1)
       }
-      where \eqn{\overline f(r)}{f#(r)} is the convolution estimator
-      and \eqn{\hat f(r)}{f*(r)} is the linear boundary kernel estimator.
+      where \eqn{\widehat f_C(r)}{fC(r)} is the convolution estimator
+      and \eqn{\widehat f_B(r)}{fB(r)} is the linear boundary kernel estimator.
       This ensures that the estimate is always nonnegative
-      and retains the asymptotic optimality of the linear boundary kernel.
+      and retains the asymptotic optimality of the linear boundary
+      kernel.
+      Computation time for this estimate
+      is greater than for all the options above.
     }
   }
   If \code{fast=TRUE}, the calculations are performed rapidly using 
@@ -167,7 +240,14 @@
   legend(2, 0.8,
      legend=c("fixed bandwidth", "boundary kernel", "true density"),
      col=1:3, lty=rep(1,3))
+}
+\seealso{
+  \code{\link[stats]{density.default}}.
+  
+  \code{\link{dkernel}} for the kernel itself.
 
+  \code{\link{densityAdaptiveKernel.default}} for adaptive
+  (variable-bandwidth) estimation.
 }
 \references{
   \ournewpaper
@@ -176,18 +256,18 @@
   Spline transformations: three new diagnostic aids for the
   statistical data-analyst (with discussion).
   \emph{Journal of the Royal Statistical Society, Series B},
-  \bold{33}, 1–-70.
+  \bold{33}, 1--70.
 
   Diggle, P.J. (1985)
   A kernel method for smoothing point process data.
   \emph{Journal of the Royal Statistical Society, Series C (Applied Statistics)}, 
-  \bold{34} 138–-147.
+  \bold{34} 138--147.
   
   Gasser, Th. and \ifelse{latex}{\out{M\"{u}ller}}{Mueller}, H.-G. (1979).
   Kernel estimation of regression functions.
   In Th. Gasser and M. Rosenblatt (editors)
   \emph{Smoothing Techniques for Curve Estimation}, pages
-  23-–68. Springer, Berlin.
+  23--68. Springer, Berlin.
 
   Jones, M.C. and Foster, P.J. (1996)
   A simple nonnegative boundary correction method for kernel density
diff --git a/man/macros/newdefns.Rd b/man/macros/newdefns.Rd
@@ -1,3 +1,3 @@
 %% Extra macro definitions for this package
-\newcommand{\ournewpaper}{Baddeley, A., Chang, Y-M., Davies, T.M. and Hazelton, M. (2024) In preparation.}
-\newcommand{\ournewpapercite}{Baddeley, Chang, Davies and Hazelton (2024)}
+\newcommand{\ournewpaper}{Baddeley, A., Davies, T.M. and Hazelton, M. (2025) Submitted for publication.}
+\newcommand{\ournewpapercite}{Baddeley, Davies and Hazelton (2025)}

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`- CHANGES IN spatstat.univar VERSION 3.1-1.002`
	`1`	`+ CHANGES IN spatstat.univar VERSION 3.1-1.003`
`2`	`2`
`3`	`3`	`OVERVIEW`
`4`	`4`