🚀 Add AR(p) model example (#4)

bstaber · web-flow · commit b0e3de20365a · 2025-08-30T09:01:29.000+02:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.15)
 project(cppxplorers LANGUAGES CXX)
 
 # Set C++ standard
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
diff --git a/Justfile b/Justfile
@@ -29,7 +29,7 @@ install: build
     cmake --install {{BUILD_DIR}}
 
 test: build
-    ctest --test-dir {{BUILD_DIR}} --output-on-failure
+    ctest --test-dir {{BUILD_DIR}} --output-on-failure -VV
 
 clean:
     rm -rf {{BUILD_DIR}}
@@ -45,7 +45,7 @@ run-kalman:
 # Linting & formatting
 # -------------------------
 lint: build
-    clang-tidy crates/*/src/*.cpp -p {{BUILD_DIR}} -- -std=c++17
+    clang-tidy crates/*/src/*.cpp -p {{BUILD_DIR}} -- -std=c++20
 
 fmt:
     find crates -type f \( -name '*.cpp' -o -name '*.hpp' \) -exec clang-format -i {} +
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
@@ -1,3 +1,4 @@
 - [Introduction](introduction.md)
 - [Kalman filter](kf_linear.md)
-- [Simple optimizers](simple_optimizers.md)
+- [Simple optimizers](simple_optimizers.md)
+- [Autoregressive models AR(p)]()
diff --git a/crates/CMakeLists.txt b/crates/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory("kf_linear")
-add_subdirectory("simple_optimizers")
+add_subdirectory("simple_optimizers")
+add_subdirectory("ar_models")
diff --git a/crates/ar_models/CMakeLists.txt b/crates/ar_models/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Header-only target
+add_library(ar_models INTERFACE)
+
+# Public includes (exports the include/ dir to consumers)
+target_include_directories(ar_models INTERFACE
+  ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+# Link Eigen and set language level
+target_link_libraries(ar_models INTERFACE Eigen3::Eigen)
+target_compile_features(ar_models INTERFACE cxx_std_20)
+
+# Demo executable
+add_executable(ar_demo src/main.cpp)
+target_link_libraries(ar_demo PRIVATE ar_models)
+
+# Tests
+enable_testing()
+add_subdirectory(tests)
diff --git a/crates/ar_models/include/ar.hpp b/crates/ar_models/include/ar.hpp
@@ -0,0 +1,152 @@
+#pragma once
+#include <Eigen/Dense>
+#include <iostream>
+#include <numeric>
+
+namespace cppx::ar_models {
+
+template <int order> class ARModel {
+  public:
+    using Vector = Eigen::Vector<double, order>;
+
+    ARModel() = default;
+    ARModel(double intercept, double noise_variance) : c_(intercept), sigma2_(noise_variance){};
+
+    [[nodiscard]] double intercept() const noexcept { return c_; }
+    [[nodiscard]] double noise() const noexcept { return sigma2_; }
+    [[nodiscard]] const Vector &coefficients() const noexcept { return phi_; }
+
+    void set_coefficients(const Vector &phi) { phi_ = phi; }
+    void set_intercept(double c) { c_ = c; }
+    void set_noise(double noise) { sigma2_ = noise; }
+
+    double forecast_one_step(const std::vector<double> &hist) const {
+        if (static_cast<int>(hist.size()) < order) {
+            throw std::invalid_argument("History shorter than model order");
+        }
+        double y = c_;
+        for (int i = 0; i < order; ++i) {
+            y += phi_(i) * hist[i];
+        }
+        return y;
+    }
+
+  private:
+    Vector phi_;
+    double c_ = 0.0;
+    double sigma2_ = 1.0;
+};
+
+template <int order> ARModel<order> fit_ar_ols(const std::vector<double> &x) {
+    if (static_cast<int>(x.size()) <= order) {
+        throw std::invalid_argument("Time series too short for AR(order)");
+    }
+
+    const int T = static_cast<int>(x.size());
+    const int n = T - order;
+
+    // Build the design system
+    Eigen::MatrixXd X(n, order + 1);
+    Eigen::VectorXd Y(n);
+
+    for (int t = 0; t < n; ++t) {
+        Y(t) = x[order + t];
+        X(t, 0) = 1.0;
+        for (int j = 0; j < order; ++j) {
+            X(t, j + 1) = x[order + t - 1 - j];
+        }
+    }
+
+    // Solve least squares
+    Eigen::VectorXd beta = X.colPivHouseholderQr().solve(Y);
+    // beta(0) = intercept, beta(1..order) = AR coefficients
+
+    // Compute residual variance
+    Eigen::VectorXd resid = Y - X * beta;
+    double sigma2 = resid.squaredNorm() / static_cast<double>(n - (order + 1));
+
+    // Create AR(p)
+    typename ARModel<order>::Vector phi;
+    for (int j = 0; j < order; ++j) {
+        phi(j) = beta(j + 1);
+    }
+
+    ARModel<order> model;
+    model.set_coefficients(phi);
+    model.set_intercept(beta(0));
+    model.set_noise(sigma2);
+
+    return model;
+}
+
+inline double _sample_mean(const std::vector<double> &x) {
+    double mu = std::accumulate(x.begin(), x.end(), 0.0) / x.size();
+    return mu;
+}
+inline double _sample_autocov(const std::vector<double> &x, int k) {
+    const int T = static_cast<int>(x.size());
+    if (k >= T) {
+        throw std::invalid_argument("lag too large");
+    }
+    const double mu = _sample_mean(x);
+    double acc = 0.0;
+    for (int t = k; t < T; ++t) {
+        acc += (x[t] - mu) * (x[t - k] - mu);
+    }
+    return acc / static_cast<double>(T);
+}
+
+template <int order> ARModel<order> fir_ar_yule_walkter(const std::vector<double> &x) {
+    static_assert(order >= 1, "Yule–Walker needs order >= 1");
+    if (static_cast<int>(x.size()) <= order) {
+        throw std::invalid_argument("Time series too short for AR(order)");
+    }
+
+    // r[0..order] sample autocovariances
+    std::array<double, order + 1> r{};
+    for (int k = 0; k <= order; ++k) {
+        r[k] = _sample_autocov(x, k);
+    }
+
+    // Levinson–Durbin recursion
+    typename ARModel<order>::Vector a;
+    a.setZero();
+    double E = r[0];
+    if (std::abs(E) < 1e-15) {
+        throw std::runtime_error("Zero variance");
+    }
+
+    for (int m = 1; m <= order; ++m) {
+        double acc = r[m];
+        for (int j = 1; j < m; ++j)
+            acc -= a(j - 1) * r[m - j];
+        const double kappa = acc / E;
+
+        // update a (reflection update)
+        typename ARModel<order>::Vector a_new = a;
+        a_new(m - 1) = kappa;
+        for (int j = 1; j < m; ++j) {
+            a_new(j - 1) = a(j - 1) - kappa * a(m - j - 1);
+        }
+        a = a_new;
+
+        E *= (1.0 - kappa * kappa);
+        if (E <= 0) {
+            throw std::runtime_error("Non-positive innovation variance in recursion");
+        }
+    }
+
+    // Compute intercept so that unconditional mean matches sample mean (stationarity assumption)
+    const double xbar = _sample_mean(x);
+    const double one_minus_sum = 1.0 - a.sum();
+    const double c = one_minus_sum * xbar;
+
+    ARModel<order> model;
+    model.set_coefficients(a);
+    model.set_intercept(c);
+    model.set_noise(E);
+
+    return model;
+}
+
+} // namespace cppx::ar_models
diff --git a/crates/ar_models/src/main.cpp b/crates/ar_models/src/main.cpp
@@ -0,0 +1,6 @@
+#include <iostream>
+
+int main() {
+    std::cout << "Hello, world!" << "\n";
+    return 0;
+}
diff --git a/crates/ar_models/tests/CMakeLists.txt b/crates/ar_models/tests/CMakeLists.txt
@@ -0,0 +1,7 @@
+# crates/simple_optimizers/tests/CMakeLists.txt
+
+add_executable(ar_models_test test_ar_models.cpp)
+
+target_link_libraries(ar_models_test PRIVATE ar_models)
+
+add_test(NAME ar_models_test COMMAND ar_models_test)
diff --git a/crates/ar_models/tests/test_ar_models.cpp b/crates/ar_models/tests/test_ar_models.cpp
@@ -0,0 +1,114 @@
+#include "ar.hpp"
+#include <cassert>
+#include <cmath>
+#include <iostream>
+#include <random>
+#include <vector>
+
+using namespace cppx::ar_models;
+
+// Tiny helper for floating-point checks
+static bool almost_equal(double a, double b, double tol = 1e-6) {
+    return std::fabs(a - b) <= tol;
+}
+
+int main() {
+    // Default construction
+    {
+        ARModel<1> model;
+        assert(almost_equal(model.intercept(), 0.0));
+        assert(almost_equal(model.noise(), 1.0));
+        assert(model.coefficients().size() == 1);
+    }
+
+    // OLS fit recovers AR(1)
+    {
+        // Simulate AR(1): X_t = c + phi X_{t-1} + eps_t
+        constexpr int P = 1;
+        const double c = 0.4;
+        const double phi = 0.65;
+        const double sigma = 0.1; // small noise -> tighter estimates
+        const int T = 1000;
+
+        std::mt19937 rng(12345);
+        std::normal_distribution<double> N(0.0, sigma);
+
+        std::vector<double> x(T);
+        x[0] = 0.0;
+        for (int t = 1; t < T; ++t) {
+            x[t] = c + phi * x[t - 1] + N(rng);
+        }
+
+        auto m = fit_ar_ols<P>(x);
+        // Coefficients
+        assert(m.coefficients().size() == P);
+        double phi_hat = m.coefficients()(0);
+        // Intercept (your API calls it "intercept()", but you’re storing the intercept there)
+        double c_hat = m.intercept();
+
+        // Loose but meaningful tolerances
+        assert(std::fabs(phi_hat - phi) < 0.05);
+        assert(std::fabs(c_hat - c) < 0.05);
+        assert(m.noise() > 0.0);
+    }
+
+    // Yule–Walker (Levinson–Durbin) recovers AR(1)
+    {
+        constexpr int P = 1;
+        const double c = 0.2;
+        const double phi = 0.5;
+        const double sigma = 0.15;
+        const int T = 1200;
+
+        std::mt19937 rng(54321);
+        std::normal_distribution<double> N(0.0, sigma);
+
+        std::vector<double> x(T);
+        x[0] = 0.0;
+        for (int t = 1; t < T; ++t) {
+            x[t] = c + phi * x[t - 1] + N(rng);
+        }
+
+        // NOTE: your function name currently has a typo "fir_ar_yule_walkter"
+        auto m = fir_ar_yule_walkter<P>(x);
+
+        double phi_hat = m.coefficients()(0);
+        double c_hat = m.intercept();
+
+        assert(std::fabs(phi_hat - phi) < 0.07);
+        assert(std::fabs(c_hat - c) < 0.07);
+        assert(m.noise() > 0.0);
+    }
+
+    // Forecast one-step sanity (AR(1))
+    {
+        ARModel<1> m;
+        // Set a known model: X_t = c + phi X_{t-1} + eps
+        ARModel<1>::Vector phi_vec;
+        phi_vec << 0.6;
+        m.set_coefficients(phi_vec);
+        m.set_intercept(0.3); // intercept (your getter name is intercept())
+        m.set_noise(0.2);
+
+        // hist = [X_T, X_{T-1}, ...] ; for AR(1) we need 1 value
+        std::vector<double> hist = {2.0};
+        double yhat = m.forecast_one_step(hist);
+        // Expected: c + phi * X_T
+        double expected = 0.3 + 0.6 * 2.0;
+        assert(almost_equal(yhat, expected, 1e-12));
+    }
+
+    // Error handling: series too short
+    {
+        std::vector<double> tiny = {1.0}; // length 1 cannot fit AR(2), nor AR(1) when n<=p
+        bool threw = false;
+        try {
+            (void) fit_ar_ols<2>(tiny);
+        } catch (const std::invalid_argument &) {
+            threw = true;
+        }
+        assert(threw);
+    }
+
+    return 0;
+}
diff --git a/crates/kf_linear/CMakeLists.txt b/crates/kf_linear/CMakeLists.txt
@@ -8,7 +8,7 @@ target_include_directories(kf_linear INTERFACE
 
 # Link Eigen and set language level
 target_link_libraries(kf_linear INTERFACE Eigen3::Eigen)
-target_compile_features(kf_linear INTERFACE cxx_std_17)
+target_compile_features(kf_linear INTERFACE cxx_std_20)
 
 # Demo executable
 add_executable(kf_demo src/main.cpp)
diff --git a/crates/kf_linear/include/kf_linear.hpp b/crates/kf_linear/include/kf_linear.hpp
@@ -6,6 +6,8 @@
 #include <stdexcept>
 #include <vector>
 
+namespace cppx::kf_linear {
+
 /**
  * @brief Generic linear Kalman filter (templated, no control term).
  *
@@ -121,3 +123,5 @@ template <int Nx, int Ny> class KFLinear {
     StateVec x_; ///< State mean
     StateMat P_; ///< State covariance
 };
+
+} // namespace cppx::kf_linear
diff --git a/crates/kf_linear/tests/test_kf_linear.cpp b/crates/kf_linear/tests/test_kf_linear.cpp
@@ -8,6 +8,7 @@
 #include "kf_linear.hpp"
 
 using Catch::Approx;
+using namespace cppx::kf_linear;
 
 TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q") {
     using KF = KFLinear<2, 1>;
diff --git a/crates/simple_optimizers/CMakeLists.txt b/crates/simple_optimizers/CMakeLists.txt
@@ -8,7 +8,7 @@ target_include_directories(simple_optimizers INTERFACE
 
 # Link Eigen and set language level
 target_link_libraries(simple_optimizers INTERFACE Eigen3::Eigen)
-target_compile_features(simple_optimizers INTERFACE cxx_std_17)
+target_compile_features(simple_optimizers INTERFACE cxx_std_20)
 
 # Demo executable
 add_executable(opt_demo src/main.cpp)
diff --git a/crates/simple_optimizers/tests/test_simple_optimizers.cpp b/crates/simple_optimizers/tests/test_simple_optimizers.cpp
@@ -55,4 +55,4 @@ int main() {
     }
 
     return 0;
-}
+};