Find initial prediction using initial_prediction module

Blunde1 · Blunde1 · commit 0f1e82f582c3 · 2021-11-08T20:57:37.000+01:00
diff --git a/R-package/inst/include/agtboost.hpp b/R-package/inst/include/agtboost.hpp
@@ -11,6 +11,7 @@
 
 #include <iostream>
 #include <fstream>
+#include <functional>
 
 // Internal
 #include "cir.hpp"
@@ -21,6 +22,7 @@
 #include "optimization.hpp"
 #include "loss_functions.hpp"
 #include "gbt_count_auto.hpp"
+#include "initial_prediction.hpp"
 
 
 #endif // __GMGTB_HPP_INCLUDED__
diff --git a/R-package/inst/include/ensemble.hpp b/R-package/inst/include/ensemble.hpp
@@ -36,6 +36,8 @@ class ENSEMBLE
     double loss(Tvec<double> &y, Tvec<double> &pred, Tvec<double> &w);
     Tvec<double> dloss(Tvec<double> &y, Tvec<double> &pred);
     Tvec<double> ddloss(Tvec<double> &y, Tvec<double> &pred);
+    double link_function(double pred_observed);
+    double inverse_link_function(double pred);
     
     double initial_prediction(Tvec<double> &y, std::string loss_function, Tvec<double> &w);
     void train(Tvec<double> &y, Tmat<double> &X, int verbose, bool greedy_complexities,
diff --git a/R-package/inst/include/initial_prediction.hpp b/R-package/inst/include/initial_prediction.hpp
@@ -0,0 +1,56 @@
+// initial_prediction.hpp
+
+#ifndef __INITIAL_PREDICTION_HPP_INCLUDED__
+#define __INITIAL_PREDICTION_HPP_INCLUDED__
+
+
+#include "external_rcpp.hpp"
+
+
+double learn_initial_prediction(
+        Tvec<double>& y, 
+        Tvec<double>& offset,
+        std::function<Tvec<double> (Tvec<double>&,Tvec<double>&)> dloss,
+        std::function<Tvec<double> (Tvec<double>&,Tvec<double>&)> ddloss,
+        std::function<double (double)> link_function,
+        std::function<double (double)> inverse_link_function,
+        int verbose
+    ){
+    // Newton opt settings
+    double tolerance = 1E-9;
+    double step_length = 0.2;
+    double step=0.0;
+    int niter = 50; // Max iterations
+    // Data specific settings
+    int n = y.size();
+    double y_average = y.sum() / n;
+    double initial_prediction = link_function(y_average);
+    Tvec<double> pred = offset.array() + initial_prediction;
+    // Iterate until optimal starting point found
+    for(int i=0; i<niter; i++){
+        // Gradient descent
+        step = - step_length * dloss(y, pred).sum() / ddloss(y, pred).sum();
+        initial_prediction += step;
+        pred = pred.array() + step;
+        // Check precision
+        if(std::abs(step) <= tolerance){
+            break;
+        }
+    }
+    // Verbose?
+    if(verbose>0){
+        Rcpp::Rcout << 
+            std::setprecision(4) <<
+            "Initial prediction and raw-prediction estimated to :" << 
+                inverse_link_function(initial_prediction) <<
+                    " and " <<
+                initial_prediction << 
+                    " respectively" <<
+                        std::endl;
+    }
+    // Retun optimal starting point
+    return initial_prediction;
+}
+
+
+#endif
diff --git a/R-package/inst/include/loss_functions.hpp b/R-package/inst/include/loss_functions.hpp
@@ -7,6 +7,48 @@
 
 // ----------- LOSS --------------
 namespace loss_functions {
+
+
+    double link_function(double pred_observed, std::string loss_function){
+        // Returns g(mu)
+        double pred_transformed=0.0;
+        if(loss_function=="mse"){
+            pred_transformed = pred_observed;
+        }else if(loss_function=="logloss"){
+            pred_transformed = log(pred_observed) - log(1 - pred_observed);
+        }else if(loss_function=="poisson"){
+            pred_transformed = log(pred_observed);
+        }else if(loss_function=="gamma::neginv"){
+            pred_transformed = - 1.0 / pred_observed;
+        }else if(loss_function=="gamma::log"){
+            pred_transformed = log(pred_observed);
+        }else if(loss_function=="negbinom"){
+            pred_transformed = log(pred_observed);
+        }
+        return pred_transformed;
+    }
+
+
+    double inverse_link_function(double pred_transformed, std::string loss_function){
+        // Returns g^{-1}(pred)
+        double pred_observed = 0.0;
+        if(loss_function=="mse"){
+            pred_observed = pred_transformed;
+        }else if(loss_function=="logloss"){
+            pred_observed = 1.0 / (1.0+exp(-pred_transformed));
+        }else if(loss_function=="poisson"){
+            pred_observed = exp(pred_transformed);
+        }else if(loss_function=="gamma::neginv"){
+            pred_observed = -1.0 / pred_transformed;;
+        }else if(loss_function=="gamma::log"){
+            pred_observed = exp(pred_transformed);
+        }else if(loss_function=="negbinom"){
+            pred_observed = exp(pred_transformed);
+        }
+        return pred_observed;
+    }
+
+
     double loss(
             Tvec<double> &y, 
             Tvec<double> &pred, 
diff --git a/R-package/src/agtboost.cpp b/R-package/src/agtboost.cpp
@@ -159,7 +159,17 @@ Tvec<double> ENSEMBLE::dloss(Tvec<double> &y, Tvec<double> &pred){
 Tvec<double> ENSEMBLE::ddloss(Tvec<double> &y, Tvec<double> &pred){
     return loss_functions::ddloss(y, pred, loss_function, extra_param);
 }
-                
+
+
+double ENSEMBLE::link_function(double pred_observed){
+    return loss_functions::link_function(pred_observed, loss_function);
+}
+
+
+double ENSEMBLE::inverse_link_function(double pred){
+    return loss_functions::inverse_link_function(pred, loss_function);
+}
+
                 
 void ENSEMBLE::train(
         Tvec<double> &y, 
@@ -170,6 +180,8 @@ void ENSEMBLE::train(
         Tvec<double> &w, Tvec<double> &offset, // Defaults to a zero-vector
         bool has_offset // Should be removed
     ){
+    using namespace std::placeholders;
+    
     // Set initials and declare variables
     int MAXITER = nrounds;
     int n = y.size(); 
@@ -182,11 +194,15 @@ void ENSEMBLE::train(
     Tmat<double> cir_sim = cir_sim_mat(100, 100); // nsim=100, nobs=100
     
     // Initial constant prediction: arg min l(y,constant)
-    if(has_offset){
-        this->initialPred = 0.0;
-    }else{
-        this->initialPred = this->initial_prediction(y, loss_function, w); //y.sum()/n;
-    }
+    this->initialPred = learn_initial_prediction(
+        y, 
+        offset, 
+        std::bind(&ENSEMBLE::dloss, this, _1, _2),
+        std::bind(&ENSEMBLE::ddloss, this, _1, _2),
+        std::bind(&ENSEMBLE::link_function, this, _1),
+        std::bind(&ENSEMBLE::inverse_link_function, this, _1),
+        verbose
+        );
     pred.setConstant(this->initialPred);
     pred += offset;
     this->initial_score = loss_functions::loss(y, pred, loss_function, w, extra_param);