dmlc · CodingCat · Mar 18, 2019 · Mar 19, 2019 · Mar 20, 2019 · Mar 20, 2019
diff --git a/dmlc-core b/dmlc-core
diff --git a/doc/jvm/xgboost4j_spark_tutorial.rst b/doc/jvm/xgboost4j_spark_tutorial.rst
@@ -205,6 +205,18 @@ Training with Evaluation Sets
 
 You can also monitor the performance of the model during training with multiple evaluation datasets. By specifying ``eval_sets`` or call ``setEvalSets`` over a XGBoostClassifier or XGBoostRegressor, you can pass in multiple evaluation datasets typed as a Map from String to DataFrame.
 
+Training with Custom Evaluation Metrics
+----------------
+With XGBoost4j (including XGBoost4J-Spark), users are able to implement their own custom evaluation metrics and synchronize the values in the distributed training setting. To implement a custom evaluation metric, users should implement the interface ``ml.dmlc.xgboost4j.java.IEvalElementWiseDistributed`` (for binary classification and regression), ``ml.dmlc.xgboost4j.java.IEvalMultiClassesDistributed`` (for multi classification) and ``ml.dmlc.xgboost4j.java.IEvalRankListDistributed`` (for ranking).
+
+* ``ml.dmlc.xgboost4j.java.IEvalElementWiseDistributed``: users are supposed to implement ``float evalRow(float label, float pred);`` which calculates the metric for a single sample given the prediction and label, as well as ``float getFinal(float errorSum, float weightSum);`` which performs the final transformation over the sum of error and weights of samples.
+
+* ``ml.dmlc.xgboost4j.java.IEvalMultiClassesDistributed``: the methods to be implemented by the users are similar to ``ml.dmlc.xgboost4j.java.IEvalElementWiseDistributed`` except that the single row metric calculating method is ``float evalRow(int label, float pred, int numClasses);``
+
+* ``ml.dmlc.xgboost4j.java.IEvalRankListDistributed``: users are to implement ``float evalMetric(float[] preds, int[] labels);`` which gives the predictions and labels for instances in the same group;
+
+By default, these interfaces do not support being used in single machine evaluation, users can change this by re-implement ``float eval(float[][] predicts, DMatrix dmat)`` method.
+
 Prediction
 ==========
 

diff --git a/include/xgboost/build_config.h b/include/xgboost/build_config.h
@@ -7,6 +7,7 @@
 
 // These check are for Makefile.
 #if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
+
 /* default logic for software pre-fetching */
 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))) || defined(__INTEL_COMPILER)
 // Enable _mm_prefetch for Intel compiler and MSVC+x86

diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h
@@ -17,6 +17,9 @@
 #include <stdint.h>
 #endif  // __cplusplus
 
+// XGBoost C API will include APIs in Rabit C API
+#include <rabit/c_api.h>
+
 #if defined(_MSC_VER) || defined(_WIN32)
 #define XGB_DLL XGB_EXTERN_C __declspec(dllexport)
 #else
@@ -565,4 +568,6 @@ XGB_DLL int XGBoosterLoadRabitCheckpoint(
  */
 XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle);
 
+XGB_DLL void XGBoosterRegisterNewMetrics(BoosterHandle handle, const char* metrics_name);
+
 #endif  // XGBOOST_C_API_H_
diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
@@ -16,7 +16,7 @@
 #include <vector>
 #include "./base.h"
 #include "./gbm.h"
-#include "./metric.h"
+#include "xgboost/metric/metric.h"
 #include "./objective.h"
 
 namespace xgboost {
@@ -186,15 +186,16 @@ class Learner : public rabit::Serializable {
    */
   virtual const std::map<std::string, std::string>& GetConfigurationArguments() const = 0;
 
+  /*! \brief The evaluation metrics used to evaluate the model. */
+  std::vector<std::unique_ptr<Metric> > metrics_;
+
  protected:
   /*! \brief internal base score of the model */
   bst_float base_score_;
   /*! \brief objective function */
   std::unique_ptr<ObjFunction> obj_;
   /*! \brief The gradient booster used by the model*/
   std::unique_ptr<GradientBooster> gbm_;
-  /*! \brief The evaluation metrics used to evaluate the model. */
-  std::vector<std::unique_ptr<Metric> > metrics_;
 };
 
 // implementation of inline functions.

diff --git a/include/xgboost/metric/elementwise_metric.h b/include/xgboost/metric/elementwise_metric.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2015-2019 by Contributors
+ */
+
+#ifndef XGBOOST_METRIC_ELEMENTWISE_METRIC_H_
+#define XGBOOST_METRIC_ELEMENTWISE_METRIC_H_
+
+#include <xgboost/metric/metric.h>
+#include <xgboost/metric/metric_common.h>
+
+#include <functional>
+#include <utility>
+#include <string>
+#include <vector>
+
+#include "../../../src/common/common.h"
+
+#if defined(XGBOOST_USE_CUDA)
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform_reduce.h>
+#include <thrust/execution_policy.h>
+#include <thrust/functional.h>  // thrust::plus<>
+
+#include "../../../src/common/device_helpers.cuh"
+#endif  // XGBOOST_USE_CUDA
+
+/*!
+ * \brief base class of element-wise evaluation
+ * \tparam Derived the name of subclass
+ */
+namespace xgboost {
+namespace metric {
+
+template<typename EvalRow>
+class ElementWiseMetricsReduction {
+ public:
+  explicit ElementWiseMetricsReduction(EvalRow policy) :
+          policy_(std::move(policy)) {}
+
+  PackedReduceResult CpuReduceMetrics(
+          const HostDeviceVector <bst_float> &weights,
+          const HostDeviceVector <bst_float> &labels,
+          const HostDeviceVector <bst_float> &preds) const {
+    size_t ndata = labels.Size();
+
+    const auto &h_labels = labels.HostVector();
+    const auto &h_weights = weights.HostVector();
+    const auto &h_preds = preds.HostVector();
+
+    bst_float residue_sum = 0;
+    bst_float weights_sum = 0;
+
+#pragma omp parallel for reduction(+: residue_sum, weights_sum) schedule(static)
+    for (omp_ulong i = 0; i < ndata; ++i) {
+     const bst_float wt = h_weights.size() > 0 ? h_weights[i] : 1.0f;
+     residue_sum += policy_.EvalRow(h_labels[i], h_preds[i]) * wt;
+     weights_sum += wt;
+    }
+    PackedReduceResult res{residue_sum, weights_sum};
+    return res;
+  }
+
+#if defined(XGBOOST_USE_CUDA)
+
+  PackedReduceResult DeviceReduceMetrics(
+     GPUSet::GpuIdType device_id,
+     size_t device_index,
+     const HostDeviceVector<bst_float>& weights,
+     const HostDeviceVector<bst_float>& labels,
+     const HostDeviceVector<bst_float>& preds) {
+    size_t n_data = preds.DeviceSize(device_id);
+
+    thrust::counting_iterator<size_t> begin(0);
+    thrust::counting_iterator<size_t> end = begin + n_data;
+
+    auto s_label = labels.DeviceSpan(device_id);
+    auto s_preds = preds.DeviceSpan(device_id);
+    auto s_weights = weights.DeviceSpan(device_id);
+
+    bool const is_null_weight = weights.Size() == 0;
+
+    auto d_policy = policy_;
+
+    PackedReduceResult result = thrust::transform_reduce(
+    thrust::cuda::par(allocators_.at(device_index)),
+    begin, end,
+    [=] XGBOOST_DEVICE(size_t idx) {
+    bst_float weight = is_null_weight ? 1.0f : s_weights[idx];
+
+    bst_float residue = d_policy.EvalRow(s_label[idx], s_preds[idx]);
+    residue *= weight;
+    return PackedReduceResult{ residue, weight };
+    },
+    PackedReduceResult(),
+    thrust::plus<PackedReduceResult>());
+
+    return result;
+  }
+
+#endif  // XGBOOST_USE_CUDA
+
+  PackedReduceResult Reduce(
+      GPUSet devices,
+      const HostDeviceVector <bst_float> &weights,
+      const HostDeviceVector <bst_float> &labels,
+      const HostDeviceVector <bst_float> &preds) {
+    PackedReduceResult result;
+
+    if (devices.IsEmpty()) {
+     result = CpuReduceMetrics(weights, labels, preds);
+    }
+#if defined(XGBOOST_USE_CUDA)
+    else {  // NOLINT
+      if (allocators_.size() != devices.Size()) {
+       allocators_.clear();
+       allocators_.resize(devices.Size());
+      }
+      preds.Reshard(devices);
+      labels.Reshard(devices);
+      weights.Reshard(devices);
+      std::vector<PackedReduceResult> res_per_device(devices.Size());
+
+#pragma omp parallel for schedule(static, 1) if (devices.Size() > 1)
+      for (GPUSet::GpuIdType id = *devices.begin(); id < *devices.end(); ++id) {
+        dh::safe_cuda(cudaSetDevice(id));
+        size_t index = devices.Index(id);
+        res_per_device.at(index) = DeviceReduceMetrics(id, index, weights, labels, preds);
+      }
+
+      for (auto const& res : res_per_device) {
+        result += res;
+      }
+    }
+#endif  // defined(XGBOOST_USE_CUDA)
+    return result;
+  }
+
+ private:
+  EvalRow policy_;
+#if defined(XGBOOST_USE_CUDA)
+  std::vector<dh::CubMemory> allocators_;
+#endif  // defined(XGBOOST_USE_CUDA)
+};
+
+template<typename Policy>
+struct EvalEWiseBase : public Metric {
+  EvalEWiseBase() : policy_{}, reducer_{policy_} {}
+
+  explicit EvalEWiseBase(Policy &policy) : policy_{policy}, reducer_{policy_} {}
+
+  explicit EvalEWiseBase(char const *policy_param) :
+          policy_{policy_param}, reducer_{policy_} {}
+
+  void Configure(
+          const std::vector<std::pair<std::string, std::string>> &args) override {
+    param_.InitAllowUnknown(args);
+  }
+
+  bst_float Eval(const HostDeviceVector <bst_float> &preds,
+                 const MetaInfo &info,
+                 bool distributed) override {
+    CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty";
+    CHECK_EQ(preds.Size(), info.labels_.Size())
+      << "label and prediction size not match, "
+      << "hint: use merror or mlogloss for multi-class classification";
+    const auto ndata = static_cast<omp_ulong>(info.labels_.Size());
+    // Dealing with ndata < n_gpus.
+    GPUSet devices = GPUSet::All(param_.gpu_id, param_.n_gpus, ndata);
+
+    auto result =
+            reducer_.Reduce(devices, info.weights_, info.labels_, preds);
+
+    double dat[2]{result.Residue(), result.Weights()};
+    if (distributed) {
+      rabit::Allreduce<rabit::op::Sum>(dat, 2);
+    }
+    return Policy::GetFinal(dat[0], dat[1]);
+  }
+
+  const char *Name() const override {
+    return policy_.Name();
+  }
+
+ private:
+  Policy policy_;
+
+  MetricParam param_;
+
+  ElementWiseMetricsReduction<Policy> reducer_;
+};
+
+}  // namespace metric
+}  // namespace xgboost
+#endif  // XGBOOST_METRIC_ELEMENTWISE_METRIC_H_
diff --git a/include/xgboost/metric.h → include/xgboost/metric/metric.h b/include/xgboost/metric.h → include/xgboost/metric/metric.h
@@ -1,21 +1,21 @@
 /*!
- * Copyright 2014 by Contributors
+ * Copyright 2014-2019 by Contributors
  * \file metric.h
  * \brief interface of evaluation metric function supported in xgboost.
- * \author Tianqi Chen, Kailong Chen
  */
-#ifndef XGBOOST_METRIC_H_
-#define XGBOOST_METRIC_H_
+#ifndef XGBOOST_METRIC_METRIC_H_
+#define XGBOOST_METRIC_METRIC_H_
 
 #include <dmlc/registry.h>
+#include <xgboost/data.h>
+#include <xgboost/base.h>
+
 #include <vector>
 #include <string>
 #include <functional>
 #include <utility>
 
-#include "./data.h"
-#include "./base.h"
-#include "../../src/common/host_device_vector.h"
+#include "../../../src/common/host_device_vector.h"
 
 namespace xgboost {
 /*!
@@ -93,4 +93,4 @@ struct MetricReg
   ::xgboost::MetricReg&  __make_ ## MetricReg ## _ ## UniqueId ## __ =  \
       ::dmlc::Registry< ::xgboost::MetricReg>::Get()->__REGISTER__(Name)
 }  // namespace xgboost
-#endif  // XGBOOST_METRIC_H_
+#endif  // XGBOOST_METRIC_METRIC_H_
diff --git a/src/metric/metric_common.h → include/xgboost/metric/metric_common.h b/src/metric/metric_common.h → include/xgboost/metric/metric_common.h
@@ -1,12 +1,11 @@
 /*!
- * Copyright 2018-2019 by Contributors
- * \file metric_param.cc
+ * Copyright 2019 by Contributors
  */
 #ifndef XGBOOST_METRIC_METRIC_COMMON_H_
 #define XGBOOST_METRIC_METRIC_COMMON_H_
 
 #include <dmlc/parameter.h>
-#include "../common/common.h"
+#include "../../../src/common/common.h"
 
 namespace xgboost {
 namespace metric {
@@ -39,6 +38,7 @@ class PackedReduceResult {
     return PackedReduceResult{residue_sum_ + other.residue_sum_,
                               weights_sum_ + other.weights_sum_};
   }
+
   PackedReduceResult &operator+=(PackedReduceResult const &other) {
     this->residue_sum_ += other.residue_sum_;
     this->weights_sum_ += other.weights_sum_;
+7 −0		.travis.yml
+43 −7		CMakeLists.txt
+1 −0		cmake/build_config.h.in
+5 −0		cmake/dmlc-config.cmake.in
+16 −0		doc/build.md
+1 −0		doc/index.md
+57 −1		include/dmlc/any.h
+2 −1		include/dmlc/build_config.h
+1 −0		include/dmlc/common.h
+1 −0		include/dmlc/concurrency.h
+1 −1		include/dmlc/data.h
+10 −2		include/dmlc/endian.h
+2 −31		include/dmlc/filesystem.h
+114 −1		include/dmlc/io.h
+3 −2		include/dmlc/json.h
+23 −16		include/dmlc/logging.h
+2 −0		include/dmlc/memory.h
+1 −1		include/dmlc/optional.h
+4 −4		include/dmlc/parameter.h
+6 −2		include/dmlc/registry.h
+58 −8		include/dmlc/strtonum.h
+2 −0		include/dmlc/thread_group.h
+19 −15		include/dmlc/threadediter.h
+1 −1		scripts/lint.py
+0 −8		scripts/travis/travis_osx_install.sh
+21 −17		src/data/text_parser.h
+0 −1		src/io.cc
+1 −1		src/io/azure_filesys.h
+32 −2		src/io/filesys.cc
+0 −128		src/io/filesys.h
+2 −1		src/io/hdfs_filesys.h
+1 −1		src/io/input_split_base.h
+1 −1		src/io/local_filesys.h
+4 −2		src/io/s3_filesys.cc
+1 −1		src/io/s3_filesys.h
+0 −1		src/io/uri_spec.h
+1 −1		test/filesys_test.cc
+35 −0		test/unittest/unittest_param.cc
+13 −6		tracker/dmlc_tracker/local.py
+2 −0		tracker/dmlc_tracker/opts.py