-
-
Notifications
You must be signed in to change notification settings - Fork 8.8k
[jvm-packages] support distributed synchronization of customized evaluation metrics #4280
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2ee821a
6e7e28a
06d48bb
5a30277
4d93614
fa37eef
640dd19
642a8fc
1cabe70
c3540ba
b6dafee
2f41504
72a51b3
d842ccf
c304e55
63c5cd8
d9d130d
1a1f462
3f67692
b517ff0
fdbeffd
6df2c9c
df1d89f
e4fa239
e135e3e
e374d82
28bf3a3
333f251
4d7298d
ee89feb
df90c9b
708bf69
3802324
d8ced76
137d014
17785c5
82e0ebb
416d3af
6942142
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -205,6 +205,18 @@ Training with Evaluation Sets | |
|
||
You can also monitor the performance of the model during training with multiple evaluation datasets. By specifying ``eval_sets`` or call ``setEvalSets`` over a XGBoostClassifier or XGBoostRegressor, you can pass in multiple evaluation datasets typed as a Map from String to DataFrame. | ||
|
||
Training with Custom Evaluation Metrics | ||
---------------- | ||
With XGBoost4j (including XGBoost4J-Spark), users are able to implement their own custom evaluation metrics and synchronize the values in the distributed training setting. To implement a custom evaluation metric, users should implement the interface ``ml.dmlc.xgboost4j.java.IEvalElementWiseDistributed`` (for binary classification and regression), ``ml.dmlc.xgboost4j.java.IEvalMultiClassesDistributed`` (for multi classification) and ``ml.dmlc.xgboost4j.java.IEvalRankListDistributed`` (for ranking). | ||
|
||
* ``ml.dmlc.xgboost4j.java.IEvalElementWiseDistributed``: users are supposed to implement ``float evalRow(float label, float pred);`` which calculates the metric for a single sample given the prediction and label, as well as ``float getFinal(float errorSum, float weightSum);`` which performs the final transformation over the sum of error and weights of samples. | ||
|
||
* ``ml.dmlc.xgboost4j.java.IEvalMultiClassesDistributed``: the methods to be implemented by the users are similar to ``ml.dmlc.xgboost4j.java.IEvalElementWiseDistributed`` except that the single row metric calculating method is ``float evalRow(int label, float pred, int numClasses);`` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be |
||
|
||
* ``ml.dmlc.xgboost4j.java.IEvalRankListDistributed``: users are to implement ``float evalMetric(float[] preds, int[] labels);`` which gives the predictions and labels for instances in the same group; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be |
||
|
||
By default, these interfaces do not support being used in single machine evaluation, users can change this by re-implement ``float eval(float[][] predicts, DMatrix dmat)`` method. | ||
|
||
Prediction | ||
========== | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
/* | ||
* Copyright 2015-2019 by Contributors | ||
*/ | ||
|
||
#ifndef XGBOOST_METRIC_ELEMENTWISE_METRIC_H_ | ||
#define XGBOOST_METRIC_ELEMENTWISE_METRIC_H_ | ||
|
||
#include <xgboost/metric/metric.h> | ||
#include <xgboost/metric/metric_common.h> | ||
|
||
#include <functional> | ||
#include <utility> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "../../../src/common/common.h" | ||
|
||
#if defined(XGBOOST_USE_CUDA) | ||
#include <thrust/iterator/counting_iterator.h> | ||
#include <thrust/transform_reduce.h> | ||
#include <thrust/execution_policy.h> | ||
#include <thrust/functional.h> // thrust::plus<> | ||
|
||
#include "../../../src/common/device_helpers.cuh" | ||
#endif // XGBOOST_USE_CUDA | ||
|
||
/*! | ||
* \brief base class of element-wise evaluation | ||
* \tparam Derived the name of subclass | ||
*/ | ||
namespace xgboost { | ||
namespace metric { | ||
|
||
template<typename EvalRow> | ||
class ElementWiseMetricsReduction { | ||
public: | ||
explicit ElementWiseMetricsReduction(EvalRow policy) : | ||
policy_(std::move(policy)) {} | ||
|
||
PackedReduceResult CpuReduceMetrics( | ||
const HostDeviceVector <bst_float> &weights, | ||
const HostDeviceVector <bst_float> &labels, | ||
const HostDeviceVector <bst_float> &preds) const { | ||
size_t ndata = labels.Size(); | ||
|
||
const auto &h_labels = labels.HostVector(); | ||
const auto &h_weights = weights.HostVector(); | ||
const auto &h_preds = preds.HostVector(); | ||
|
||
bst_float residue_sum = 0; | ||
bst_float weights_sum = 0; | ||
|
||
#pragma omp parallel for reduction(+: residue_sum, weights_sum) schedule(static) | ||
for (omp_ulong i = 0; i < ndata; ++i) { | ||
const bst_float wt = h_weights.size() > 0 ? h_weights[i] : 1.0f; | ||
residue_sum += policy_.EvalRow(h_labels[i], h_preds[i]) * wt; | ||
weights_sum += wt; | ||
} | ||
PackedReduceResult res{residue_sum, weights_sum}; | ||
return res; | ||
} | ||
|
||
#if defined(XGBOOST_USE_CUDA) | ||
|
||
PackedReduceResult DeviceReduceMetrics( | ||
GPUSet::GpuIdType device_id, | ||
size_t device_index, | ||
const HostDeviceVector<bst_float>& weights, | ||
const HostDeviceVector<bst_float>& labels, | ||
const HostDeviceVector<bst_float>& preds) { | ||
size_t n_data = preds.DeviceSize(device_id); | ||
|
||
thrust::counting_iterator<size_t> begin(0); | ||
thrust::counting_iterator<size_t> end = begin + n_data; | ||
|
||
auto s_label = labels.DeviceSpan(device_id); | ||
auto s_preds = preds.DeviceSpan(device_id); | ||
auto s_weights = weights.DeviceSpan(device_id); | ||
|
||
bool const is_null_weight = weights.Size() == 0; | ||
|
||
auto d_policy = policy_; | ||
|
||
PackedReduceResult result = thrust::transform_reduce( | ||
thrust::cuda::par(allocators_.at(device_index)), | ||
begin, end, | ||
[=] XGBOOST_DEVICE(size_t idx) { | ||
bst_float weight = is_null_weight ? 1.0f : s_weights[idx]; | ||
|
||
bst_float residue = d_policy.EvalRow(s_label[idx], s_preds[idx]); | ||
residue *= weight; | ||
return PackedReduceResult{ residue, weight }; | ||
}, | ||
PackedReduceResult(), | ||
thrust::plus<PackedReduceResult>()); | ||
|
||
return result; | ||
} | ||
|
||
#endif // XGBOOST_USE_CUDA | ||
|
||
PackedReduceResult Reduce( | ||
GPUSet devices, | ||
const HostDeviceVector <bst_float> &weights, | ||
const HostDeviceVector <bst_float> &labels, | ||
const HostDeviceVector <bst_float> &preds) { | ||
PackedReduceResult result; | ||
|
||
if (devices.IsEmpty()) { | ||
result = CpuReduceMetrics(weights, labels, preds); | ||
} | ||
#if defined(XGBOOST_USE_CUDA) | ||
else { // NOLINT | ||
if (allocators_.size() != devices.Size()) { | ||
allocators_.clear(); | ||
allocators_.resize(devices.Size()); | ||
} | ||
preds.Reshard(devices); | ||
labels.Reshard(devices); | ||
weights.Reshard(devices); | ||
std::vector<PackedReduceResult> res_per_device(devices.Size()); | ||
|
||
#pragma omp parallel for schedule(static, 1) if (devices.Size() > 1) | ||
for (GPUSet::GpuIdType id = *devices.begin(); id < *devices.end(); ++id) { | ||
dh::safe_cuda(cudaSetDevice(id)); | ||
size_t index = devices.Index(id); | ||
res_per_device.at(index) = DeviceReduceMetrics(id, index, weights, labels, preds); | ||
} | ||
|
||
for (auto const& res : res_per_device) { | ||
result += res; | ||
} | ||
} | ||
#endif // defined(XGBOOST_USE_CUDA) | ||
return result; | ||
} | ||
|
||
private: | ||
EvalRow policy_; | ||
#if defined(XGBOOST_USE_CUDA) | ||
std::vector<dh::CubMemory> allocators_; | ||
#endif // defined(XGBOOST_USE_CUDA) | ||
}; | ||
|
||
template<typename Policy> | ||
struct EvalEWiseBase : public Metric { | ||
EvalEWiseBase() : policy_{}, reducer_{policy_} {} | ||
|
||
explicit EvalEWiseBase(Policy &policy) : policy_{policy}, reducer_{policy_} {} | ||
|
||
explicit EvalEWiseBase(char const *policy_param) : | ||
policy_{policy_param}, reducer_{policy_} {} | ||
|
||
void Configure( | ||
const std::vector<std::pair<std::string, std::string>> &args) override { | ||
param_.InitAllowUnknown(args); | ||
} | ||
|
||
bst_float Eval(const HostDeviceVector <bst_float> &preds, | ||
const MetaInfo &info, | ||
bool distributed) override { | ||
CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; | ||
CHECK_EQ(preds.Size(), info.labels_.Size()) | ||
<< "label and prediction size not match, " | ||
<< "hint: use merror or mlogloss for multi-class classification"; | ||
const auto ndata = static_cast<omp_ulong>(info.labels_.Size()); | ||
// Dealing with ndata < n_gpus. | ||
GPUSet devices = GPUSet::All(param_.gpu_id, param_.n_gpus, ndata); | ||
|
||
auto result = | ||
reducer_.Reduce(devices, info.weights_, info.labels_, preds); | ||
|
||
double dat[2]{result.Residue(), result.Weights()}; | ||
if (distributed) { | ||
rabit::Allreduce<rabit::op::Sum>(dat, 2); | ||
} | ||
return Policy::GetFinal(dat[0], dat[1]); | ||
} | ||
|
||
const char *Name() const override { | ||
return policy_.Name(); | ||
} | ||
|
||
private: | ||
Policy policy_; | ||
|
||
MetricParam param_; | ||
|
||
ElementWiseMetricsReduction<Policy> reducer_; | ||
}; | ||
|
||
} // namespace metric | ||
} // namespace xgboost | ||
#endif // XGBOOST_METRIC_ELEMENTWISE_METRIC_H_ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For completeness, we need to have weights included in element-wise metric:
We may also want to define an interface to let the user define a custom reduce op.