Skip to content

Commit

Permalink
Remove distcol updater. (#5507)
Browse files Browse the repository at this point in the history
Closes #5498.
  • Loading branch information
trivialfis committed Apr 10, 2020
1 parent 7d52c0b commit bd653fa
Show file tree
Hide file tree
Showing 5 changed files with 1 addition and 234 deletions.
1 change: 0 additions & 1 deletion doc/parameter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ Parameters for Tree Booster
- A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist:

- ``grow_colmaker``: non-distributed column-based construction of trees.
- ``distcol``: distributed tree construction with column-based data splitting mode.
- ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting.
- ``grow_local_histmaker``: based on local histogram counting.
- ``grow_skmaker``: uses the approximate sketching algorithm.
Expand Down
67 changes: 0 additions & 67 deletions src/common/bitmap.h

This file was deleted.

3 changes: 1 addition & 2 deletions src/gbm/gbtree.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ class GBTree : public GradientBooster {
void LoadModel(Json const& in) override;

bool AllowLazyCheckPoint() const override {
return model_.learner_model_param->num_output_group == 1 ||
tparam_.updater_seq.find("distcol") != std::string::npos;
return model_.learner_model_param->num_output_group == 1;
}

void PredictBatch(DMatrix* p_fmat,
Expand Down
2 changes: 0 additions & 2 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -888,8 +888,6 @@ class LearnerImpl : public LearnerIO {
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
if (tparam_.dsplit == DataSplitMode::kCol) {
// 'distcol' updater hidden until it becomes functional again
// See discussion at https://github.com/dmlc/xgboost/issues/1832
LOG(FATAL) << "Column-wise data split is currently not supported.";
}
}
Expand Down
162 changes: 0 additions & 162 deletions src/tree/updater_colmaker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "param.h"
#include "constraints.h"
#include "../common/random.h"
#include "../common/bitmap.h"
#include "split_evaluator.h"

namespace xgboost {
Expand Down Expand Up @@ -618,171 +617,10 @@ class ColMaker: public TreeUpdater {
};
};

// distributed column maker
class DistColMaker : public ColMaker {
public:
void Configure(const Args& args) override {
param_.UpdateAllowUnknown(args);
pruner_.reset(TreeUpdater::Create("prune", tparam_));
pruner_->Configure(args);
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
spliteval_->Init(&param_);
}

char const* Name() const override {
return "distcol";
}

void Update(HostDeviceVector<GradientPair> *gpair,
DMatrix* dmat,
const std::vector<RegTree*> &trees) override {
CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time";
this->LazyGetColumnDensity(dmat);
Builder builder(
param_,
colmaker_param_,
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone()),
interaction_constraints_, column_densities_);
// build the tree
builder.Update(gpair->ConstHostVector(), dmat, trees[0]);
//// prune the tree, note that pruner will sync the tree
pruner_->Update(gpair, dmat, trees);
// update position after the tree is pruned
builder.UpdatePosition(dmat, *trees[0]);
}

private:
class Builder : public ColMaker::Builder {
public:
explicit Builder(const TrainParam &param,
ColMakerTrainParam const &colmaker_train_param,
std::unique_ptr<SplitEvaluator> spliteval,
FeatureInteractionConstraintHost _interaction_constraints,
const std::vector<float> &column_densities)
: ColMaker::Builder(param, colmaker_train_param,
std::move(spliteval),
std::move(_interaction_constraints),
column_densities) {}
inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) {
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
int nid = this->DecodePosition(ridx);
while (tree[nid].IsDeleted()) {
nid = tree[nid].Parent();
CHECK_GE(nid, 0);
}
this->position_[ridx] = nid;
}
}

protected:
void SetNonDefaultPosition(const std::vector<int> &qexpand, DMatrix *p_fmat,
const RegTree &tree) override {
// step 2, classify the non-default data into right places
std::vector<unsigned> fsplits;
for (int nid : qexpand) {
if (!tree[nid].IsLeaf()) {
fsplits.push_back(tree[nid].SplitIndex());
}
}
// get the candidate split index
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
while (fsplits.size() != 0 && fsplits.back() >= p_fmat->Info().num_col_) {
fsplits.pop_back();
}
// bitmap is only word concurrent, set to bool first
{
auto ndata = static_cast<bst_omp_uint>(this->position_.size());
boolmap_.resize(ndata);
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
boolmap_[j] = 0;
}
}
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
const bst_uint ridx = col[j].index;
const bst_float fvalue = col[j].fvalue;
const int nid = this->DecodePosition(ridx);
if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) {
if (fvalue < tree[nid].SplitCond()) {
if (!tree[nid].DefaultLeft()) boolmap_[ridx] = 1;
} else {
if (tree[nid].DefaultLeft()) boolmap_[ridx] = 1;
}
}
}
}
}

bitmap_.InitFromBool(boolmap_);
// communicate bitmap
rabit::Allreduce<rabit::op::BitOR>(dmlc::BeginPtr(bitmap_.data), bitmap_.data.size());
// get the new position
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
const int nid = this->DecodePosition(ridx);
if (bitmap_.Get(ridx)) {
CHECK(!tree[nid].IsLeaf()) << "inconsistent reduce information";
if (tree[nid].DefaultLeft()) {
this->SetEncodePosition(ridx, tree[nid].RightChild());
} else {
this->SetEncodePosition(ridx, tree[nid].LeftChild());
}
}
}
}
// synchronize the best solution of each node
void SyncBestSolution(const std::vector<int> &qexpand) override {
std::vector<SplitEntry> vec;
for (int nid : qexpand) {
for (int tid = 0; tid < this->nthread_; ++tid) {
this->snode_[nid].best.Update(this->stemp_[tid][nid].best);
}
vec.push_back(this->snode_[nid].best);
}
// TODO(tqchen) lazy version
// communicate best solution
reducer_.Allreduce(dmlc::BeginPtr(vec), vec.size());
// assign solution back
for (size_t i = 0; i < qexpand.size(); ++i) {
const int nid = qexpand[i];
this->snode_[nid].best = vec[i];
}
}

private:
common::BitMap bitmap_;
std::vector<int> boolmap_;
rabit::Reducer<SplitEntry, SplitEntry::Reduce> reducer_;
};
// we directly introduce pruner here
std::unique_ptr<TreeUpdater> pruner_;
// training parameter
TrainParam param_;
// Cloned for each builder instantiation
std::unique_ptr<SplitEvaluator> spliteval_;

FeatureInteractionConstraintHost interaction_constraints_;
};

XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
.describe("Grow tree with parallelization over columns.")
.set_body([]() {
return new ColMaker();
});

XGBOOST_REGISTER_TREE_UPDATER(DistColMaker, "distcol")
.describe("Distributed column split version of tree maker.")
.set_body([]() {
return new DistColMaker();
});
} // namespace tree
} // namespace xgboost

0 comments on commit bd653fa

Please sign in to comment.