From bd653fad4c39f5fcff1f22090508678958f6deb5 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 10 Apr 2020 12:52:56 +0800 Subject: [PATCH] Remove distcol updater. (#5507) Closes #5498. --- doc/parameter.rst | 1 - src/common/bitmap.h | 67 --------------- src/gbm/gbtree.h | 3 +- src/learner.cc | 2 - src/tree/updater_colmaker.cc | 162 ----------------------------------- 5 files changed, 1 insertion(+), 234 deletions(-) delete mode 100644 src/common/bitmap.h diff --git a/doc/parameter.rst b/doc/parameter.rst index d31091fec40f..755cb5346dd7 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -157,7 +157,6 @@ Parameters for Tree Booster - A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist: - ``grow_colmaker``: non-distributed column-based construction of trees. - - ``distcol``: distributed tree construction with column-based data splitting mode. - ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting. - ``grow_local_histmaker``: based on local histogram counting. - ``grow_skmaker``: uses the approximate sketching algorithm. diff --git a/src/common/bitmap.h b/src/common/bitmap.h deleted file mode 100644 index ee102a6805d5..000000000000 --- a/src/common/bitmap.h +++ /dev/null @@ -1,67 +0,0 @@ -/*! - * Copyright 2014 by Contributors - * \file bitmap.h - * \brief a simple implement of bitmap - * NOTE: bitmap is only threadsafe per word access, remember this when using bitmap - * \author Tianqi Chen - */ -#ifndef XGBOOST_COMMON_BITMAP_H_ -#define XGBOOST_COMMON_BITMAP_H_ - -#include -#include - -namespace xgboost { -namespace common { -/*! \brief bit map that contains set of bit indicators */ -struct BitMap { - /*! \brief internal data structure */ - std::vector data; - /*! - * \brief resize the bitmap to be certain size - * \param size the size of bitmap - */ - inline void Resize(size_t size) { - data.resize((size + 31U) >> 5, 0); - } - /*! - * \brief query the i-th position of bitmap - * \param i the position in - */ - inline bool Get(size_t i) const { - return (data[i >> 5] >> (i & 31U)) & 1U; - } - /*! - * \brief set i-th position to true - * \param i position index - */ - inline void SetTrue(size_t i) { - data[i >> 5] |= (1 << (i & 31U)); - } - /*! \brief initialize the value of bit map from vector of bool*/ - inline void InitFromBool(const std::vector& vec) { - this->Resize(vec.size()); - // parallel over the full cases - auto nsize = static_cast(vec.size() / 32); - #pragma omp parallel for schedule(static) - for (bst_omp_uint i = 0; i < nsize; ++i) { - uint32_t res = 0; - for (int k = 0; k < 32; ++k) { - uint32_t bit = vec[(i << 5) | k]; - res |= (bit << k); - } - data[i] = res; - } - if (nsize != vec.size()) data.back() = 0; - for (size_t i = nsize; i < vec.size(); ++i) { - if (vec[i]) this->SetTrue(i); - } - } - /*! \brief clear the bitmap, set all places to false */ - inline void Clear() { - std::fill(data.begin(), data.end(), 0U); - } -}; -} // namespace common -} // namespace xgboost -#endif // XGBOOST_COMMON_BITMAP_H_ diff --git a/src/gbm/gbtree.h b/src/gbm/gbtree.h index 6ece5330f537..534c3ad5469a 100644 --- a/src/gbm/gbtree.h +++ b/src/gbm/gbtree.h @@ -195,8 +195,7 @@ class GBTree : public GradientBooster { void LoadModel(Json const& in) override; bool AllowLazyCheckPoint() const override { - return model_.learner_model_param->num_output_group == 1 || - tparam_.updater_seq.find("distcol") != std::string::npos; + return model_.learner_model_param->num_output_group == 1; } void PredictBatch(DMatrix* p_fmat, diff --git a/src/learner.cc b/src/learner.cc index 1d1acd2aec99..1c0e91deae88 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -888,8 +888,6 @@ class LearnerImpl : public LearnerIO { CHECK(tparam_.dsplit != DataSplitMode::kAuto) << "Precondition violated; dsplit cannot be 'auto' in distributed mode"; if (tparam_.dsplit == DataSplitMode::kCol) { - // 'distcol' updater hidden until it becomes functional again - // See discussion at https://github.com/dmlc/xgboost/issues/1832 LOG(FATAL) << "Column-wise data split is currently not supported."; } } diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 690a8bcceef3..951cfdb5ec27 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -17,7 +17,6 @@ #include "param.h" #include "constraints.h" #include "../common/random.h" -#include "../common/bitmap.h" #include "split_evaluator.h" namespace xgboost { @@ -618,171 +617,10 @@ class ColMaker: public TreeUpdater { }; }; -// distributed column maker -class DistColMaker : public ColMaker { - public: - void Configure(const Args& args) override { - param_.UpdateAllowUnknown(args); - pruner_.reset(TreeUpdater::Create("prune", tparam_)); - pruner_->Configure(args); - spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator)); - spliteval_->Init(¶m_); - } - - char const* Name() const override { - return "distcol"; - } - - void Update(HostDeviceVector *gpair, - DMatrix* dmat, - const std::vector &trees) override { - CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time"; - this->LazyGetColumnDensity(dmat); - Builder builder( - param_, - colmaker_param_, - std::unique_ptr(spliteval_->GetHostClone()), - interaction_constraints_, column_densities_); - // build the tree - builder.Update(gpair->ConstHostVector(), dmat, trees[0]); - //// prune the tree, note that pruner will sync the tree - pruner_->Update(gpair, dmat, trees); - // update position after the tree is pruned - builder.UpdatePosition(dmat, *trees[0]); - } - - private: - class Builder : public ColMaker::Builder { - public: - explicit Builder(const TrainParam ¶m, - ColMakerTrainParam const &colmaker_train_param, - std::unique_ptr spliteval, - FeatureInteractionConstraintHost _interaction_constraints, - const std::vector &column_densities) - : ColMaker::Builder(param, colmaker_train_param, - std::move(spliteval), - std::move(_interaction_constraints), - column_densities) {} - inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) { - const auto ndata = static_cast(p_fmat->Info().num_row_); - #pragma omp parallel for schedule(static) - for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) { - int nid = this->DecodePosition(ridx); - while (tree[nid].IsDeleted()) { - nid = tree[nid].Parent(); - CHECK_GE(nid, 0); - } - this->position_[ridx] = nid; - } - } - - protected: - void SetNonDefaultPosition(const std::vector &qexpand, DMatrix *p_fmat, - const RegTree &tree) override { - // step 2, classify the non-default data into right places - std::vector fsplits; - for (int nid : qexpand) { - if (!tree[nid].IsLeaf()) { - fsplits.push_back(tree[nid].SplitIndex()); - } - } - // get the candidate split index - std::sort(fsplits.begin(), fsplits.end()); - fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); - while (fsplits.size() != 0 && fsplits.back() >= p_fmat->Info().num_col_) { - fsplits.pop_back(); - } - // bitmap is only word concurrent, set to bool first - { - auto ndata = static_cast(this->position_.size()); - boolmap_.resize(ndata); - #pragma omp parallel for schedule(static) - for (bst_omp_uint j = 0; j < ndata; ++j) { - boolmap_[j] = 0; - } - } - for (const auto &batch : p_fmat->GetBatches()) { - for (auto fid : fsplits) { - auto col = batch[fid]; - const auto ndata = static_cast(col.size()); - #pragma omp parallel for schedule(static) - for (bst_omp_uint j = 0; j < ndata; ++j) { - const bst_uint ridx = col[j].index; - const bst_float fvalue = col[j].fvalue; - const int nid = this->DecodePosition(ridx); - if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) { - if (fvalue < tree[nid].SplitCond()) { - if (!tree[nid].DefaultLeft()) boolmap_[ridx] = 1; - } else { - if (tree[nid].DefaultLeft()) boolmap_[ridx] = 1; - } - } - } - } - } - - bitmap_.InitFromBool(boolmap_); - // communicate bitmap - rabit::Allreduce(dmlc::BeginPtr(bitmap_.data), bitmap_.data.size()); - // get the new position - const auto ndata = static_cast(p_fmat->Info().num_row_); - #pragma omp parallel for schedule(static) - for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) { - const int nid = this->DecodePosition(ridx); - if (bitmap_.Get(ridx)) { - CHECK(!tree[nid].IsLeaf()) << "inconsistent reduce information"; - if (tree[nid].DefaultLeft()) { - this->SetEncodePosition(ridx, tree[nid].RightChild()); - } else { - this->SetEncodePosition(ridx, tree[nid].LeftChild()); - } - } - } - } - // synchronize the best solution of each node - void SyncBestSolution(const std::vector &qexpand) override { - std::vector vec; - for (int nid : qexpand) { - for (int tid = 0; tid < this->nthread_; ++tid) { - this->snode_[nid].best.Update(this->stemp_[tid][nid].best); - } - vec.push_back(this->snode_[nid].best); - } - // TODO(tqchen) lazy version - // communicate best solution - reducer_.Allreduce(dmlc::BeginPtr(vec), vec.size()); - // assign solution back - for (size_t i = 0; i < qexpand.size(); ++i) { - const int nid = qexpand[i]; - this->snode_[nid].best = vec[i]; - } - } - - private: - common::BitMap bitmap_; - std::vector boolmap_; - rabit::Reducer reducer_; - }; - // we directly introduce pruner here - std::unique_ptr pruner_; - // training parameter - TrainParam param_; - // Cloned for each builder instantiation - std::unique_ptr spliteval_; - - FeatureInteractionConstraintHost interaction_constraints_; -}; - XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker") .describe("Grow tree with parallelization over columns.") .set_body([]() { return new ColMaker(); }); - -XGBOOST_REGISTER_TREE_UPDATER(DistColMaker, "distcol") -.describe("Distributed column split version of tree maker.") -.set_body([]() { - return new DistColMaker(); - }); } // namespace tree } // namespace xgboost