From 55a2ed1d339082da3f07b2d71d2336f093bc9eb8 Mon Sep 17 00:00:00 2001 From: egor Date: Fri, 29 Mar 2019 14:35:46 +0300 Subject: [PATCH 1/5] oprimizations for pre-processing --- src/common/column_matrix.h | 6 +- src/common/hist_util.cc | 170 +++++++++++++++++++++++------- src/common/hist_util.h | 65 ++++++++++++ src/common/row_set.h | 1 - src/tree/updater_quantile_hist.cc | 56 +++++++++- src/tree/updater_quantile_hist.h | 34 ++++++ 6 files changed, 288 insertions(+), 44 deletions(-) diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h index 18fcdb337bfa..9b3381044b85 100644 --- a/src/common/column_matrix.h +++ b/src/common/column_matrix.h @@ -124,6 +124,8 @@ class ColumnMatrix { } // pre-fill index_ for dense columns + + #pragma omp parallel for for (bst_uint fid = 0; fid < nfeature; ++fid) { if (type_[fid] == kDenseColumn) { const size_t ibegin = boundary_[fid].index_begin; @@ -184,8 +186,8 @@ class ColumnMatrix { std::vector feature_counts_; std::vector type_; - std::vector index_; // index_: may store smaller integers; needs padding - std::vector row_ind_; + SimpleArray index_; // index_: may store smaller integers; needs padding + SimpleArray row_ind_; std::vector boundary_; // index_base_[fid]: least bin id for feature fid diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index f1c3e412bae8..4777091bddc3 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -11,6 +11,7 @@ #include "./column_matrix.h" #include "./hist_util.h" #include "./quantile.h" +#include "./../tree/updater_quantile_hist.h" #if defined(XGBOOST_MM_PREFETCH_PRESENT) #include @@ -49,7 +50,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { constexpr int kFactor = 8; std::vector sketchs; - const int nthread = omp_get_max_threads(); + const size_t nthread = omp_get_max_threads(); unsigned const nstep = static_cast((info.num_col_ + nthread - 1) / nthread); @@ -67,34 +68,86 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { // Use group index for weights? bool const use_group_ind = num_groups != 0 && weights.size() != info.num_row_; - for (const auto &batch : p_fmat->GetRowBatches()) { - size_t group_ind = 0; - if (use_group_ind) { - group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid); + if (use_group_ind) + { + for (const auto &batch : p_fmat->GetRowBatches()) { + size_t group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid); + #pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind) + { + CHECK_EQ(nthread, omp_get_num_threads()); + auto tid = static_cast(omp_get_thread_num()); + unsigned begin = std::min(nstep * tid, ncol); + unsigned end = std::min(nstep * (tid + 1), ncol); + + // do not iterate if no columns are assigned to the thread + if (begin < end && end <= ncol) { + for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*) + size_t const ridx = batch.base_rowid + i; + SparsePage::Inst const inst = batch[i]; + if (group_ptr[group_ind] == ridx && + // maximum equals to weights.size() - 1 + group_ind < num_groups - 1) { + // move to next group + group_ind++; + } + for (auto const& entry : inst) { + if (entry.index >= begin && entry.index < end) { + size_t w_idx = group_ind; + sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx)); + } + } + } + } + } } -#pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind) - { - CHECK_EQ(nthread, omp_get_num_threads()); - auto tid = static_cast(omp_get_thread_num()); - unsigned begin = std::min(nstep * tid, ncol); - unsigned end = std::min(nstep * (tid + 1), ncol); - - // do not iterate if no columns are assigned to the thread - if (begin < end && end <= ncol) { - for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*) - size_t const ridx = batch.base_rowid + i; - SparsePage::Inst const inst = batch[i]; - if (use_group_ind && - group_ptr[group_ind] == ridx && - // maximum equals to weights.size() - 1 - group_ind < num_groups - 1) { - // move to next group - group_ind++; + } else { + for (const auto &batch : p_fmat->GetRowBatches()) { + const size_t size = batch.Size(); + const size_t block_size = 512; + const size_t block_size_iter = block_size * nthread; + const size_t n_blocks = size / block_size_iter + !!(size % block_size_iter); + + std::vector>> buff(nthread); + for(size_t tid = 0; tid < nthread; ++tid) { + buff[tid].resize(block_size * ncol); + } + + std::vector sizes(nthread * ncol, 0); + + for (size_t iblock = 0; iblock < n_blocks; ++iblock) { + #pragma omp parallel num_threads(nthread) + { + int tid = omp_get_thread_num(); + + const size_t ibegin = iblock * block_size_iter + tid * block_size; + const size_t iend = std::min(ibegin + block_size, size); + + auto* p_sizes = sizes.data() + ncol * tid; + auto* p_buff = buff[tid].data(); + + for(size_t i = ibegin; i < iend; ++i) { + size_t const ridx = batch.base_rowid + i; + bst_float w = info.GetWeight(ridx); + SparsePage::Inst const inst = batch[i]; + + for (auto const& entry : inst) { + const size_t idx = entry.index; + p_buff[idx * block_size + p_sizes[idx]] = { entry.fvalue, w }; + p_sizes[idx]++; + } } - for (auto const& entry : inst) { - if (entry.index >= begin && entry.index < end) { - size_t w_idx = use_group_ind ? group_ind : ridx; - sketchs[entry.index].Push(entry.fvalue, info.GetWeight(w_idx)); + #pragma omp barrier + #pragma omp for schedule(static) + for(size_t icol = 0; icol < ncol; ++icol) { + for(size_t tid = 0; tid < nthread; ++tid) { + auto* p_sizes = sizes.data() + ncol * tid; + auto* p_buff = buff[tid].data() + icol * block_size; + + for(size_t i = 0; i < p_sizes[icol]; ++i) { + sketchs[icol].Push(p_buff[i].first, p_buff[i].second); + } + + p_sizes[icol] = 0; } } } @@ -177,22 +230,65 @@ uint32_t HistCutMatrix::GetBinIdx(const Entry& e) { void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { cut.Init(p_fmat, max_num_bins); - - const int nthread = omp_get_max_threads(); + const size_t nthread = omp_get_max_threads(); + // const int nthread = 1; const uint32_t nbins = cut.row_ptr.back(); hit_count.resize(nbins, 0); hit_count_tloc_.resize(nthread * nbins, 0); - row_ptr.push_back(0); + + size_t new_size = 1; + for (const auto &batch : p_fmat->GetRowBatches()) { + new_size += batch.Size(); + } + + row_ptr.resize(new_size); + row_ptr[0] = 0; + + size_t rbegin = 0; + size_t prev_sum = 0; + for (const auto &batch : p_fmat->GetRowBatches()) { - const size_t rbegin = row_ptr.size() - 1; - for (size_t i = 0; i < batch.Size(); ++i) { - row_ptr.push_back(batch[i].size() + row_ptr.back()); + MemStackAllocator partial_sums(nthread); + size_t* p_part = partial_sums.Get(); + + size_t block_size = batch.Size() / nthread; + + #pragma omp parallel num_threads(nthread) + { + #pragma omp for + for(size_t tid = 0; tid < nthread; ++tid) + { + size_t ibegin = block_size * tid; + size_t iend = (tid == (nthread-1) ? batch.Size() : (block_size * (tid+1))); + + size_t sum = 0; + for (size_t i = ibegin; i < iend; ++i) { + sum += batch[i].size(); + row_ptr[rbegin + 1 + i] = sum; + } + } + + #pragma omp single + { + p_part[0] = prev_sum; + for (size_t i = 1; i < nthread; ++i) + p_part[i] = p_part[i - 1] + row_ptr[rbegin + i*block_size]; + } + + #pragma omp for + for(size_t tid = 0; tid < nthread; ++tid) { + size_t ibegin = block_size * tid; + size_t iend = (tid == (nthread-1) ? batch.Size() : (block_size * (tid+1))); + + for (size_t i = ibegin; i < iend; ++i) + row_ptr[rbegin + 1 + i] += p_part[tid]; + } } + index.resize(row_ptr.back()); CHECK_GT(cut.cut.size(), 0U); - CHECK_EQ(cut.row_ptr.back(), cut.cut.size()); auto bsize = static_cast(batch.Size()); #pragma omp parallel for num_threads(nthread) schedule(static) @@ -203,7 +299,6 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { SparsePage::Inst inst = batch[i]; CHECK_EQ(ibegin + inst.size(), iend); - for (bst_uint j = 0; j < inst.size(); ++j) { uint32_t idx = cut.GetBinIdx(inst[j]); @@ -215,10 +310,13 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { #pragma omp parallel for num_threads(nthread) schedule(static) for (bst_omp_uint idx = 0; idx < bst_omp_uint(nbins); ++idx) { - for (int tid = 0; tid < nthread; ++tid) { + for (size_t tid = 0; tid < nthread; ++tid) { hit_count[idx] += hit_count_tloc_[tid * nbins + idx]; } } + + prev_sum = row_ptr[rbegin + batch.Size()]; + rbegin += batch.Size(); } } diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 3aa2f232d8a5..83b72ac71176 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -19,6 +19,71 @@ namespace xgboost { namespace common { +template +struct SimpleArray { + + ~SimpleArray() { + free(ptr_); + ptr_ = nullptr; + } + + void resize(size_t n) { + T* ptr = static_cast(malloc(n*sizeof(T))); + memcpy(ptr, ptr_, n_ * sizeof(T)); + free(ptr_); + ptr_ = ptr; + n_ = n; + } + + T& operator[](size_t idx) { + return ptr_[idx]; + } + + T& operator[](size_t idx) const { + return ptr_[idx]; + } + + size_t size() const { + return n_; + } + + T back() const { + return ptr_[n_-1]; + } + + T* data() { + return ptr_; + } + + const T* data() const { + return ptr_; + } + + + T* begin() { + return ptr_; + } + + const T* begin() const { + return ptr_; + } + + T* end() { + return ptr_ + n_; + } + + const T* end() const { + return ptr_ + n_; + } + +private: + T* ptr_ = nullptr; + size_t n_ = 0; +}; + + + + /*! \brief Cut configuration for all the features. */ struct HistCutMatrix { /*! \brief Unit pointer to rows by element position */ diff --git a/src/common/row_set.h b/src/common/row_set.h index 01e27a39c28c..285988b159c3 100644 --- a/src/common/row_set.h +++ b/src/common/row_set.h @@ -59,7 +59,6 @@ class RowSetCollection { } // clear up things inline void Clear() { - row_indices_.clear(); elem_of_each_node_.clear(); } // initialize node id 0->everything diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index f06cc76c3f77..c94616117a78 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -420,25 +420,71 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, CHECK_EQ(info.root_index_.size(), 0U); std::vector& row_indices = row_set_collection_.row_indices_; + row_indices.resize(info.num_row_); + auto* p_row_indices = row_indices.data(); // mark subsample and build list of member rows + if (param_.subsample < 1.0f) { std::bernoulli_distribution coin_flip(param_.subsample); auto& rnd = common::GlobalRandom(); + size_t j = 0; for (size_t i = 0; i < info.num_row_; ++i) { if (gpair[i].GetHess() >= 0.0f && coin_flip(rnd)) { - row_indices.push_back(i); + p_row_indices[j++] = i; } } + row_indices.resize(j); } else { - for (size_t i = 0; i < info.num_row_; ++i) { - if (gpair[i].GetHess() >= 0.0f) { - row_indices.push_back(i); + + MemStackAllocator buff(this->nthread_); + int* p_buff = buff.Get(); + std::fill(p_buff, p_buff + this->nthread_, false); + + const size_t block_size = info.num_row_ / this->nthread_ + !!(info.num_row_ % this->nthread_); + + #pragma omp parallel num_threads(this->nthread_) + { + const size_t tid = omp_get_thread_num(); + const size_t ibegin = tid * block_size; + const size_t iend = std::min(ibegin + block_size, info.num_row_); + + for (size_t i = ibegin; i < iend; ++i) { + if(gpair[i].GetHess() < 0.0f) { + buff.Get()[tid] = true; + break; + } + } + } + + bool has_heg_hess = false; + for (size_t tid = 0; tid < this->nthread_; ++tid) + if (p_buff[tid] == true) + has_heg_hess = true; + + if(has_heg_hess) { + size_t j = 0; + for (size_t i = 0; i < info.num_row_; ++i) { + if (gpair[i].GetHess() >= 0.0f) { + p_row_indices[j++] = i; + } + } + row_indices.resize(j); + } else { + #pragma omp parallel num_threads(this->nthread_) + { + const size_t tid = omp_get_thread_num(); + const size_t ibegin = tid * block_size; + const size_t iend = std::min(ibegin + block_size, info.num_row_); + for (size_t i = ibegin; i < iend; ++i) { + p_row_indices[i] = i; + } } } } - row_set_collection_.Init(); } + row_set_collection_.Init(); + { /* determine layout of data */ const size_t nrow = info.num_row_; diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 7f346fe580ab..7e47d1147c5c 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -28,6 +28,40 @@ #include "../common/column_matrix.h" namespace xgboost { + +template +class MemStackAllocator +{ +public: + MemStackAllocator(size_t required_size): required_size_(required_size) { + } + + T* Get() { + if(!ptr_) + { + if (MaxStackSize>=required_size_) { + ptr_ = stack_mem_; + } + else { + ptr_ = (T*)malloc(required_size_ * sizeof(T)); + do_free_ = true; + } + } + + return ptr_; + } + + ~MemStackAllocator() { + if (do_free_) free(ptr_); + } + +private: + T* ptr_ = nullptr; + bool do_free_ = false; + size_t required_size_; + T stack_mem_[MaxStackSize]; +}; + namespace tree { using xgboost::common::HistCutMatrix; From 72709f12f9b53d96aa91ab290f365653189816cd Mon Sep 17 00:00:00 2001 From: egor Date: Fri, 29 Mar 2019 17:18:00 +0300 Subject: [PATCH 2/5] code cleaning --- src/common/column_matrix.h | 8 ++++---- src/common/hist_util.cc | 26 +++++++++++++------------- src/common/hist_util.h | 3 +-- src/tree/updater_quantile_hist.cc | 17 ++++++++++------- src/tree/updater_quantile_hist.h | 20 +++++++++----------- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/common/column_matrix.h b/src/common/column_matrix.h index 9b3381044b85..510206f50cd7 100644 --- a/src/common/column_matrix.h +++ b/src/common/column_matrix.h @@ -71,7 +71,7 @@ class ColumnMatrix { // construct column matrix from GHistIndexMatrix inline void Init(const GHistIndexMatrix& gmat, double sparse_threshold) { - const auto nfeature = static_cast(gmat.cut.row_ptr.size() - 1); + const int32_t nfeature = static_cast(gmat.cut.row_ptr.size() - 1); const size_t nrow = gmat.row_ptr.size() - 1; // identify type of each column @@ -86,7 +86,7 @@ class ColumnMatrix { gmat.GetFeatureCounts(&feature_counts_[0]); // classify features - for (bst_uint fid = 0; fid < nfeature; ++fid) { + for (int32_t fid = 0; fid < nfeature; ++fid) { if (static_cast(feature_counts_[fid]) < sparse_threshold * nrow) { type_[fid] = kSparseColumn; @@ -100,7 +100,7 @@ class ColumnMatrix { boundary_.resize(nfeature); size_t accum_index_ = 0; size_t accum_row_ind_ = 0; - for (bst_uint fid = 0; fid < nfeature; ++fid) { + for (int32_t fid = 0; fid < nfeature; ++fid) { boundary_[fid].index_begin = accum_index_; boundary_[fid].row_ind_begin = accum_row_ind_; if (type_[fid] == kDenseColumn) { @@ -126,7 +126,7 @@ class ColumnMatrix { // pre-fill index_ for dense columns #pragma omp parallel for - for (bst_uint fid = 0; fid < nfeature; ++fid) { + for (int32_t fid = 0; fid < nfeature; ++fid) { if (type_[fid] == kDenseColumn) { const size_t ibegin = boundary_[fid].index_begin; uint32_t* begin = &index_[ibegin]; diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 4777091bddc3..0c46b43b609d 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -68,8 +68,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { // Use group index for weights? bool const use_group_ind = num_groups != 0 && weights.size() != info.num_row_; - if (use_group_ind) - { + if (use_group_ind) { for (const auto &batch : p_fmat->GetRowBatches()) { size_t group_ind = this->SearchGroupIndFromBaseRow(group_ptr, batch.base_rowid); #pragma omp parallel num_threads(nthread) firstprivate(group_ind, use_group_ind) @@ -108,7 +107,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { const size_t n_blocks = size / block_size_iter + !!(size % block_size_iter); std::vector>> buff(nthread); - for(size_t tid = 0; tid < nthread; ++tid) { + for (size_t tid = 0; tid < nthread; ++tid) { buff[tid].resize(block_size * ncol); } @@ -125,7 +124,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { auto* p_sizes = sizes.data() + ncol * tid; auto* p_buff = buff[tid].data(); - for(size_t i = ibegin; i < iend; ++i) { + for (size_t i = ibegin; i < iend; ++i) { size_t const ridx = batch.base_rowid + i; bst_float w = info.GetWeight(ridx); SparsePage::Inst const inst = batch[i]; @@ -138,12 +137,12 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { } #pragma omp barrier #pragma omp for schedule(static) - for(size_t icol = 0; icol < ncol; ++icol) { - for(size_t tid = 0; tid < nthread; ++tid) { + for (size_t icol = 0; icol < ncol; ++icol) { + for (size_t tid = 0; tid < nthread; ++tid) { auto* p_sizes = sizes.data() + ncol * tid; auto* p_buff = buff[tid].data() + icol * block_size; - for(size_t i = 0; i < p_sizes[icol]; ++i) { + for (size_t i = 0; i < p_sizes[icol]; ++i) { sketchs[icol].Push(p_buff[i].first, p_buff[i].second); } @@ -230,7 +229,7 @@ uint32_t HistCutMatrix::GetBinIdx(const Entry& e) { void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { cut.Init(p_fmat, max_num_bins); - const size_t nthread = omp_get_max_threads(); + const int32_t nthread = omp_get_max_threads(); // const int nthread = 1; const uint32_t nbins = cut.row_ptr.back(); hit_count.resize(nbins, 0); @@ -257,8 +256,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { #pragma omp parallel num_threads(nthread) { #pragma omp for - for(size_t tid = 0; tid < nthread; ++tid) - { + for (int32_t tid = 0; tid < nthread; ++tid) { size_t ibegin = block_size * tid; size_t iend = (tid == (nthread-1) ? batch.Size() : (block_size * (tid+1))); @@ -272,17 +270,19 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { #pragma omp single { p_part[0] = prev_sum; - for (size_t i = 1; i < nthread; ++i) + for (int32_t i = 1; i < nthread; ++i) { p_part[i] = p_part[i - 1] + row_ptr[rbegin + i*block_size]; + } } #pragma omp for - for(size_t tid = 0; tid < nthread; ++tid) { + for (int32_t tid = 0; tid < nthread; ++tid) { size_t ibegin = block_size * tid; size_t iend = (tid == (nthread-1) ? batch.Size() : (block_size * (tid+1))); - for (size_t i = ibegin; i < iend; ++i) + for (size_t i = ibegin; i < iend; ++i) { row_ptr[rbegin + 1 + i] += p_part[tid]; + } } } diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 83b72ac71176..6c6bfcb7eed3 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -21,7 +21,6 @@ namespace common { template struct SimpleArray { - ~SimpleArray() { free(ptr_); ptr_ = nullptr; @@ -76,7 +75,7 @@ struct SimpleArray { return ptr_ + n_; } -private: + private: T* ptr_ = nullptr; size_t n_ = 0; }; diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index c94616117a78..8b9ab2ce07e3 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -435,7 +435,6 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, } row_indices.resize(j); } else { - MemStackAllocator buff(this->nthread_); int* p_buff = buff.Get(); std::fill(p_buff, p_buff + this->nthread_, false); @@ -446,10 +445,11 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, { const size_t tid = omp_get_thread_num(); const size_t ibegin = tid * block_size; - const size_t iend = std::min(ibegin + block_size, info.num_row_); + const size_t iend = std::min(static_cast(ibegin + block_size), + static_cast(info.num_row_)); for (size_t i = ibegin; i < iend; ++i) { - if(gpair[i].GetHess() < 0.0f) { + if (gpair[i].GetHess() < 0.0f) { buff.Get()[tid] = true; break; } @@ -457,11 +457,13 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, } bool has_heg_hess = false; - for (size_t tid = 0; tid < this->nthread_; ++tid) - if (p_buff[tid] == true) + for (size_t tid = 0; tid < this->nthread_; ++tid) { + if (p_buff[tid] == true) { has_heg_hess = true; + } + } - if(has_heg_hess) { + if (has_heg_hess) { size_t j = 0; for (size_t i = 0; i < info.num_row_; ++i) { if (gpair[i].GetHess() >= 0.0f) { @@ -474,7 +476,8 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, { const size_t tid = omp_get_thread_num(); const size_t ibegin = tid * block_size; - const size_t iend = std::min(ibegin + block_size, info.num_row_); + const size_t iend = std::min(static_cast(ibegin + block_size), + static_cast(info.num_row_)); for (size_t i = ibegin; i < iend; ++i) { p_row_indices[i] = i; } diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index 7e47d1147c5c..af6d5860dec8 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -30,20 +30,17 @@ namespace xgboost { template -class MemStackAllocator -{ -public: - MemStackAllocator(size_t required_size): required_size_(required_size) { +class MemStackAllocator { + public: + explicit MemStackAllocator(size_t required_size): required_size_(required_size) { } T* Get() { - if(!ptr_) - { - if (MaxStackSize>=required_size_) { + if (!ptr_) { + if (MaxStackSize >= required_size_) { ptr_ = stack_mem_; - } - else { - ptr_ = (T*)malloc(required_size_ * sizeof(T)); + } else { + ptr_ = reinterpret_cast(malloc(required_size_ * sizeof(T))); do_free_ = true; } } @@ -55,7 +52,8 @@ class MemStackAllocator if (do_free_) free(ptr_); } -private: + + private: T* ptr_ = nullptr; bool do_free_ = false; size_t required_size_; From 3c88125e306481bbaf8985a7b6a0226a4da3587f Mon Sep 17 00:00:00 2001 From: egor Date: Fri, 29 Mar 2019 18:24:14 +0300 Subject: [PATCH 3/5] code cleaning --- src/common/hist_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 0c46b43b609d..a473f9240f28 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -137,7 +137,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { } #pragma omp barrier #pragma omp for schedule(static) - for (size_t icol = 0; icol < ncol; ++icol) { + for (int32_t icol = 0; icol < static_cast(ncol); ++icol) { for (size_t tid = 0; tid < nthread; ++tid) { auto* p_sizes = sizes.data() + ncol * tid; auto* p_buff = buff[tid].data() + icol * block_size; From 3bd01633ad32b073d68714c4e6dbd3153fea1769 Mon Sep 17 00:00:00 2001 From: egor Date: Thu, 4 Apr 2019 11:05:05 +0300 Subject: [PATCH 4/5] code cleaning after review --- src/tree/updater_quantile_hist.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 8b9ab2ce07e3..c6381c95fb6f 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -435,8 +435,8 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, } row_indices.resize(j); } else { - MemStackAllocator buff(this->nthread_); - int* p_buff = buff.Get(); + MemStackAllocator buff(this->nthread_); + bool* p_buff = buff.Get(); std::fill(p_buff, p_buff + this->nthread_, false); const size_t block_size = info.num_row_ / this->nthread_ + !!(info.num_row_ % this->nthread_); @@ -450,20 +450,20 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat, for (size_t i = ibegin; i < iend; ++i) { if (gpair[i].GetHess() < 0.0f) { - buff.Get()[tid] = true; + p_buff[tid] = true; break; } } } - bool has_heg_hess = false; + bool has_neg_hess = false; for (size_t tid = 0; tid < this->nthread_; ++tid) { - if (p_buff[tid] == true) { - has_heg_hess = true; + if (p_buff[tid]) { + has_neg_hess = true; } } - if (has_heg_hess) { + if (has_neg_hess) { size_t j = 0; for (size_t i = 0; i < info.num_row_; ++i) { if (gpair[i].GetHess() >= 0.0f) { From 866b6816d99c3c7f61bddcfe8d4cfacfccfc88ab Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Thu, 11 Apr 2019 15:01:53 +0300 Subject: [PATCH 5/5] Apply suggestions from code review Co-Authored-By: SmirnovEgorRu --- src/common/hist_util.h | 4 ++++ src/tree/updater_quantile_hist.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/common/hist_util.h b/src/common/hist_util.h index 6c6bfcb7eed3..09c7d76535d8 100644 --- a/src/common/hist_util.h +++ b/src/common/hist_util.h @@ -19,6 +19,10 @@ namespace xgboost { namespace common { +/* + * \brief A thin wrapper around dynamically allocated C-style array. + * Make sure to call resize() before use. + */ template struct SimpleArray { ~SimpleArray() { diff --git a/src/tree/updater_quantile_hist.h b/src/tree/updater_quantile_hist.h index af6d5860dec8..17688f86afcb 100644 --- a/src/tree/updater_quantile_hist.h +++ b/src/tree/updater_quantile_hist.h @@ -29,6 +29,9 @@ namespace xgboost { +/*! + * \brief A C-style array with in-stack allocation. As long as the array is smaller than MaxStackSize, it will be allocated inside the stack. Otherwise, it will be heap-allocated. + */ template class MemStackAllocator { public: