microsoft · CharlesAuguste · Jun 25, 2019 · Jun 25, 2019 · Jun 26, 2019 · Jun 26, 2019
@@ -312,6 +312,18 @@ Learning Control Parameters
 
    -  dropout rate: a fraction of previous trees to drop during the dropout
 
+-  ``monotone_penalty`` :raw-html:`<a id="monotone_penalty" title="Permalink to this parameter" href="#monotone_penalty">&#x1F517;&#xFE0E;</a>`, default = ``0.``, type = double, aliases: ``monotone_splits_penalty``, constraints: ``0.0 <= monotone_penalty (< max_depth, if max_depth > 0)``
+
+   -  used only if ``monotone_constraints`` is set
+
+   -  monotone penalty: a penalization of 0 equals to no penalization. A penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
+
+-  ``monotone_precise_method`` :raw-html:`<a id="monotone_precise_method" title="Permalink to this parameter" href="#monotone_precise_method">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool, aliases: ``monotone_constraints_precise_mode``
+
+   -  used only if ``monotone_constraints`` is set
+
+   -  monotone precise method`: if set to false then the program will run as fast as without constraints, but the results may be over-constrained. If set to true, then the program will be slower, but results will be better. Note that if there are categorical features, in the dataset, they will be splitted using the fast method regardless of this parameter. Also, the parameter can only be set to true if the missing handle is disabled
+
 -  ``max_drop`` :raw-html:`<a id="max_drop" title="Permalink to this parameter" href="#max_drop">&#x1F517;&#xFE0E;</a>`, default = ``50``, type = int
 
    -  used only in ``dart``

@@ -325,6 +325,18 @@ struct Config {
   // desc = dropout rate: a fraction of previous trees to drop during the dropout
   double drop_rate = 0.1;
 
+  // alias = monotone_splits_penalty
+  // check = >=0.0
+  // check = <max_depth; if max_depth > 0
+  // desc = used only if ``monotone_constraints`` is set
+  // desc = monotone penalty: a penalization of 0 equals to no penalization. A penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
+  double monotone_penalty = 0.;
+
+  // alias = monotone_constraints_precise_mode
+  // desc = used only if ``monotone_constraints`` is set
+  // desc = monotone precise mode: if set to false then the program will run as fast as without constraints, but the results may be over-constrained. If set to true, then the program will be slower, but results will be better. Note that if there are categorical features, in the dataset, they will be splitted using the fast method regardless of this parameter. Also, the parameter can only be set to true if the missing handle is disabled
+  bool monotone_precise_mode = false;
+
   // desc = used only in ``dart``
   // desc = max number of dropped trees during one boosting iteration
   // desc = ``<=0`` means no limit

@@ -60,7 +60,7 @@ class Tree {
   int Split(int leaf, int feature, int real_feature, uint32_t threshold_bin,
             double threshold_double, double left_value, double right_value,
             int left_cnt, int right_cnt, double left_weight, double right_weight,
-            float gain, MissingType missing_type, bool default_left);
+            float gain, MissingType missing_type, bool default_left, bool feature_is_monotone);
 
   /*!
   * \brief Performing a split on tree leaves, with categorical feature
@@ -80,9 +80,14 @@ class Tree {
   * \param gain Split gain
   * \return The index of new leaf.
   */
-  int SplitCategorical(int leaf, int feature, int real_feature, const uint32_t* threshold_bin, int num_threshold_bin,
-                       const uint32_t* threshold, int num_threshold, double left_value, double right_value,
-                       int left_cnt, int right_cnt, double left_weight, double right_weight, float gain, MissingType missing_type);
+
+  int SplitCategorical(int leaf, int feature, int real_feature,
+                       const uint32_t *threshold_bin, int num_threshold_bin,
+                       const uint32_t *threshold, int num_threshold,
+                       double left_value, double right_value, int left_cnt,
+                       int right_cnt, double left_weight, double right_weight,
+                       float gain, MissingType missing_type,
+                       bool feature_is_monotone);
 
   /*! \brief Get the output of one leaf */
   inline double LeafOutput(int leaf) const { return leaf_value_[leaf]; }
@@ -124,6 +129,24 @@ class Tree {
   inline int PredictLeafIndex(const double* feature_values) const;
   inline int PredictLeafIndexByMap(const std::unordered_map<int, double>& feature_values) const;
 
+  // Get node parent
+  inline int node_parent(int node_idx) const;
+  // Get leaf parent
+  inline int leaf_parent(int node_idx) const;
+
+  // Get children
+  inline int left_child(int node_idx) const;
+  inline int right_child(int node_idx) const;
+
+  // Get if the feature is in a monotone subtree
+  inline bool leaf_is_in_monotone_subtree(int leaf_idx) const;
+
+  inline double internal_value(int node_idx) const;
+
+  inline uint32_t threshold_in_bin(int node_idx) const;
+
+  // Get the feature corresponding to the split
+  inline int split_feature_inner(int node_idx) const;
 
   inline void PredictContrib(const double* feature_values, int num_features, double* output);
 
@@ -302,8 +325,10 @@ class Tree {
     }
   }
 
-  inline void Split(int leaf, int feature, int real_feature, double left_value, double right_value, int left_cnt, int right_cnt,
-                    double left_weight, double right_weight, float gain);
+  inline void Split(int leaf, int feature, int real_feature, double left_value,
+                    double right_value, int left_cnt, int right_cnt, double left_weight,
+                    double right_weight,float gain, bool feature_is_monotone);
+
   /*!
   * \brief Find leaf index of which record belongs by features
   * \param feature_values Feature value of this record
@@ -402,12 +427,22 @@ class Tree {
   std::vector<int> leaf_depth_;
   double shrinkage_;
   int max_depth_;
+  // add parent node information
+  std::vector<int> node_parent_;
+  // Keeps track of the monotone splits above the leaf
+  std::vector<bool> leaf_is_in_monotone_subtree_;
 };
 
 inline void Tree::Split(int leaf, int feature, int real_feature,
                         double left_value, double right_value, int left_cnt, int right_cnt,
-                        double left_weight, double right_weight, float gain) {
+                        double left_weight, double right_weight, float gain, bool feature_is_monotone) {
   int new_node_idx = num_leaves_ - 1;
+
+  // Update if there is a monotone split above the leaf
+  if (feature_is_monotone || leaf_is_in_monotone_subtree_[leaf]) {
+    leaf_is_in_monotone_subtree_[leaf] = true;
+    leaf_is_in_monotone_subtree_[num_leaves_] = true;
+  }
   // update parent info
   int parent = leaf_parent_[leaf];
   if (parent >= 0) {
@@ -421,6 +456,7 @@ inline void Tree::Split(int leaf, int feature, int real_feature,
   // add new node
   split_feature_inner_[new_node_idx] = feature;
   split_feature_[new_node_idx] = real_feature;
+  node_parent_[new_node_idx] = parent;
 
   split_gain_[new_node_idx] = gain;
   // add two new leaves
@@ -529,6 +565,41 @@ inline int Tree::GetLeafByMap(const std::unordered_map<int, double>& feature_val
   return ~node;
 }
 
+inline int Tree::node_parent(int node_idx) const{
+    return node_parent_[node_idx];
+}
+
+inline int Tree::left_child(int node_idx) const{
+    return left_child_[node_idx];
+}
+
+inline int Tree::right_child(int node_idx) const{
+    return right_child_[node_idx];
+}
+
+inline int Tree::split_feature_inner(int node_idx) const{
+    return split_feature_inner_[node_idx];
+}
+
+inline int Tree::leaf_parent(int node_idx) const{
+    return leaf_parent_[node_idx];
+}
+
+inline uint32_t Tree::threshold_in_bin(int node_idx) const{
+    #ifdef DEBUG
+    CHECK(node_idx >= 0);
+    #endif
+    return threshold_in_bin_[node_idx];
+}
+
+inline bool Tree::leaf_is_in_monotone_subtree(int leaf_idx) const {
+    return leaf_is_in_monotone_subtree_[leaf_idx];
+}
+
+inline double Tree::internal_value(int node_idx) const {
+    return internal_value_[node_idx];
+}
+
 
 }  // namespace LightGBM
 

@@ -540,6 +540,9 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
     for (int iter = 0; iter < num_used_model; ++iter) {
       for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
         if (models_[iter]->split_gain(split_idx) > 0) {
+          #ifdef DEBUG
+          CHECK(models_[iter]->split_feature(split_idx) >= 0);
+          #endif
           feature_importances[models_[iter]->split_feature(split_idx)] += 1.0;
         }
       }
@@ -548,6 +551,9 @@ std::vector<double> GBDT::FeatureImportance(int num_iteration, int importance_ty
     for (int iter = 0; iter < num_used_model; ++iter) {
       for (int split_idx = 0; split_idx < models_[iter]->num_leaves() - 1; ++split_idx) {
         if (models_[iter]->split_gain(split_idx) > 0) {
+          #ifdef DEBUG
+          CHECK(models_[iter]->split_feature(split_idx) >= 0);
+          #endif
           feature_importances[models_[iter]->split_feature(split_idx)] += models_[iter]->split_gain(split_idx);
         }
       }

@@ -6,6 +6,7 @@
  * This file is auto generated by LightGBM\helpers\parameter_generator.py from LightGBM\include\LightGBM\config.h file.
  */
 #include<LightGBM/config.h>
+#include <LightGBM/utils/log.h>
 namespace LightGBM {
 std::unordered_map<std::string, std::string> Config::alias_table({
   {"config_file", "config"},
@@ -80,6 +81,8 @@ std::unordered_map<std::string, std::string> Config::alias_table({
   {"lambda", "lambda_l2"},
   {"min_split_gain", "min_gain_to_split"},
   {"rate_drop", "drop_rate"},
+  {"monotone_splits_penalty", "monotone_penalty"},
+  {"monotone_constraints_precise_mode", "monotone_precise_mode"},
   {"topk", "top_k"},
   {"mc", "monotone_constraints"},
   {"monotone_constraint", "monotone_constraints"},
@@ -199,6 +202,8 @@ std::unordered_set<std::string> Config::parameter_set({
   "lambda_l2",
   "min_gain_to_split",
   "drop_rate",
+  "monotone_penalty",
+  "monotone_precise_mode",
   "max_drop",
   "skip_drop",
   "xgboost_dart_mode",
@@ -399,8 +404,21 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   if (GetString(params, "monotone_constraints", &tmp_str)) {
     monotone_constraints = Common::StringToArray<int8_t>(tmp_str, ',');
+    Log::Warning("The constraining method was just changed, which could significantly affect results of the algorithm");
   }
 
+  GetDouble(params, "monotone_penalty", &monotone_penalty);
+  bool constraints_exist = false;
+  for (auto it = monotone_constraints.begin(); it != monotone_constraints.end();
+       it++) {
+    if (*it != 0) {
+      constraints_exist = true;
+    }
+  }
+  CHECK(monotone_penalty == 0 || constraints_exist);
+  CHECK(max_depth <= 0 || monotone_penalty < max_depth);
+  CHECK(monotone_penalty >= 0.0);
+
   if (GetString(params, "feature_contri", &tmp_str)) {
     feature_contri = Common::StringToArray<double>(tmp_str, ',');
   }
@@ -476,6 +494,10 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   GetBool(params, "use_missing", &use_missing);
 
+  GetBool(params, "monotone_precise_mode", &monotone_precise_mode);
+  CHECK(!monotone_precise_mode || !use_missing);
+  CHECK(!monotone_precise_mode || constraints_exist);
+
   GetBool(params, "zero_as_missing", &zero_as_missing);
 
   GetBool(params, "two_round", &two_round);
@@ -607,6 +629,8 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[lambda_l2: " << lambda_l2 << "]\n";
   str_buf << "[min_gain_to_split: " << min_gain_to_split << "]\n";
   str_buf << "[drop_rate: " << drop_rate << "]\n";
+  str_buf << "[monotone_penalty: " << monotone_penalty << "]\n";
+  str_buf << "[monotone_precise_mode: " << monotone_precise_mode << "]\n";
   str_buf << "[max_drop: " << max_drop << "]\n";
   str_buf << "[skip_drop: " << skip_drop << "]\n";
   str_buf << "[xgboost_dart_mode: " << xgboost_dart_mode << "]\n";

@@ -24,7 +24,9 @@ Tree::Tree(int max_leaves)
   threshold_.resize(max_leaves_ - 1);
   decision_type_.resize(max_leaves_ - 1, 0);
   split_gain_.resize(max_leaves_ - 1);
+  node_parent_.resize(max_leaves_ - 1);
   leaf_parent_.resize(max_leaves_);
+  leaf_is_in_monotone_subtree_.resize(max_leaves_);
   leaf_value_.resize(max_leaves_);
   leaf_weight_.resize(max_leaves_);
   leaf_count_.resize(max_leaves_);
@@ -38,6 +40,7 @@ Tree::Tree(int max_leaves)
   leaf_value_[0] = 0.0f;
   leaf_weight_[0] = 0.0f;
   leaf_parent_[0] = -1;
+  node_parent_[0] = -1;
   shrinkage_ = 1.0f;
   num_cat_ = 0;
   cat_boundaries_.push_back(0);
@@ -50,8 +53,11 @@ Tree::~Tree() {
 
 int Tree::Split(int leaf, int feature, int real_feature, uint32_t threshold_bin,
                 double threshold_double, double left_value, double right_value,
-                int left_cnt, int right_cnt, double left_weight, double right_weight, float gain, MissingType missing_type, bool default_left) {
-  Split(leaf, feature, real_feature, left_value, right_value, left_cnt, right_cnt, left_weight, right_weight, gain);
+                int left_cnt, int right_cnt, double left_weight,
+                double right_weight, float gain,
+                MissingType missing_type, bool default_left,
+                bool feature_was_monotone) {
+  Split(leaf, feature, real_feature, left_value, right_value, left_cnt, right_cnt, left_weight, right_weight, gain, feature_was_monotone);
   int new_node_idx = num_leaves_ - 1;
   decision_type_[new_node_idx] = 0;
   SetDecisionType(&decision_type_[new_node_idx], false, kCategoricalMask);
@@ -69,10 +75,15 @@ int Tree::Split(int leaf, int feature, int real_feature, uint32_t threshold_bin,
   return num_leaves_ - 1;
 }
 
-int Tree::SplitCategorical(int leaf, int feature, int real_feature, const uint32_t* threshold_bin, int num_threshold_bin,
-                           const uint32_t* threshold, int num_threshold, double left_value, double right_value,
-                           data_size_t left_cnt, data_size_t right_cnt, double left_weight, double right_weight, float gain, MissingType missing_type) {
-  Split(leaf, feature, real_feature, left_value, right_value, left_cnt, right_cnt, left_weight, right_weight, gain);
+int Tree::SplitCategorical(int leaf, int feature, int real_feature,
+                           const uint32_t *threshold_bin, int num_threshold_bin,
+                           const uint32_t *threshold, int num_threshold,
+                           double left_value, double right_value,
+                           data_size_t left_cnt, data_size_t right_cnt,
+                           double left_weight, double right_weight,
+                           float gain, MissingType missing_type,
+                           bool feature_was_monotone) {
+  Split(leaf, feature, real_feature, left_value, right_value, left_cnt, right_cnt, left_weight, right_weight, gain, feature_was_monotone);
   int new_node_idx = num_leaves_ - 1;
   decision_type_[new_node_idx] = 0;
   SetDecisionType(&decision_type_[new_node_idx], true, kCategoricalMask);

@@ -83,6 +83,10 @@ class CostEfficientGradientBoosting {
     }
   }
 
+  SplitInfo const & GetSplitInfo(int i) const {
+    return splits_per_leaf_[i];
+  }
+
  private:
   double CalculateOndemandCosts(int feature_index, int real_fidx, int leaf_index) const {
     if (tree_learner_->config_->cegb_penalty_feature_lazy.empty()) {

@@ -146,7 +146,7 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
 }
 
 template <typename TREELEARNER_T>
-void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
+void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree) {
   TREELEARNER_T::ConstructHistograms(this->is_feature_used_, true);
   // construct local histograms
   #pragma omp parallel for schedule(static)
@@ -160,11 +160,12 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplits() {
   // Reduce scatter for histogram
   Network::ReduceScatter(input_buffer_.data(), reduce_scatter_size_, sizeof(HistogramBinEntry), block_start_.data(),
                          block_len_.data(), output_buffer_.data(), static_cast<comm_size_t>(output_buffer_.size()), &HistogramBinEntry::SumReducer);
-  this->FindBestSplitsFromHistograms(this->is_feature_used_, true);
+  this->FindBestSplitsFromHistograms(this->is_feature_used_, true, tree);
 }
 
 template <typename TREELEARNER_T>
-void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const std::vector<int8_t>&, bool) {
+void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(
+    const std::vector<int8_t> &, bool, const Tree *tree) {
   std::vector<SplitInfo> smaller_bests_per_thread(this->num_threads_, SplitInfo());
   std::vector<SplitInfo> larger_bests_per_thread(this->num_threads_, SplitInfo());
   std::vector<int8_t> smaller_node_used_features(this->num_features_, 1);
@@ -190,13 +191,14 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const
                                     this->smaller_leaf_histogram_array_[feature_index].RawData());
     SplitInfo smaller_split;
     // find best threshold for smaller child
+    // FIXME Fill the vectors with the actual constraints and thresholds
+    SplittingConstraints *constraints;
+    std::vector<uint32_t> thresholds;
     this->smaller_leaf_histogram_array_[feature_index].FindBestThreshold(
-      this->smaller_leaf_splits_->sum_gradients(),
-      this->smaller_leaf_splits_->sum_hessians(),
-      GetGlobalDataCountInLeaf(this->smaller_leaf_splits_->LeafIndex()),
-      this->smaller_leaf_splits_->min_constraint(),
-      this->smaller_leaf_splits_->max_constraint(),
-      &smaller_split);
+        this->smaller_leaf_splits_->sum_gradients(),
+        this->smaller_leaf_splits_->sum_hessians(),
+        GetGlobalDataCountInLeaf(this->smaller_leaf_splits_->LeafIndex()),
+        &smaller_split, constraints);
     smaller_split.feature = real_feature_index;
     if (smaller_split > smaller_bests_per_thread[tid] && smaller_node_used_features[feature_index]) {
       smaller_bests_per_thread[tid] = smaller_split;
@@ -210,13 +212,12 @@ void DataParallelTreeLearner<TREELEARNER_T>::FindBestSplitsFromHistograms(const
       this->smaller_leaf_histogram_array_[feature_index]);
     SplitInfo larger_split;
     // find best threshold for larger child
+    // FIXME Fill the vectors with the actual constraints and thresholds
     this->larger_leaf_histogram_array_[feature_index].FindBestThreshold(
-      this->larger_leaf_splits_->sum_gradients(),
-      this->larger_leaf_splits_->sum_hessians(),
-      GetGlobalDataCountInLeaf(this->larger_leaf_splits_->LeafIndex()),
-      this->larger_leaf_splits_->min_constraint(),
-      this->larger_leaf_splits_->max_constraint(),
-      &larger_split);
+        this->larger_leaf_splits_->sum_gradients(),
+        this->larger_leaf_splits_->sum_hessians(),
+        GetGlobalDataCountInLeaf(this->larger_leaf_splits_->LeafIndex()),
+        &larger_split, constraints);
     larger_split.feature = real_feature_index;
     if (larger_split > larger_bests_per_thread[tid] && larger_node_used_features[feature_index]) {
       larger_bests_per_thread[tid] = larger_split;