diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 0d9ade659fef..30ea6cd51674 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -470,6 +470,14 @@ Learning Control Parameters - ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results +- ``monotone_penalty`` :raw-html:`🔗︎`, default = ``0.0``, type = double, aliases: ``monotone_splits_penalty``, ``ms_penalty``, ``mc_penalty``, constraints: ``monotone_penalty >= 0.0`` + + - used only if ``monotone_constraints`` is set + + - `monotone penalty `__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter + + - if ``0.0`` (the default), no penalization is applied + - ``feature_contri`` :raw-html:`🔗︎`, default = ``None``, type = multi-double, aliases: ``feature_contrib``, ``fc``, ``fp``, ``feature_penalty`` - used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 057f56e99491..01a2061f8efb 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -447,6 +447,13 @@ struct Config { // descl2 = ``intermediate``, a `more advanced method `__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results std::string monotone_constraints_method = "basic"; + // alias = monotone_splits_penalty, ms_penalty, mc_penalty + // check = >=0.0 + // desc = used only if ``monotone_constraints`` is set + // desc = `monotone penalty `__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter + // desc = if ``0.0`` (the default), no penalization is applied + double monotone_penalty = 0.0; + // type = multi-double // alias = feature_contrib, fc, fp, feature_penalty // default = None diff --git a/src/io/config.cpp b/src/io/config.cpp index 0cf1d3c8bf21..a6f475e89d83 100644 --- a/src/io/config.cpp +++ b/src/io/config.cpp @@ -328,6 +328,9 @@ void Config::CheckParamConflict() { Log::Warning("Cannot use \"intermediate\" monotone constraints with feature fraction different from 1, auto set monotone constraints to \"basic\" method."); monotone_constraints_method = "basic"; } + if (max_depth > 0 && monotone_penalty >= max_depth) { + Log::Warning("Monotone penalty greater than tree depth. Monotone features won't be used."); + } } std::string Config::ToString() const { diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index 59cc62a5d375..b2204affb4df 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -87,6 +87,9 @@ const std::unordered_map& Config::alias_table() { {"monotone_constraint", "monotone_constraints"}, {"monotone_constraining_method", "monotone_constraints_method"}, {"mc_method", "monotone_constraints_method"}, + {"monotone_splits_penalty", "monotone_penalty"}, + {"ms_penalty", "monotone_penalty"}, + {"mc_penalty", "monotone_penalty"}, {"feature_contrib", "feature_contri"}, {"fc", "feature_contri"}, {"fp", "feature_contri"}, @@ -218,6 +221,7 @@ const std::unordered_set& Config::parameter_set() { "top_k", "monotone_constraints", "monotone_constraints_method", + "monotone_penalty", "feature_contri", "forcedsplits_filename", "refit_decay_rate", @@ -419,6 +423,9 @@ void Config::GetMembersFromString(const std::unordered_map(tmp_str, ','); } @@ -639,6 +646,7 @@ std::string Config::SaveMembersToString() const { str_buf << "[top_k: " << top_k << "]\n"; str_buf << "[monotone_constraints: " << Common::Join(Common::ArrayCast(monotone_constraints), ",") << "]\n"; str_buf << "[monotone_constraints_method: " << monotone_constraints_method << "]\n"; + str_buf << "[monotone_penalty: " << monotone_penalty << "]\n"; str_buf << "[feature_contri: " << Common::Join(feature_contri, ",") << "]\n"; str_buf << "[forcedsplits_filename: " << forcedsplits_filename << "]\n"; str_buf << "[refit_decay_rate: " << refit_decay_rate << "]\n"; diff --git a/src/treelearner/monotone_constraints.hpp b/src/treelearner/monotone_constraints.hpp index 4d804d7fbfa0..dcad0d6d3288 100644 --- a/src/treelearner/monotone_constraints.hpp +++ b/src/treelearner/monotone_constraints.hpp @@ -62,6 +62,24 @@ class LeafConstraintsBase { const std::vector& best_split_per_leaf) = 0; inline static LeafConstraintsBase* Create(const Config* config, int num_leaves); + + double ComputeMonotoneSplitGainPenalty(int leaf_index, double penalization) { + int depth = tree_->leaf_depth(leaf_index); + if (penalization >= depth + 1.) { + return kEpsilon; + } + if (penalization <= 1.) { + return 1. - penalization / pow(2., depth) + kEpsilon; + } + return 1. - pow(2, penalization - 1. - depth) + kEpsilon; + } + + void ShareTreePointer(const Tree* tree) { + tree_ = tree; + } + + private: + const Tree* tree_; }; class BasicLeafConstraints : public LeafConstraintsBase { diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp index b7569d22c8e2..6c4390553efd 100644 --- a/src/treelearner/serial_tree_learner.cpp +++ b/src/treelearner/serial_tree_learner.cpp @@ -165,6 +165,8 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians auto tree = std::unique_ptr(new Tree(config_->num_leaves)); auto tree_prt = tree.get(); + constraints_->ShareTreePointer(tree_prt); + // root leaf int left_leaf = 0; int cur_depth = 1; @@ -692,6 +694,11 @@ void SerialTreeLearner::ComputeBestSplitForFeature( cegb_->DetlaGain(feature_index, real_fidx, leaf_splits->leaf_index(), num_data, new_split); } + if (new_split.monotone_type != 0) { + double penalty = constraints_->ComputeMonotoneSplitGainPenalty( + leaf_splits->leaf_index(), config_->monotone_penalty); + new_split.gain *= penalty; + } if (new_split > *best_split) { *best_split = new_split; } diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 51be083a9f01..d4200649eb20 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1036,7 +1036,7 @@ def generate_trainset_for_monotone_constraints_tests(self, x3_to_category=True): categorical_features = [] if x3_to_category: categorical_features = [2] - trainset = lgb.Dataset(x, label=y, categorical_feature=categorical_features) + trainset = lgb.Dataset(x, label=y, categorical_feature=categorical_features, free_raw_data=False) return trainset def test_monotone_constraints(self): @@ -1071,8 +1071,8 @@ def is_correctly_constrained(learner, x3_to_category=True): return True for test_with_categorical_variable in [True, False]: + trainset = self.generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable) for monotone_constraints_method in ["basic", "intermediate"]: - trainset = self.generate_trainset_for_monotone_constraints_tests(test_with_categorical_variable) params = { 'min_data': 20, 'num_leaves': 20, @@ -1083,6 +1083,76 @@ def is_correctly_constrained(learner, x3_to_category=True): constrained_model = lgb.train(params, trainset) self.assertTrue(is_correctly_constrained(constrained_model, test_with_categorical_variable)) + def test_monotone_penalty(self): + def are_first_splits_non_monotone(tree, n, monotone_constraints): + if n <= 0: + return True + if "leaf_value" in tree: + return True + if monotone_constraints[tree["split_feature"]] != 0: + return False + return (are_first_splits_non_monotone(tree["left_child"], n - 1, monotone_constraints) + and are_first_splits_non_monotone(tree["right_child"], n - 1, monotone_constraints)) + + def are_there_monotone_splits(tree, monotone_constraints): + if "leaf_value" in tree: + return False + if monotone_constraints[tree["split_feature"]] != 0: + return True + return (are_there_monotone_splits(tree["left_child"], monotone_constraints) + or are_there_monotone_splits(tree["right_child"], monotone_constraints)) + + max_depth = 5 + monotone_constraints = [1, -1, 0] + penalization_parameter = 2.0 + trainset = self.generate_trainset_for_monotone_constraints_tests(x3_to_category=False) + for monotone_constraints_method in ["basic", "intermediate"]: + params = { + 'max_depth': max_depth, + 'monotone_constraints': monotone_constraints, + 'monotone_penalty': penalization_parameter, + "monotone_constraints_method": monotone_constraints_method, + } + constrained_model = lgb.train(params, trainset, 10) + dumped_model = constrained_model.dump_model()["tree_info"] + for tree in dumped_model: + self.assertTrue(are_first_splits_non_monotone(tree["tree_structure"], int(penalization_parameter), + monotone_constraints)) + self.assertTrue(are_there_monotone_splits(tree["tree_structure"], monotone_constraints)) + + # test if a penalty as high as the depth indeed prohibits all monotone splits + def test_monotone_penalty_max(self): + max_depth = 5 + monotone_constraints = [1, -1, 0] + penalization_parameter = max_depth + trainset_constrained_model = self.generate_trainset_for_monotone_constraints_tests(x3_to_category=False) + x = trainset_constrained_model.data + y = trainset_constrained_model.label + x3_negatively_correlated_with_y = x[:, 2] + trainset_unconstrained_model = lgb.Dataset(x3_negatively_correlated_with_y.reshape(-1, 1), label=y) + params_constrained_model = { + 'monotone_constraints': monotone_constraints, + 'monotone_penalty': penalization_parameter, + "max_depth": max_depth, + "gpu_use_dp": True, + } + params_unconstrained_model = { + "max_depth": max_depth, + "gpu_use_dp": True, + } + + unconstrained_model = lgb.train(params_unconstrained_model, trainset_unconstrained_model, 10) + unconstrained_model_predictions = unconstrained_model.\ + predict(x3_negatively_correlated_with_y.reshape(-1, 1)) + + for monotone_constraints_method in ["basic", "intermediate"]: + params_constrained_model["monotone_constraints_method"] = monotone_constraints_method + # The penalization is so high that the first 2 features should not be used here + constrained_model = lgb.train(params_constrained_model, trainset_constrained_model, 10) + + # Check that a very high penalization is the same as not using the features at all + np.testing.assert_array_equal(constrained_model.predict(x), unconstrained_model_predictions) + def test_max_bin_by_feature(self): col1 = np.arange(0, 100)[:, np.newaxis] col2 = np.zeros((100, 1))