Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix r interaction #5543

Merged
merged 2 commits into from
Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions R-package/tests/testthat/test_interaction_constraints.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,42 @@ test_that("interaction constraints for regression", {
bst <- xgboost(data = train, label = y, max_depth = 3,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
interaction_constraints = list(c(0,1)))

# Set all observations to have the same x3 values then increment
# by the same amount
preds <- lapply(c(1,2,3), function(x){
tmat <- matrix(c(x1,x2,rep(x,1000)), ncol=3)
return(predict(bst, tmat))
})
preds <- lapply(c(1,2,3), function(x){
tmat <- matrix(c(x1,x2,rep(x,1000)), ncol=3)
return(predict(bst, tmat))
})

# Check incrementing x3 has the same effect on all observations
# since x3 is constrained to be independent of x1 and x2
# and all observations start off from the same x3 value
diff1 <- preds[[2]] - preds[[1]]
test1 <- all(abs(diff1 - diff1[1]) < 1e-4)
diff2 <- preds[[3]] - preds[[2]]
test2 <- all(abs(diff2 - diff2[1]) < 1e-4)
diff1 <- preds[[2]] - preds[[1]]
test1 <- all(abs(diff1 - diff1[1]) < 1e-4)

diff2 <- preds[[3]] - preds[[2]]
test2 <- all(abs(diff2 - diff2[1]) < 1e-4)

expect_true({
test1 & test2
}, "Interaction Contraint Satisfied")

})

test_that("interaction constraints scientific representation", {
rows <- 10
## When number exceeds 1e5, R paste function uses scientific representation.
## See: https://github.com/dmlc/xgboost/issues/5179
cols <- 1e5+10

d <- matrix(rexp(rows, rate=.1), nrow=rows, ncol=cols)
y <- rnorm(rows)

dtrain <- xgb.DMatrix(data=d, info = list(label=y))
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))

with_inc <- xgb.train(data=dtrain, tree_method='hist',
interaction_constraints=inc, nrounds=10)
without_inc <- xgb.train(data=dtrain, tree_method='hist', nrounds=10)
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
})
14 changes: 6 additions & 8 deletions src/tree/constraints.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <vector>

#include "xgboost/span.h"
#include "xgboost/json.h"
#include "constraints.h"
#include "param.h"

Expand All @@ -27,15 +28,12 @@ void FeatureInteractionConstraintHost::Reset() {
if (!enabled_) {
return;
}
// Parse interaction constraints
std::istringstream iss(this->interaction_constraint_str_);
dmlc::JSONReader reader(&iss);
// Read std::vector<std::vector<bst_uint>> first and then
// convert to std::vector<std::unordered_set<bst_uint>>
std::vector<std::vector<bst_uint>> tmp;
// Read std::vector<std::vector<bst_feature_t>> first and then
// convert to std::vector<std::unordered_set<bst_feature_t>>
std::vector<std::vector<bst_feature_t>> tmp;
try {
reader.Read(&tmp);
} catch (dmlc::Error const& e) {
ParseInteractionConstraint(this->interaction_constraint_str_, &tmp);
} catch (dmlc::Error const &e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< this->interaction_constraint_str_ << "\n"
<< "With error:\n" << e.what();
Expand Down
35 changes: 15 additions & 20 deletions src/tree/constraints.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <bitset>
#include <string>
#include <sstream>
#include <set>

#include "xgboost/logging.h"
Expand All @@ -18,28 +16,25 @@
#include "param.h"
#include "../common/device_helpers.cuh"


namespace xgboost {

size_t FeatureInteractionConstraint::Features() const {
size_t FeatureInteractionConstraintDevice::Features() const {
return d_sets_ptr_.size() - 1;
}

void FeatureInteractionConstraint::Configure(
void FeatureInteractionConstraintDevice::Configure(
tree::TrainParam const& param, int32_t const n_features) {
has_constraint_ = true;
if (param.interaction_constraints.length() == 0) {
has_constraint_ = false;
return;
}
// --- Parse interaction constraints
std::istringstream iss(param.interaction_constraints);
dmlc::JSONReader reader(&iss);
// Interaction constraints parsed from string parameter. After
// parsing, this looks like {{0, 1, 2}, {2, 3 ,4}}.
std::vector<std::vector<int32_t>> h_feature_constraints;
std::vector<std::vector<bst_feature_t>> h_feature_constraints;
try {
reader.Read(&h_feature_constraints);
ParseInteractionConstraint(param.interaction_constraints, &h_feature_constraints);
} catch (dmlc::Error const& e) {
LOG(FATAL) << "Failed to parse feature interaction constraint:\n"
<< param.interaction_constraints << "\n"
Expand Down Expand Up @@ -68,13 +63,13 @@ void FeatureInteractionConstraint::Configure(

// Represent constraints as CSR format, flatten is the value vector,
// ptr is row_ptr vector in CSR.
std::vector<int32_t> h_feature_constraints_flatten;
std::vector<uint32_t> h_feature_constraints_flatten;
for (auto const& constraints : h_feature_constraints) {
for (int32_t c : constraints) {
for (uint32_t c : constraints) {
h_feature_constraints_flatten.emplace_back(c);
}
}
std::vector<int32_t> h_feature_constraints_ptr;
std::vector<size_t> h_feature_constraints_ptr;
size_t n_features_in_constraints = 0;
h_feature_constraints_ptr.emplace_back(n_features_in_constraints);
for (auto const& v : h_feature_constraints) {
Expand Down Expand Up @@ -130,13 +125,13 @@ void FeatureInteractionConstraint::Configure(
s_result_buffer_ = dh::ToSpan(result_buffer_);
}

FeatureInteractionConstraint::FeatureInteractionConstraint(
FeatureInteractionConstraintDevice::FeatureInteractionConstraintDevice(
tree::TrainParam const& param, int32_t const n_features) :
has_constraint_{true}, n_sets_{0} {
this->Configure(param, n_features);
}

void FeatureInteractionConstraint::Reset() {
void FeatureInteractionConstraintDevice::Reset() {
for (auto& node : node_constraints_storage_) {
thrust::fill(node.begin(), node.end(), 0);
}
Expand All @@ -153,7 +148,7 @@ __global__ void ClearBuffersKernel(
}
}

void FeatureInteractionConstraint::ClearBuffers() {
void FeatureInteractionConstraintDevice::ClearBuffers() {
CHECK_EQ(output_buffer_bits_.Size(), input_buffer_bits_.Size());
CHECK_LE(feature_buffer_.Size(), output_buffer_bits_.Size());
uint32_t constexpr kBlockThreads = 256;
Expand All @@ -164,7 +159,7 @@ void FeatureInteractionConstraint::ClearBuffers() {
output_buffer_bits_, input_buffer_bits_);
}

common::Span<bst_feature_t> FeatureInteractionConstraint::QueryNode(int32_t node_id) {
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::QueryNode(int32_t node_id) {
if (!has_constraint_) { return {}; }
CHECK_LT(node_id, s_node_constraints_.size());

Expand Down Expand Up @@ -203,7 +198,7 @@ __global__ void QueryFeatureListKernel(LBitField64 node_constraints,
result_buffer_output &= result_buffer_input;
}

common::Span<bst_feature_t> FeatureInteractionConstraint::Query(
common::Span<bst_feature_t> FeatureInteractionConstraintDevice::Query(
common::Span<bst_feature_t> feature_list, int32_t nid) {
if (!has_constraint_ || nid == 0) {
return feature_list;
Expand Down Expand Up @@ -250,8 +245,8 @@ __global__ void RestoreFeatureListFromSetsKernel(
LBitField64 feature_buffer,

bst_feature_t fid,
common::Span<int32_t> feature_interactions,
common::Span<int32_t> feature_interactions_ptr, // of size n interaction set + 1
common::Span<bst_feature_t> feature_interactions,
common::Span<size_t> feature_interactions_ptr, // of size n interaction set + 1

common::Span<bst_feature_t> interactions_list,
common::Span<size_t> interactions_list_ptr) {
Expand Down Expand Up @@ -302,7 +297,7 @@ __global__ void InteractionConstraintSplitKernel(LBitField64 feature,
}
}

void FeatureInteractionConstraint::Split(
void FeatureInteractionConstraintDevice::Split(
bst_node_t node_id, bst_feature_t feature_id, bst_node_t left_id, bst_node_t right_id) {
if (!has_constraint_) { return; }
CHECK_NE(node_id, left_id)
Expand Down
20 changes: 10 additions & 10 deletions src/tree/constraints.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,18 @@ struct ValueConstraint {
};

// Feature interaction constraints built for GPU Hist updater.
struct FeatureInteractionConstraint {
struct FeatureInteractionConstraintDevice {
protected:
// Whether interaction constraint is used.
bool has_constraint_;
// n interaction sets.
int32_t n_sets_;
size_t n_sets_;

// The parsed feature interaction constraints as CSR.
dh::device_vector<int32_t> d_fconstraints_;
common::Span<int32_t> s_fconstraints_;
dh::device_vector<int32_t> d_fconstraints_ptr_;
common::Span<int32_t> s_fconstraints_ptr_;
dh::device_vector<bst_feature_t> d_fconstraints_;
common::Span<bst_feature_t> s_fconstraints_;
dh::device_vector<size_t> d_fconstraints_ptr_;
common::Span<size_t> s_fconstraints_ptr_;
/* Interaction sets for each feature as CSR. For an input like:
* [[0, 1], [1, 2]], this will have values:
*
Expand Down Expand Up @@ -141,11 +141,11 @@ struct FeatureInteractionConstraint {

public:
size_t Features() const;
FeatureInteractionConstraint() = default;
FeatureInteractionConstraintDevice() = default;
void Configure(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraint(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraint(FeatureInteractionConstraint const& that) = default;
FeatureInteractionConstraint(FeatureInteractionConstraint&& that) = default;
FeatureInteractionConstraintDevice(tree::TrainParam const& param, int32_t const n_features);
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice const& that) = default;
FeatureInteractionConstraintDevice(FeatureInteractionConstraintDevice&& that) = default;
/*! \brief Reset before constructing a new tree. */
void Reset();
/*! \brief Return a list of features given node id */
Expand Down
29 changes: 29 additions & 0 deletions src/tree/param.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <vector>
#include <utility>

#include "xgboost/json.h"
#include "param.h"

namespace std {
Expand Down Expand Up @@ -79,3 +80,31 @@ std::istream &operator>>(std::istream &is, std::vector<int> &t) {
return is;
}
} // namespace std

namespace xgboost {
void ParseInteractionConstraint(
std::string const &constraint_str,
std::vector<std::vector<bst_feature_t>> *p_out) {
auto &out = *p_out;
auto j_inc = Json::Load({constraint_str.c_str(), constraint_str.size()});
auto const &all = get<Array>(j_inc);
out.resize(all.size());
for (size_t i = 0; i < all.size(); ++i) {
auto const &set = get<Array const>(all[i]);
for (auto const &v : set) {
if (XGBOOST_EXPECT(IsA<Integer>(v), true)) {
uint32_t u = static_cast<uint32_t const>(get<Integer const>(v));
out[i].emplace_back(u);
} else if (IsA<Number>(v)) {
double d = get<Number const>(v);
CHECK_EQ(std::floor(d), d)
<< "Found floating point number in interaction constraints";
out[i].emplace_back(static_cast<uint32_t const>(d));
} else {
LOG(FATAL) << "Unknown value type for interaction constraint:"
<< v.GetValue().TypeStr();
}
}
}
}
} // namespace xgboost
15 changes: 14 additions & 1 deletion src/tree/param.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,21 @@ struct SplitEntryContainer {
};

using SplitEntry = SplitEntryContainer<GradStats>;

} // namespace tree

/*
* \brief Parse the interaction constraints from string.
* \param constraint_str String storing the interfaction constraints:
*
* Example input string:
*
* "[[1, 2], [3, 4]]""
*
* \param p_out Pointer to output
*/
void ParseInteractionConstraint(
std::string const &constraint_str,
std::vector<std::vector<xgboost::bst_feature_t>> *p_out);
} // namespace xgboost

// define string serializer for vector, to get the arguments
Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ struct GPUHistMakerDevice {
common::Monitor monitor;
std::vector<ValueConstraint> node_value_constraints;
common::ColumnSampler column_sampler;
FeatureInteractionConstraint interaction_constraints;
FeatureInteractionConstraintDevice interaction_constraints;

using ExpandQueue =
std::priority_queue<ExpandEntry, std::vector<ExpandEntry>,
Expand Down
6 changes: 3 additions & 3 deletions tests/cpp/tree/test_constraints.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
namespace xgboost {
namespace {

struct FConstraintWrapper : public FeatureInteractionConstraint {
struct FConstraintWrapper : public FeatureInteractionConstraintDevice {
common::Span<LBitField64> GetNodeConstraints() {
return FeatureInteractionConstraint::s_node_constraints_;
return FeatureInteractionConstraintDevice::s_node_constraints_;
}
FConstraintWrapper(tree::TrainParam param, bst_feature_t n_features) :
FeatureInteractionConstraint(param, n_features) {}
FeatureInteractionConstraintDevice(param, n_features) {}

dh::device_vector<bst_feature_t> const& GetDSets() const {
return d_sets_;
Expand Down