Skip to content

Commit

Permalink
Separate Depthwidth and Lossguide growing policy in fast histogram (#…
Browse files Browse the repository at this point in the history
…4102)

* add back train method but mark as deprecated

* add back train method but mark as deprecated

* add back train method but mark as deprecated

* fix scalastyle error

* fix scalastyle error

* fix scalastyle error

* fix scalastyle error

* init

* more changes

* temp

* update

* udpate rabit

* change the histogram

* update kfactor

* sync per node stats

* temp

* update

* final

* code clean

* update rabit

* more cleanup

* fix errors

* fix failed tests

* enforce c++11

* broadcast subsampled feature correctly

* init col

* temp

* col sampling

* fix histmastrix init

* fix col sampling

* remove cout

* fix out of bound access

* fix core dump

remove core dump file

* disbale test temporarily

* update

* add fid

* print perf data

* update

* revert some changes

* temp

* temp

* pass all tests

* bring back some tests

* recover some changes

* fix lint issue

* enable monotone and interaction constraints

* don't specify default for monotone and interactions

* recover column init part

* more recovery

* fix core dumps

* code clean

* revert some changes

* fix test compilation issue

* fix lint issue

* resolve compilation issue

* fix issues of lint caused by rebase

* fix stylistic changes and change variable names

* use regtree internal function

* modularize depth width

* address the comments

* fix failed tests

* wrap perf timers with class

* fix lint

* fix num_leaves count

* fix indention

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.h

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.cc

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* Update src/tree/updater_quantile_hist.h

Co-Authored-By: CodingCat <CodingCat@users.noreply.github.com>

* merge

* fix compilation
  • Loading branch information
CodingCat committed Feb 13, 2019
1 parent 3be1b9a commit c18a366
Show file tree
Hide file tree
Showing 8 changed files with 450 additions and 182 deletions.
2 changes: 1 addition & 1 deletion jvm-packages/dev/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ rm /usr/bin/python
ln -s /opt/rh/python27/root/usr/bin/python /usr/bin/python

# build xgboost
cd /xgboost/jvm-packages;mvn package
cd /xgboost/jvm-packages;ulimit -c unlimited;mvn package

Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val trainingRDD = sc.parallelize(Classification.train)
val (booster, metrics) = XGBoost.trainDistributed(
trainingRDD,
List("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
List("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic", "num_round" -> 5, "num_workers" -> numWorkers,
"custom_eval" -> null, "custom_obj" -> null, "use_external_memory" -> false,
"missing" -> Float.NaN).toMap,
Expand All @@ -92,7 +92,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic", "num_round" -> 5, "num_workers" -> numWorkers,
"use_external_memory" -> true)
val model = new XGBoostClassifier(paramMap).fit(training)
Expand All @@ -104,54 +104,78 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic", "num_round" -> 5, "num_workers" -> numWorkers,
"tracker_conf" -> TrackerConf(60 * 60 * 1000, "scala"))
val model = new XGBoostClassifier(paramMap).fit(training)
assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1)
}

test("test with fast histo with monotone_constraints") {
test("test with quantile hist with monotone_constraints (lossguide)") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1",
"max_depth" -> "6", "silent" -> "1",
"max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide",
"num_round" -> 5, "num_workers" -> numWorkers, "monotone_constraints" -> "(1, 0)")
val model = new XGBoostClassifier(paramMap).fit(training)
assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1)
}

test("test with quantile hist with interaction_constraints (lossguide)") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1",
"max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide",
"num_round" -> 5, "num_workers" -> numWorkers, "interaction_constraints" -> "[[1,2],[2,3,4]]")
val model = new XGBoostClassifier(paramMap).fit(training)
assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1)
}

test("test with quantile hist with monotone_constraints (depthwise)") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1",
"max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise",
"num_round" -> 5, "num_workers" -> numWorkers, "monotone_constraints" -> "(1, 0)")
val model = new XGBoostClassifier(paramMap).fit(training)
assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1)
}

test("test with fast histo with interaction_constraints") {
test("test with quantile hist with interaction_constraints (depthwise)") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1",
"max_depth" -> "6", "silent" -> "1",
"max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise",
"num_round" -> 5, "num_workers" -> numWorkers, "interaction_constraints" -> "[[1,2],[2,3,4]]")
val model = new XGBoostClassifier(paramMap).fit(training)
assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1)
}

test("test with fast histo depthwise") {
test("test with quantile hist depthwise") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1",
"max_depth" -> "6", "silent" -> "1",
"max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "depthwise",
"num_round" -> 5, "num_workers" -> numWorkers)
val model = new XGBoostClassifier(paramMap).fit(training)
assert(eval.eval(model._booster.predict(testDM, outPutMargin = true), testDM) < 0.1)
}

test("test with fast histo lossguide") {
test("test with quantile hist lossguide") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "0", "silent" -> "1",
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist", "grow_policy" -> "lossguide",
"max_leaves" -> "8", "num_round" -> 5,
"num_workers" -> numWorkers)
Expand All @@ -160,11 +184,11 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
assert(x < 0.1)
}

test("test with fast histo lossguide with max bin") {
test("test with quantile hist lossguide with max bin") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "0", "silent" -> "0",
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "0",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "lossguide", "max_leaves" -> "8", "max_bin" -> "16",
"eval_metric" -> "error", "num_round" -> 5, "num_workers" -> numWorkers)
Expand All @@ -173,11 +197,11 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
assert(x < 0.1)
}

test("test with fast histo depthwidth with max depth") {
test("test with quantile hist depthwidth with max depth") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "6", "silent" -> "0",
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "depthwise", "max_depth" -> "2",
"eval_metric" -> "error", "num_round" -> 10, "num_workers" -> numWorkers)
Expand All @@ -186,11 +210,11 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
assert(x < 0.1)
}

test("test with fast histo depthwidth with max depth and max bin") {
test("test with quantile hist depthwidth with max depth and max bin") {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "6", "silent" -> "0",
val paramMap = Map("eta" -> "1", "gamma" -> "0.5", "max_depth" -> "6",
"objective" -> "binary:logistic", "tree_method" -> "hist",
"grow_policy" -> "depthwise", "max_depth" -> "2", "max_bin" -> "2",
"eval_metric" -> "error", "num_round" -> 10, "num_workers" -> numWorkers)
Expand All @@ -217,7 +241,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
}

val denseDF = buildDenseDataFrame().repartition(4)
val paramMap = List("eta" -> "1", "max_depth" -> "2", "silent" -> "1",
val paramMap = List("eta" -> "1", "max_depth" -> "2",
"objective" -> "binary:logistic", "missing" -> -0.1f, "num_workers" -> numWorkers).toMap
val model = new XGBoostClassifier(paramMap).fit(denseDF)
model.transform(denseDF).collect()
Expand All @@ -227,7 +251,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val eval = new EvalError()
val training = buildDataFrame(Classification.train)
val testDM = new DMatrix(Classification.test.iterator)
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic", "timeout_request_workers" -> 0L,
"num_round" -> 5, "num_workers" -> numWorkers)
val model = new XGBoostClassifier(paramMap).fit(training)
Expand All @@ -241,7 +265,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val testDM = new DMatrix(Classification.test.iterator)

val tmpPath = Files.createTempDirectory("model1").toAbsolutePath.toString
val paramMap = Map("eta" -> "1", "max_depth" -> 2, "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> 2,
"objective" -> "binary:logistic", "checkpoint_path" -> tmpPath,
"checkpoint_interval" -> 2, "num_workers" -> numWorkers)

Expand Down Expand Up @@ -295,16 +319,17 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
val trainingRDD = sc.parallelize(Ranking.train, 5)
val (booster, _) = XGBoost.trainDistributed(
trainingRDD,
List("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
List("eta" -> "1", "max_depth" -> "6",
"objective" -> "rank:pairwise", "num_round" -> 5, "num_workers" -> numWorkers,
"missing" -> Float.NaN, "use_external_memory" -> false).toMap,
"custom_eval" -> null, "custom_obj" -> null, "use_external_memory" -> false,
"missing" -> Float.NaN).toMap,
hasGroup = true)

assert(booster != null)
}

test("training summary") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic", "num_round" -> 5, "nWorkers" -> numWorkers)

val trainingDF = buildDataFrame(Classification.train)
Expand All @@ -316,7 +341,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
}

test("train/test split") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic", "train_test_ratio" -> "0.5",
"num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Classification.train)
Expand All @@ -332,7 +357,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
test("train with multiple validation datasets (non-ranking)") {
val training = buildDataFrame(Classification.train)
val Array(train, eval1, eval2) = training.randomSplit(Array(0.6, 0.2, 0.2))
val paramMap1 = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap1 = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic",
"num_round" -> 5, "num_workers" -> numWorkers)

Expand All @@ -345,7 +370,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
assert(model1.summary.trainObjectiveHistory !== model1.summary.validationObjectiveHistory(0))
assert(model1.summary.trainObjectiveHistory !== model1.summary.validationObjectiveHistory(1))

val paramMap2 = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap2 = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "binary:logistic",
"num_round" -> 5, "num_workers" -> numWorkers,
"eval_sets" -> Map("eval1" -> eval1, "eval2" -> eval2))
Expand All @@ -362,7 +387,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
test("train with multiple validation datasets (ranking)") {
val training = buildDataFrameWithGroup(Ranking.train, 5)
val Array(train, eval1, eval2) = training.randomSplit(Array(0.6, 0.2, 0.2))
val paramMap1 = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap1 = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "rank:pairwise",
"num_round" -> 5, "num_workers" -> numWorkers, "group_col" -> "group")
val xgb1 = new XGBoostRegressor(paramMap1).setEvalSets(Map("eval1" -> eval1, "eval2" -> eval2))
Expand All @@ -375,7 +400,7 @@ class XGBoostGeneralSuite extends FunSuite with PerTest {
assert(model1.summary.trainObjectiveHistory !== model1.summary.validationObjectiveHistory(0))
assert(model1.summary.trainObjectiveHistory !== model1.summary.validationObjectiveHistory(1))

val paramMap2 = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
val paramMap2 = Map("eta" -> "1", "max_depth" -> "6",
"objective" -> "rank:pairwise",
"num_round" -> 5, "num_workers" -> numWorkers, "group_col" -> "group",
"eval_sets" -> Map("eval1" -> eval1, "eval2" -> eval2))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,6 @@ public void testAscendMetrics() {
public void testBoosterEarlyStop() throws XGBoostError, IOException {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
Expand Down Expand Up @@ -375,7 +374,7 @@ public void testBoosterEarlyStop() throws XGBoostError, IOException {
}
}

private void testWithFastHisto(DMatrix trainingSet, Map<String, DMatrix> watches, int round,
private void testWithQuantileHisto(DMatrix trainingSet, Map<String, DMatrix> watches, int round,
Map<String, Object> paramMap, float threshold) throws XGBoostError {
float[][] metrics = new float[watches.size()][round];
Booster booster = XGBoost.train(trainingSet, paramMap, round, watches,
Expand All @@ -393,10 +392,9 @@ private void testWithFastHisto(DMatrix trainingSet, Map<String, DMatrix> watches
}

@Test
public void testFastHistoDepthWise() throws XGBoostError {
public void testQuantileHistoDepthWise() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
Expand All @@ -410,14 +408,13 @@ public void testFastHistoDepthWise() throws XGBoostError {
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
watches.put("test", testMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);
}

@Test
public void testFastHistoLossGuide() throws XGBoostError {
public void testQuantileHistoLossGuide() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 0);
Expand All @@ -432,14 +429,13 @@ public void testFastHistoLossGuide() throws XGBoostError {
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
watches.put("test", testMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);
}

@Test
public void testFastHistoLossGuideMaxBin() throws XGBoostError {
public void testQuantileHistoLossGuideMaxBin() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 0);
Expand All @@ -454,7 +450,7 @@ public void testFastHistoLossGuideMaxBin() throws XGBoostError {
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.0f);
testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);
}

@Test
Expand Down Expand Up @@ -534,46 +530,41 @@ public void testGetFeatureImportanceTotalCover() throws XGBoostError {
}

@Test
public void testFastHistoDepthwiseMaxDepth() throws XGBoostError {
public void testQuantileHistoDepthwiseMaxDepth() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("max_depth", 2);
put("grow_policy", "depthwise");
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.85f);
testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);
}

@Test
public void testFastHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {
public void testQuantileHistoDepthwiseMaxDepthMaxBin() throws XGBoostError {
DMatrix trainMat = new DMatrix("../../demo/data/agaricus.txt.train");
DMatrix testMat = new DMatrix("../../demo/data/agaricus.txt.test");
// testBoosterWithFastHistogram(trainMat, testMat);
Map<String, Object> paramMap = new HashMap<String, Object>() {
{
put("max_depth", 3);
put("silent", 1);
put("objective", "binary:logistic");
put("tree_method", "hist");
put("max_depth", 2);
put("max_bin", 2);
put("grow_policy", "depthwise");
put("eval_metric", "auc");
}
};
Map<String, DMatrix> watches = new HashMap<>();
watches.put("training", trainMat);
testWithFastHisto(trainMat, watches, 10, paramMap, 0.85f);
testWithQuantileHisto(trainMat, watches, 10, paramMap, 0.95f);
}

/**
Expand Down
Loading

0 comments on commit c18a366

Please sign in to comment.