Skip to content

Commit

Permalink
Add JSON IO for various components.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Aug 6, 2019
1 parent 2a4df8e commit 3f35ba7
Show file tree
Hide file tree
Showing 35 changed files with 996 additions and 81 deletions.
7 changes: 2 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@ if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
endif()

message(STATUS "xgboost VERSION: ${xgboost_VERSION}")
set(XGBOOST_DEFINITIONS
${XGBOOST_DEFINITIONS}
-DXGBOOST_VER_MAJOR=${xgboost_VERSION_MAJOR}
-DXGBOOST_VER_MINOR=${xgboost_VERSION_MINOR}
-DXGBOOST_VER_PATCH=${xgboost_VERSION_PATCH})
include (${xgboost_SOURCE_DIR}/cmake/Version.cmake)
write_version()
set_default_configuration_release()

#-- Options
Expand Down
2 changes: 2 additions & 0 deletions amalgamation/xgboost-all0.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
// gbms
#include "../src/gbm/gbm.cc"
#include "../src/gbm/gbtree.cc"
#include "../src/gbm/gbtree_model.cc"
#include "../src/gbm/gblinear.cc"
#include "../src/gbm/gblinear_model.cc"

// data
#include "../src/data/data.cc"
Expand Down
5 changes: 5 additions & 0 deletions cmake/Version.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
function (write_version)
configure_file(
${xgboost_SOURCE_DIR}/cmake/build_config.h.in
${xgboost_SOURCE_DIR}/include/xgboost/build_config.h @ONLY)
endfunction (write_version)
28 changes: 28 additions & 0 deletions cmake/build_config.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*!
* Copyright 2019 by Contributors
* \file build_config.h
*
* Generated from `cmake/build_config.h.in` by cmake.
*/
#ifndef XGBOOST_BUILD_CONFIG_H_
#define XGBOOST_BUILD_CONFIG_H_

// These check are for Makefile.
#if !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined(XGBOOST_BUILTIN_PREFETCH_PRESENT)
/* default logic for software pre-fetching */
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))) || defined(__INTEL_COMPILER)
// Enable _mm_prefetch for Intel compiler and MSVC+x86
#define XGBOOST_MM_PREFETCH_PRESENT
#define XGBOOST_BUILTIN_PREFETCH_PRESENT
#elif defined(__GNUC__)
// Enable __builtin_prefetch for GCC
#define XGBOOST_BUILTIN_PREFETCH_PRESENT
#endif // GUARDS

#endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()

#define XGBOOST_VER_MAJOR @xgboost_VERSION_MAJOR@
#define XGBOOST_VER_MINOR @xgboost_VERSION_MINOR@
#define XGBOOST_VER_PATCH @xgboost_VERSION_PATCH@

#endif // XGBOOST_BUILD_CONFIG_H_
15 changes: 15 additions & 0 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,21 @@ inline XGBOOST_DEVICE void GradientPairInternal<int64_t>::SetHess(float h) {

} // namespace detail

class Json;

struct Serializable {
/*!
* \brief load the model from a json object
* \param in json object where to load the model from
*/
virtual void Load(Json const& in) = 0;
/*!
* \breif saves the model to a json object
* \param out json container where to save the model to
*/
virtual void Save(Json* out) const = 0;
};

/*! \brief gradient statistics pair usually needed in gradient boosting */
using GradientPair = detail::GradientPairInternal<float>;

Expand Down
6 changes: 6 additions & 0 deletions include/xgboost/build_config.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/*!
* Copyright 2019 by Contributors
* \file build_config.h
*
* Generated from `cmake/build_config.h.in` by cmake.
*/
#ifndef XGBOOST_BUILD_CONFIG_H_
#define XGBOOST_BUILD_CONFIG_H_
Expand All @@ -19,4 +21,8 @@

#endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()

#define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 0
#define XGBOOST_VER_PATCH 0

#endif // XGBOOST_BUILD_CONFIG_H_
13 changes: 12 additions & 1 deletion include/xgboost/gbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <dmlc/registry.h>
#include <xgboost/base.h>
#include <xgboost/json.h>
#include <xgboost/data.h>
#include <xgboost/objective.h>
#include <xgboost/feature_map.h>
Expand All @@ -27,7 +28,7 @@ namespace xgboost {
/*!
* \brief interface of gradient boosting model.
*/
class GradientBooster {
class GradientBooster : public Serializable {
protected:
GenericParameter const* learner_param_;

Expand All @@ -46,11 +47,21 @@ class GradientBooster {
* \param fi input stream.
*/
virtual void Load(dmlc::Stream* fi) = 0;
/*!
* \brief load model from json
* \param in input json model.
*/
void Load(Json const& in) override = 0;
/*!
* \brief save model to stream.
* \param fo output stream
*/
virtual void Save(dmlc::Stream* fo) const = 0;
/*!
* \brief Save model to Json
* \param out output json document.
*/
void Save(Json* out) const override = 0;
/*!
* \brief whether the model allow lazy checkpoint
* return true if model is only updated in DoBoost
Expand Down
16 changes: 14 additions & 2 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
#define XGBOOST_LEARNER_H_

#include <rabit/rabit.h>

#include <xgboost/base.h>
#include <xgboost/gbm.h>
#include <xgboost/metric.h>
#include <xgboost/objective.h>
#include <xgboost/feature_map.h>
#include <xgboost/generic_parameters.h>
#include <xgboost/json.h>

#include <utility>
#include <map>
Expand All @@ -41,19 +41,31 @@ namespace xgboost {
*
* \endcode
*/
class Learner : public rabit::Serializable {
class Learner : public Serializable, public rabit::Serializable {
public:
/*! \brief virtual destructor */
~Learner() override = default;
/*!
* \brief Configure Learner based on set parameters.
*/
virtual void Configure() = 0;

/*!
* \brief load model from json object
* \param in input json object
*/
void Load(Json const& in) override = 0;
/*!
* \brief load model from stream
* \param fi input stream.
*/
void Load(dmlc::Stream* fi) override = 0;

/*!
* \brief save model to json object
* \param out output json object
*/
void Save(Json* out) const override = 0;
/*!
* \brief save model to stream.
* \param fo output stream
Expand Down
7 changes: 6 additions & 1 deletion include/xgboost/objective.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@

namespace xgboost {

class Json;

/*! \brief interface of objective function */
class ObjFunction {
class ObjFunction : public Serializable {
protected:
GenericParameter const* tparam_;

Expand Down Expand Up @@ -72,6 +74,9 @@ class ObjFunction {
virtual bst_float ProbToMargin(bst_float base_score) const {
return base_score;
}

virtual void Save(Json* out) const = 0;
virtual void Load(Json const& in) = 0;
/*!
* \brief Create an objective function according to name.
* \param tparam Generic parameters.
Expand Down
22 changes: 18 additions & 4 deletions include/xgboost/tree_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <dmlc/io.h>
#include <dmlc/parameter.h>

#include <limits>
#include <vector>
#include <string>
Expand All @@ -24,6 +25,8 @@ namespace xgboost {

struct PathElement; // forward declaration

class Json;

/*! \brief meta parameters of the tree */
struct TreeParam : public dmlc::Parameter<TreeParam> {
/*! \brief number of start root */
Expand Down Expand Up @@ -57,6 +60,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
// other arguments are set by the algorithm.
DMLC_DECLARE_FIELD(num_roots).set_lower_bound(1).set_default(1)
.describe("Number of start root of trees.");
DMLC_DECLARE_FIELD(num_nodes).set_lower_bound(1).set_default(1);
DMLC_DECLARE_FIELD(num_feature)
.describe("Number of features used in tree construction.");
DMLC_DECLARE_FIELD(size_leaf_vector).set_lower_bound(0).set_default(0)
Expand All @@ -80,7 +84,7 @@ struct RTreeNodeStat {
/*! \brief weight of current node */
bst_float base_weight;
/*! \brief number of child that is leaf node known up to now */
int leaf_child_cnt;
int leaf_child_cnt {0};
bool operator==(const RTreeNodeStat& b) const {
return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
Expand All @@ -91,7 +95,7 @@ struct RTreeNodeStat {
* \brief define regression tree to be the most common tree model.
* This is the data structure used in xgboost's major tree models.
*/
class RegTree {
class RegTree : public Serializable {
public:
/*! \brief auxiliary statistics of node to help tree building */
using SplitCondT = bst_float;
Expand All @@ -103,6 +107,12 @@ class RegTree {
static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
"Node: 64 bit align");
}
Node(int32_t cleft, int32_t cright, int32_t parent,
uint32_t split_ind, float split_cond, bool default_left) :
parent_{parent}, cleft_{cleft}, cright_{cright} {
this->SetSplit(split_ind, split_cond, default_left);
}

/*! \brief index of left child */
XGBOOST_DEVICE int LeftChild() const {
return this->cleft_;
Expand Down Expand Up @@ -216,9 +226,9 @@ class RegTree {
};
// pointer to parent, highest bit is used to
// indicate whether it's a left child or not
int parent_;
int parent_{-1};
// pointer to left, right
int cleft_, cright_;
int cleft_{-1}, cright_{-1};
// split feature index, left split or right split depends on the highest bit
unsigned sindex_{0};
// extra info
Expand Down Expand Up @@ -307,6 +317,8 @@ class RegTree {
}
CHECK_EQ(static_cast<int>(deleted_nodes_.size()), param.num_deleted);
}

void Load(Json const& in) override;
/*!
* \brief save model to stream
* \param fo output stream
Expand All @@ -320,6 +332,8 @@ class RegTree {
fo->Write(dmlc::BeginPtr(stats_), sizeof(RTreeNodeStat) * nodes_.size());
}

void Save(Json* out) const override;

bool operator==(const RegTree& b) const {
return nodes_ == b.nodes_ && stats_ == b.stats_ &&
deleted_nodes_ == b.deleted_nodes_ && param == b.param;
Expand Down
5 changes: 5 additions & 0 deletions include/xgboost/tree_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#include "../../src/common/host_device_vector.h"

namespace xgboost {

class Json;

/*!
* \brief interface of tree update module, that performs update of a tree.
*/
Expand Down Expand Up @@ -67,6 +70,8 @@ class TreeUpdater {

virtual char const* Name() const = 0;

virtual void Save(Json* out) const {}

/*!
* \brief Create a tree updater given name
* \param name Name of the tree updater.
Expand Down
13 changes: 12 additions & 1 deletion plugin/example/custom_obj.cc
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
/*!
* Copyright 2015 by Contributors
* Copyright 2015-2019 by Contributors
* \file custom_metric.cc
* \brief This is an example to define plugin of xgboost.
* This plugin defines the additional metric function.
*/
#include <xgboost/base.h>
#include <dmlc/parameter.h>
#include <xgboost/objective.h>
#include <xgboost/json.h>

namespace xgboost {
namespace obj {
Expand Down Expand Up @@ -69,6 +70,16 @@ class MyLogistic : public ObjFunction {
return -std::log(1.0f / base_score - 1.0f);
}

void Save(Json* p_out) const override {
auto& out = *p_out;
out["name"] = String("MyLogistic");
out["MyLogisticParam"] = toJson(param_);
}

void Load(Json const& in) override {
param_.InitAllowUnknown(fromJson(get<Object>(in["MyLogisticParam"])));
}

private:
MyLogisticParam param_;
};
Expand Down
10 changes: 7 additions & 3 deletions python-package/xgboost/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,18 @@
# pylint: disable=invalid-name, redefined-builtin
STRING_TYPES = (str,)


def py_str(x):
"""convert c string back to python string"""
return x.decode('utf-8')
try:
s = x.decode('utf-8')
except UnicodeDecodeError as e:
print('Failed to decode error message, please file a bug report.')
print(e)
s = x
return s
else:
STRING_TYPES = (basestring,) # pylint: disable=undefined-variable


def py_str(x):
"""convert c string back to python string"""
return x
Expand Down
Loading

0 comments on commit 3f35ba7

Please sign in to comment.