From 317e12422e3db5c56fdeb0c6d69d038db5924d80 Mon Sep 17 00:00:00 2001 From: fis Date: Sat, 3 Aug 2019 10:49:26 -0400 Subject: [PATCH] Add integer, serializable, CMake version. --- include/xgboost/base.h | 6 ++ include/xgboost/json.h | 98 +++++++++++++++++++-- include/xgboost/json_io.h | 7 +- include/xgboost/learner.h | 8 +- src/common/json.cc | 157 +++++++++++++++++++++++++++++++--- src/learner.cc | 36 ++++---- tests/cpp/common/test_json.cc | 78 +++++++++++++---- 7 files changed, 332 insertions(+), 58 deletions(-) diff --git a/include/xgboost/base.h b/include/xgboost/base.h index f1940afba35d..0922cb22e73c 100644 --- a/include/xgboost/base.h +++ b/include/xgboost/base.h @@ -69,6 +69,12 @@ #define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) std::stable_sort((X), (Y), (Z)) #endif // GLIBC VERSION +#if defined(__GNUC__) +#define XGBOOST_EXPECT(cond, ret) __builtin_expect((cond), (ret)) +#else +#define XGBOOST_EXPECT(cond, ret) (cond) +#endif // defined(__GNUC__) + /*! * \brief Tag function as usable by device */ diff --git a/include/xgboost/json.h b/include/xgboost/json.h index 46836f1add48..9fa807aec58a 100644 --- a/include/xgboost/json.h +++ b/include/xgboost/json.h @@ -4,8 +4,8 @@ #ifndef XGBOOST_JSON_H_ #define XGBOOST_JSON_H_ +#include // deprecated #include - #include #include @@ -185,7 +185,7 @@ class JsonNumber : public Value { public: JsonNumber() : Value(ValueKind::Number) {} - JsonNumber(double value) : Value(ValueKind::Number) { // NOLINT + JsonNumber(Float value) : Value(ValueKind::Number) { // NOLINT number_ = value; } @@ -198,6 +198,7 @@ class JsonNumber : public Value { Float const& getNumber() const & { return number_; } Float& getNumber() & { return number_; } + bool operator==(Value const& rhs) const override; Value& operator=(Value const& rhs) override; @@ -206,6 +207,34 @@ class JsonNumber : public Value { } }; +class JsonInteger : public Value { + public: + using Int = int64_t; + + private: + Int integer_; + + public: + JsonInteger() : Value(ValueKind::Integer), integer_{0} {} + template ::value>::type* = nullptr> + JsonInteger(IntT value) : Value(ValueKind::Integer), integer_{value} {} + + Json& operator[](std::string const & key) override; + Json& operator[](int ind) override; + + bool operator==(Value const& rhs) const override; + Value& operator=(Value const& rhs) override; + + Int const& getInteger() && { return integer_; } + Int const& getInteger() const & { return integer_; } + Int& getInteger() & { return integer_; } + void Save(JsonWriter* writer) override; + + static bool isClassOf(Value const* value) { + return value->Type() == ValueKind::Integer; + } +}; + class JsonNull : public Value { public: JsonNull() : Value(ValueKind::Null) {} @@ -256,15 +285,16 @@ class JsonBoolean : public Value { }; struct StringView { - char const* str_; + using CharT = char; // unsigned char + CharT const* str_; size_t size_; public: StringView() = default; - StringView(char const* str, size_t size) : str_{str}, size_{size} {} + StringView(CharT const* str, size_t size) : str_{str}, size_{size} {} - char const& operator[](size_t p) const { return str_[p]; } - char const& at(size_t p) const { // NOLINT + CharT const& operator[](size_t p) const { return str_[p]; } + CharT const& at(size_t p) const { // NOLINT CHECK_LT(p, size_); return str_[p]; } @@ -319,6 +349,13 @@ class Json { return *this; } + // integer + explicit Json(JsonInteger integer) : ptr_{new JsonInteger(integer)} {} + Json& operator=(JsonInteger integer) { + ptr_.reset(new JsonInteger(std::move(integer))); + return *this; + } + // array explicit Json(JsonArray list) : ptr_ {new JsonArray(std::move(list))} {} @@ -410,10 +447,24 @@ JsonNumber::Float& GetImpl(T& val) { // NOLINT template ::value>::type* = nullptr> -double const& GetImpl(T& val) { // NOLINT +JsonNumber::Float const& GetImpl(T& val) { // NOLINT return val.getNumber(); } +// Integer +template ::value>::type* = nullptr> +JsonInteger::Int& GetImpl(T& val) { // NOLINT + return val.getInteger(); +} +template ::value>::type* = nullptr> +JsonInteger::Int const& GetImpl(T& val) { // NOLINT + return val.getInteger(); +} + // String template decltype(detail::GetImpl(*Cast(&json.GetValue())))& { // using Object = JsonObject; using Array = JsonArray; using Number = JsonNumber; +using Integer = JsonInteger; using Boolean = JsonBoolean; using String = JsonString; using Null = JsonNull; @@ -525,6 +577,36 @@ inline std::map fromJson(std::map c } return res; } - } // namespace xgboost + +#include + +namespace xgboost { + +struct Serializable : public rabit::Serializable { + virtual ~Serializable() = default; + /*! + * \brief load the model from a stream + * \param fi stream where to load the model from + */ + virtual void Load(dmlc::Stream *fi) override = 0; + /*! + * \brief saves the model to a stream + * \param fo stream where to save the model to + */ + virtual void Save(dmlc::Stream *fo) const override = 0; + + /*! + * \brief load the model from a json object + * \param in json object where to load the model from + */ + virtual void Load(Json const& in) = 0; + /*! + * \breif saves the model to a json object + * \param out json container where to save the model to + */ + virtual void Save(Json* out) const = 0; +}; +} // namespace xgboost + #endif // XGBOOST_JSON_H_ diff --git a/include/xgboost/json_io.h b/include/xgboost/json_io.h index d0323354d1a3..bd7633548218 100644 --- a/include/xgboost/json_io.h +++ b/include/xgboost/json_io.h @@ -80,10 +80,14 @@ class JsonReader { explicit SourceLocation(size_t pos) : pos_{pos} {} size_t Pos() const { return pos_; } - SourceLocation& Forward(char c = 0) { + SourceLocation& Forward() { pos_++; return *this; } + SourceLocation& Forward(uint32_t n) { + pos_ += n; + return *this; + } } cursor_; StringView raw_str_; @@ -207,6 +211,7 @@ class JsonWriter { virtual void Visit(JsonArray const* arr); virtual void Visit(JsonObject const* obj); virtual void Visit(JsonNumber const* num); + virtual void Visit(JsonInteger const* num); virtual void Visit(JsonRaw const* raw); virtual void Visit(JsonNull const* null); virtual void Visit(JsonString const* str); diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h index 80e544195f35..edf647695ed0 100644 --- a/include/xgboost/learner.h +++ b/include/xgboost/learner.h @@ -8,8 +8,6 @@ #ifndef XGBOOST_LEARNER_H_ #define XGBOOST_LEARNER_H_ -#include - #include #include #include @@ -42,7 +40,7 @@ namespace xgboost { * * \endcode */ -class Learner : public rabit::Serializable { +class Learner : public Serializable { public: /*! \brief virtual destructor */ ~Learner() override = default; @@ -51,14 +49,14 @@ class Learner : public rabit::Serializable { */ virtual void Configure() = 0; - virtual void Load(Json const& in) = 0; + virtual void Load(Json const& in) override = 0; /*! * \brief load model from stream * \param fi input stream. */ void Load(dmlc::Stream* fi) override = 0; - virtual void Save(Json* out) const = 0; + virtual void Save(Json* out) const override = 0; /*! * \brief save model to stream. * \param fo output stream diff --git a/src/common/json.cc b/src/common/json.cc index 917aa1748379..34402caa5d69 100644 --- a/src/common/json.cc +++ b/src/common/json.cc @@ -2,7 +2,10 @@ * Copyright (c) by Contributors 2019 */ #include +#include +#include +#include "xgboost/base.h" #include "xgboost/logging.h" #include "xgboost/json.h" #include "xgboost/json_io.h" @@ -56,6 +59,13 @@ void JsonWriter::Visit(JsonNumber const* num) { convertor_.str(""); } +void JsonWriter::Visit(JsonInteger const* num) { + convertor_ << num->getInteger(); + auto const& str = convertor_.str(); + this->Write(StringView{str.c_str(), str.size()}); + convertor_.str(""); +} + void JsonWriter::Visit(JsonRaw const* raw) { auto const& str = raw->getRaw(); this->Write(str); @@ -282,6 +292,34 @@ void JsonNumber::Save(JsonWriter* writer) { writer->Visit(this); } +// Json Integer +Json& JsonInteger::operator[](std::string const& key) { + LOG(FATAL) << "Object of type " + << Value::TypeStr() << " can not be indexed by string."; + return DummyJsonObject(); +} + +Json& JsonInteger::operator[](int ind) { + LOG(FATAL) << "Object of type " + << Value::TypeStr() << " can not be indexed by Integer."; + return DummyJsonObject(); +} + +bool JsonInteger::operator==(Value const& rhs) const { + if (!IsA(&rhs)) { return false; } + return integer_ == Cast(&rhs)->getInteger(); +} + +Value & JsonInteger::operator=(Value const &rhs) { + JsonInteger const* casted = Cast(&rhs); + integer_ = casted->getInteger(); + return *this; +} + +void JsonInteger::Save(JsonWriter* writer) { + writer->Visit(this); +} + // Json Null Json& JsonNull::operator[](std::string const & key) { LOG(FATAL) << "Object of type " @@ -401,7 +439,7 @@ void JsonReader::SkipSpaces() { while (cursor_.Pos() < raw_str_.size()) { char c = raw_str_[cursor_.Pos()]; if (std::isspace(c)) { - cursor_.Forward(c); + cursor_.Forward(); } else { break; } @@ -527,15 +565,114 @@ Json JsonReader::ParseObject() { } Json JsonReader::ParseNumber() { - std::string substr = raw_str_.substr(cursor_.Pos(), kMaxNumLength); - size_t pos = 0; + char const* p = raw_str_.c_str() + cursor_.Pos(); + char const* const beg = p; // keep track of current pointer + + // TODO(trivialfis): Add back all the checks for number + bool negative = false; + if ('-' == *p) { + ++p; + negative = true; + } + + bool is_float = false; - Number::Float number{0}; - number = std::stof(substr, &pos); - for (size_t i = 0; i < pos; ++i) { - GetNextChar(); + using ExpInt = std::remove_const< + decltype(std::numeric_limits::max_exponent)>::type; + constexpr auto kExpMax = std::numeric_limits::max(); + constexpr auto kExpMin = std::numeric_limits::min(); + + JsonInteger::Int i = 0; + double f = 0.0; // Use double to maintain accuracy + + if (*p == '0') { + ++p; + } else { + char c = *p; + do { + ++p; + char digit = c - '0'; + i = 10 * i + digit; + c = *p; + } while (std::isdigit(c)); + } + + ExpInt exponent = 0; + const char *const dot_position = p; + if ('.' == *p) { + is_float = true; + f = i; + ++p; + char c = *p; + + do { + ++p; + f = f * 10 + (c - '0'); + c = *p; + } while (std::isdigit(c)); + } + if (is_float) { + exponent = dot_position - p + 1; + } + + char e = *p; + if ('e' == e || 'E' == e) { + if (!is_float) { + is_float = true; + f = i; + } + ++p; + + bool negative_exponent = false; + if ('-' == *p) { + negative_exponent = true; + ++p; + } else if ('+' == *p) { + ++p; + } + + ExpInt exp = 0; + + char c = *p; + while (std::isdigit(c)) { + unsigned char digit = c - '0'; + if (XGBOOST_EXPECT(exp > (kExpMax - digit) / 10, false)) { + CHECK_GT(exp, (kExpMax - digit) / 10) << "Overflow"; + } + exp = 10 * exp + digit; + + ++p; + + c = *p; + } + static_assert(-kExpMax >= kExpMin, "exp can be negated without loss or UB"); + exponent += (negative_exponent ? -exp : exp); + } + + if (exponent) { + CHECK(is_float); + // If d is zero but the exponent is huge, don't + // multiply zero by inf which gives nan. + if (f != 0.0) { + f *= exp10(exponent); + } + } + + if (negative) { + f = -f; + i = -i; + } + + auto moved = std::distance(beg, p); + this->cursor_.Forward(moved); + + if (is_float) { + // LOG(WARNING) << "Number"; + return Json(JsonNumber(f)); + } else { + // LOG(WARNING) << "Integer"; + return Json(JsonInteger(i)); } - return Json(number); } Json JsonReader::ParseBoolean() { @@ -587,8 +724,6 @@ class GlobalCLocale { Json Json::Load(StringView str, bool ignore_specialization) { GlobalCLocale guard; - LOG(WARNING) << "Json serialization is still experimental." - " Output schema is subject to change in the future."; JsonReader reader(str, ignore_specialization); common::Timer t; t.Start(); @@ -610,8 +745,6 @@ Json Json::Load(JsonReader* reader) { void Json::Dump(Json json, std::ostream *stream, bool pretty) { GlobalCLocale guard; - LOG(WARNING) << "Json serialization is still experimental." - " Output schema is subject to change in the future."; JsonWriter writer(stream, true); common::Timer t; t.Start(); diff --git a/src/learner.cc b/src/learner.cc index 34e205c20771..578ab34458dd 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -5,12 +5,7 @@ * \author Tianqi Chen */ #include -#include -#include -#include -#include -#include -#include + #include #include #include @@ -20,12 +15,17 @@ #include #include +#include "xgboost/feature_map.h" +#include "xgboost/learner.h" +#include "xgboost/logging.h" +#include "xgboost/generic_parameters.h" #include "xgboost/json.h" -#include "./common/common.h" -#include "./common/host_device_vector.h" -#include "./common/io.h" -#include "./common/random.h" -#include "./common/timer.h" + +#include "common/common.h" +#include "common/host_device_vector.h" +#include "common/io.h" +#include "common/random.h" +#include "common/timer.h" namespace { @@ -305,8 +305,11 @@ class LearnerImpl : public Learner { void Save(Json* p_out) const override { // FIXME(trivialfis); num_boosting_round Json& out = *p_out; - out["version"] = Json(Array{std::vector({Json{String{"1"}}, - Json{String{"0"}}})}); + Integer::Int major{XGBOOST_VER_MAJOR}, minor{XGBOOST_VER_MINOR}, patch{XGBOOST_VER_PATCH}; + out["version"] = Json(Array{std::vector{ + Json(Integer(major)), + Json(Integer(minor)), + Json(Integer(patch))}}); out["Learner"] = Object(); auto& learner = out["Learner"]; @@ -330,9 +333,10 @@ class LearnerImpl : public Learner { } void Load(Json const& in) override { - std::string major, minor; - std::tie(major, minor) = std::make_pair(get(get(in["version"])[0]), - get(get(in["version"])[1])); + Integer::Int major, minor, patch; + std::tie(major, minor, patch) = std::make_tuple(get(get(in["version"])[0]), + get(get(in["version"])[1]), + get(get(in["version"])[2])); LOG(INFO) << "Loading XGBoost " << major << ", " << minor << " model"; auto const& learner = get(in["Learner"]); mparam_.InitAllowUnknown(fromJson(get(learner.at("learner_model_param")))); diff --git a/tests/cpp/common/test_json.cc b/tests/cpp/common/test_json.cc index a599c3f19fbb..48c4d5cb95c4 100644 --- a/tests/cpp/common/test_json.cc +++ b/tests/cpp/common/test_json.cc @@ -143,9 +143,26 @@ TEST(Json, TestParseObject) { } TEST(Json, ParseNumber) { - std::string str = "31.8892"; - auto json = Json::Load(StringView{str.c_str(), str.size()}); - ASSERT_NEAR(get(json), 31.8892f, kRtEps); + { + std::string str = "31.8892"; + auto json = Json::Load(StringView{str.c_str(), str.size()}); + ASSERT_NEAR(get(json), 31.8892f, kRtEps); + } + { + std::string str = "-31.8892"; + auto json = Json::Load(StringView{str.c_str(), str.size()}); + ASSERT_NEAR(get(json), -31.8892f, kRtEps); + } + { + std::string str = "2e4"; + auto json = Json::Load(StringView{str.c_str(), str.size()}); + ASSERT_NEAR(get(json), 2e4f, kRtEps); + } + { + std::string str = "2e-4"; + auto json = Json::Load(StringView{str.c_str(), str.size()}); + ASSERT_NEAR(get(json), 2e-4f, kRtEps); + } } TEST(Json, ParseArray) { @@ -181,7 +198,8 @@ TEST(Json, ParseArray) { std::vector arr = get(json); ASSERT_EQ(arr.size(), 3); Json v0 = arr[0]; - ASSERT_EQ(get(v0["depth"]), 3); + ASSERT_EQ(get(v0["depth"]), 3); + ASSERT_NEAR(get(v0["gain"]), 10.4866, kRtEps); } TEST(Json, Null) { @@ -242,7 +260,7 @@ TEST(Json, AssigningObjects) { { std::map objects; Json json_objects { JsonObject() }; - std::vector arr_0 (1, Json(3.3)); + std::vector arr_0 (1, Json(3.3f)); json_objects["tree_parameters"] = JsonArray(arr_0); std::vector json_arr = get(json_objects["tree_parameters"]); ASSERT_NEAR(get(json_arr[0]), 3.3f, kRtEps); @@ -263,9 +281,9 @@ TEST(Json, AssigningObjects) { TEST(Json, AssigningArray) { Json json; json = JsonArray(); - std::vector tmp_0 {Json(Number(1)), Json(Number(2))}; + std::vector tmp_0 {Json(Number(1.0f)), Json(Number(2.0f))}; json = tmp_0; - std::vector tmp_1 {Json(Number(3))}; + std::vector tmp_1 {Json(Number(3.0f))}; get(json) = tmp_1; std::vector res = get(json); ASSERT_EQ(get(res[0]), 3); @@ -274,14 +292,14 @@ TEST(Json, AssigningArray) { TEST(Json, AssigningNumber) { { // right value - Json json = Json{ Number(4) }; + Json json = Json{ Number(4.0f) }; get(json) = 15; ASSERT_EQ(get(json), 15); } { // left value ref - Json json = Json{ Number(4) }; + Json json = Json{ Number(4.0f) }; Number::Float& ref = get(json); ref = 15; ASSERT_EQ(get(json), 15); @@ -289,7 +307,7 @@ TEST(Json, AssigningNumber) { { // left value - Json json = Json{ Number(4) }; + Json json = Json{ Number(4.0f) }; double value = get(json); ASSERT_EQ(value, 4); value = 15; // NOLINT @@ -323,8 +341,8 @@ TEST(Json, AssigningString) { } TEST(Json, LoadDump) { - std::string buffer = GetModelStr(); - Json origin {Json::Load(StringView{buffer.c_str(), buffer.size()}, true)}; + std::string ori_buffer = GetModelStr(); + Json origin {Json::Load(StringView{ori_buffer.c_str(), ori_buffer.size()}, true)}; dmlc::TemporaryDirectory tempdir; auto const& path = tempdir.path + "test_model_dump"; @@ -333,10 +351,11 @@ TEST(Json, LoadDump) { Json::Dump(origin, &fout); fout.close(); - buffer = common::LoadSequentialFile(path); - Json load_back {Json::Load(StringView(buffer.c_str(), buffer.size()), true)}; + std::string new_buffer = common::LoadSequentialFile(path); + Json load_back {Json::Load(StringView(new_buffer.c_str(), new_buffer.size()), true)}; - ASSERT_EQ(load_back, origin); + ASSERT_EQ(load_back, origin) << ori_buffer << "\n\n---------------\n\n" + << new_buffer; } // For now Json is quite ignorance about unicode. @@ -359,7 +378,7 @@ TEST(Json, WrongCasts) { ASSERT_ANY_THROW(get(json)); } { - Json json = Json{ Array{ std::vector{ Json{ Number{1} } } } }; + Json json = Json{ Array{ std::vector{ Json{ Number{1.0f} } } } }; ASSERT_ANY_THROW(get(json)); } { @@ -368,4 +387,31 @@ TEST(Json, WrongCasts) { ASSERT_ANY_THROW(get(json)); } } + +TEST(Json, Int_vs_Float) { + // If integer is parsed as float, calling `get()' will throw. + { + std::string str = R"json( +{ + "number": 123.4, + "integer": 123 +})json"; + + Json obj = Json::Load({str.c_str(), str.size()}); + JsonNumber::Float number = get(obj["number"]); + ASSERT_NEAR(number, 123.4, kRtEps); + JsonInteger::Int integer = get(obj["integer"]); + ASSERT_EQ(integer, 123); + } + + { + std::string str = R"json( +{"data": [2503595760, false], "shape": [10]} +)json"; + Json obj = Json::Load({str.c_str(), str.size()}); + auto array = get(obj["data"]); + auto ptr = get(array[0]); + ASSERT_EQ(ptr, 2503595760); + } +} } // namespace xgboost