Skip to content

Commit

Permalink
C++ uint16_t to int for ATN.
Browse files Browse the repository at this point in the history
  • Loading branch information
parrt committed Mar 18, 2022
1 parent b4148b8 commit 4d2ebbf
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 45 deletions.
4 changes: 2 additions & 2 deletions runtime/Cpp/runtime/src/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct BypassAltsAtnCache final {
/// bypass alternatives.
///
/// <seealso cref= ATNDeserializationOptions#isGenerateRuleBypassTransitions() </seealso>
std::map<std::vector<uint16_t>, std::unique_ptr<const atn::ATN>> map;
std::map<std::vector<int>, std::unique_ptr<const atn::ATN>> map;
};

BypassAltsAtnCache* getBypassAltsAtnCache() {
Expand Down Expand Up @@ -229,7 +229,7 @@ TokenFactory<CommonToken>* Parser::getTokenFactory() {


const atn::ATN& Parser::getATNWithBypassAlts() {
const std::vector<uint16_t> &serializedAtn = getSerializedATN();
const std::vector<int> &serializedAtn = getSerializedATN();
if (serializedAtn.empty()) {
throw UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.");
}
Expand Down
2 changes: 1 addition & 1 deletion runtime/Cpp/runtime/src/Recognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ namespace antlr4 {
/// For interpreters, we don't know their serialized ATN despite having
/// created the interpreter from it.
/// </summary>
virtual const std::vector<uint16_t>& getSerializedATN() const {
virtual const std::vector<int>& getSerializedATN() const {
throw "there is no serialized ATN";
}

Expand Down
34 changes: 9 additions & 25 deletions runtime/Cpp/runtime/src/atn/ATNDeserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,28 +221,27 @@ namespace {
return s;
}

uint32_t deserializeInt32(const std::vector<uint16_t>& data, size_t offset) {
uint32_t deserializeInt32(const std::vector<int>& data, size_t offset) {
return static_cast<uint32_t>(data[offset]) | (static_cast<uint32_t>(data[offset + 1]) << 16);
}

ssize_t readUnicodeInt(const std::vector<uint16_t>& data, int& p) {
ssize_t readInt16(const std::vector<int>& data, int& p) {
return static_cast<ssize_t>(data[p++]);
}

ssize_t readUnicodeInt32(const std::vector<uint16_t>& data, int& p) {
ssize_t readInt32(const std::vector<int>& data, int& p) {
auto result = deserializeInt32(data, p);
p += 2;
return static_cast<ssize_t>(result);
}

// We templatize this on the function type so the optimizer can inline
// the 16- or 32-bit readUnicodeInt/readUnicodeInt32 as needed.
// the 16- or 32-bit readInt16/readInt32 as needed.
template <typename F>
void deserializeSets(
const std::vector<uint16_t>& data,
const std::vector<int>& data,
int& p,
std::vector<misc::IntervalSet>& sets,
F readUnicode) {
std::vector<misc::IntervalSet>& sets, F readUnicode) {
size_t nsets = data[p++];
sets.reserve(sets.size() + nsets);
for (size_t i = 0; i < nsets; i++) {
Expand All @@ -269,7 +268,7 @@ ATNDeserializer::ATNDeserializer() : ATNDeserializer(ATNDeserializationOptions::

ATNDeserializer::ATNDeserializer(ATNDeserializationOptions deserializationOptions) : _deserializationOptions(std::move(deserializationOptions)) {}

std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& data) const {
std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<int>& data) const {
int p = 0;
int version = data[p++];
if (version != SERIALIZED_VERSION) {
Expand Down Expand Up @@ -301,9 +300,6 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
}

size_t ruleIndex = data[p++];
if (ruleIndex == 0xFFFF) {
ruleIndex = INVALID_INDEX;
}

ATNState *s = stateFactory(stype, ruleIndex);
if (stype == ATNStateType::LOOP_END) { // special case
Expand Down Expand Up @@ -352,10 +348,6 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
atn->ruleToStartState.push_back(startState);
if (atn->grammarType == ATNType::LEXER) {
size_t tokenType = data[p++];
if (tokenType == 0xFFFF) {
tokenType = Token::EOF;
}

atn->ruleToTokenType.push_back(tokenType);
}
}
Expand Down Expand Up @@ -388,10 +380,10 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
std::vector<misc::IntervalSet> sets;

// First, deserialize sets with 16-bit arguments <= U+FFFF.
deserializeSets(data, p, sets, readUnicodeInt);
deserializeSets(data, p, sets, readInt16);

// Next, deserialize sets with 32-bit arguments <= U+10FFFF.
deserializeSets(data, p, sets, readUnicodeInt32);
deserializeSets(data, p, sets, readInt32);

sets.shrink_to_fit();

Expand Down Expand Up @@ -492,15 +484,7 @@ std::unique_ptr<ATN> ATNDeserializer::deserialize(const std::vector<uint16_t>& d
for (size_t i = 0; i < atn->lexerActions.size(); i++) {
LexerActionType actionType = static_cast<LexerActionType>(data[p++]);
int data1 = data[p++];
if (data1 == 0xFFFF) {
data1 = -1;
}

int data2 = data[p++];
if (data2 == 0xFFFF) {
data2 = -1;
}

atn->lexerActions[i] = lexerActionFactory(actionType, data1, data2);
}
}
Expand Down
2 changes: 1 addition & 1 deletion runtime/Cpp/runtime/src/atn/ATNDeserializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace atn {

explicit ATNDeserializer(ATNDeserializationOptions deserializationOptions);

std::unique_ptr<ATN> deserialize(const std::vector<uint16_t> &input) const;
std::unique_ptr<ATN> deserialize(const std::vector<int> &input) const;
void verifyATN(const ATN &atn) const;

private:
Expand Down
4 changes: 2 additions & 2 deletions runtime/Cpp/runtime/src/atn/ATNSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,10 @@ std::string ATNSerializer::decode(const std::wstring &inpdata) {
if (inpdata.size() < 10)
throw IllegalArgumentException("Not enough data to decode");

std::vector<uint16_t> data(inpdata.size());
std::vector<int> data(inpdata.size());

for (size_t i = 0; i < inpdata.size(); ++i) {
data[i] = (uint16_t)inpdata[i];
data[i] = (int)inpdata[i];
}

std::string buf;
Expand Down
4 changes: 2 additions & 2 deletions runtime/Cpp/runtime/src/misc/InterpreterDataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) {
};
}

std::vector<uint16_t> serializedATN;
std::vector<int> serializedATN;

std::getline(input, line, '\n');
assert(line == "atn:");
Expand All @@ -115,7 +115,7 @@ InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) {
number = std::strtoul(&value[1], nullptr, 10);
else
number = std::strtoul(value.c_str(), nullptr, 10);
serializedATN.push_back(static_cast<uint16_t>(number));
serializedATN.push_back(static_cast<int>(number));
}

ATNDeserializer deserializer;
Expand Down
6 changes: 3 additions & 3 deletions runtime/Cpp/runtime/src/tree/xpath/XPathLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct XPathLexerStaticData final {
const std::vector<std::string> literalNames;
const std::vector<std::string> symbolicNames;
const antlr4::dfa::Vocabulary vocabulary;
std::vector<uint16_t> serializedATN;
std::vector<int> serializedATN;
std::unique_ptr<antlr4::atn::ATN> atn;
};

Expand Down Expand Up @@ -61,7 +61,7 @@ void xpathLexerInitialize() {
"STRING"
}
);
static const uint16_t serializedATNSegment0[] = {
static const int serializedATNSegment0[] = {
0x4, 0x0, 0x8, 0x32, 0x6, 0xffff, 0x2, 0x0, 0x7, 0x0, 0x2, 0x1, 0x7,
0x1, 0x2, 0x2, 0x7, 0x2, 0x2, 0x3, 0x7, 0x3, 0x2, 0x4, 0x7, 0x4,
0x2, 0x5, 0x7, 0x5, 0x2, 0x6, 0x7, 0x6, 0x2, 0x7, 0x7, 0x7, 0x1,
Expand Down Expand Up @@ -151,7 +151,7 @@ const dfa::Vocabulary& XPathLexer::getVocabulary() const {
return xpathLexerStaticData->vocabulary;
}

const std::vector<uint16_t>& XPathLexer::getSerializedATN() const {
const std::vector<int>& XPathLexer::getSerializedATN() const {
return xpathLexerStaticData->serializedATN;
}

Expand Down
2 changes: 1 addition & 1 deletion runtime/Cpp/runtime/src/tree/xpath/XPathLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class XPathLexer : public antlr4::Lexer {

virtual const antlr4::dfa::Vocabulary& getVocabulary() const override;

virtual const std::vector<uint16_t>& getSerializedATN() const override;
virtual const std::vector<int>& getSerializedATN() const override;

virtual const antlr4::atn::ATN& getATN() const override;

Expand Down
14 changes: 7 additions & 7 deletions tool/resources/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public:

const antlr4::dfa::Vocabulary& getVocabulary() const override;

virtual const std::vector\<uint16_t>& getSerializedATN() const override;
virtual const std::vector\<int>& getSerializedATN() const override;
virtual const antlr4::atn::ATN& getATN() const override;

<if (actionFuncs)>
Expand Down Expand Up @@ -138,7 +138,7 @@ struct <lexer.name; format = "cap">StaticData final {
const std::vector\<std::string> literalNames;
const std::vector\<std::string> symbolicNames;
const antlr4::dfa::Vocabulary vocabulary;
std::vector\<uint16_t> serializedATN;
std::vector\<int> serializedATN;
std::unique_ptr\<antlr4::atn::ATN> atn;
};

Expand Down Expand Up @@ -199,7 +199,7 @@ const dfa::Vocabulary& <lexer.name>::getVocabulary() const {
return <lexer.grammarName; format = "lower">LexerStaticData->vocabulary;
}

const std::vector\<uint16_t>& <lexer.name>::getSerializedATN() const {
const std::vector\<int>& <lexer.name>::getSerializedATN() const {
return <lexer.grammarName; format = "lower">LexerStaticData->serializedATN;
}

Expand Down Expand Up @@ -307,7 +307,7 @@ public:

const antlr4::dfa::Vocabulary& getVocabulary() const override;

const std::vector\<uint16_t>& getSerializedATN() const override;
const std::vector\<int>& getSerializedATN() const override;

<namedActions.members>

Expand Down Expand Up @@ -356,7 +356,7 @@ struct <parser.name; format = "cap">StaticData final {
const std::vector\<std::string> literalNames;
const std::vector\<std::string> symbolicNames;
const antlr4::dfa::Vocabulary vocabulary;
std::vector\<uint16_t> serializedATN;
std::vector\<int> serializedATN;
std::unique_ptr\<antlr4::atn::ATN> atn;
};

Expand Down Expand Up @@ -407,7 +407,7 @@ const dfa::Vocabulary& <parser.name>::getVocabulary() const {
return <parser.grammarName; format = "lower">ParserStaticData->vocabulary;
}

const std::vector\<uint16_t>& <parser.name>::getSerializedATN() const {
const std::vector\<int>& <parser.name>::getSerializedATN() const {
return <parser.grammarName; format = "lower">ParserStaticData->serializedATN;
}

Expand Down Expand Up @@ -438,7 +438,7 @@ SerializedATNHeader(model) ::= <<
>>

SerializedATN(model) ::= <<
static const uint16_t serializedATNSegment[] = {
static const int serializedATNSegment[] = {
<model.serialized: {s | <s>}; separator=",", wrap>
};
staticData->serializedATN.reserve(sizeof(serializedATNSegment) / sizeof(serializedATNSegment[0]));
Expand Down
2 changes: 1 addition & 1 deletion tool/src/org/antlr/v4/codegen/Target.java
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ public String getBaseVisitorFileName(boolean header) {
* in a single segment (a declaration in target language) of the serialized ATN.
* E.g., in C++, a small segment length results in multiple decls like:
*
* static const uint16_t serializedATNSegment1[] = {
* static const int serializedATNSegment1[] = {
* 0x7, 0x12, 0x2, 0x13, 0x7, 0x13, 0x2, 0x14, 0x7, 0x14, 0x2, 0x15, 0x7,
* 0x15, 0x2, 0x16, 0x7, 0x16, 0x2, 0x17, 0x7, 0x17, 0x2, 0x18, 0x7,
* 0x18, 0x2, 0x19, 0x7, 0x19, 0x2, 0x1a, 0x7, 0x1a, 0x2, 0x1b, 0x7,
Expand Down

0 comments on commit 4d2ebbf

Please sign in to comment.