diff --git a/.clang-tidy b/.clang-tidy index cc9120b..8f34b6c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -63,6 +63,7 @@ Checks: >- -bugprone-macro-parantheses, -bugprone-narrowing-conversions, -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-do-while, -cppcoreguidelines-avoid-magic-numbers, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-macro-usage, diff --git a/include/core/monomux/adt/FunctionExtras.hpp b/include/core/monomux/adt/FunctionExtras.hpp index 65eae28..39f6684 100644 --- a/include/core/monomux/adt/FunctionExtras.hpp +++ b/include/core/monomux/adt/FunctionExtras.hpp @@ -4,6 +4,21 @@ #include "monomux/adt/Metaprogramming.hpp" +#define MONOMUX_MAKE_NON_COPYABLE(CLASS_NAME) \ + CLASS_NAME(const CLASS_NAME&) = delete; \ + CLASS_NAME& operator=(const CLASS_NAME&) = delete; +#define MONOMUX_MAKE_NON_MOVABLE(CLASS_NAME) \ + CLASS_NAME(CLASS_NAME&&) = delete; \ + CLASS_NAME& operator=(CLASS_NAME&&) = delete; +#define MONOMUX_MAKE_NON_COPYABLE_MOVABLE(CLASS_NAME) \ + MONOMUX_MAKE_NON_COPYABLE(CLASS_NAME) \ + MONOMUX_MAKE_NON_MOVABLE(CLASS_NAME) + +#define MONOMUX_MAKE_STRICT_TYPE(CLASS_NAME, VIRTUAL_DTOR) \ + CLASS_NAME() = default; \ + MONOMUX_MAKE_NON_COPYABLE_MOVABLE(CLASS_NAME) \ + VIRTUAL_DTOR ~CLASS_NAME() = default; + #define MONOMUX_DETAIL_FUNCTION_HEAD( \ RET_TY, NAME, ARGUMENTS, ATTRIBUTES, QUALIFIERS) \ ATTRIBUTES RET_TY NAME(ARGUMENTS) QUALIFIERS @@ -17,11 +32,13 @@ #define MONOMUX_DETAIL_CONST_TYPE \ using Const = std::add_pointer_t< \ std::add_const_t>> -#define MONOMUX_DETAIL_CONST_OBJ const_cast(this) #define MONOMUX_DETAIL_CONST_VALUE(CALL) const auto& Value = CALL +/* NOLINTBEGIN(cppcoreguidelines-pro-type-const-cast) */ +#define MONOMUX_DETAIL_CONST_OBJ const_cast(this) /* NOLINTBEGIN(bugprone-macro-parantheses) */ #define MONOMUX_DETAIL_RETURN_CAST(RET_TY, OBJ) return const_cast(OBJ) /* NOLINTEND(bugprone-macro-parantheses) */ +/* NOLINTEND(cppcoreguidelines-pro-type-const-cast) */ #define MONOMUX_DETAIL_FUNCTION_BODY(RET_TY, CALL) \ { \ diff --git a/include/core/monomux/adt/ScopeGuard.hpp b/include/core/monomux/adt/ScopeGuard.hpp index df51ca7..d82eb05 100644 --- a/include/core/monomux/adt/ScopeGuard.hpp +++ b/include/core/monomux/adt/ScopeGuard.hpp @@ -13,7 +13,8 @@ namespace monomux /// \code{.cpp} /// scope_guard RAII{[] { enter(); }, [] { exit(); }}; /// \endcode -template struct ScopeGuard +template +struct [[deprecated("Replace by scope_guard")]] ScopeGuard { ScopeGuard(EnterFunction&& Enter, ExitFunction&& Exit) : Exit(Exit) { diff --git a/include/core/monomux/adt/scope_guard.hpp b/include/core/monomux/adt/scope_guard.hpp new file mode 100644 index 0000000..8fe3ccb --- /dev/null +++ b/include/core/monomux/adt/scope_guard.hpp @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#pragma once +#include +#include + +namespace monomux +{ +/// A simple scope guard that fires an optional callback function when it is +/// constructed, and another callback function (usually, a lambda passed to the +/// constructor) when destructed. +/// +/// Examples: +/// +/// \code{.cpp} +/// scope_guard Cleanup{[] { exit(); }}; +/// \endcode +/// +/// \code{.cpp} +/// scope_guard RAII{[] { enter(); }, [] { exit(); }}; +/// \endcode +template struct scope_guard +{ + // NOLINTNEXTLINE(google-explicit-constructor) + scope_guard(ExitFunction&& Exit) noexcept : Alive(true), Exit(Exit) {} + scope_guard(EnterFunction&& Enter, + ExitFunction&& Exit) noexcept(noexcept(Enter())) + : Alive(false), Exit(Exit) + { + Enter(); + Alive = true; // NOLINT(cppcoreguidelines-prefer-member-initializer) + } + + ~scope_guard() noexcept(noexcept(Exit())) + { + if (Alive) + Exit(); + Alive = false; + } + + scope_guard() = delete; + scope_guard(const scope_guard&) = delete; + scope_guard(scope_guard&&) = delete; + scope_guard& operator=(const scope_guard&) = delete; + scope_guard& operator=(scope_guard&&) = delete; + +private: + bool Alive; + ExitFunction Exit; +}; + +/// A simple scope guard that restores the value of a "captured" variable when +/// the scope is exited. +/// +/// Example: +/// +/// \code{.cpp} +/// int X = 4; +/// { +/// restore_guard Reset{X}; +/// X = 6; +/// } +/// assert(X == 4); +/// \endcode +template struct restore_guard +{ + // NOLINTNEXTLINE(google-explicit-constructor) + restore_guard(Ty& Var) noexcept(std::is_copy_constructible_v) + : Address(std::addressof(Var)), Value(Var) + {} + + ~restore_guard() noexcept(std::is_move_assignable_v) + { + *Address = std::move(Value); + Address = nullptr; + } + + restore_guard() = delete; + restore_guard(const restore_guard&) = delete; + restore_guard(restore_guard&&) = delete; + restore_guard& operator=(const restore_guard&) = delete; + restore_guard& operator=(restore_guard&&) = delete; + +private: + Ty* Address; + Ty Value; +}; + +} // namespace monomux diff --git a/src/implementation/Monomux.dto b/src/implementation/Monomux.dto index 0731c42..57ea6b7 100644 --- a/src/implementation/Monomux.dto +++ b/src/implementation/Monomux.dto @@ -6,6 +6,19 @@ namespace monomux::message { +namespace test1 +{ +namespace test2 +{ +namespace test3::test4 +{ +} +} +} +namespace test5 +{ +} + literal ui64 APIMajor = 1; literal ui64 APIMinor = 0; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0fa84a3..c27cbe6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -40,6 +40,7 @@ if (MONOMUX_BUILD_TESTS) adt/MetaprogrammingTest.cpp adt/RingBufferTest.cpp + adt/scope_guard_test.cpp adt/SmallIndexMapTest.cpp message/MessageSerialisationTest.cpp ) diff --git a/test/adt/scope_guard_test.cpp b/test/adt/scope_guard_test.cpp new file mode 100644 index 0000000..44b1237 --- /dev/null +++ b/test/adt/scope_guard_test.cpp @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-3.0-only */ +#include + +#include "monomux/adt/scope_guard.hpp" + +/* NOLINTBEGIN(cert-err58-cpp,cppcoreguidelines-avoid-goto,cppcoreguidelines-owning-memory) + */ + +using namespace monomux; + +TEST(ScopeGuard, EntryAndExitCalled) +{ + int Variable = 2; + { + ASSERT_EQ(Variable, 2); + + scope_guard SG{[&Variable] { Variable = 4; }, + [&Variable] { Variable = 0; }}; + ASSERT_EQ(Variable, 4); + } + ASSERT_EQ(Variable, 0); +} + +TEST(ScopeGuard, RestoreGuard) +{ + int Variable = 2; + { + ASSERT_EQ(Variable, 2); + restore_guard RG{Variable}; + Variable = 4; + ASSERT_EQ(Variable, 4); + } + ASSERT_EQ(Variable, 2); +} + +/* NOLINTEND(cert-err58-cpp,cppcoreguidelines-avoid-goto,cppcoreguidelines-owning-memory) + */ diff --git a/tools/dto_compiler/CMakeLists.txt b/tools/dto_compiler/CMakeLists.txt index e2ead25..2178589 100644 --- a/tools/dto_compiler/CMakeLists.txt +++ b/tools/dto_compiler/CMakeLists.txt @@ -1,7 +1,12 @@ # SPDX-License-Identifier: LGPL-3.0-only add_executable(dto_compiler DTOCompiler.cpp - lex.cpp + + dto_unit.cpp + lexer.cpp + parser.cpp + + ast/decl.cpp ) if (NOT MONOMUX_LIBRARY_TYPE STREQUAL "UNITY") target_link_libraries(dto_compiler PUBLIC diff --git a/tools/dto_compiler/DTOCompiler.cpp b/tools/dto_compiler/DTOCompiler.cpp index a11244a..202d2e8 100644 --- a/tools/dto_compiler/DTOCompiler.cpp +++ b/tools/dto_compiler/DTOCompiler.cpp @@ -6,7 +6,9 @@ #include #include -#include "lex.hpp" +#include "dto_unit.hpp" +#include "lexer.hpp" +#include "parser.hpp" namespace { @@ -57,17 +59,32 @@ int main(int ArgC, char* ArgV[]) std::cout << "Input string:\n" << InputBuffer << std::endl; using namespace monomux::tools::dto_compiler; + lexer L{InputBuffer}; + parser P{L}; + bool Success = P.parse(); + std::cout << P.get_unit().dump().str() << std::endl; - token T{}; - while ((T = L.lex()) != token::EndOfFile) + if (!Success) { - std::cout << to_string(L.get_token_info_raw()) << std::endl; + std::cerr << "ERROR! " << P.get_error().Location.Line << ':' + << P.get_error().Location.Column << ": " << P.get_error().Reason + << std::endl; + std::string_view ErrorLine = [&]() -> std::string_view { + auto RemainingRowCnt = P.get_error().Location.Line - 1; + std::size_t LinePos = 0; + while (RemainingRowCnt) + { + LinePos = InputBuffer.find('\n', LinePos) + 1; + --RemainingRowCnt; + } - if (T == token::SyntaxError) - { - std::cerr << "ERROR!" << std::endl; - return EXIT_FAILURE; - } + std::string_view Line = InputBuffer; + Line.remove_prefix(LinePos); + return Line.substr(0, Line.find('\n')); + }(); + std::cerr << " " << ErrorLine << std::endl; + std::cerr << " " << std::string(P.get_error().Location.Column - 1, ' ') + << '^' << std::endl; } } diff --git a/tools/dto_compiler/ast/decl.cpp b/tools/dto_compiler/ast/decl.cpp new file mode 100644 index 0000000..c2cdbcd --- /dev/null +++ b/tools/dto_compiler/ast/decl.cpp @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#include + +#include "decl.hpp" + +namespace monomux::tools::dto_compiler::ast +{ + +namespace +{ + +void print_ident(std::ostringstream& OS, std::size_t Indent) +{ + if (Indent == 0) + OS << '.'; + + while (Indent > 1) + { + OS << "| "; + --Indent; + } + if (Indent == 1) + { + OS << "|- "; + } +} + +} // namespace + +void decl_context::dump_children(std::ostringstream& OS, + std::size_t Depth) const +{ + for (const auto& Child : Children) + { + print_ident(OS, Depth); + Child->dump(OS, Depth + 1); + } +} + +#define MONOMUX_DECL_DUMP(TYPE) \ + void TYPE::dump(std::ostringstream& OS, std::size_t Depth) const + +MONOMUX_DECL_DUMP(decl) {} + +MONOMUX_DECL_DUMP(comment_decl) +{ + static constexpr std::size_t CommentPrintLength = 64; + OS << "CommentDecl " << this << ' '; + OS << (Comment.is_block_comment() ? "block " : "line "); + OS << Comment.get_comment().substr(0, CommentPrintLength); + OS << '\n'; +} + +MONOMUX_DECL_DUMP(named_decl) {} + +MONOMUX_DECL_DUMP(namespace_decl) +{ + OS << "NamespaceDecl " << this << ' ' << get_identifier() << '\n'; + dump_children(OS, Depth); +} + +MONOMUX_DECL_DUMP(value_decl) {} + +MONOMUX_DECL_DUMP(literal_decl) {} + +#undef MONOMUX_DECL_DUMP + +} // namespace monomux::tools::dto_compiler::ast diff --git a/tools/dto_compiler/ast/decl.hpp b/tools/dto_compiler/ast/decl.hpp new file mode 100644 index 0000000..1e4a107 --- /dev/null +++ b/tools/dto_compiler/ast/decl.hpp @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include "monomux/adt/FunctionExtras.hpp" + +#include "expr.hpp" +#include "type.hpp" + +namespace monomux::tools::dto_compiler::ast +{ + +class comment +{ + bool IsBlockComment; + std::string Comment; + +public: + comment(bool BlockComment, std::string Comment) + : IsBlockComment(BlockComment), Comment(std::move(Comment)) + {} + + [[nodiscard]] bool is_block_comment() const noexcept + { + return IsBlockComment; + } + [[nodiscard]] const std::string& get_comment() const noexcept + { + return Comment; + } +}; + +class decl +{ + +public: + MONOMUX_MAKE_STRICT_TYPE(decl, virtual); + virtual void dump(std::ostringstream& OS, std::size_t Depth) const; +}; + +#define MONOMUX_DECL_DUMP \ + void dump(std::ostringstream& OS, std::size_t Depth) const override; + +/// A faux node of supertype \p decl that only holds a comment. +class comment_decl : public decl +{ + comment Comment; + +public: + explicit comment_decl(comment C) : Comment(std::move(C)) {} + MONOMUX_DECL_DUMP; +}; + +class named_decl : public decl +{ + std::string Identifier; + +public: + explicit named_decl(std::string Identifier) + : Identifier(std::move(Identifier)) + {} + + [[nodiscard]] const std::string& get_identifier() const noexcept + { + return Identifier; + } + + MONOMUX_DECL_DUMP; +}; + +/// Represents a kind of \p decl that may store inner child \p decl nodes. +class decl_context +{ + std::vector> Children; + std::unordered_map NameableChildren; + +public: + MONOMUX_MAKE_STRICT_TYPE(decl_context, ); + void dump_children(std::ostringstream& OS, std::size_t Depth = 0) const; + + template + DeclType* get_or_create_child_decl(Args&&... Argv) + { + std::unique_ptr Node = + std::make_unique(std::forward(Argv)...); + + if constexpr (std::is_base_of_v) + { + if (auto It = NameableChildren.find( + dynamic_cast(*Node).get_identifier()); + It != NameableChildren.end()) + return dynamic_cast(It->second); + } + + Children.push_back(std::move(Node)); + auto* OwnedNode = static_cast(Children.back().get()); + + if constexpr (std::is_base_of_v) + { + const auto* ND = dynamic_cast(OwnedNode); + NameableChildren[ND->get_identifier()] = OwnedNode; + } + + return OwnedNode; + } + + const decl* lookup(const std::string& Identifier) const noexcept + { + auto It = NameableChildren.find(Identifier); + if (It == NameableChildren.end()) + return nullptr; + + return It->second; + } +}; + +class namespace_decl + : public named_decl + , public decl_context +{ +public: + explicit namespace_decl(std::string Identifier) + : named_decl(std::move(Identifier)) + {} + MONOMUX_DECL_DUMP; +}; + +/// Declarations that have associated types. +class value_decl : public named_decl +{ + type* Type; + +public: + explicit value_decl(std::string Identifier, type* Type) + : named_decl(std::move(Identifier)), Type(Type) + {} + + [[nodiscard]] const type* get_type() const noexcept { return Type; } + + MONOMUX_DECL_DUMP; +}; + +class literal_decl : public value_decl +{ + expr* Value; + +public: + explicit literal_decl(std::string Identifier, type* Type, expr* Value) + : value_decl(std::move(Identifier), Type), Value(Value) + {} + + [[nodiscard]] const expr* get_value() const noexcept { return Value; } + + MONOMUX_DECL_DUMP; +}; + +#undef MONOMUX_DECL_DUMP + +} // namespace monomux::tools::dto_compiler::ast diff --git a/tools/dto_compiler/ast/expr.hpp b/tools/dto_compiler/ast/expr.hpp new file mode 100644 index 0000000..1ffdb95 --- /dev/null +++ b/tools/dto_compiler/ast/expr.hpp @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#pragma once + +#include "monomux/adt/FunctionExtras.hpp" + +namespace monomux::tools::dto_compiler::ast +{ + +class expr +{ +public: + MONOMUX_MAKE_STRICT_TYPE(expr, virtual); +}; + +} // namespace monomux::tools::dto_compiler::ast diff --git a/tools/dto_compiler/ast/type.hpp b/tools/dto_compiler/ast/type.hpp new file mode 100644 index 0000000..cd9889f --- /dev/null +++ b/tools/dto_compiler/ast/type.hpp @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#pragma once + +#include "monomux/adt/FunctionExtras.hpp" + +namespace monomux::tools::dto_compiler::ast +{ + +class type +{ +public: + MONOMUX_MAKE_STRICT_TYPE(type, virtual); +}; + +} // namespace monomux::tools::dto_compiler::ast diff --git a/tools/dto_compiler/dto_unit.cpp b/tools/dto_compiler/dto_unit.cpp new file mode 100644 index 0000000..b714276 --- /dev/null +++ b/tools/dto_compiler/dto_unit.cpp @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#include + +#include "dto_unit.hpp" + +namespace monomux::tools::dto_compiler +{ + +std::ostringstream dto_unit::dump() const +{ + std::ostringstream Ret; + + Ret << "DTOContext " << this << '\n'; + Root.dump_children(Ret, 1); + + return Ret; +} + +} // namespace monomux::tools::dto_compiler diff --git a/tools/dto_compiler/dto_unit.hpp b/tools/dto_compiler/dto_unit.hpp new file mode 100644 index 0000000..b926ad0 --- /dev/null +++ b/tools/dto_compiler/dto_unit.hpp @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#pragma once +#include + +#include "monomux/adt/FunctionExtras.hpp" + +#include "ast/decl.hpp" +#include "ast/type.hpp" + +namespace monomux::tools::dto_compiler +{ + +class dto_unit +{ + ast::decl_context Root; + +public: + MONOMUX_MAKE_STRICT_TYPE(dto_unit, ); + + [[nodiscard]] const ast::decl_context& get_root() const noexcept + { + return Root; + } + MONOMUX_MEMBER_0(ast::decl_context&, get_root, [[nodiscard]], noexcept); + + std::ostringstream dump() const; +}; + +} // namespace monomux::tools::dto_compiler diff --git a/tools/dto_compiler/lex.cpp b/tools/dto_compiler/lexer.cpp similarity index 79% rename from tools/dto_compiler/lex.cpp rename to tools/dto_compiler/lexer.cpp index 41602fc..5676949 100644 --- a/tools/dto_compiler/lex.cpp +++ b/tools/dto_compiler/lexer.cpp @@ -1,23 +1,32 @@ /* SPDX-License-Identifier: LGPL-3.0-only */ -#include +#include #include +#include #include -#include -#include +#include #include +#include +#include #include #include #include #include +#ifndef NDEBUG +#include +#endif /* !NDEBUG */ + +#include "monomux/Debug.h" #include "monomux/adt/FunctionExtras.hpp" #include "monomux/adt/SmallIndexMap.hpp" +#include "monomux/adt/scope_guard.hpp" #include "monomux/unreachable.hpp" -#include "lex.hpp" +#include "lexer.hpp" namespace monomux::tools::dto_compiler { + std::string_view to_string(token TK) { switch (TK) @@ -195,9 +204,9 @@ class char_sequence_lexer return std::nullopt; else { - std::cerr << "ERROR: Unhandled state type at #" << StateIndex - << " when reading" << Buffer << " at the end of " << Chars - << std::endl; + MONOMUX_DEBUG(std::cerr << "ERROR: Unhandled state type at #" + << StateIndex << " when reading" << Buffer + << " at the end of " << Chars << std::endl); return std::nullopt; } } @@ -211,8 +220,9 @@ class char_sequence_lexer void add_new_char_sequence(token Tok, std::string_view Str) { - std::cerr << "CharSequence for Token::" << to_string(Tok) << " = " << '"' - << Str << '"' << " (size: " << Str.size() << ')' << std::endl; + MONOMUX_DEBUG(std::cerr << "CharSequence for Token::" << to_string(Tok) + << " = " << '"' << Str << '"' + << " (size: " << Str.size() << ')' << std::endl); (void)create_start_state_if_none(); States.reserve(States.size() + 1 /* Acceptor state */ + Str.size() /* Letter transitions...*/ + @@ -227,8 +237,8 @@ class char_sequence_lexer Acceptors.set(RawTok, I); return I; }(); - std::cerr << '#' << AcceptStateIndex << " = AcceptState(" << to_string(Tok) - << ')' << std::endl; + MONOMUX_DEBUG(std::cerr << '#' << AcceptStateIndex << " = AcceptState(" + << to_string(Tok) << ')' << std::endl); continue_building_char_lex_sequence( Str, @@ -250,7 +260,8 @@ class char_sequence_lexer { if (StartStateIndex != static_cast(-1)) return StartStateIndex; - std::cerr << '#' << States.size() << " = StartState" << std::endl; + MONOMUX_DEBUG(std::cerr << '#' << States.size() << " = StartState" + << std::endl); return StartStateIndex = make_state(States.size()); } @@ -271,27 +282,23 @@ class char_sequence_lexer return *MaybeNextIndex; std::size_t NextStateIndex = make_state(States.size()); -#ifndef NDEBUG - std::get(States.at(NextStateIndex)).DebugConsumedPrefix = - ParentState.DebugConsumedPrefix; - std::get(States.at(NextStateIndex)) - .DebugConsumedPrefix.push_back(C); -#endif /* !NDEBUG */ - + MONOMUX_DEBUG( + std::get(States.at(NextStateIndex)).DebugConsumedPrefix = + ParentState.DebugConsumedPrefix; + std::get(States.at(NextStateIndex)) + .DebugConsumedPrefix.push_back(C);); return NextStateIndex; }(); ParentState.Next.set(C, NextStateIndex); - std::cerr << "step(#" << ParentState.Index << ", '" << C << "') := #" - << NextStateIndex; -#ifndef NDEBUG - std::cerr + MONOMUX_DEBUG( + std::cerr << "step(#" << ParentState.Index << ", '" << C << "') := #" + << NextStateIndex; + std::cerr << " (" << '"' << std::get(States.at(NextStateIndex)).DebugConsumedPrefix << '"' << ')'; -#endif /* !NDEBUG */ - std::cerr << std::endl; - + std::cerr << std::endl;); if (Str.size() > 1) continue_building_char_lex_sequence( @@ -311,63 +318,93 @@ class char_sequence_lexer void finish_char_lex_sequence(const accept_state& Acceptor, forward_state& ParentState) { +#ifndef NDEBUG auto PrintFinish = [&]() { std::cerr << "finish(#" << ParentState.Index; -#ifndef NDEBUG - std::cerr << " /* " << '"' << ParentState.DebugConsumedPrefix << '"' - << " */"; -#endif /* !NDEBUG */ + MONOMUX_DEBUG(std::cerr << " /* " << '"' + << ParentState.DebugConsumedPrefix << '"' + << " */"); std::cerr << ')'; }; +#endif /* !NDEBUG */ if (const std::size_t* NextIndex = ParentState.Next.tryGet('\0')) { - std::cerr << "ERROR: Attempting to build non-deterministic automaton.\n"; - PrintFinish(); - std::cerr + MONOMUX_DEBUG( + std::cerr + << "ERROR: Attempting to build non-deterministic automaton.\n"; + PrintFinish(); + std::cerr << " = #" << *NextIndex << " == Accept(" << to_string( std::get(States.at(*NextIndex)).AcceptedToken) << "), already.\nAttempted accepting " - << to_string(Acceptor.AcceptedToken) << " here instead." << std::endl; + << to_string(Acceptor.AcceptedToken) << " here instead." << std::endl;); throw char{0}; } ParentState.Next.set('\0', Acceptor.Index); - PrintFinish(); - std::cerr << " := #" << Acceptor.Index << " == Accept(" - << to_string(Acceptor.AcceptedToken) << ')' << std::endl; + MONOMUX_DEBUG(PrintFinish(); std::cerr + << " := #" << Acceptor.Index << " == Accept(" + << to_string(Acceptor.AcceptedToken) << ')' + << std::endl;); } }; } // namespace detail +lexer::location lexer::location::make_location(std::string_view FullBuffer, + std::string_view Buffer) noexcept +{ + assert(FullBuffer.size() >= Buffer.size() && "Buffer overflow!"); + const std::size_t LocationFromStart = FullBuffer.size() - Buffer.size(); + return make_location(FullBuffer, LocationFromStart); +} + +lexer::location lexer::location::make_location(std::string_view Buffer, + std::size_t AbsoluteLoc) noexcept +{ + std::string_view BufferBeforeLoc = Buffer; + BufferBeforeLoc.remove_suffix(BufferBeforeLoc.size() - AbsoluteLoc); + + const std::size_t Rows = std::count_if(BufferBeforeLoc.begin(), + BufferBeforeLoc.end(), + [](char Ch) { return Ch == '\n'; }); + const decltype(std::string_view::npos) LastNewlineIndex = + BufferBeforeLoc.find_last_of('\n'); + return location{.Absolute = AbsoluteLoc, + .Line = Rows + 1, + .Column = BufferBeforeLoc.size() - LastNewlineIndex}; +} + lexer::~lexer() = default; lexer::lexer(std::string_view Buffer) : SequenceLexer(std::make_unique()), - OriginalFullBuffer(Buffer), CurrentState({Buffer, token::NullToken, {}}) + OriginalFullBuffer(Buffer), CurrentState({Buffer, token::NullToken, {}, {}}) { set_current_token(); - std::cerr << "DEBUG: Building sequenced lexical analysis table..." - << std::endl; + MONOMUX_DEBUG( + std::cerr << "DEBUG: Building sequenced lexical analysis table..." + << std::endl); #define STR_SPELLING_TOKEN(NAME, SPELLING) \ SequenceLexer->add_new_char_sequence(token::NAME, SPELLING); #include "Tokens.inc.h" - std::cerr << "DEBUG: Lexical analysis table created." << std::endl; + MONOMUX_DEBUG(std::cerr << "DEBUG: Lexical analysis table created." + << std::endl); } template -token lexer::set_current_token(Args&&... Argv) +token lexer::set_current_token(Args&&... Argv) noexcept { CurrentState.Info = token_info{std::forward(Argv)...}; return CurrentState.Tok = TK; } -token lexer::set_current_token_raw(token TK, all_token_infos_type Info) +token lexer::set_current_token_raw(token TK, all_token_infos_type Info) noexcept { switch (TK) { @@ -389,7 +426,7 @@ token lexer::set_current_token_raw(token TK, all_token_infos_type Info) } void lexer::token_buffer_set_end_at_consumed_buffer( - std::string_view& TokenBuffer, std::size_t KeepCharsAtEnd) + std::string_view& TokenBuffer, std::size_t KeepCharsAtEnd) noexcept { const std::size_t TokenLength = TokenBuffer.size() - CurrentState.Buffer.size() - KeepCharsAtEnd; @@ -404,9 +441,10 @@ void lexer::token_buffer_set_end_at_consumed_buffer( TokenBuffer.remove_suffix(TokenBuffer.size() - TokenLength); } -token lexer::lex_token() +token lexer::lex_token() noexcept { std::string_view TokenBuffer = CurrentState.Buffer; + CurrentState.Loc = OriginalFullBuffer.size() - CurrentState.Buffer.size(); auto TokenBufferEndAtReadBuffer = [&](std::size_t KeepCharsAtEnd = 0) { token_buffer_set_end_at_consumed_buffer(TokenBuffer, KeepCharsAtEnd); }; @@ -432,10 +470,18 @@ token lexer::lex_token() { // Consume various comment kinds. Ch = get_char(); + token T{}; if (Ch == '/') // // - return lex_comment(TokenBuffer, /*MultiLine=*/false); - if (Ch == '*') // /* - return lex_comment(TokenBuffer, /*MultiLine=*/true); + T = lex_comment(TokenBuffer, /*MultiLine=*/false); + else if (Ch == '*') // /* + T = lex_comment(TokenBuffer, /*MultiLine=*/true); + + if (T == token::Comment) + return T; + if (T == token::NullToken) + // The comment did not need to be lexed, as it will be stripped from the + // output. + return lex_token(); return set_current_token( std::string("Unexpected ") + static_cast(Ch) + " when reading " + @@ -476,7 +522,6 @@ token lexer::lex_token() case '9': return lex_integer_literal(TokenBuffer); - default: { if (std::isalpha(Ch) || Ch == '_') @@ -501,11 +546,11 @@ token lexer::lex_token() } } - std::cerr << TokenBuffer << std::endl; + MONOMUX_DEBUG(std::cerr << TokenBuffer << std::endl); unreachable("switch() statement should've returned appropriate Token"); } -token lexer::lex_comment(std::string_view& TokenBuffer, bool MultiLine) +token lexer::lex_comment(std::string_view& TokenBuffer, bool MultiLine) noexcept { // "//" comments are line comments that should be stripped, unless // they are "//!" comments, which need to stay in the generated code. @@ -578,7 +623,7 @@ token lexer::lex_comment(std::string_view& TokenBuffer, bool MultiLine) return token::NullToken; } -token lexer::lex_integer_literal(std::string_view& TokenBuffer) +token lexer::lex_integer_literal(std::string_view& TokenBuffer) noexcept { bool IsNegative = TokenBuffer.front() == '-'; Char Ch = get_char(); @@ -594,14 +639,17 @@ token lexer::lex_integer_literal(std::string_view& TokenBuffer) } -token lexer::lex() { return lex_token(); } +token lexer::lex() noexcept { return lex_token(); } -token lexer::peek() +token lexer::peek() noexcept { - state SavedState = CurrentState; - token Peeked = lex_token(); - CurrentState = SavedState; - return Peeked; + restore_guard G{CurrentState}; + return lex_token(); +} + +lexer::location lexer::get_location() const noexcept +{ + return location::make_location(OriginalFullBuffer, CurrentState.Loc); } lexer::Char lexer::get_char() noexcept @@ -622,10 +670,11 @@ lexer::Char lexer::get_char() noexcept case '\0': // Observing a 0x00 (NUL) byte in the middle of the input stream means // something has gone wrong. - std::cerr << "WARNING: Encountered NUL ('\\0') character at position " - << (OriginalFullBuffer.size() - CurrentState.Buffer.size()) - << " before true EOF.\nReplacing with SPACE (' ')..." - << std::endl; + MONOMUX_DEBUG(std::cerr + << "WARNING: Encountered NUL ('\\0') character at position " + << (OriginalFullBuffer.size() - CurrentState.Buffer.size()) + << " before true EOF.\nReplacing with SPACE (' ')..." + << std::endl); return ' '; case '\n': @@ -643,10 +692,8 @@ lexer::Char lexer::get_char() noexcept lexer::Char lexer::peek_char() noexcept { - auto BufferSave = CurrentState.Buffer; - Char Ch = get_char(); - CurrentState.Buffer = BufferSave; - return Ch; + restore_guard G{CurrentState.Buffer}; + return get_char(); } } // namespace monomux::tools::dto_compiler diff --git a/tools/dto_compiler/lex.hpp b/tools/dto_compiler/lexer.hpp similarity index 71% rename from tools/dto_compiler/lex.hpp rename to tools/dto_compiler/lexer.hpp index 8a0e8e4..5970d35 100644 --- a/tools/dto_compiler/lex.hpp +++ b/tools/dto_compiler/lexer.hpp @@ -7,6 +7,8 @@ #include #include +#include "monomux/adt/FunctionExtras.hpp" + namespace monomux::tools::dto_compiler { @@ -63,8 +65,27 @@ class lexer std::unique_ptr SequenceLexer; /// The original buffer the Lexer was constructed with. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const std::string_view OriginalFullBuffer; +public: + struct location + { + std::size_t Absolute; + std::size_t Line, Column; + + /// Creates a \p location based on the position where \p Buffer begins + /// within the \p FullBuffer. + static location make_location(std::string_view FullBuffer, + std::string_view Buffer) noexcept; + + /// Creates a \p location for the position marked by \p AbsoluteLoc in the + /// \p Buffer. + static location make_location(std::string_view Buffer, + std::size_t AbsoluteLoc) noexcept; + }; + +private: struct state { /// The currently lexed tail end of the buffer. Same as \p @@ -74,9 +95,14 @@ class lexer /// The last lexed token. token Tok; /// Information about the last lexed token, if any. + /// /// This is \b guaranteed to contain the appropriate \p TokenKind /// specialisation. all_token_infos_type Info; + + /// The absolute location (first char in the buffer at index 0) where the + /// last lexed token \p Tok begun. + std::size_t Loc; }; state CurrentState; @@ -85,19 +111,25 @@ class lexer using Char = std::uint8_t; explicit lexer(std::string_view Buffer); - lexer(const lexer&) = delete; - lexer(lexer&&) = delete; - lexer& operator=(const lexer&) = delete; - lexer& operator=(lexer&&) = delete; + MONOMUX_MAKE_NON_COPYABLE_MOVABLE(lexer); ~lexer(); /// Lexes the next token and returns its identifying kind. The state of the /// lexer is updated by this operation. - [[nodiscard]] token lex(); + [[nodiscard]] token lex() noexcept; + + /// Returns the type of the last lexed \p token without lexing a new one. + [[nodiscard]] token current_token() const noexcept + { + return CurrentState.Tok; + } + + /// \returns the \p location of the last \p token returned by \p lex(). + [[nodiscard]] location get_location() const noexcept; /// Lexes the next token and returns its identifying kind, but discards the /// result without affecting the state of the lexer. - [[nodiscard]] token peek(); + [[nodiscard]] token peek() noexcept; /// \returns the currently stored token info, without any semantic checks to /// its value. @@ -111,7 +143,7 @@ class lexer template [[nodiscard]] std::optional> get_token_info() const noexcept { - if (auto* TKInfo = std::get_if>(CurrentState.Info)) + if (auto* TKInfo = std::get_if>(&CurrentState.Info)) return *TKInfo; return std::nullopt; } @@ -128,24 +160,26 @@ class lexer /// Cut \p TokenBuffer to end at the already consumed portion of the \p Lexer /// read-buffer, optionally restoring \p KeepCharsAtEnd number of characters /// back to the end. - void token_buffer_set_end_at_consumed_buffer(std::string_view& TokenBuffer, - std::size_t KeepCharsAtEnd = 0); + void token_buffer_set_end_at_consumed_buffer( + std::string_view& TokenBuffer, std::size_t KeepCharsAtEnd = 0) noexcept; /// Lexes the next token and return its identifying kind, mutating the state /// of the lexer in the process. - [[nodiscard]] token lex_token(); + [[nodiscard]] token lex_token() noexcept; /// Lexes a comment, either by ignoring it to a \p NullToken, or by creating /// a \p Comment token that contains the lexed value for the comment. [[nodiscard]] token lex_comment(std::string_view& TokenBuffer, - bool MultiLine); + bool MultiLine) noexcept; /// Lexes an integer literal from the pending \p TokenBuffer that starts with /// the \p Ch character. - [[nodiscard]] token lex_integer_literal(std::string_view& TokenBuffer); + [[nodiscard]] token + lex_integer_literal(std::string_view& TokenBuffer) noexcept; - template token set_current_token(Args&&... Argv); - token set_current_token_raw(token TK, all_token_infos_type Info); + template + token set_current_token(Args&&... Argv) noexcept; + token set_current_token_raw(token TK, all_token_infos_type Info) noexcept; }; } // namespace monomux::tools::dto_compiler diff --git a/tools/dto_compiler/parser.cpp b/tools/dto_compiler/parser.cpp new file mode 100644 index 0000000..9f38297 --- /dev/null +++ b/tools/dto_compiler/parser.cpp @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +// #include +#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include + +#ifndef NDEBUG +#include +#endif /* !NDEBUG */ + +#include "monomux/Debug.h" +#include "monomux/adt/scope_guard.hpp" +// #include "monomux/adt/FunctionExtras.hpp" +// #include "monomux/adt/SmallIndexMap.hpp" +// #include "monomux/unreachable.hpp" + +#include "ast/decl.hpp" +#include "dto_unit.hpp" +#include "lexer.hpp" + +#include "parser.hpp" + +namespace monomux::tools::dto_compiler +{ + +namespace +{ + +const parser::error_info EmptyError{}; + +} // namespace + +#define INFO(TOKEN_KIND) \ + const auto& Info = Lexer.get_token_info(); + +std::string parser::parse_potentially_scoped_identifier() +{ + std::string Identifier; + while (true) + { + token T = Lexer.current_token(); + if (T == token::Scope) + Identifier.append("::"); + else if (T == token::Identifier) + { + INFO(Identifier); + Identifier.append(Info->Identifier); + } + else + return Identifier; + + (void)Lexer.lex(); + } +} + +bool parser::parse_namespace() +{ + // First, need to consume the identifier of the namespace. + assert(Lexer.current_token() == token::Namespace && "Expected 'namespace'"); + (void)Lexer.lex(); + std::string Identifier = parse_potentially_scoped_identifier(); + + if (Lexer.lex() != token::LBrace) + set_error_to_current_token("Expected '{' after namespace declaration"); + + auto* NSD = DeclContext->get_or_create_child_decl( + std::move(Identifier)); + restore_guard G{DeclContext}; + DeclContext = NSD; + bool Inner = parse(); + + if (Inner && Lexer.current_token() != token::RBrace) + set_error_to_current_token("Parsing of a 'namespace' ended without a '}'"); + + return Inner; +} + +bool parser::parse() +{ + ast::decl_context* CurrentContext = DeclContext; + + while (true) + { + switch (Lexer.current_token()) + { + case token::BeginningOfFile: + { + (void)Lexer.lex(); + continue; + } + + case token::EndOfFile: + return true; + + case token::SyntaxError: + return false; + + default: + set_error_to_current_token( + std::string{"Unexpected "} + + std::string{to_string(Lexer.current_token())} + + std::string{" encountered while parsing."}); + return false; + + case token::RBrace: + if (CurrentContext == &ParseUnit->get_root()) + { + set_error_to_current_token("'}' does not close anything here"); + return false; + } + return true; + + case token::Comment: + { + INFO(Comment); + CurrentContext->get_or_create_child_decl( + ast::comment{Info->IsBlockComment, Info->Comment}); + break; + } + + case token::Namespace: + parse_namespace(); + break; + } + + (void)Lexer.lex(); + } + + return true; +} + +parser::parser(lexer& Lexer) + : Lexer(Lexer), ParseUnit(std::make_unique()), + DeclContext(&ParseUnit->get_root()) +{} + +parser::~parser() { DeclContext = nullptr; } + +const parser::error_info& parser::get_error() const noexcept +{ + assert(has_error() && "Invalid call to 'get_error' if no error exists!"); + return has_error() ? *Error : EmptyError; +} + +void parser::set_error_to_current_token(std::string Reason) noexcept +{ + Error.emplace(error_info{.Location = Lexer.get_location(), + .TokenKind = Lexer.current_token(), + .TokenInfo = Lexer.get_token_info_raw(), + .Reason = std::move(Reason)}); +} + +token parser::get_next_token() noexcept +{ + token T = Lexer.lex(); + if (T == token::SyntaxError) + { + Error.emplace(error_info{ + .Location = Lexer.get_location(), + .TokenKind = T, + .TokenInfo = Lexer.get_token_info_raw(), + .Reason = Lexer.get_token_info()->to_string()}); + } + + MONOMUX_DEBUG(std::cout << Lexer.get_location().Line << ':' + << Lexer.get_location().Column << ' '; + std::cout << to_string(Lexer.get_token_info_raw()) + << std::endl;); + + return T; +} + +} // namespace monomux::tools::dto_compiler diff --git a/tools/dto_compiler/parser.hpp b/tools/dto_compiler/parser.hpp new file mode 100644 index 0000000..0ff9c57 --- /dev/null +++ b/tools/dto_compiler/parser.hpp @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: LGPL-3.0-only */ +#pragma once +// #include +#include +#include +#include +// #include +// #include + +#include "monomux/adt/FunctionExtras.hpp" + +#include "lexer.hpp" + +namespace monomux::tools::dto_compiler +{ + +class dto_unit; + +namespace ast +{ + +class decl_context; +class namespace_decl; + +} // namespace ast + +/// Consumes the \p token stream emitted by the \p lexer to build a +/// \p dto_unit. +class parser +{ +public: + struct error_info + { + lexer::location Location; + token TokenKind; + all_token_infos_type TokenInfo; + + std::string Reason; + }; + +private: + lexer& Lexer; // NOLINT(cppcoreguidelines-avoid-const-or-ref-data-members). + std::unique_ptr ParseUnit; + + std::optional Error; + void set_error_to_current_token(std::string Reason) noexcept; + + [[nodiscard]] token get_next_token() noexcept; + + ast::decl_context* DeclContext; + + std::string parse_potentially_scoped_identifier(); + bool parse_namespace(); + +public: + explicit parser(lexer& Lexer); + MONOMUX_MAKE_NON_COPYABLE_MOVABLE(parser); + ~parser(); + + [[nodiscard]] std::unique_ptr take_unit() && noexcept + { + return std::move(ParseUnit); + } + + [[nodiscard]] const dto_unit& get_unit() const noexcept { return *ParseUnit; } + MONOMUX_MEMBER_0(dto_unit&, get_unit, [[nodiscard]], noexcept); + + /// Parses a definition set from the \p Lexer according to the language rules + /// baked into the instance. + /// + /// \returns whether the parsing was successful. + bool parse(); + + [[nodiscard]] bool has_error() const noexcept { return Error.has_value(); } + [[nodiscard]] const error_info& get_error() const noexcept; +}; + +} // namespace monomux::tools::dto_compiler