From d270bd4b71c1133e24705188e0acea24eac642aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Wed, 23 Apr 2025 10:42:37 +0200 Subject: [PATCH 1/9] Pass every CannotConsumeTokenException to Error --- src/GrammarProcessing/SyntacticGrammar.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/GrammarProcessing/SyntacticGrammar.php b/src/GrammarProcessing/SyntacticGrammar.php index 22a8222..a9c7c72 100644 --- a/src/GrammarProcessing/SyntacticGrammar.php +++ b/src/GrammarProcessing/SyntacticGrammar.php @@ -36,7 +36,8 @@ public function parseLexicalTokens(TokenStream $tokenStream, string $rootSymbol) ); $tokenStream->consumeEndOfStream(); - } catch (CannotConsumeTokenException) { + } catch (CannotConsumeTokenException $e) { + $error->setError($e); $error->throw(); } From 81906643c26b79ae96cf22467d8415ea3fbd3cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 10:38:36 +0200 Subject: [PATCH 2/9] Install tracy/tracy for tests --- composer.json | 3 +- composer.lock | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 2d4ef5c..ebc2bb4 100644 --- a/composer.json +++ b/composer.json @@ -35,7 +35,8 @@ "require-dev": { "nette/tester": "^2.5.4", "phpstan/phpstan": "^2.1.12", - "spaze/phpstan-disallowed-calls": "^4.5.0" + "spaze/phpstan-disallowed-calls": "^4.5.0", + "tracy/tracy": "^2.10.9" }, "scripts": { "phpstan": "phpstan analyse", diff --git a/composer.lock b/composer.lock index 3113204..379cc00 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "d3157f1168a25baac379a8b1e604f5a0", + "content-hash": "aea19a6b932b3ab92ec26d21f5e10b70", "packages": [], "packages-dev": [ { @@ -206,6 +206,81 @@ } ], "time": "2025-04-10T19:01:43+00:00" + }, + { + "name": "tracy/tracy", + "version": "v2.10.9", + "source": { + "type": "git", + "url": "https://github.com/nette/tracy.git", + "reference": "e7af75205b184ca8895bc57fafd331f8d5022d26" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/tracy/zipball/e7af75205b184ca8895bc57fafd331f8d5022d26", + "reference": "e7af75205b184ca8895bc57fafd331f8d5022d26", + "shasum": "" + }, + "require": { + "ext-json": "*", + "ext-session": "*", + "php": "8.0 - 8.4" + }, + "conflict": { + "nette/di": "<3.0" + }, + "require-dev": { + "latte/latte": "^2.5 || ^3.0", + "nette/di": "^3.0", + "nette/http": "^3.0", + "nette/mail": "^3.0 || ^4.0", + "nette/tester": "^2.2", + "nette/utils": "^3.0 || ^4.0", + "phpstan/phpstan": "^1.0", + "psr/log": "^1.0 || ^2.0 || ^3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.10-dev" + } + }, + "autoload": { + "files": [ + "src/Tracy/functions.php" + ], + "classmap": [ + "src" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "😎 Tracy: the addictive tool to ease debugging PHP code for cool developers. Friendly design, logging, profiler, advanced features like debugging AJAX calls or CLI support. You will love it.", + "homepage": "https://tracy.nette.org", + "keywords": [ + "Xdebug", + "debug", + "debugger", + "nette", + "profiler" + ], + "support": { + "issues": "https://github.com/nette/tracy/issues", + "source": "https://github.com/nette/tracy/tree/v2.10.9" + }, + "time": "2024-11-07T14:48:00+00:00" } ], "aliases": [], From 78fccc2d2065766cc20eba1ea47c5ed66ecc5b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 11:12:20 +0200 Subject: [PATCH 3/9] Add visitor system to Vocabulary\Symbol --- src/GrammarProcessing/Vocabulary/Literal.php | 7 +++++++ .../Vocabulary/NegativeLookahead.php | 7 +++++++ .../Vocabulary/Nonterminal.php | 7 +++++++ src/GrammarProcessing/Vocabulary/OneOf.php | 17 +++++++++++++++++ src/GrammarProcessing/Vocabulary/Regexp.php | 7 +++++++ src/GrammarProcessing/Vocabulary/Repeat.php | 15 +++++++++++++++ src/GrammarProcessing/Vocabulary/Sequence.php | 17 +++++++++++++++++ src/GrammarProcessing/Vocabulary/Subtract.php | 17 +++++++++++++++++ src/GrammarProcessing/Vocabulary/Symbol.php | 7 +++++++ 9 files changed, 101 insertions(+) diff --git a/src/GrammarProcessing/Vocabulary/Literal.php b/src/GrammarProcessing/Vocabulary/Literal.php index 935009e..1d4a100 100644 --- a/src/GrammarProcessing/Vocabulary/Literal.php +++ b/src/GrammarProcessing/Vocabulary/Literal.php @@ -37,4 +37,11 @@ public function acceptNode( return new GrammarProcessing\TokenNode($token); } + + + public function visit(callable $visitor): Symbol + { + return $visitor($this); + } + } diff --git a/src/GrammarProcessing/Vocabulary/NegativeLookahead.php b/src/GrammarProcessing/Vocabulary/NegativeLookahead.php index b8503a8..8ea175e 100644 --- a/src/GrammarProcessing/Vocabulary/NegativeLookahead.php +++ b/src/GrammarProcessing/Vocabulary/NegativeLookahead.php @@ -33,4 +33,11 @@ public function acceptNode( return new GrammarProcessing\EmptyNode('NegativeLookahead'); } + + + public function visit(callable $visitor): Symbol + { + return $visitor($this); + } + } diff --git a/src/GrammarProcessing/Vocabulary/Nonterminal.php b/src/GrammarProcessing/Vocabulary/Nonterminal.php index 9dc6881..4dd74e6 100644 --- a/src/GrammarProcessing/Vocabulary/Nonterminal.php +++ b/src/GrammarProcessing/Vocabulary/Nonterminal.php @@ -32,4 +32,11 @@ public function acceptNode( : new GrammarProcessing\TokenNode($tokenStream->consumeTokenWithType($this->nonterminal)); } + + + public function visit(callable $visitor): Symbol + { + return $visitor($this); + } + } diff --git a/src/GrammarProcessing/Vocabulary/OneOf.php b/src/GrammarProcessing/Vocabulary/OneOf.php index 6bfab74..0f084b3 100644 --- a/src/GrammarProcessing/Vocabulary/OneOf.php +++ b/src/GrammarProcessing/Vocabulary/OneOf.php @@ -67,4 +67,21 @@ public function acceptNode( return $result['node']; } + + + public function visit(callable $visitor): Symbol + { + $result = []; + + foreach ($this->symbols as $symbol) { + $result[] = $symbol->visit($visitor); + } + + return $visitor( + $result === $this->symbols + ? $this + : new self($result), + ); + } + } diff --git a/src/GrammarProcessing/Vocabulary/Regexp.php b/src/GrammarProcessing/Vocabulary/Regexp.php index 8479ca0..bfbd550 100644 --- a/src/GrammarProcessing/Vocabulary/Regexp.php +++ b/src/GrammarProcessing/Vocabulary/Regexp.php @@ -37,4 +37,11 @@ public function acceptNode( ); } + + + public function visit(callable $visitor): Symbol + { + return $visitor($this); + } + } diff --git a/src/GrammarProcessing/Vocabulary/Repeat.php b/src/GrammarProcessing/Vocabulary/Repeat.php index 5fd7878..ff77974 100644 --- a/src/GrammarProcessing/Vocabulary/Repeat.php +++ b/src/GrammarProcessing/Vocabulary/Repeat.php @@ -69,4 +69,19 @@ public function acceptNode( return new GrammarProcessing\ListNode($result); } + + + public function visit(callable $visitor): Symbol + { + $visitedSymbol = $this->symbol->visit($visitor); + + return $visitor( + $visitedSymbol === $this->symbol ? $this : new self( + $visitedSymbol, + $this->min, + $this->max, + ), + ); + } + } diff --git a/src/GrammarProcessing/Vocabulary/Sequence.php b/src/GrammarProcessing/Vocabulary/Sequence.php index 32cbb49..7b28705 100644 --- a/src/GrammarProcessing/Vocabulary/Sequence.php +++ b/src/GrammarProcessing/Vocabulary/Sequence.php @@ -42,4 +42,21 @@ public function acceptNode( return new GrammarProcessing\ListNode($result); } + + + public function visit(callable $visitor): Symbol + { + $visitedSymbols = []; + + foreach ($this->symbols as $symbol) { + $visitedSymbols[] = $symbol->visit($visitor); + } + + return $visitor( + $visitedSymbols === $this->symbols + ? $this + : new self($visitedSymbols), + ); + } + } diff --git a/src/GrammarProcessing/Vocabulary/Subtract.php b/src/GrammarProcessing/Vocabulary/Subtract.php index 5612b98..ddd2b93 100644 --- a/src/GrammarProcessing/Vocabulary/Subtract.php +++ b/src/GrammarProcessing/Vocabulary/Subtract.php @@ -55,4 +55,21 @@ public function acceptNode( return $node; } + + + public function visit(callable $visitor): Symbol + { + $visitedBaseSymbol = $this->baseSymbol->visit($visitor); + $visitedSubtractedSymbol = $this->subtractedSymbol->visit($visitor); + + $isDifferent = $visitedBaseSymbol !== $this->baseSymbol || $visitedSubtractedSymbol !== $this->subtractedSymbol; + + return $visitor( + $isDifferent ? $this : new self( + $visitedBaseSymbol, + $visitedSubtractedSymbol, + ), + ); + } + } diff --git a/src/GrammarProcessing/Vocabulary/Symbol.php b/src/GrammarProcessing/Vocabulary/Symbol.php index c72d2a7..7696e0a 100644 --- a/src/GrammarProcessing/Vocabulary/Symbol.php +++ b/src/GrammarProcessing/Vocabulary/Symbol.php @@ -24,4 +24,11 @@ function acceptNode( array $nonterminals, ): GrammarProcessing\Node; + + + /** + * @param callable(Symbol): Symbol $visitor + */ + function visit(callable $visitor): Symbol; + } From 962e5158f8281dcbc86dd78d8bce2b216b305891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 11:15:01 +0200 Subject: [PATCH 4/9] Fix typo in "Unexpected token" error --- src/GrammarProcessing/LexicalGrammar.php | 2 +- tests/cases/LexicalGrammar.cases.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GrammarProcessing/LexicalGrammar.php b/src/GrammarProcessing/LexicalGrammar.php index 632048a..eca2d07 100644 --- a/src/GrammarProcessing/LexicalGrammar.php +++ b/src/GrammarProcessing/LexicalGrammar.php @@ -154,7 +154,7 @@ private function throwUnexpectedTokenException( { throw new UnexpectedTokenException( sprintf( - "Unxpected token '%s'", + "Unexpected token '%s'", strlen($value) > 12 ? substr($value, 0, 10) . '... (truncated)' : $value, diff --git a/tests/cases/LexicalGrammar.cases.php b/tests/cases/LexicalGrammar.cases.php index 9e90995..87b0db7 100644 --- a/tests/cases/LexicalGrammar.cases.php +++ b/tests/cases/LexicalGrammar.cases.php @@ -90,6 +90,6 @@ 'Base' => new Vojtechdobes\GrammarProcessing\Vocabulary\Regexp('a'), ], 'ab', - "Unxpected token 'b' (line 1, col 2)", + "Unexpected token 'b' (line 1, col 2)", ], ]; From 7830d87293dad10ccb1bb508f8701d6b139e5025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 11:16:17 +0200 Subject: [PATCH 5/9] Don't hide unexpected token error in case of no matches --- src/GrammarProcessing/LexicalGrammar.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/GrammarProcessing/LexicalGrammar.php b/src/GrammarProcessing/LexicalGrammar.php index eca2d07..026050a 100644 --- a/src/GrammarProcessing/LexicalGrammar.php +++ b/src/GrammarProcessing/LexicalGrammar.php @@ -42,10 +42,6 @@ public function parseSource(string $source): TokenStream throw new UnexpectedTokenException("Can't parse source: " . preg_last_error_msg(), null); } - if ($matches === []) { - return new TokenStream([]); - } - $getLocation = function (int $offset) use ($source): Location { $precedingText = substr($source, 0, $offset); @@ -76,6 +72,10 @@ public function parseSource(string $source): TokenStream ); } + if ($matches === []) { + return new TokenStream([]); + } + if ($this->ignoredTokenSymbols !== []) { if (count($this->ignoredTokenSymbols) === 1) { $matches = array_values( From 20d3efe7b6cd3d3d0051425a97d5c3fa5872f988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 11:14:45 +0200 Subject: [PATCH 6/9] Remove location from Token --- phpstan-baseline.neon | 2 +- src/GrammarProcessing/LexicalGrammar.php | 21 +++++++------------- src/GrammarProcessing/LocationGetter.php | 25 ++++++++++++++++++++++++ src/GrammarProcessing/Token.php | 24 ++--------------------- src/GrammarProcessing/TokenStream.php | 9 +++++---- tests/cases/LexicalGrammar.Test.php | 9 ++++++++- 6 files changed, 48 insertions(+), 42 deletions(-) create mode 100644 src/GrammarProcessing/LocationGetter.php diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 7986378..9760c1d 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -28,4 +28,4 @@ parameters: message: '#^Parameter \$column of class Vojtechdobes\\GrammarProcessing\\Location constructor expects int\<0, max\>, int given\.$#' identifier: argument.type count: 1 - path: src/GrammarProcessing/LexicalGrammar.php + path: src/GrammarProcessing/LocationGetter.php diff --git a/src/GrammarProcessing/LexicalGrammar.php b/src/GrammarProcessing/LexicalGrammar.php index 026050a..1159ef2 100644 --- a/src/GrammarProcessing/LexicalGrammar.php +++ b/src/GrammarProcessing/LexicalGrammar.php @@ -42,14 +42,7 @@ public function parseSource(string $source): TokenStream throw new UnexpectedTokenException("Can't parse source: " . preg_last_error_msg(), null); } - $getLocation = function (int $offset) use ($source): Location { - $precedingText = substr($source, 0, $offset); - - return new Location( - line: substr_count($precedingText, "\n") + 1, - column: $offset - strrpos("\n" . $precedingText, "\n") + 1, - ); - }; + $locationGetter = new LocationGetter($source); $expectedOffset = 0; $endOffset = strlen($source); @@ -58,7 +51,7 @@ public function parseSource(string $source): TokenStream if ($expectedOffset !== $match[0][1]) { $this->throwUnexpectedTokenException( $match[0][0], - $getLocation($match[0][1]), + $locationGetter->getLocation($match[0][1]), ); } @@ -68,12 +61,12 @@ public function parseSource(string $source): TokenStream if ($expectedOffset !== strlen($source)) { $this->throwUnexpectedTokenException( substr($source, $expectedOffset), - $getLocation($expectedOffset), + $locationGetter->getLocation($expectedOffset), ); } if ($matches === []) { - return new TokenStream([]); + return new TokenStream([], $locationGetter); } if ($this->ignoredTokenSymbols !== []) { @@ -100,7 +93,7 @@ public function parseSource(string $source): TokenStream } if ($matches === []) { - return new TokenStream([]); + return new TokenStream([], $locationGetter); } $iMin = 1 + count($this->ignoredTokenSymbols); @@ -108,7 +101,7 @@ public function parseSource(string $source): TokenStream return new TokenStream( array_map( - function (array $match) use ($getLocation, $iMax, $iMin): Token { + function (array $match) use ($iMax, $iMin): Token { $type = NULL; for ($i = $iMin; $i < $iMax; $i++) { @@ -121,11 +114,11 @@ function (array $match) use ($getLocation, $iMax, $iMin): Token { $type, $match[0][0], $match[0][1], - $getLocation, ); }, $matches, ), + $locationGetter, ); } diff --git a/src/GrammarProcessing/LocationGetter.php b/src/GrammarProcessing/LocationGetter.php new file mode 100644 index 0000000..83451b6 --- /dev/null +++ b/src/GrammarProcessing/LocationGetter.php @@ -0,0 +1,25 @@ +source, 0, $offset); + + return new Location( + line: substr_count($precedingText, "\n") + 1, + column: $offset - strrpos("\n" . $precedingText, "\n") + 1, + ); + } + +} diff --git a/src/GrammarProcessing/Token.php b/src/GrammarProcessing/Token.php index 1762f30..18e800a 100644 --- a/src/GrammarProcessing/Token.php +++ b/src/GrammarProcessing/Token.php @@ -6,30 +6,10 @@ final class Token { - /** @var callable(int): Location $getLocation */ - private $getLocation; - - public Location $location { - - get { - return ($this->getLocation)($this->tokenOffset); - } - - } - - - - /** - * @param callable(int): Location $getLocation - */ public function __construct( public readonly string $type, public readonly string $value, - private readonly int $tokenOffset, - callable $getLocation, - ) - { - $this->getLocation = $getLocation; - } + public readonly int $offset, + ) {} } diff --git a/src/GrammarProcessing/TokenStream.php b/src/GrammarProcessing/TokenStream.php index 634ac3c..76f34b0 100644 --- a/src/GrammarProcessing/TokenStream.php +++ b/src/GrammarProcessing/TokenStream.php @@ -16,6 +16,7 @@ final class TokenStream */ public function __construct( public readonly array $tokens, + private readonly LocationGetter $locationGetter, ) {} @@ -65,7 +66,7 @@ public function consumeEndOfStream(): void throw new CannotConsumeTokenException( "Unexpected token '{$token->value}'", $this->currentToken, - $token->location, + $this->locationGetter->getLocation($token->offset), ); } @@ -88,9 +89,9 @@ public function consumeTokenWithType(string $type): Token if ($token->type !== $type) { throw new CannotConsumeTokenException( - "Expected token with type '{$type}', got '{$token->type}' instead", + "Expected token with type '{$type}', got '{$token->type}' ({$token->value}) instead", $this->currentToken, - $token->location, + $this->locationGetter->getLocation($token->offset), ); } @@ -117,7 +118,7 @@ public function consumeTokenWithValue(string $value): Token throw new CannotConsumeTokenException( "Expected token '{$value}', got '{$token->value}' instead", $this->currentToken, - $token->location, + $this->locationGetter->getLocation($token->offset), ); } diff --git a/tests/cases/LexicalGrammar.Test.php b/tests/cases/LexicalGrammar.Test.php index a39d83c..ce78ea2 100644 --- a/tests/cases/LexicalGrammar.Test.php +++ b/tests/cases/LexicalGrammar.Test.php @@ -16,6 +16,13 @@ $expectedTokens, ] = $case; +$locationGetter = new Vojtechdobes\GrammarProcessing\LocationGetter($source); + +function formatLocation(Vojtechdobes\GrammarProcessing\Location $location): string +{ + return "{$location->line},{$location->column}"; +} + $lexicalGrammar = new Vojtechdobes\GrammarProcessing\LexicalGrammar( ignoredTokenSymbols: $ignoredTokenSymbols, syntaxTokenSymbols: $syntaxTokenSymbols, @@ -31,7 +38,7 @@ static fn ($token) => [ $token->type, $token->value, - "{$token->location->line},{$token->location->column}", + formatLocation($locationGetter->getLocation($token->offset)), ], $tokenStream->tokens, ), From 18b8f5f3d94df04a8838fff47c19f2f0ce5b6eb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 11:19:51 +0200 Subject: [PATCH 7/9] Output SelectedNode from OneOf --- src/GrammarProcessing/SelectedNode.php | 28 ++++++++++++++++++++++ src/GrammarProcessing/Vocabulary/OneOf.php | 9 +++++-- 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 src/GrammarProcessing/SelectedNode.php diff --git a/src/GrammarProcessing/SelectedNode.php b/src/GrammarProcessing/SelectedNode.php new file mode 100644 index 0000000..ecef5e1 --- /dev/null +++ b/src/GrammarProcessing/SelectedNode.php @@ -0,0 +1,28 @@ +symbols as $symbol) { + foreach ($this->symbols as $i => $symbol) { $tokenStreamCopy = clone $tokenStream; try { $node = $symbol->acceptNode($error, $tokenStreamCopy, $nonterminals); $attempts[] = [ + 'index' => $i, 'node' => $node, 'tokenStream' => $tokenStreamCopy, ]; @@ -64,7 +65,11 @@ public function acceptNode( $result = array_pop($attempts); $tokenStream->advanceTo($result['tokenStream']); - return $result['node']; + + return new GrammarProcessing\SelectedNode( + $result['index'], + $result['node'], + ); } From bf8970a557129a84a1600b177455eec643cc96fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 11:20:15 +0200 Subject: [PATCH 8/9] Add EBNF grammar parser --- phpstan-baseline.neon | 12 + src/GrammarProcessing/Ebnf/GrammarFactory.php | 225 ++++++++++++++++++ .../Ebnf/InterpretationFactory.php | 43 ++++ .../Ebnf/Nodes/Alternation.php | 29 +++ .../Ebnf/Nodes/Concatenation.php | 29 +++ src/GrammarProcessing/Ebnf/Nodes/Factor.php | 42 ++++ src/GrammarProcessing/Ebnf/Nodes/Grammar.php | 152 ++++++++++++ .../Ebnf/Nodes/Identifier.php | 17 ++ src/GrammarProcessing/Ebnf/Nodes/Lhs.php | 17 ++ src/GrammarProcessing/Ebnf/Nodes/Rhs.php | 17 ++ src/GrammarProcessing/Ebnf/Nodes/Rule.php | 20 ++ src/GrammarProcessing/Ebnf/Nodes/Term.php | 35 +++ src/GrammarProcessing/Ebnf/Nodes/Terminal.php | 35 +++ src/GrammarProcessing/Ebnf/Parser.php | 59 +++++ src/GrammarProcessing/Vocabulary/Literal.php | 2 +- src/GrammarProcessing/Vocabulary/Repeat.php | 6 +- src/GrammarProcessing/Vocabulary/Sequence.php | 2 +- tests/cases/Grammars/ebnf-grammar.ebnf | 46 ++++ tests/cases/Grammars/ebnfGrammar.Test.php | 41 ++++ 19 files changed, 824 insertions(+), 5 deletions(-) create mode 100644 src/GrammarProcessing/Ebnf/GrammarFactory.php create mode 100644 src/GrammarProcessing/Ebnf/InterpretationFactory.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Alternation.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Concatenation.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Factor.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Grammar.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Identifier.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Lhs.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Rhs.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Rule.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Term.php create mode 100644 src/GrammarProcessing/Ebnf/Nodes/Terminal.php create mode 100644 src/GrammarProcessing/Ebnf/Parser.php create mode 100644 tests/cases/Grammars/ebnf-grammar.ebnf create mode 100644 tests/cases/Grammars/ebnfGrammar.Test.php diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index 9760c1d..b1a5ba7 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -29,3 +29,15 @@ parameters: identifier: argument.type count: 1 path: src/GrammarProcessing/LocationGetter.php + + - + message: '#^Parameter \#1 \$source of method Vojtechdobes\\GrammarProcessing\\Ebnf\\Parser\:\:parseGrammarFromSource\(\) expects string, string\|false given\.$#' + identifier: argument.type + count: 1 + path: tests/cases/Grammars/ebnfGrammar.Test.php + + - + message: '#^Parameter \#1 \$source of method Vojtechdobes\\GrammarProcessing\\Grammar\:\:parseSource\(\) expects string, string\|false given\.$#' + identifier: argument.type + count: 2 + path: tests/cases/Grammars/ebnfGrammar.Test.php diff --git a/src/GrammarProcessing/Ebnf/GrammarFactory.php b/src/GrammarProcessing/Ebnf/GrammarFactory.php new file mode 100644 index 0000000..b65e370 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/GrammarFactory.php @@ -0,0 +1,225 @@ + new GrammarProcessing\Vocabulary\Regexp('[a-zA-Z]'), + 'digit' => new GrammarProcessing\Vocabulary\Regexp('[0-9]'), + 'symbol' => new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Literal('['), + new GrammarProcessing\Vocabulary\Literal(']'), + new GrammarProcessing\Vocabulary\Literal('{'), + new GrammarProcessing\Vocabulary\Literal('}'), + new GrammarProcessing\Vocabulary\Literal('('), + new GrammarProcessing\Vocabulary\Literal(')'), + new GrammarProcessing\Vocabulary\Literal('<'), + new GrammarProcessing\Vocabulary\Literal('>'), + new GrammarProcessing\Vocabulary\Literal("'"), + new GrammarProcessing\Vocabulary\Literal('"'), + new GrammarProcessing\Vocabulary\Literal('='), + new GrammarProcessing\Vocabulary\Literal('|'), + new GrammarProcessing\Vocabulary\Literal('.'), + new GrammarProcessing\Vocabulary\Literal(','), + new GrammarProcessing\Vocabulary\Literal(';'), + new GrammarProcessing\Vocabulary\Literal('-'), + new GrammarProcessing\Vocabulary\Literal('+'), + new GrammarProcessing\Vocabulary\Literal('*'), + new GrammarProcessing\Vocabulary\Literal('?'), + new GrammarProcessing\Vocabulary\Literal('\\'), + ]), + 'character' => new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Nonterminal('letter'), + new GrammarProcessing\Vocabulary\Nonterminal('digit'), + new GrammarProcessing\Vocabulary\Nonterminal('symbol'), + new GrammarProcessing\Vocabulary\Literal('_'), + new GrammarProcessing\Vocabulary\Literal(' '), + ]), + 'identifier' => new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('letter'), + new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Nonterminal('letter'), + new GrammarProcessing\Vocabulary\Nonterminal('digit'), + new GrammarProcessing\Vocabulary\Literal('_'), + ]), + 0, + null, + ), + ]), + 'terminal' => new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal("'"), + new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\Subtract( + new GrammarProcessing\Vocabulary\Nonterminal('character'), + new GrammarProcessing\Vocabulary\Literal("'"), + ), + 1, + null, + ), + new GrammarProcessing\Vocabulary\Literal("'"), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal('"'), + new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\Subtract( + new GrammarProcessing\Vocabulary\Nonterminal('character'), + new GrammarProcessing\Vocabulary\Literal('"'), + ), + 1, + null, + ), + new GrammarProcessing\Vocabulary\Literal('"'), + ]), + ]), + 'S' => new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Literal(' '), + new GrammarProcessing\Vocabulary\Regexp('\n'), + new GrammarProcessing\Vocabulary\Regexp('\t'), + new GrammarProcessing\Vocabulary\Regexp('\r'), + new GrammarProcessing\Vocabulary\Regexp('\f'), + new GrammarProcessing\Vocabulary\Regexp('\b'), + ]), + 1, + null, + ), + ]; + + $syntacticSymbols = [ + 'opt_S' => new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\Nonterminal('S'), + 0, + null, + ), + 'terminator' => new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Literal(';'), + new GrammarProcessing\Vocabulary\Literal('.'), + ]), + 'term' => new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal('('), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('rhs'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal(')'), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal('['), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('rhs'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal(']'), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal('{'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('rhs'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal('}'), + ]), + new GrammarProcessing\Vocabulary\Nonterminal('terminal'), + new GrammarProcessing\Vocabulary\Nonterminal('identifier'), + ]), + 'factor' => new GrammarProcessing\Vocabulary\OneOf([ + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('term'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal('?'), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('term'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal('*'), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('term'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal('+'), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('term'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal('-'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('term'), + ]), + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('term'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + ]), + ]), + 'concatenation' => new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('factor'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal(','), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('factor'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + ]), + 0, + null, + ), + ]), + 'alternation' => new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('concatenation'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Literal('|'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('concatenation'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + ]), + 0, + null, + ), + ]), + 'rhs' => new GrammarProcessing\Vocabulary\Nonterminal('alternation'), + 'lhs' => new GrammarProcessing\Vocabulary\Nonterminal('identifier'), + 'rule' => new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('lhs'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Literal('='), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('rhs'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('terminator'), + ]), + 'grammar' => new GrammarProcessing\Vocabulary\Repeat( + new GrammarProcessing\Vocabulary\Sequence([ + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + new GrammarProcessing\Vocabulary\Nonterminal('rule'), + new GrammarProcessing\Vocabulary\Nonterminal('opt_S'), + ]), + 0, + null, + ), + ]; + + return new GrammarProcessing\Grammar( + lexicalSymbols: $lexicalSymbols, + syntaxTokenSymbols: [ + 'identifier', + 'terminal', + 'symbol', + 'S', + ], + ignoredTokenSymbols: [], + syntacticSymbols: $syntacticSymbols, + ); + } + +} diff --git a/src/GrammarProcessing/Ebnf/InterpretationFactory.php b/src/GrammarProcessing/Ebnf/InterpretationFactory.php new file mode 100644 index 0000000..3003892 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/InterpretationFactory.php @@ -0,0 +1,43 @@ + $lexicalSymbols + * @param non-empty-list $syntaxTokenSymbols + * @param list $ignoredTokenSymbols + */ + public function createInterpretation( + array $lexicalSymbols, + array $syntaxTokenSymbols, + array $ignoredTokenSymbols, + ): GrammarProcessing\Interpretation + { + return new GrammarProcessing\Interpretation([ + 'alternation' => new Nodes\Alternation(), + 'concatenation' => new Nodes\Concatenation(), + 'factor' => new Nodes\Factor(), + 'grammar' => new Nodes\Grammar( + lexicalSymbols: $lexicalSymbols, + syntaxTokenSymbols: $syntaxTokenSymbols, + ignoredTokenSymbols: $ignoredTokenSymbols, + ), + 'identifier' => new Nodes\Identifier(), + 'lhs' => new Nodes\Lhs(), + 'rhs' => new Nodes\Rhs(), + 'rule' => new Nodes\Rule(), + 'term' => new Nodes\Term(), + 'terminal' => new Nodes\Terminal(), + ]); + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Alternation.php b/src/GrammarProcessing/Ebnf/Nodes/Alternation.php new file mode 100644 index 0000000..10e1bfe --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Alternation.php @@ -0,0 +1,29 @@ +value[1], + ]; + + foreach ($node->value[3]->value as $item) { + $result[] = yield $item->value[2]; + } + + if (count($result) === 1) { + return $result[0]; + } + + return new GrammarProcessing\Vocabulary\OneOf($result); + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Concatenation.php b/src/GrammarProcessing/Ebnf/Nodes/Concatenation.php new file mode 100644 index 0000000..e224185 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Concatenation.php @@ -0,0 +1,29 @@ +value[1], + ]; + + foreach ($node->value[3]->value as $item) { + $result[] = yield $item->value[2]; + } + + if (count($result) === 1) { + return $result[0]; + } + + return new GrammarProcessing\Vocabulary\Sequence($result); + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Factor.php b/src/GrammarProcessing/Ebnf/Nodes/Factor.php new file mode 100644 index 0000000..6a4dda1 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Factor.php @@ -0,0 +1,42 @@ +index) { + 0 => new GrammarProcessing\Vocabulary\Repeat( + yield $node->value->value[0], + 0, + 1, + ), + 1 => new GrammarProcessing\Vocabulary\Repeat( + yield $node->value->value[0], + 0, + null, + ), + 2 => new GrammarProcessing\Vocabulary\Repeat( + yield $node->value->value[0], + 1, + null, + ), + 3 => new GrammarProcessing\Vocabulary\Subtract( + yield $node->value->value[0], + yield $node->value->value[4], + ), + 4 => yield $node->value->value[0], + default => throw new LogicException("This can't happen"), + }; + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Grammar.php b/src/GrammarProcessing/Ebnf/Nodes/Grammar.php new file mode 100644 index 0000000..7cbd9b5 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Grammar.php @@ -0,0 +1,152 @@ + $lexicalSymbols + * @param non-empty-list $syntaxTokenSymbols + * @param list $ignoredTokenSymbols + */ + public function __construct( + private readonly array $lexicalSymbols, + private readonly array $syntaxTokenSymbols, + private readonly array $ignoredTokenSymbols, + ) {} + + + + public function interpret(GrammarProcessing\Node $node): Generator + { + $result = []; + + foreach ($node->value as $item) { + [$identifier, $production] = yield $item->value[1]; + + $result[$identifier->nonterminal] = $production; + } + + $result = $this->ensureOnlyNonoptionalSyntaxTokenSymbols($result); + + return $this->createGrammar($result); + } + + + + /** + * @param array $symbols + * @return array + */ + private function ensureOnlyNonoptionalSyntaxTokenSymbols(array $symbols): array + { + $optionalSyntaxTokenSymbols = []; + + foreach ($this->syntaxTokenSymbols as $syntaxTokenSymbol) { + if (array_key_exists($syntaxTokenSymbol, $symbols) === false) { + throw new LogicException( + "Syntax token symbol '{$syntaxTokenSymbol}' wasn't found in parsed source", + ); + } + + $symbol = $symbols[$syntaxTokenSymbol]; + + if ( + $symbol instanceof GrammarProcessing\Vocabulary\Repeat + && $symbol->min === 0 + ) { + $optionalSyntaxTokenSymbols[] = $syntaxTokenSymbol; + } + } + + if ($optionalSyntaxTokenSymbols === []) { + return $symbols; + } + + $replace = function ( + GrammarProcessing\Vocabulary\Symbol $symbol, + ) use ( + $optionalSyntaxTokenSymbols, + ): GrammarProcessing\Vocabulary\Symbol { + if ( + $symbol instanceof GrammarProcessing\Vocabulary\Nonterminal + && in_array($symbol->nonterminal, $optionalSyntaxTokenSymbols, true) + ) { + return new GrammarProcessing\Vocabulary\Repeat( + $symbol, + 0, + 1, + ); + } + + return $symbol; + }; + + foreach ($symbols as $name => $symbol) { + if (in_array($name, $optionalSyntaxTokenSymbols, true)) { + /** @var GrammarProcessing\Vocabulary\Repeat $symbol */ + + $symbols[$name] = new GrammarProcessing\Vocabulary\Repeat( + $symbol->symbol->visit($replace), + 1, + $symbol->max, + ); + } else { + $symbols[$name] = $symbol->visit($replace); + } + } + + return $symbols; + } + + + + /** + * @param array $symbols + */ + private function createGrammar(array $symbols): GrammarProcessing\Grammar + { + $lexicalSymbols = []; + + foreach ($this->lexicalSymbols as $lexicalSymbol) { + if (array_key_exists($lexicalSymbol, $symbols) === false) { + throw new LogicException( + "Lexical symbol '{$lexicalSymbol}' wasn't found in parsed source", + ); + } + + $lexicalSymbols[$lexicalSymbol] = $symbols[$lexicalSymbol]; + } + + $syntacticSymbols = []; + + foreach ($symbols as $name => $symbol) { + if (in_array($symbol, $lexicalSymbols, true) === false) { + $syntacticSymbols[$name] = $symbol; + } + } + + if ($syntacticSymbols === []) { + throw new LogicException( + "Grammar doesn't contain any non-lexical symbols", + ); + } + + return new GrammarProcessing\Grammar( + lexicalSymbols: $lexicalSymbols, + syntaxTokenSymbols: $this->syntaxTokenSymbols, + ignoredTokenSymbols: $this->ignoredTokenSymbols, + syntacticSymbols: $syntacticSymbols, + ); + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Identifier.php b/src/GrammarProcessing/Ebnf/Nodes/Identifier.php new file mode 100644 index 0000000..88d0879 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Identifier.php @@ -0,0 +1,17 @@ +value); + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Lhs.php b/src/GrammarProcessing/Ebnf/Nodes/Lhs.php new file mode 100644 index 0000000..5d7a091 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Lhs.php @@ -0,0 +1,17 @@ +value[0]; + $production = yield $node->value[4]; + + return [$identifier, $production]; + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Term.php b/src/GrammarProcessing/Ebnf/Nodes/Term.php new file mode 100644 index 0000000..dda5c23 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Term.php @@ -0,0 +1,35 @@ +index) { + 0 => yield $node->value->value[2], + 1 => new GrammarProcessing\Vocabulary\Repeat( + yield $node->value->value[2], + 0, + 1, + ), + 2 => new GrammarProcessing\Vocabulary\Repeat( + yield $node->value->value[2], + 0, + null, + ), + 3 => yield $node->value, + 4 => yield $node->value, + default => throw new LogicException("This can't happen"), + }; + } + +} diff --git a/src/GrammarProcessing/Ebnf/Nodes/Terminal.php b/src/GrammarProcessing/Ebnf/Nodes/Terminal.php new file mode 100644 index 0000000..15a5413 --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Nodes/Terminal.php @@ -0,0 +1,35 @@ +value; + + if (str_starts_with($value, '"')) { + $value = trim($value, '"'); + } elseif (str_starts_with($value, "'")) { + $value = trim($value, "'"); + } + + if (in_array($value, [ + '\n', + '\t', + '\r', + '\f', + '\b', + ], true)) { + return new GrammarProcessing\Vocabulary\Regexp($value); + } + + return new GrammarProcessing\Vocabulary\Literal($value); + } + +} diff --git a/src/GrammarProcessing/Ebnf/Parser.php b/src/GrammarProcessing/Ebnf/Parser.php new file mode 100644 index 0000000..634880c --- /dev/null +++ b/src/GrammarProcessing/Ebnf/Parser.php @@ -0,0 +1,59 @@ + $lexicalSymbols + * @param non-empty-list $syntaxTokenSymbols + * @param list $ignoredTokenSymbols + */ + public function __construct( + array $lexicalSymbols, + array $syntaxTokenSymbols, + array $ignoredTokenSymbols, + ) + { + $this->ebnfGrammar = new GrammarFactory()->createGrammar(); + $this->ebnfInterpretation = new InterpretationFactory()->createInterpretation( + lexicalSymbols: $lexicalSymbols, + syntaxTokenSymbols: $syntaxTokenSymbols, + ignoredTokenSymbols: $ignoredTokenSymbols, + ); + } + + + + public function parseGrammarFromSource(string $source): GrammarProcessing\Grammar + { + return $this->parseGrammarFromAbstractSyntaxTree( + $this->ebnfGrammar->parseSource($source, 'grammar'), + ); + } + + + + /** + * @param GrammarProcessing\AbstractSyntaxTree<'grammar'> $abstractSyntaxTree + */ + public function parseGrammarFromAbstractSyntaxTree( + GrammarProcessing\AbstractSyntaxTree $abstractSyntaxTree, + ): GrammarProcessing\Grammar + { + return $abstractSyntaxTree->interpret($this->ebnfInterpretation); + } + +} diff --git a/src/GrammarProcessing/Vocabulary/Literal.php b/src/GrammarProcessing/Vocabulary/Literal.php index 1d4a100..5a2fa62 100644 --- a/src/GrammarProcessing/Vocabulary/Literal.php +++ b/src/GrammarProcessing/Vocabulary/Literal.php @@ -9,7 +9,7 @@ final class Literal implements Symbol { public function __construct( - private readonly string $literal, + public readonly string $literal, ) {} diff --git a/src/GrammarProcessing/Vocabulary/Repeat.php b/src/GrammarProcessing/Vocabulary/Repeat.php index ff77974..db9d32c 100644 --- a/src/GrammarProcessing/Vocabulary/Repeat.php +++ b/src/GrammarProcessing/Vocabulary/Repeat.php @@ -9,9 +9,9 @@ final class Repeat implements Symbol { public function __construct( - private readonly Symbol $symbol, - private readonly int $min, - private readonly ?int $max, + public readonly Symbol $symbol, + public readonly int $min, + public readonly ?int $max, ) {} diff --git a/src/GrammarProcessing/Vocabulary/Sequence.php b/src/GrammarProcessing/Vocabulary/Sequence.php index 7b28705..9c71c76 100644 --- a/src/GrammarProcessing/Vocabulary/Sequence.php +++ b/src/GrammarProcessing/Vocabulary/Sequence.php @@ -12,7 +12,7 @@ final class Sequence implements Symbol * @param list $symbols */ public function __construct( - private readonly array $symbols, + public readonly array $symbols, ) {} diff --git a/tests/cases/Grammars/ebnf-grammar.ebnf b/tests/cases/Grammars/ebnf-grammar.ebnf new file mode 100644 index 0000000..95987e7 --- /dev/null +++ b/tests/cases/Grammars/ebnf-grammar.ebnf @@ -0,0 +1,46 @@ +letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" + | "c" | "d" | "e" | "f" | "g" | "h" | "i" + | "j" | "k" | "l" | "m" | "n" | "o" | "p" + | "q" | "r" | "s" | "t" | "u" | "v" | "w" + | "x" | "y" | "z" ; + +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; + +symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" + | "'" | '"' | "=" | "|" | "." | "," | ";" | "-" + | "+" | "*" | "?" | "\" ; + +character = letter | digit | symbol | "_" | " " ; +identifier = letter , { letter | digit | "_" } ; + +S = { " " | "\n" | "\t" | "\r" | "\f" | "\b" } ; + +terminal = "'" , ( character - "'" ) + , "'" + | '"' , ( character - '"' ) + , '"' ; + +terminator = ";" | "." ; + +term = "(" , S , rhs , S , ")" + | "[" , S , rhs , S , "]" + | "{" , S , rhs , S , "}" + | terminal + | identifier ; + +factor = term , S , "?" + | term , S , "*" + | term , S , "+" + | term , S , "-" , S , term + | term , S ; + +concatenation = S , factor , S , { "," , S , factor , S } ; +alternation = S , concatenation , S , { "|" , S , concatenation , S } ; + +rhs = alternation ; +lhs = identifier ; + +rule = lhs , S , "=" , S , rhs , S , terminator ; + +grammar = ( S , rule , S ) * ; diff --git a/tests/cases/Grammars/ebnfGrammar.Test.php b/tests/cases/Grammars/ebnfGrammar.Test.php new file mode 100644 index 0000000..4e8fb31 --- /dev/null +++ b/tests/cases/Grammars/ebnfGrammar.Test.php @@ -0,0 +1,41 @@ +parseGrammarFromSource($ebnfGrammarSource); + +$ebnfGrammarB = $ebnfParser->parseGrammarFromAbstractSyntaxTree( + $ebnfGrammarA->parseSource($ebnfGrammarSource, 'grammar'), +); + +$ebnfGrammarC = $ebnfParser->parseGrammarFromAbstractSyntaxTree( + $ebnfGrammarB->parseSource($ebnfGrammarSource, 'grammar'), +); + +Tester\Assert::equal( + $ebnfGrammarA, + $ebnfGrammarB, +); From 008f611a8060a2ac34176171a4e4a955c1a5cd9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20Dobe=C5=A1?= Date: Fri, 25 Apr 2025 14:59:13 +0200 Subject: [PATCH 9/9] Keep tab indentation in phpstan-baseline.neon --- .editorconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.editorconfig b/.editorconfig index e0652b0..bc541e8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -9,3 +9,6 @@ trim_trailing_whitespace = true [*.{md,neon,yml}] indent_size = 2 indent_style = space + +[phpstan-baseline.neon] +indent_style = tab