Skip to content

Commit

Permalink
Fix and simplify lookahead logic, improve lexer performance by ~6%
Browse files Browse the repository at this point in the history
  • Loading branch information
JanJakes committed Oct 3, 2024
1 parent b4f0e08 commit f658751
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 78 deletions.
68 changes: 26 additions & 42 deletions custom-parser/parser/MySQLLexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1960,6 +1960,8 @@ public function __construct(string $input, int $serverVersion = 80000, int $sqlM
$this->input = $input;
$this->serverVersion = $serverVersion;
$this->sqlModes = $sqlModes;
$this->c = $this->input[$this->position] ?? null;
$this->n = $this->input[$this->position + 1] ?? null;
}

public function isSqlModeActive(int $mode): bool
Expand Down Expand Up @@ -2008,7 +2010,8 @@ private function nextToken()
$this->tokenInstance = null;
$this->channel = self::CHANNEL_DEFAULT;

$la = $this->LA(1);
$la = $this->c;
$la2 = $this->n;

if ($la === "'") {
$this->SINGLE_QUOTED_TEXT();
Expand All @@ -2019,7 +2022,7 @@ private function nextToken()
} elseif ($this->isDigit($la)) {
$this->NUMBER();
} elseif ($la === '.') {
if ($this->isDigit($this->LA(2))) {
if ($this->isDigit($la2)) {
$this->NUMBER();
} else {
$this->DOT_IDENTIFIER();
Expand All @@ -2028,7 +2031,7 @@ private function nextToken()
$this->consume();
$this->type = self::EQUAL_OPERATOR;
} elseif ($la === ':') {
if ($this->LA(2) === '=') {
if ($la2 === '=') {
$this->consume(); // Consume the ':'.
$this->consume(); // Consume the '='.
$this->type = self::ASSIGN_OPERATOR;
Expand All @@ -2037,7 +2040,7 @@ private function nextToken()
$this->type = self::COLON_SYMBOL;
}
} elseif ($la === '<') {
if ($this->LA(2) === '=') {
if ($la2 === '=') {
if ($this->LA(3) === '>') {
$this->consume(); // Consume the '<'.
$this->consume(); // Consume the '='.
Expand All @@ -2048,11 +2051,11 @@ private function nextToken()
$this->consume(); // Consume the '='.
$this->type = self::LESS_OR_EQUAL_OPERATOR;
}
} elseif ($this->LA(2) === '>') {
} elseif ($la2 === '>') {
$this->consume(); // Consume the '<'.
$this->consume(); // Consume the '>'.
$this->type = self::NOT_EQUAL_OPERATOR;
} elseif ($this->LA(2) === '<') {
} elseif ($la2 === '<') {
$this->consume(); // Consume the '<'.
$this->consume(); // Consume the '<'.
$this->type = self::SHIFT_LEFT_OPERATOR;
Expand All @@ -2061,11 +2064,11 @@ private function nextToken()
$this->type = self::LESS_THAN_OPERATOR;
}
} elseif ($la === '>') {
if ($this->LA(2) === '=') {
if ($la2 === '=') {
$this->consume(); // Consume the '>'.
$this->consume(); // Consume the '='.
$this->type = self::GREATER_OR_EQUAL_OPERATOR;
} elseif ($this->LA(2) === '>') {
} elseif ($la2 === '>') {
$this->consume(); // Consume the '>'.
$this->consume(); // Consume the '>'.
$this->type = self::SHIFT_RIGHT_OPERATOR;
Expand All @@ -2074,7 +2077,7 @@ private function nextToken()
$this->type = self::GREATER_THAN_OPERATOR;
}
} elseif ($la === '!') {
if ($this->LA(2) === '=') {
if ($la2 === '=') {
$this->consume(); // Consume the '!'.
$this->consume(); // Consume the '='.
$this->type = self::NOT_EQUAL_OPERATOR;
Expand All @@ -2086,7 +2089,7 @@ private function nextToken()
$this->consume();
$this->type = self::PLUS_OPERATOR;
} elseif ($la === '-') {
if ($this->LA(2) === '>') {
if ($la2 === '>') {
if ($this->LA(3) === '>') {
if ($this->serverVersion >= 50713) {
$this->consume(); // Consume the '-'.
Expand Down Expand Up @@ -2115,7 +2118,7 @@ private function nextToken()
$this->consume();
$this->type = self::MULT_OPERATOR;
} elseif ($la === '/') {
if ($this->LA(2) === '*') {
if ($la2 === '*') {
$this->blockComment();
} else {
$this->consume();
Expand All @@ -2125,7 +2128,7 @@ private function nextToken()
$this->consume();
$this->type = self::MOD_OPERATOR;
} elseif ($la === '&') {
if ($this->LA(2) === '&') {
if ($la2 === '&') {
$this->consume(); // Consume the '&'.
$this->consume(); // Consume the '&'.
$this->type = self::LOGICAL_AND_OPERATOR;
Expand All @@ -2137,7 +2140,7 @@ private function nextToken()
$this->consume();
$this->type = self::BITWISE_XOR_OPERATOR;
} elseif ($la === '|') {
if ($this->LA(2) === '|') {
if ($la2 === '|') {
$this->consume(); // Consume the '|'.
$this->consume(); // Consume the '|'.
$this->type = $this->isSqlModeActive(self::SQL_MODE_PIPES_AS_CONCAT)
Expand Down Expand Up @@ -2169,7 +2172,7 @@ private function nextToken()
$this->consume();
$this->type = self::CLOSE_CURLY_SYMBOL;
} elseif ($la === '@') {
if ($this->LA(2) === '@') {
if ($la2 === '@') {
$this->consume(); // Consume the '@'.
$this->consume(); // Consume the '@'.
$this->type = self::AT_AT_SIGN_SYMBOL;
Expand All @@ -2181,7 +2184,7 @@ private function nextToken()
$this->consume();
$this->type = self::PARAM_MARKER;
} elseif ($la === '\\') {
if ($this->LA(2) === 'N') {
if ($la2 === 'N') {
$this->consume(); // Consume the '\'.
$this->consume(); // Consume the 'N'.
$this->type = self::NULL2_SYMBOL;
Expand All @@ -2191,16 +2194,16 @@ private function nextToken()
}
} elseif ($la === '#') {
$this->POUND_COMMENT();
} elseif ($la === '-' && $this->LA(2) === '-') {
} elseif ($la === '-' && $la2 === '-') {
$this->DASHDASH_COMMENT();
} elseif ($this->isWhitespace($la)) {
while ($this->isWhitespace($this->c)) {
$this->consume();
}
$this->channel = self::CHANNEL_HIDDEN;
} elseif ($la === '0' && ($this->LA(2) === 'x' || $this->LA(2) === 'b')) {
} elseif ($la === '0' && ($la2 === 'x' || $la2 === 'b')) {
$this->NUMBER();
} elseif (($la === 'x' || $la === 'X' || $la === 'b' || $la === 'B') && $this->LA(2) === "'") {
} elseif (($la === 'x' || $la === 'X' || $la === 'b' || $la === 'B') && $la2 === "'") {
$this->NUMBER();
} elseif (preg_match('/\G' . self::PATTERN_UNQUOTED_IDENTIFIER . '/u', $this->input, $matches, 0, $this->position)) {
$this->text = $matches[0];
Expand All @@ -2227,34 +2230,15 @@ private function nextToken()

protected function LA(int $i): ?string
{
if(null === $this->c) {
$this->c = $this->input[$this->position] ?? null;
}
if ($i === 1) {
return $this->c;
} elseif ($i === 2) {
return $this->n;
} else {
if ($this->position + $i - 1 >= strlen($this->input)) {
return null;
} else {
return $this->input[$this->position + $i - 1];
}
}
return $this->input[$this->position + $i - 1] ?? null;
}

protected function consume(): void
{
$this->text .= $this->c;

if ($this->position < strlen($this->input)) {
++$this->position;
$this->c = $this->input[$this->position] ?? null;
$this->n = $this->input[$this->position + 1] ?? null;
} else {
$this->c = null;
$this->n = null;
}
$this->position += 1;
$this->c = $this->input[$this->position] ?? null;
$this->n = $this->input[$this->position + 1] ?? null;
}

protected function matchEOF(): void
Expand Down Expand Up @@ -2422,7 +2406,7 @@ protected function NUMBER()
$this->HEX_NUMBER();
} elseif (($this->c === '0' && $this->n === 'b') || (strtolower($this->c) === 'b' && $this->n === "'")) {
$this->BIN_NUMBER();
} elseif ($this->c === '.' && $this->isDigit($this->LA(2))) {
} elseif ($this->c === '.' && $this->isDigit($this->n)) {
$this->DECIMAL_NUMBER();
} else {
$this->INT_NUMBER();
Expand Down
35 changes: 0 additions & 35 deletions tests/parser/data/failures.csv
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,6 @@ END;"
CHECK (f1 < f2))"
"my $mode = (stat($ENV{SESSION_FILE}))[2]"
"my $perm = sprintf ""%04o"", ($mode & 07777)"
"/* line with only comment */"
"SELECT 1 /*!99999 /* */ */"
"SELECT 2 /*!12345 /* */ */"
"SELECT 3 /*! /* */ */"
Expand Down Expand Up @@ -697,8 +696,6 @@ JOIN t1 ON 1 WHERE (CAST(""1"" AS JSON) MEMBER OF( t1.col_json->'$[*]'))"
pk INT PRIMARY KEY,
vc VARCHAR(1) NOT NULL,
gc INT GENERATED ALWAYS AS (1))"
"/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */"
"/*!40101 SET SQL_MODE=@OLD_SQL_MODE */"
echo
"REVOKE ALL PRIVILEGES ON db35380295.* FROM u1b35380295"
"FLUSH LOCAL TABLES test.t1, test.t2 for ExPoRt"
Expand Down Expand Up @@ -731,8 +728,6 @@ UNIQUE KEY(pk))"
"CREATE TABLE self (base INT, pk INT GENERATED ALWAYS AS (base+1) STORED, fk INT,
UNIQUE KEY(pk), FOREIGN KEY (fk) REFERENCES self(pk))"
"CREATE TABLE parent0 (base INT, pk INT GENERATED ALWAYS AS (base+1) VIRTUAL, UNIQUE KEY(pk))"
"/*!50101 CREATE TABLE child (fk INT, FOREIGN KEY (fk) REFERENCES parent(pk)) */"
"/*! ALTER TABLE child ADD FOREIGN KEY (fk) REFERENCES parent(pk) */"
"CREATE TABLE t1 (f1 INT PRIMARY KEY, f2 INT, F3 INT AS (f1+1) VIRTUAL,
FOREIGN KEY(f2) REFERENCES t2(f3))"
"CREATE TABLE t1 (f1 VARCHAR(10), f2 VARCHAR(10),
Expand Down Expand Up @@ -1764,16 +1759,6 @@ SELECT * FROM information_schema.schemata"


connect(root_con,localhost,root,,mysql)"
"/* 8 */
INSERT INTO t1 (id) SELECT id FROM t1"
"/* 12 */
INSERT INTO t1 (id) SELECT id FROM t1"
"/* 16 */
INSERT INTO t1 (id) SELECT id FROM t1"
"/* 20 */
INSERT INTO t1 (id) SELECT id FROM t1"
"/* 24 */
INSERT INTO t1 SELECT id+1 FROM t1"
"CREATE TABLE t1(a INT,
b INT GENERATED ALWAYS AS (-a) VIRTUAL,
c INT GENERATED ALWAYS AS (-a) STORED)"
Expand Down Expand Up @@ -2269,10 +2254,6 @@ connection default"
"foreach $line (@lines) {
if ($line =~ /^select/) {
print $line"
"/*!50003 CREATE FUNCTION `f`() RETURNS bigint(20)
return 42 */"
"/*!50003 CREATE PROCEDURE `p`()
select 42 */"
"revoke all privileges on mysqldump_myDB.* from myDB_User@localhost"
"CREATE TABLE t1 (pk INTEGER, a INTEGER, b INTEGER, c VARCHAR(16),
sum INTEGER GENERATED ALWAYS AS (a+b),
Expand Down Expand Up @@ -2326,7 +2307,6 @@ EOF

echo # cleanup
DROP TABLE b34999015_db.`KEY`"
"/*!99999 SET @@SESSION.non_supported_session_variable = 1*/"
"REVOKE ALL PRIVILEGES ON *.* FROM user1"
"CREATE TEMPORARY TABLE Temp3 LIKE Temp2"
"CREATE TEMPORARY TABLE tt1 LIKE performance_schema.setup_consumers"
Expand Down Expand Up @@ -2963,12 +2943,6 @@ ELSE f END"
"SHOW PARSE_TREE SELECT * FROM t JOIN t2 NATURAL JOIN (t3 LEFT JOIN t4 USING (col1, col2))"
"SHOW PARSE_TREE SELECT * FROM t JOIN t2 JOIN t3 USING (col1, col2) JOIN (SELECT * FROM tab) AS t4"
"SHOW PARSE_TREE SELECT db.func(), char(col1), char(col1 USING utf8mb4), concat(a,b), concat(a,b) COLLATE utf8mb4_turkish_ci"
"/* John Doe wants 1 table and 4 chairs */
call po_create_order(""P"", 1, @my_po)"
"/* Marry Smith wants a coffee table */
call po_create_order(""P"", 2, @my_po)"
"/* The local school wants 10 class tables and 20 chairs */
call po_create_order(""M"", 4, @my_po)"
"my $filename = $ENV{""MYSQLD_PIDFILE""} or die(""pidfile not set"")"
"my $pid"
"my $wait_cnt=60"
Expand Down Expand Up @@ -4041,15 +4015,6 @@ end"
"call bug1656(@1, @2)"
"select @1, @2"
"show binary log status"
"/*!50003 create function bug14723()
returns bigint(20)
main_loop: begin
return 42;
end */"
"/*!50003 create procedure bug14723()
main_loop: begin
select 42;
end */"
"create procedure mysqltest1.p1()
begin
alter database character set koi8r;
Expand Down
2 changes: 1 addition & 1 deletion tests/parser/data/stats.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Total: 66241 | Failures: 2147 / 3% | Exceptions: 0 / 0%
Total: 66241 | Failures: 2129 / 3% | Exceptions: 0 / 0%

0 comments on commit f658751

Please sign in to comment.