Skip to content

Commit

Permalink
perf(parser): use memchr for lexing comments (#8193)
Browse files Browse the repository at this point in the history
  • Loading branch information
sno2 authored Oct 27, 2023
1 parent c36efe2 commit e2b5c6a
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 1 deletion.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/ruff_python_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ bitflags = { workspace = true }
is-macro = { workspace = true }
itertools = { workspace = true }
lalrpop-util = { version = "0.20.0", default-features = false }
memchr = { workspace = true }
unicode-ident = { workspace = true }
unicode_names2 = { workspace = true }
rustc-hash = { workspace = true }
Expand Down
4 changes: 3 additions & 1 deletion crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,9 @@ impl<'source> Lexer<'source> {
#[cfg(debug_assertions)]
debug_assert_eq!(self.cursor.previous(), '#');

self.cursor.eat_while(|c| !matches!(c, '\n' | '\r'));
let bytes = self.cursor.rest().as_bytes();
let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len());
self.cursor.skip_bytes(offset);

Tok::Comment(self.token_text().to_string())
}
Expand Down
17 changes: 17 additions & 0 deletions crates/ruff_python_parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,21 @@ impl<'a> Cursor<'a> {
self.bump();
}
}

/// Skips the next `count` bytes.
///
/// ## Panics
/// - If `count` is larger than the remaining bytes in the input stream.
/// - If `count` indexes into a multi-byte character.
pub(super) fn skip_bytes(&mut self, count: usize) {
#[cfg(debug_assertions)]
{
self.prev_char = self.chars.as_str()[..count]
.chars()
.next_back()
.unwrap_or('\0');
}

self.chars = self.chars.as_str()[count..].chars();
}
}

0 comments on commit e2b5c6a

Please sign in to comment.