From 8b49c0cbd18191169d298f3141e9165a4a414b46 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Thu, 4 Nov 2021 21:35:57 -0700 Subject: [PATCH] Ignore leading byte order mark in source files (#1021) --- src/lexer.rs | 2 ++ src/parser.rs | 9 ++++++- src/token_kind.rs | 4 +++- tests/byte_order_mark.rs | 52 ++++++++++++++++++++++++++++++++++++++++ tests/lib.rs | 1 + 5 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 tests/byte_order_mark.rs diff --git a/src/lexer.rs b/src/lexer.rs index 232ab949e6..42267cf9cc 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -490,6 +490,7 @@ impl<'src> Lexer<'src> { '@' => self.lex_single(At), '[' => self.lex_delimiter(BracketL), '\n' | '\r' => self.lex_eol(), + '\u{feff}' => self.lex_single(ByteOrderMark), ']' => self.lex_delimiter(BracketR), '`' | '"' | '\'' => self.lex_string(), '{' => self.lex_delimiter(BraceL), @@ -926,6 +927,7 @@ mod tests { BraceR => "}", BracketL => "[", BracketR => "]", + ByteOrderMark => "\u{feff}", Colon => ":", ColonEquals => ":=", Comma => ",", diff --git a/src/parser.rs b/src/parser.rs index 32bf5c3172..1dc064a5fc 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -57,7 +57,12 @@ impl<'tokens, 'src> Parser<'tokens, 'src> { /// `Parser::next` fn unexpected_token(&self) -> CompileResult<'src, CompileError<'src>> { self.error(CompileErrorKind::UnexpectedToken { - expected: self.expected.iter().cloned().collect::>(), + expected: self + .expected + .iter() + .cloned() + .filter(|kind| *kind != ByteOrderMark) + .collect::>(), found: self.next()?.kind, }) } @@ -302,6 +307,8 @@ impl<'tokens, 'src> Parser<'tokens, 'src> { let mut eol_since_last_comment = false; + self.accept(ByteOrderMark)?; + loop { let next = self.next()?; diff --git a/src/token_kind.rs b/src/token_kind.rs index bd75273601..79139bb669 100644 --- a/src/token_kind.rs +++ b/src/token_kind.rs @@ -11,6 +11,7 @@ pub(crate) enum TokenKind { BraceR, BracketL, BracketR, + ByteOrderMark, Colon, ColonEquals, Comma, @@ -51,6 +52,7 @@ impl Display for TokenKind { BraceR => "'}'", BracketL => "'['", BracketR => "']'", + ByteOrderMark => "byte order mark", Colon => "':'", ColonEquals => "':='", Comma => "','", @@ -61,6 +63,7 @@ impl Display for TokenKind { Eol => "end of line", Equals => "'='", EqualsEquals => "'=='", + EqualsTilde => "'=~'", Identifier => "identifier", Indent => "indent", InterpolationEnd => "'}}'", @@ -70,7 +73,6 @@ impl Display for TokenKind { Plus => "'+'", StringToken => "string", Text => "command text", - EqualsTilde => "'=~'", Unspecified => "unspecified", Whitespace => "whitespace", } diff --git a/tests/byte_order_mark.rs b/tests/byte_order_mark.rs new file mode 100644 index 0000000000..676aefa1ea --- /dev/null +++ b/tests/byte_order_mark.rs @@ -0,0 +1,52 @@ +use crate::common::*; + +#[test] +fn ignore_leading_byte_order_mark() { + Test::new() + .justfile( + " + \u{feff}foo: + echo bar + ", + ) + .stderr("echo bar\n") + .stdout("bar\n") + .run(); +} + +#[test] +fn non_leading_byte_order_mark_produces_error() { + Test::new() + .justfile( + " + foo: + echo bar + \u{feff} + ", + ) + .stderr( + " + error: Expected \'@\', comment, end of file, end of line, or identifier, but found byte order mark + | + 3 | \u{feff} + | ^ + ") + .status(EXIT_FAILURE) + .run(); +} + +#[test] +fn dont_mention_byte_order_mark_in_errors() { + Test::new() + .justfile("{") + .stderr( + " + error: Expected '@', comment, end of file, end of line, or identifier, but found '{' + | + 1 | { + | ^ + ", + ) + .status(EXIT_FAILURE) + .run(); +} diff --git a/tests/lib.rs b/tests/lib.rs index 70bfd1ee50..fe9819afce 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -3,6 +3,7 @@ mod test; mod assert_stdout; mod assert_success; +mod byte_order_mark; mod changelog; mod choose; mod command;