Skip to content

Commit

Permalink
Add basic support for nested subpatterns. (#237)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jezza committed May 13, 2022
1 parent 925c49e commit 18230cb
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 33 deletions.
49 changes: 16 additions & 33 deletions logos-derive/src/parser/subpattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::parser::definition::{bytes_to_regex_string, Literal};

#[derive(Default)]
pub struct Subpatterns {
map: Vec<(Ident, Literal)>,
map: Vec<(Ident, String)>,
}

impl Subpatterns {
Expand All @@ -27,28 +27,19 @@ impl Subpatterns {
return;
}

// Validate the literal as proper regex. If it's not, error and manufacture a substitute.
let lit = match &lit {
Literal::Utf8(s) => match Mir::utf8(&s.value()) {
Ok(_) => lit,
Err(err) => {
errors.err(err, lit.span());
Literal::Utf8(LitStr::new(&param.to_string(), lit.span()))
}
},
Literal::Bytes(b) => {
let source = bytes_to_regex_string(b.value());
match Mir::binary(&source) {
Ok(_) => lit,
Err(err) => {
errors.err(err, lit.span());
Literal::Bytes(LitByteStr::new(param.to_string().as_bytes(), lit.span()))
}
}
}
let fixed = self.fix(&lit, errors);

// Validate the literal as proper regex. If it's not, emit an error.
let mir = match &lit {
Literal::Utf8(_) => Mir::utf8(&fixed),
Literal::Bytes(_) => Mir::binary(&fixed),
};

self.map.push((param, lit));
if let Err(err) = mir {
errors.err(err, lit.span());
};

self.map.push((param, fixed));
}

pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String {
Expand Down Expand Up @@ -86,18 +77,10 @@ impl Subpatterns {
};

match self.map.iter().find(|(def, _)| *def == name) {
Some((_, val)) => match val {
Literal::Utf8(val) => {
let subpattern = val.value();
pattern.replace_range(i..subref_end, &subpattern);
i += subpattern.len() + 1;
}
Literal::Bytes(val) => {
let subpattern = bytes_to_regex_string(val.value());
pattern.replace_range(i..subref_end, &subpattern);
i += subpattern.len() + 1;
}
},
Some((_, subpattern)) => {
pattern.replace_range(i..subref_end, &subpattern);
i += subpattern.len() + 1;
}
None => {
errors.err(
format!("subpattern reference `{}` has not been defined", name),
Expand Down
18 changes: 18 additions & 0 deletions tests/tests/advanced.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use logos_derive::Logos;

#[derive(Logos, Debug, Clone, Copy, PartialEq)]
#[logos(subpattern xdigit = r"[0-9a-fA-F]")]
#[logos(subpattern a = r"A")]
#[logos(subpattern b = r"(?&a)BB(?&a)")]
enum Token {
#[regex(r"[ \t\n\f]+", logos::skip)]
#[error]
Expand All @@ -13,6 +15,9 @@ enum Token {
#[regex("0[xX](?&xdigit)+")]
LiteralHex,

#[regex("~?(?&b)~?")]
Abba,

#[regex("-?[0-9]+")]
LiteralInteger,

Expand Down Expand Up @@ -228,4 +233,17 @@ mod advanced {
],
);
}

#[test]
fn subpatterns() {
assert_lex(
"ABBA~ ~ABBA ~ABBA~ ABBA",
&[
(Token::Abba, "ABBA~", 0..5),
(Token::Abba, "~ABBA", 6..11),
(Token::Abba, "~ABBA~", 12..18),
(Token::Abba, "ABBA", 19..23),
],
);
}
}

0 comments on commit 18230cb

Please sign in to comment.