diff --git a/.gitignore b/.gitignore index 2e0ed6d..1b2a7b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ /.vscode /target + +# rendered code fragments +/*.pdf diff --git a/Cargo.lock b/Cargo.lock index d3f609d..4a3b158 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,15 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] [[package]] name = "anstream" @@ -195,6 +204,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustix" version = "0.38.34" @@ -283,6 +321,7 @@ name = "technique" version = "0.3.0" dependencies = [ "clap", + "regex", "serde", "tinytemplate", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 75da6fc..73e587e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,10 @@ license = "MIT" [dependencies] clap = { version = "4.5.16", features = [ "wrap_help" ] } +regex = "1.11.1" serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" tracing-subscriber = "0.3.18" + +[build-dependencies] diff --git a/src/language/mod.rs b/src/language/mod.rs new file mode 100644 index 0000000..5f445f7 --- /dev/null +++ b/src/language/mod.rs @@ -0,0 +1,6 @@ +// Types representing the Technique procedures language + +mod types; + +// Re-export all public symbols +pub use types::*; diff --git a/src/language/types.rs b/src/language/types.rs new file mode 100644 index 0000000..528ec60 --- /dev/null +++ b/src/language/types.rs @@ -0,0 +1,350 @@ +// Abstract Syntax Trees for the Technique language + +use regex::Regex; + +#[derive(Eq, Debug, PartialEq)] +pub struct Technique<'i> { + pub header: Option>, + pub body: Option>>, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Metadata<'i> { + pub version: u8, + pub license: Option<&'i str>, + pub copyright: Option<&'i str>, + pub template: Option<&'i str>, +} + +impl Default for Metadata<'_> { + fn default() -> Self { + Metadata { + version: 1, + license: None, + copyright: None, + template: None, + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ValidationError { + ZeroLengthToken, + InvalidLicense, + InvalidCopyright, + InvalidTemplate, + InvalidIdentifier, + InvalidForma, + InvalidGenus, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Procedure<'i> { + pub name: Identifier<'i>, + pub signature: Option>, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Identifier<'i>(pub &'i str); + +#[derive(Eq, Debug, PartialEq)] +pub struct Forma<'i>(pub &'i str); + +#[derive(Eq, Debug, PartialEq)] +pub enum Genus<'i> { + Unit, + Single(Forma<'i>), + Tuple(Vec>), + List(Forma<'i>), +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Signature<'i> { + pub domain: Genus<'i>, + pub range: Genus<'i>, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Attribute<'i>(pub &'i str); + +pub fn validate_identifier(input: &str) -> Result { + if input.len() == 0 { + return Err(ValidationError::ZeroLengthToken); + } + + let re = Regex::new(r"^[a-z][a-z0-9_]*$").unwrap(); + if re.is_match(input) { + Ok(Identifier(input)) + } else { + Err(ValidationError::InvalidIdentifier) + } +} + +pub fn validate_forma(input: &str) -> Result { + if input.len() == 0 { + return Err(ValidationError::ZeroLengthToken); + } + + let mut cs = input.chars(); + + if !cs + .next() + .unwrap() + .is_ascii_uppercase() + { + return Err(ValidationError::InvalidForma); + } + + for c in cs { + if !(c.is_ascii_uppercase() || c.is_ascii_lowercase() || c.is_ascii_digit()) { + return Err(ValidationError::InvalidForma); + } + } + + Ok(Forma(input)) +} + +/// This one copes with (and discards) any internal whitespace encountered. +pub fn validate_genus(input: &str) -> Result { + let first = input + .chars() + .next() + .unwrap(); + + match first { + '[' => { + // consume up to closing bracket + let re = Regex::new(r"\[\s*(.+)\s*\]").unwrap(); + + let cap = match re.captures(input) { + Some(c) => c, + None => return Err(ValidationError::ZeroLengthToken), + }; + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ValidationError::InvalidGenus)?; + + let forma = validate_forma(one)?; + + Ok(Genus::List(forma)) + } + '(' => { + // first trim off the parenthesis and whitespace + let re = Regex::new(r"\(\s*(.*)\s*\)").unwrap(); + + let cap = match re.captures(input) { + Some(c) => c, + None => return Err(ValidationError::ZeroLengthToken), + }; + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ValidationError::InvalidGenus)?; + + if one.len() == 0 { + return Ok(Genus::Unit); + } + + // now split on , characters, and gather + + let mut formas: Vec = Vec::new(); + + for text in one.split(",") { + let text = text.trim(); + let forma = validate_forma(text)?; + formas.push(forma); + } + + Ok(Genus::Tuple(formas)) + } + _ => { + let re = Regex::new(r"(.+)\s*").unwrap(); + + let cap = match re.captures(input) { + Some(c) => c, + None => return Err(ValidationError::ZeroLengthToken), + }; + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ValidationError::InvalidGenus)?; + + let forma = validate_forma(one)?; + + Ok(Genus::Single(forma)) + } + } +} + +// the validate functions all need to have start and end anchors, which seems +// like it should be abstracted away. + +pub fn validate_license(input: &str) -> Result<&str, ValidationError> { + let re = Regex::new(r"^[A-Za-z0-9.,\-_ \(\)\[\]]+$").unwrap(); + + if re.is_match(input) { + Ok(input) + } else { + Err(ValidationError::InvalidLicense) + } +} + +pub fn validate_copyright(input: &str) -> Result<&str, ValidationError> { + let re = Regex::new(r"^[A-Za-z0-9.,\-_ \(\)\[\]]+$").unwrap(); + + if re.is_match(input) { + Ok(input) + } else { + Err(ValidationError::InvalidCopyright) + } +} + +pub fn validate_template(input: &str) -> Result<&str, ValidationError> { + let re = Regex::new(r"^[A-Za-z0-9.,\-]+$").unwrap(); + + if re.is_match(input) { + Ok(input) + } else { + Err(ValidationError::InvalidTemplate) + } +} + +#[cfg(test)] +mod check { + use super::*; + + #[test] + fn identifier_rules() { + assert_eq!(validate_identifier("a"), Ok(Identifier("a"))); + assert_eq!(validate_identifier("ab"), Ok(Identifier("ab"))); + assert_eq!(validate_identifier("johnny5"), Ok(Identifier("johnny5"))); + assert_eq!( + validate_identifier("Pizza"), + Err(ValidationError::InvalidIdentifier) + ); + assert_eq!( + validate_identifier("pizZa"), + Err(ValidationError::InvalidIdentifier) + ); + assert!(validate_identifier("0trust").is_err()); + assert_eq!( + validate_identifier("make_dinner"), + Ok(Identifier("make_dinner")) + ); + assert!(validate_identifier("MakeDinner").is_err()); + assert!(validate_identifier("make-dinner").is_err()); + } + + #[test] + fn forma_rules() { + assert_eq!(validate_forma("A"), Ok(Forma("A"))); + assert_eq!(validate_forma("Beans"), Ok(Forma("Beans"))); + assert_eq!(validate_forma("lower"), Err(ValidationError::InvalidForma)); + assert_eq!( + validate_forma("0Degrees"), + Err(ValidationError::InvalidForma) + ); + } + + #[test] + fn genus_rules_single() { + assert_eq!(validate_genus("A"), Ok(Genus::Single(Forma("A")))); + } + + #[test] + fn genus_rules_list() { + assert_eq!(validate_genus("[A]"), Ok(Genus::List(Forma("A")))); + } + + #[test] + fn genus_rules_tuple() { + assert_eq!( + validate_genus("(A, B)"), + Ok(Genus::Tuple(vec![Forma("A"), Forma("B")])) + ); + + assert_eq!( + validate_genus("(Coffee, Tea)"), + Ok(Genus::Tuple(vec![Forma("Coffee"), Forma("Tea")])) + ); + + // not actually sure whether we should be normalizing this? Probably + // not, because formatting and linting is a separate concern. + + assert_eq!(validate_genus("(A)"), Ok(Genus::Tuple(vec![Forma("A")]))); + } + + #[test] + fn genus_rules_unit() { + assert_eq!(validate_genus("()"), Ok(Genus::Unit)); + } + + #[test] + fn license_rules() { + assert_eq!(validate_license("MIT"), Ok("MIT")); + assert_eq!(validate_license("Public Domain"), Ok("Public Domain")); + assert_eq!(validate_license("CC BY-SA 3.0 IGO"), Ok("CC BY-SA 3.0 IGO")); + } + + #[test] + fn copyright_rules() { + assert_eq!(validate_copyright("ACME"), Ok("ACME")); + assert_eq!(validate_copyright("lower"), Ok("lower")); + assert_eq!(validate_copyright("ACME, Inc"), Ok("ACME, Inc")); + assert_eq!(validate_copyright("2024 ACME, Inc."), Ok("2024 ACME, Inc.")); + } + + #[test] + fn template_rules() { + assert_eq!(validate_template("checklist"), Ok("checklist")); + assert_eq!(validate_template("checklist,v1"), Ok("checklist,v1")); + assert_eq!(validate_template("checklist-v1.0"), Ok("checklist-v1.0")); + } + + fn maker<'i>() -> Metadata<'i> { + let t1 = Metadata { + version: 1, + license: None, + copyright: None, + template: None, + }; + + t1 + } + + #[test] + fn ast_construction() { + let t1 = Metadata { + version: 1, + license: None, + copyright: None, + template: None, + }; + + assert_eq!(Metadata::default(), t1); + + let t2 = Metadata { + version: 1, + license: Some("MIT"), + copyright: Some("ACME, Inc"), + template: Some("checklist"), + }; + + let t3 = maker(); + + assert_eq!(t3, t1); + + let t4 = Metadata { + license: Some("MIT"), + copyright: Some("ACME, Inc"), + template: Some("checklist"), + ..t3 + }; + + assert_eq!(t4, t2); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..39dba47 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod language; diff --git a/src/main.rs b/src/main.rs index 8a81de0..661e0fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use std::path::Path; use tracing::debug; use tracing_subscriber; +mod parsing; mod rendering; fn main() { @@ -89,10 +90,11 @@ fn main() { let filename = submatches .get_one::("filename") - .unwrap(); // argument are required by definitin so always present + .unwrap(); // argument are required by definition so always present debug!(filename); + parsing::load(&Path::new(filename)); todo!(); } Some(("format", submatches)) => { diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..2ef0e68 --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,12 @@ +// parser for the Technique language +use std::path::Path; + +mod parser; +mod scope; + +pub fn load(source: &Path) { + // read source to a str + let content = std::fs::read_to_string(source).expect("Failed to read the source file"); + + parser::parse_via_scopes(content.as_str()); +} diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs new file mode 100644 index 0000000..16eafb9 --- /dev/null +++ b/src/parsing/parser.rs @@ -0,0 +1,903 @@ +#![allow(dead_code)] + +use std::any::type_name; + +use regex::Regex; +use technique::language::*; + +use super::scope::*; + +pub fn parse_via_scopes(content: &str) { + let mut input = Parser::new(); + input.initialize(content); + + let result = input.parse_technique_header(); + println!("{:?}", result); + println!("{:?}", input); + + std::process::exit(0); +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ParsingError { + IllegalParserState, + Unimplemented, + ZeroLengthToken, + Unrecognized, // improve this + Expected(&'static str), + InvalidHeader, + ValidationFailure(ValidationError), + InvalidCharacter(char), + UnexpectedEndOfInput, + InvalidIdentifier, + InvalidForma, + InvalidGenus, + InvalidSignature, + InvalidDeclaration, +} + +impl From for ParsingError { + fn from(error: ValidationError) -> Self { + ParsingError::ValidationFailure(error) + } +} + +#[derive(Debug)] +struct Parser<'i> { + scope: Scope, + source: &'i str, + offset: usize, + count: usize, +} + +impl<'i> Parser<'i> { + fn new() -> Parser<'i> { + Parser { + scope: Scope::new(), + source: "", + offset: 0, + count: 0, + } + } + + fn initialize(&mut self, content: &'i str) { + self.scope = Scope::new(); + self.source = content; + self.count = 0; + self.offset = 0; + } + + fn using_string(&mut self, f: F) -> Result + where + F: Fn(&'i str) -> Result, + { + let l = self + .source + .len(); + + let result = f(self.source)?; + + // advance the parser position + self.source = ""; + self.offset += l; + + // and return + Ok(result) + } + + fn using_regex(&mut self, re: regex::Regex, mut f: F) -> Result + where + F: FnMut(&mut Parser<'i>, regex::Captures<'i>) -> Result, + { + let cap = match re.captures(self.source) { + Some(c) => c, + None => return Err(ParsingError::Expected(type_name::())), + }; + + let zero = cap + .get(0) + .unwrap(); + + let l = zero.end(); + + let mut parser = Parser { + scope: self + .scope + .clone(), + source: zero.as_str(), + count: self.count, + offset: self.offset + zero.start(), + }; + + // this is effectively self.f(cap) + let result = f(&mut parser, cap)?; + + // advance the parser position + self.source = &self.source[l..]; + self.offset += l; + + // and return + Ok(result) + } + + fn try_using_regex( + &mut self, + re: regex::Regex, + mut f: F, + ) -> Result, ParsingError> + where + F: FnMut(&mut Parser<'i>, regex::Captures<'i>) -> Result, + { + let cap = match re.captures(self.source) { + Some(c) => c, + None => return Ok(None), + }; + + let zero = cap + .get(0) + .unwrap(); + + let l = zero.end(); + + let mut parser = Parser { + scope: self + .scope + .clone(), + source: zero.as_str(), + count: self.count, + offset: self.offset + zero.start(), + }; + + // this is effectively self.f(cap) + let result = f(&mut parser, cap)?; + + // advance the parser position + self.source = &self.source[l..]; + self.offset += l; + + // and return + Ok(Some(result)) + } + + /// Given a regex Match, fork a copy of the parser state and run a nested + /// parser on that derivative. Does NOT advance the parent's parser state; + /// the caller needs to do that via one of the using_*() methods. + + fn subparser_match( + &mut self, + needle: regex::Match<'i>, + mut f: F, + ) -> Result + where + F: FnMut(&mut Parser<'i>) -> Result, + { + let mut parser = Parser { + scope: self + .scope + .clone(), + source: needle.as_str(), + count: self.count, + offset: self.offset + needle.start(), + }; + + // this is effectively self.f() + let result = f(&mut parser)?; + + // and return + Ok(result) + } + + fn parse_from_start(&mut self) -> Result<(), ParsingError> { + let layer = self + .scope + .current(); + + match layer { + Layer::Technique => (), // this is where we should be + _ => return Err(ParsingError::IllegalParserState), + } + + let _header = self.parse_technique_header()?; + Ok(()) // FIXME + } + + fn parse_newline(&mut self) -> Result<(), ParsingError> { + for (i, c) in self + .source + .char_indices() + { + let l = i + 1; + + if c == '\n' { + self.source = &self.source[l..]; + self.count += 1; + self.offset += l; + return Ok(()); + } else if c.is_ascii_whitespace() { + continue; + } else { + return Err(ParsingError::InvalidCharacter(c)); + } + } + + // We don't actually require a newline to end the file. + + self.source = ""; + self.offset += self + .source + .len(); + Ok(()) + // Err(ParsingError::UnexpectedEndOfInput) + } + + // hard wire the version for now. If we ever grow to supporting multiple + // major versions then this will become a lot more complicated. + fn parse_magic_line(&mut self) -> Result { + let re = Regex::new(r"%\s*technique\s+v1").unwrap(); + + let m = re + .find(self.source) + .ok_or(ParsingError::Unrecognized)?; + + let l = m.end(); + + self.source = &self.source[l..]; + self.offset += l; + + Ok(1) + } + + // This one is awkward because if a SPDX line is present, then it really needs + // to have a license, whereas the copyright part is optional. + fn parse_spdx_line(&mut self) -> Result<(Option<&'i str>, Option<&'i str>), ParsingError> { + // First establish we have a valid line. + + if self + .source + .len() + == 0 + { + return Ok((None, None)); + } + + let x = self + .source + .chars() + .next() + .unwrap(); + + if x != '!' { + return Err(ParsingError::InvalidHeader); + } + + let mut lines = self + .source + .lines(); + let line = lines + .next() + .unwrap(); + + let re = Regex::new(r"!\s*([^;]+)(?:;\s*(?:\(c\)|\(C\)|©)\s*(.+))?").unwrap(); + + let cap = re + .captures(line) + .ok_or(ParsingError::Unrecognized)?; + + // Get the length of the match as a whole so we can advance the parser + // state later. + + let l = cap + .get(0) + .ok_or(ParsingError::Unrecognized)? + .end(); + + // Now to extracting the values we need. We get the license code from + // the first capture. It must be present otherwise we don't have a + // valid SPDX line (and we declared that we're on an SPDX line by the + // presence of the '!' character at the beginning of the line). + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ParsingError::InvalidHeader)?; + + let one = validate_license(one)?; + let one = Some(one); + + // Now dig out the copyright, if present: + + let two = cap + .get(2) + .map(|v| v.as_str()); + + let two = match two { + Some(text) => Some(validate_copyright(text)?), + None => None, + }; + + // Advance the parser state, and return. + + self.source = &self.source[l..]; + self.offset += l; + + Ok((one, two)) + } + + fn parse_template_line(&mut self) -> Result, ParsingError> { + let re = Regex::new(r"&\s*(.+)").unwrap(); + + self.try_using_regex(re, |outer, cap| { + let one = cap + .get(1) + .ok_or(ParsingError::Expected("a template"))?; + + outer.subparser_match(one, |inner| { + inner.using_string(|text| { + let result = validate_template(text)?; + Ok(result) + }) + }) + }) + } + + + fn parse_technique_header(&mut self) -> Result, ParsingError> { + let version = self.parse_magic_line()?; + self.parse_newline()?; + + let (license, copyright) = self.parse_spdx_line()?; + self.parse_newline()?; + + let template = self.parse_template_line()?; + self.parse_newline()?; + + Ok(Metadata { + version, + license, + copyright, + template, + }) + } + + fn parse_identifier(&mut self) -> Result, ParsingError> { + self.using_string(|text| { + let result = validate_identifier(text)?; + Ok(result) + }) + } + + fn parse_forma(&mut self) -> Result, ParsingError> { + self.using_string(|text| { + let result = validate_forma(text)?; + Ok(result) + }) + } + + fn ensure_nonempty(&mut self) -> Result<(), ParsingError> { + if self + .source + .len() + == 0 + { + return Err(ParsingError::UnexpectedEndOfInput); + } + Ok(()) + } + + fn trim_whitespace(&mut self) -> Result<(), ParsingError> { + let mut l = 0; + + for (i, c) in self + .source + .char_indices() + { + if c == '\n' { + break; + } else if c.is_ascii_whitespace() { + l = i + 1; + continue; + } else { + break; + } + } + + self.source = &self.source[l..]; + self.offset += l; + + Ok(()) + } + + fn parse_genus(&mut self) -> Result, ParsingError> { + self.trim_whitespace()?; + self.ensure_nonempty()?; + + let first = self + .source + .chars() + .next() + .unwrap(); + + let re = match first { + '[' => { + // consume up to closing bracket + Regex::new(r"\[.+?\]").unwrap() + } + '(' => { + // consume up to closing parenthesis + Regex::new(r"\(.*?\)").unwrap() + } + _ => Regex::new(r".+").unwrap(), + }; + + self.using_regex(re, |outer, _| { + println!("{:?}", outer.source); + outer.using_string(|text| { + let result = validate_genus(text)?; + Ok(result) + }) + }) + } + + // idea: put the current Capture in the parser state? + + fn parse_signature(&mut self) -> Result, ParsingError> { + let re = Regex::new(r"\s*(.+?)\s*->\s*(.+?)\s*$").unwrap(); + + let (domain, range) = self.using_regex(re, |outer, cap| { + let one = cap + .get(1) + .ok_or(ParsingError::Expected("a Genus for the domain"))?; + + let two = cap + .get(2) + .ok_or(ParsingError::Expected("a Genus for the range"))?; + + let domain = outer.subparser_match(one, |inner| inner.parse_genus())?; + let range = outer.subparser_match(two, |inner| inner.parse_genus())?; + + Ok((domain, range)) + })?; + + Ok(Signature { domain, range }) + } + + /// declarations are of the form + /// + /// identifier : signature + /// + /// where the optional signature is + /// + /// genus -> genus + /// + fn parse_procedure_declaration( + &mut self, + ) -> Result<(Identifier<'i>, Option>), ParsingError> { + // These capture groups use .+? to make "match more than one, but + // lazily" so that the subsequent grabs of whitespace and the all + // important ':' character are not absorbed. + let re = Regex::new(r"^\s*(.+?)\s*:\s*(.+?)?\s*$").unwrap(); + + self.using_regex(re, |outer, cap| { + let name = match cap.get(1) { + Some(one) => outer.subparser_match(one, |inner| { + let result = inner.parse_identifier()?; + Ok(result) + }), + None => Err(ParsingError::Expected("an Identifier")), + }?; + + let signature = match cap.get(2) { + Some(two) => outer.subparser_match(two, |inner| { + // println!("{:?}", two); + let result = inner.parse_signature()?; + Ok(Some(result)) + }), + None => Ok(None), + }?; + + Ok((name, signature)) + }) + } + + fn parse_procedure(&mut self) -> Result, ParsingError> { + let (name, signature) = self.parse_procedure_declaration()?; + + // let body = self.parse_body()?; + self.parse_newline()?; + + Ok(Procedure { name, signature }) + } +} + +#[cfg(test)] +mod check { + use super::*; + + #[test] + fn magic_line() { + let mut input = Parser::new(); + + input.initialize("% technique v1"); + assert_eq!(input.parse_magic_line(), Ok(1)); + + input.initialize("%technique v1"); + assert_eq!(input.parse_magic_line(), Ok(1)); + + // this is rejected because the technique keyword isn't present. + input.initialize("%techniquev1"); + assert_eq!(input.parse_magic_line(), Err(ParsingError::Unrecognized)); + } + + #[test] + fn header_spdx() { + let mut input = Parser::new(); + + input.initialize("! PD"); + assert_eq!(input.parse_spdx_line(), Ok((Some("PD"), None))); + + input.initialize("! MIT; (c) ACME, Inc."); + assert_eq!( + input.parse_spdx_line(), + Ok((Some("MIT"), Some("ACME, Inc."))) + ); + + input.initialize("! MIT; (C) 2024 ACME, Inc."); + assert_eq!( + input.parse_spdx_line(), + Ok((Some("MIT"), Some("2024 ACME, Inc."))) + ); + + input.initialize("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."); + assert_eq!( + input.parse_spdx_line(), + Ok((Some("CC BY-SA 3.0 [IGO]"), Some("2024 ACME, Inc."))) + ); + } + + #[test] + fn header_template() { + let mut input = Parser::new(); + input.initialize("& checklist"); + assert_eq!(input.parse_template_line(), Ok(Some("checklist"))); + + input.initialize("& nasa-flight-plan,v4.0"); + assert_eq!( + input.parse_template_line(), + Ok(Some("nasa-flight-plan,v4.0")) + ); + } + + // now we test incremental parsing + + #[test] + fn check_not_eof() { + let mut input = Parser::new(); + input.initialize("Hello World"); + assert_eq!(input.ensure_nonempty(), Ok(())); + + input.initialize(""); + assert_eq!( + input.ensure_nonempty(), + Err(ParsingError::UnexpectedEndOfInput) + ); + } + + #[test] + fn consume_whitespace() { + let mut input = Parser::new(); + input.initialize(" hello"); + assert_eq!(input.trim_whitespace(), Ok(())); + assert_eq!(input.source, "hello"); + } + + #[test] + fn forma_rules() { + let mut input = Parser::new(); + input.initialize("A"); + assert_eq!(input.parse_forma(), Ok(Forma("A"))); + + input.initialize("Apple"); + assert_eq!(input.parse_forma(), Ok(Forma("Apple"))); + } + + #[test] + fn single_genus_definitions() { + let mut input = Parser::new(); + input.initialize("A"); + assert_eq!(input.parse_genus(), Ok(Genus::Single(Forma("A")))); + assert_eq!(input.source, ""); + + input.initialize("Apple"); + assert_eq!(input.parse_genus(), Ok(Genus::Single(Forma("Apple")))); + assert_eq!(input.source, ""); + } + + #[test] + fn list_genus_definitions() { + let mut input = Parser::new(); + input.initialize("[A]"); + assert_eq!(input.parse_genus(), Ok(Genus::List(Forma("A")))); + assert_eq!(input.source, ""); + } + + #[test] + fn tuple_genus_definitions() { + let mut input = Parser::new(); + + input.initialize("(A, B)"); + assert_eq!( + input.parse_genus(), + Ok(Genus::Tuple(vec![Forma("A"), Forma("B")])) + ); + assert_eq!(input.source, ""); + + // not actually sure whether we should be normalizing this? Probably + // not, because formatting and linting is a separate concern. + + input.initialize("(A)"); + assert_eq!(input.parse_genus(), Ok(Genus::Tuple(vec![Forma("A")]))); + assert_eq!(input.source, ""); + } + + #[test] + fn unit_genus_definitions() { + let mut input = Parser::new(); + + // and now the special case of the unit type + + input.initialize("()"); + assert_eq!(input.parse_genus(), Ok(Genus::Unit)); + assert_eq!(input.source, "") + } + + #[test] + fn signatures() { + let mut input = Parser::new(); + + input.initialize("A -> B"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::Single(Forma("A")), + range: Genus::Single(Forma("B")) + }) + ); + + input.initialize("Beans -> Coffee"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::Single(Forma("Beans")), + range: Genus::Single(Forma("Coffee")) + }) + ); + + input.initialize("[Bits] -> Bob"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::List(Forma("Bits")), + range: Genus::Single(Forma("Bob")) + }) + ); + + input.initialize("Complex -> (Real, Imaginary)"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::Single(Forma("Complex")), + range: Genus::Tuple(vec![Forma("Real"), Forma("Imaginary")]) + }) + ); + } + + #[test] + fn declarations_simple() { + let mut input = Parser::new(); + + input.initialize("making_coffee :"); + assert_eq!( + input.parse_procedure_declaration(), + Ok((Identifier("making_coffee"), None)) + ); + } + + #[test] + fn declarations_full() { + let mut input = Parser::new(); + + input.initialize("f : A -> B"); + assert_eq!( + input.parse_procedure_declaration(), + Ok(( + Identifier("f"), + Some(Signature { + domain: Genus::Single(Forma("A")), + range: Genus::Single(Forma("B")) + }) + )) + ); + + input.initialize("making_coffee : Beans -> Coffee"); + assert_eq!( + input.parse_procedure_declaration(), + Ok(( + Identifier("making_coffee"), + Some(Signature { + domain: Genus::Single(Forma("Beans")), + range: Genus::Single(Forma("Coffee")) + }) + )) + ); + + input.initialize("making_coffee : (Beans, Milk) -> Coffee"); + assert_eq!( + input.parse_procedure_declaration(), + Ok(( + Identifier("making_coffee"), + Some(Signature { + domain: Genus::Tuple(vec![Forma("Beans"), Forma("Milk")]), + range: Genus::Single(Forma("Coffee")) + }) + )) + ); + } +} + +#[cfg(test)] +mod verify { + use super::*; + + #[test] + fn technique_header() { + let mut input = Parser::new(); + input.initialize("% technique v1"); + + assert_eq!( + input.parse_technique_header(), + Ok(Metadata { + version: 1, + license: None, + copyright: None, + template: None + }) + ); + + input.initialize( + r#" +% technique v1 +! MIT; (c) ACME, Inc +& checklist + "#, + ); + assert_eq!( + input.parse_technique_header(), + Ok(Metadata { + version: 1, + license: Some("MIT"), + copyright: Some("ACME, Inc"), + template: Some("checklist") + }) + ); + } +} + +/* + #[test] + fn check_procedure_signature() { + let p = grammar::signatureParser::new(); + + assert_eq!( + p.parse(""), + Ok(Signature { + domain: Genus::Single(Forma { + name: "A".to_owned() + }), + range: Genus::Single(Forma { + name: "B".to_owned() + }) + }) + ); + assert!(p + .parse("A ->") + .is_err()); + assert!(p + .parse("A") + .is_err()); + } + + #[test] + fn check_procedure_declaration() { + let d = grammar::declarationParser::new(); + + assert_eq!(d.parse("making_coffee :"), Ok("making_coffee".to_owned())); + + let p = grammar::declaration_lineParser::new(); + + assert_eq!( + p.parse("f :"), + Ok(Procedure { + name: "f".to_owned(), + signature: None + }) + ); + + assert!(p + .parse("cook-pizza :B") + .is_err()); + + assert_eq!( + p.parse("f : A -> B"), + Ok(Procedure { + name: "f".to_owned(), + signature: Some(Signature { + domain: Genus::Single(Forma { + name: "A".to_owned() + }), + range: Genus::Single(Forma { + name: "B".to_owned() + }) + }) + }) + ); + } + + #[test] + fn check_attribute_role() { + let a = grammar::attributeParser::new(); + + assert_eq!( + a.parse("@chef"), + Ok(Attribute { + name: "chef".to_owned() + }) + ); + + let p = grammar::attribute_lineParser::new(); + + assert_eq!( + p.parse("@chef"), + Ok(vec![Attribute { + name: "chef".to_owned() + }]) + ); + assert_eq!( + p.parse("@chef + @sous"), + Ok(vec![ + Attribute { + name: "chef".to_owned() + }, + Attribute { + name: "sous".to_owned() + } + ]) + ); + } + + // the verify_*() functions are where we do verificaton of larger composite + // structures built up from the smaller pieces check_*()'d above. + + /* + #[test] + fn check_procedure_declaration_explicit() { + let input = "making_coffee : Beans, Milk -> Coffee"; + + // let declaration = TechniqueParser::parse(Rule::declaration, &input) + // .expect("Unsuccessful Parse") + // .next() + // .unwrap(); + + assert_eq!( + input, // FIXME + "making_coffee : Beans, Milk -> Coffee" + ); + + // assert_eq!(identifier.as_str(), "making_coffee"); + // assert_eq!(identifier.as_rule(), Rule::identifier); + + // assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); + // assert_eq!(signature.as_rule(), Rule::signature); + + } + */ +*/ diff --git a/src/parsing/scope.rs b/src/parsing/scope.rs new file mode 100644 index 0000000..94f706c --- /dev/null +++ b/src/parsing/scope.rs @@ -0,0 +1,151 @@ +#![allow(unused_variables)] +#![allow(dead_code)] + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Layer { + Technique, // within a technique file, by definition the base state + Metadata, // header lines + Procedure, // procedure function block + Declaration, // procedure function signature + Description, // procedure description, as free form text + StepItem, // (sub)step within a procedure body + CodeBlock, // escape to a code mode + Embedded, // multi-line string of another language. +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct Scope { + stack: Vec, +} + +impl Scope { + pub(crate) fn new() -> Scope { + Scope { stack: vec![] } + } + + pub(crate) fn current(&self) -> Layer { + match self + .stack + .last() + { + Some(layer) => *layer, + None => Layer::Technique, + } + } + + pub(crate) fn push(&mut self, layer: Layer) { + self.stack + .push(layer); + } + + pub(crate) fn pop(&mut self) -> Layer { + match self + .stack + .pop() + { + Some(layer) => layer, + None => Layer::Technique, + } + } + + pub(crate) fn reset(&mut self) { + self.stack + .clear(); + } + + /// Iterate over the members of the stack and put them into a sorted list + /// suitable to be used as the tags for syntax highlighting. + pub(crate) fn tags(self) -> Vec { + let mut copy = self + .stack + .clone(); + + copy.push(Layer::Technique); + copy.sort(); + copy.dedup(); + + copy + } +} + +#[cfg(test)] +mod check { + use super::*; + + #[test] + fn stack_operations() { + let mut stack = Scope::new(); + + let current = stack.current(); + assert_eq!(current, Layer::Technique); + + stack.push(Layer::Metadata); + + let current = stack.current(); + assert_eq!(current, Layer::Metadata); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Metadata); + + stack.push(Layer::Procedure); + stack.push(Layer::Declaration); + let current = stack.current(); + assert_eq!(current, Layer::Declaration); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Declaration); + + let current = stack.current(); + assert_eq!(current, Layer::Procedure); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Procedure); + + // and if we pop again, we're still in Technique + let popped = stack.pop(); + assert_eq!(popped, Layer::Technique); + + stack.push(Layer::Description); + stack.push(Layer::CodeBlock); + + // TODO get layers as tags in sorted order + + // now we try reset() + + stack.reset(); + let current = stack.current(); + assert_eq!(current, Layer::Technique); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Technique); + } + + #[test] + fn extract_tags_sorted() { + let mut stack = Scope::new(); + + stack.push(Layer::Procedure); + stack.push(Layer::StepItem); + stack.push(Layer::Description); + stack.push(Layer::StepItem); + stack.push(Layer::CodeBlock); + stack.push(Layer::Embedded); + stack.push(Layer::CodeBlock); + + let result = stack.tags(); + assert_eq!( + result, + vec![ + Layer::Technique, + Layer::Procedure, + Layer::Description, + Layer::StepItem, + Layer::CodeBlock, + Layer::Embedded + ] + ) + + // which raises an interesting question about what happens when we + // nest. This will probably need changing. + } +}