From 8df238c20dd73e1cfefc6f71bca62fb2cc5fadd9 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 12:33:49 +1000 Subject: [PATCH 001/112] Stub module for parsing --- src/main.rs | 4 +++- src/parsing/mod.rs | 8 ++++++++ src/parsing/parser.rs | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 src/parsing/mod.rs create mode 100644 src/parsing/parser.rs diff --git a/src/main.rs b/src/main.rs index 8a81de0..52c5cde 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use tracing::debug; use tracing_subscriber; mod rendering; +mod parsing; fn main() { const VERSION: &str = concat!("v", env!("CARGO_PKG_VERSION")); @@ -89,10 +90,11 @@ fn main() { let filename = submatches .get_one::("filename") - .unwrap(); // argument are required by definitin so always present + .unwrap(); // argument are required by definition so always present debug!(filename); + parsing::load(&Path::new(filename)); todo!(); } Some(("format", submatches)) => { diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..30cadc0 --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,8 @@ +// parser for the Technique language +use std::path::Path; + +pub mod parser; + +pub fn load(source: &Path) { + +} diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs new file mode 100644 index 0000000..5675879 --- /dev/null +++ b/src/parsing/parser.rs @@ -0,0 +1,4 @@ +// parsing machinery + +pub fn load() { +} From 48a0ae9a11fe2b22c920aa20d699c0bffd919431 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 13:20:46 +1000 Subject: [PATCH 002/112] Add pest dependency --- Cargo.lock | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + 2 files changed, 146 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index d3f609d..e5500bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,6 +57,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -97,6 +106,35 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "errno" version = "0.3.9" @@ -107,6 +145,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -171,6 +219,51 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "pest" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c73c26c01b8c87956cea613c907c9d6ecffd8d18a2a5908e5de0adfaa185cea" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "664d22978e2815783adbdd2c588b455b1bd625299ce36b2a99881ac9627e6d8d" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d5487022d5d33f4c30d91c22afa240ce2a644e87fe08caad974d4eab6badbe" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0091754bbd0ea592c4deb3a122ce8ecbb0753b738aa82bc055fcc2eccc8d8174" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -246,6 +339,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -283,6 +387,8 @@ name = "technique" version = "0.3.0" dependencies = [ "clap", + "pest", + "pest_derive", "serde", "tinytemplate", "tracing", @@ -299,6 +405,26 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -376,6 +502,18 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -394,6 +532,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 75da6fc..86ded5a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,8 @@ license = "MIT" [dependencies] clap = { version = "4.5.16", features = [ "wrap_help" ] } +pest = "2.7.11" +pest_derive = "2.7.11" serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" From 1e440ea01c771661642976a1e9530145e82b658f Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 13:21:43 +1000 Subject: [PATCH 003/112] Grammar for procedure declaration --- src/parsing/parser.rs | 34 +++++++++++++++++++++++++++++++++- technique.pest | 15 +++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 technique.pest diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 5675879..2fb9ca2 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,4 +1,36 @@ // parsing machinery -pub fn load() { +use pest::Parser; +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "../technique.pest"] +struct TechniqueParser; + +pub fn load() {} + +#[cfg(test)] +mod tests { + use super::*; // Import all parent module items + + #[test] + fn check_procedure_declaration() { + let input = "making_coffee : Beans -> Coffee"; + + let declaration = TechniqueParser::parse(Rule::declaration, &input) + .expect("Unsuccessful Parse") + .next() + .unwrap(); + + assert_eq!(declaration.as_str(), "making_coffee : Beans -> Coffee"); + assert_eq!(declaration.as_rule(), Rule::declaration); + + let identifier = declaration + .into_inner() + .next() + .unwrap(); + + assert_eq!(identifier.as_str(), "making_coffee"); + assert_eq!(identifier.as_rule(), Rule::identifier); + } } diff --git a/technique.pest b/technique.pest new file mode 100644 index 0000000..acfc4a4 --- /dev/null +++ b/technique.pest @@ -0,0 +1,15 @@ +// Parsing Expression Grammar for v1 of the Technique Procedure Language + +WHITESPACE = _{ " " | "\t" } + +declaration = { identifier ~ ":" ~ signature? } + +identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } + +signature = { type ~ "->" ~ type } + +type = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } + + + + From 5fb162a0630ddd6d4eec89cf6089ee1811534ecf Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 14:45:17 +1000 Subject: [PATCH 004/112] Type is a reserved word so call types typas --- src/parsing/parser.rs | 24 ++++++++++++++++++++++-- technique.pest | 4 ++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 2fb9ca2..31e1ecf 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -25,12 +25,32 @@ mod tests { assert_eq!(declaration.as_str(), "making_coffee : Beans -> Coffee"); assert_eq!(declaration.as_rule(), Rule::declaration); - let identifier = declaration - .into_inner() + let mut pairs = declaration.into_inner(); + + let identifier = pairs .next() .unwrap(); assert_eq!(identifier.as_str(), "making_coffee"); assert_eq!(identifier.as_rule(), Rule::identifier); + + let signature = pairs + .next() + .unwrap(); + + assert_eq!(signature.as_str(), "Beans -> Coffee"); + assert_eq!(signature.as_rule(), Rule::signature); + + let mut pairs = signature.into_inner(); + + let domain = pairs.next().unwrap(); + + assert_eq!(domain.as_str(), "Beans"); + assert_eq!(domain.as_rule(), Rule::typa); + + let range = pairs.next().unwrap(); + + assert_eq!(range.as_str(), "Coffee"); + assert_eq!(range.as_rule(), Rule::typa); } } diff --git a/technique.pest b/technique.pest index acfc4a4..6468854 100644 --- a/technique.pest +++ b/technique.pest @@ -6,9 +6,9 @@ declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } -signature = { type ~ "->" ~ type } +signature = { typa ~ "->" ~ typa } -type = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } +typa = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } From 163c33bcc83d2e2c878d6c254a490224706f48ef Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 16:31:12 +1000 Subject: [PATCH 005/112] Handle multiple types in domain --- src/parsing/parser.rs | 28 +++++++++++++++++++++------- technique.pest | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 31e1ecf..dc78b5c 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -15,14 +15,17 @@ mod tests { #[test] fn check_procedure_declaration() { - let input = "making_coffee : Beans -> Coffee"; + let input = "making_coffee : Beans, Milk -> Coffee"; let declaration = TechniqueParser::parse(Rule::declaration, &input) .expect("Unsuccessful Parse") .next() .unwrap(); - assert_eq!(declaration.as_str(), "making_coffee : Beans -> Coffee"); + assert_eq!( + declaration.as_str(), + "making_coffee : Beans, Milk -> Coffee" + ); assert_eq!(declaration.as_rule(), Rule::declaration); let mut pairs = declaration.into_inner(); @@ -38,17 +41,28 @@ mod tests { .next() .unwrap(); - assert_eq!(signature.as_str(), "Beans -> Coffee"); + assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); assert_eq!(signature.as_rule(), Rule::signature); let mut pairs = signature.into_inner(); - let domain = pairs.next().unwrap(); + let domain1 = pairs + .next() + .unwrap(); + + assert_eq!(domain1.as_str(), "Beans"); + assert_eq!(domain1.as_rule(), Rule::typa); + + let domain2 = pairs + .next() + .unwrap(); - assert_eq!(domain.as_str(), "Beans"); - assert_eq!(domain.as_rule(), Rule::typa); + assert_eq!(domain2.as_str(), "Milk"); + assert_eq!(domain2.as_rule(), Rule::typa); - let range = pairs.next().unwrap(); + let range = pairs + .next() + .unwrap(); assert_eq!(range.as_str(), "Coffee"); assert_eq!(range.as_rule(), Rule::typa); diff --git a/technique.pest b/technique.pest index 6468854..ee2df97 100644 --- a/technique.pest +++ b/technique.pest @@ -6,7 +6,7 @@ declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } -signature = { typa ~ "->" ~ typa } +signature = { typa ~ ("," ~ typa)* ~ "->" ~ typa } typa = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } From 024a6d0584b722ec0caf3c188dd53cbe4e84a2bb Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 16 Sep 2024 16:44:04 +1000 Subject: [PATCH 006/112] Add parser test using macro --- src/parsing/parser.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index dc78b5c..2e47192 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,6 +1,6 @@ // parsing machinery -use pest::Parser; +use pest::{consumes_to, parses_to, Parser}; use pest_derive::Parser; #[derive(Parser)] @@ -14,7 +14,7 @@ mod tests { use super::*; // Import all parent module items #[test] - fn check_procedure_declaration() { + fn check_procedure_declaration_explicit() { let input = "making_coffee : Beans, Milk -> Coffee"; let declaration = TechniqueParser::parse(Rule::declaration, &input) @@ -67,4 +67,23 @@ mod tests { assert_eq!(range.as_str(), "Coffee"); assert_eq!(range.as_rule(), Rule::typa); } + + #[test] + fn check_procedure_declaration_macro() { + parses_to! { + parser: TechniqueParser, + input: "making_coffee : Beans, Milk -> Coffee", + rule: Rule::declaration, + tokens: [ + declaration(0, 37, [ + identifier(0, 13), + signature(16, 37, [ + typa(16, 21), + typa(23, 27), + typa(31, 37) + ]) + ]) + ] + }; + } } From 31b2873cc2f5c7ad88ab2e787a349ee41b75e63e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:35:57 +1000 Subject: [PATCH 007/112] Rename type again, to forma --- src/parsing/parser.rs | 12 ++++++------ technique.pest | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 2e47192..3e44b85 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -51,21 +51,21 @@ mod tests { .unwrap(); assert_eq!(domain1.as_str(), "Beans"); - assert_eq!(domain1.as_rule(), Rule::typa); + assert_eq!(domain1.as_rule(), Rule::forma); let domain2 = pairs .next() .unwrap(); assert_eq!(domain2.as_str(), "Milk"); - assert_eq!(domain2.as_rule(), Rule::typa); + assert_eq!(domain2.as_rule(), Rule::forma); let range = pairs .next() .unwrap(); assert_eq!(range.as_str(), "Coffee"); - assert_eq!(range.as_rule(), Rule::typa); + assert_eq!(range.as_rule(), Rule::forma); } #[test] @@ -78,9 +78,9 @@ mod tests { declaration(0, 37, [ identifier(0, 13), signature(16, 37, [ - typa(16, 21), - typa(23, 27), - typa(31, 37) + forma(16, 21), + forma(23, 27), + forma(31, 37) ]) ]) ] diff --git a/technique.pest b/technique.pest index ee2df97..804c355 100644 --- a/technique.pest +++ b/technique.pest @@ -6,9 +6,9 @@ declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } -signature = { typa ~ ("," ~ typa)* ~ "->" ~ typa } +signature = { forma ~ ("," ~ forma )* ~ "->" ~ forma } -typa = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } +forma = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } From c2ae2e8ab2d072e2a7b323704f466b6e15c12f1f Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:36:49 +1000 Subject: [PATCH 008/112] Pass input file to parser when checking --- src/parsing/mod.rs | 3 +++ src/parsing/parser.rs | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 30cadc0..fbfb83f 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -4,5 +4,8 @@ use std::path::Path; pub mod parser; pub fn load(source: &Path) { + // read source to a str + let content = std::fs::read_to_string(source).expect("Failed to read the source file"); + parser::parse_via_pest(content.as_str()); } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 3e44b85..c205038 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -7,7 +7,10 @@ use pest_derive::Parser; #[grammar = "../technique.pest"] struct TechniqueParser; -pub fn load() {} +pub fn parse_via_pest(content: &str) { + let technique = TechniqueParser::parse(Rule::technique, &content); + println!("{:?}", technique); +} #[cfg(test)] mod tests { From 35ca52073a228895edfbfc128ae3aeed689cdf38 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:37:07 +1000 Subject: [PATCH 009/112] Grammar for header lines in a Technique file --- technique.pest | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/technique.pest b/technique.pest index 804c355..98f80c7 100644 --- a/technique.pest +++ b/technique.pest @@ -2,7 +2,27 @@ WHITESPACE = _{ " " | "\t" } -declaration = { identifier ~ ":" ~ signature? } +technique = { SOI ~ magic_line ~ spdx_line? ~ NEWLINE* ~ declaration ~ EOI } + +// File Format Header + +magic_line = { "%" ~ "technique" ~ "v1" ~ NEWLINE } + +// License and Copyright Header + +spdx_line = { "!" ~ license ~ ";" ~ copyright ~ NEWLINE } + +license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " )* } + +copyright = { ("©" | "(c)" | "(C)") ~ year? ~ owner } + +year = @{ ASCII_DIGIT{4} } + +owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | ",")* } + +// Procedure Declaration + +declaration = { identifier ~ ":" ~ signature? ~ NEWLINE } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } From 9a7abaa0b8022debae06663a3ee2cce7e2292c31 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 19 Sep 2024 17:40:02 +1000 Subject: [PATCH 010/112] Reconsider place of newlines in grammar --- technique.pest | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/technique.pest b/technique.pest index 98f80c7..8bf62fc 100644 --- a/technique.pest +++ b/technique.pest @@ -2,15 +2,24 @@ WHITESPACE = _{ " " | "\t" } -technique = { SOI ~ magic_line ~ spdx_line? ~ NEWLINE* ~ declaration ~ EOI } +technique = { + SOI + ~ magic_line + ~ NEWLINE + ~ spdx_line? + ~ NEWLINE+ + ~ declaration + ~ NEWLINE+ + ~ EOI +} // File Format Header -magic_line = { "%" ~ "technique" ~ "v1" ~ NEWLINE } +magic_line = { "%" ~ "technique" ~ "v1" } // License and Copyright Header -spdx_line = { "!" ~ license ~ ";" ~ copyright ~ NEWLINE } +spdx_line = { "!" ~ license ~ ";" ~ copyright } license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " )* } @@ -22,7 +31,7 @@ owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" // Procedure Declaration -declaration = { identifier ~ ":" ~ signature? ~ NEWLINE } +declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } From 2132ef4b9510f91112b5383e5e45c47bd3c86bc8 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 10:27:25 +1000 Subject: [PATCH 011/112] Test copyright string on spdx line --- src/parsing/parser.rs | 40 ++++++++++++++++++++++++++++++++++++++++ technique.pest | 24 ++++++++++++------------ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index c205038..4ceaf9b 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -89,4 +89,44 @@ mod tests { ] }; } + + #[test] + fn check_header_spdx() { + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 21, [ + license(2, 5), + copyright(7, 21, [ + owner(11, 21) + ]) + ]) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) 2024 ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 26, [ + license(2, 5), + copyright(7, 26, [ + year(11,15), + owner(16, 26) + ]) + ]) + ] + }; + + parses_to! { + parser: TechniqueParser, + input: "2024", + rule: Rule::year, + tokens: [ + year(0,4), + ] + }; + } } diff --git a/technique.pest b/technique.pest index 8bf62fc..31cddb0 100644 --- a/technique.pest +++ b/technique.pest @@ -3,14 +3,14 @@ WHITESPACE = _{ " " | "\t" } technique = { - SOI - ~ magic_line - ~ NEWLINE - ~ spdx_line? - ~ NEWLINE+ - ~ declaration - ~ NEWLINE+ - ~ EOI + SOI ~ + magic_line ~ + NEWLINE ~ + spdx_line? ~ + NEWLINE+ ~ + declaration ~ + NEWLINE+ ~ + EOI } // File Format Header @@ -19,15 +19,15 @@ magic_line = { "%" ~ "technique" ~ "v1" } // License and Copyright Header -spdx_line = { "!" ~ license ~ ";" ~ copyright } +spdx_line = { "!" ~ license ~ (";" ~ copyright)? } -license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " )* } +license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "." )* } copyright = { ("©" | "(c)" | "(C)") ~ year? ~ owner } -year = @{ ASCII_DIGIT{4} } +year = @{ ASCII_DIGIT{4} ~ "-" ~ (ASCII_DIGIT{4})? | ASCII_DIGIT{4} } -owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | ",")* } +owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "," | "." )* } // Procedure Declaration From 08d1e040b142a84b22e494ee15ebbf78c670336a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 10:43:08 +1000 Subject: [PATCH 012/112] Grammar for template line in header --- src/parsing/parser.rs | 38 +++++++++++++++++++++++++++++++++++++- technique.pest | 8 ++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 4ceaf9b..3106b0d 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,6 +1,6 @@ // parsing machinery -use pest::{consumes_to, parses_to, Parser}; +use pest::Parser; use pest_derive::Parser; #[derive(Parser)] @@ -14,6 +14,8 @@ pub fn parse_via_pest(content: &str) { #[cfg(test)] mod tests { + use pest::{consumes_to, fails_with, parses_to}; + use super::*; // Import all parent module items #[test] @@ -129,4 +131,38 @@ mod tests { ] }; } + + #[test] + fn check_header_template() { + parses_to! { + parser: TechniqueParser, + input: "& checklist", + rule: Rule::template_line, + tokens: [ + template_line(0, 11, [ + template(2, 11) + ]) + ] + }; + + parses_to! { + parser: TechniqueParser, + input: "& nasa-flight-plan-v4.0", + rule: Rule::template_line, + tokens: [ + template_line(0, 23, [ + template(2, 23) + ]) + ] + }; + + fails_with! { + parser: TechniqueParser, + input: "&", + rule: Rule::template_line, + positives: [Rule::template], + negatives: [], + pos: 1 + }; + } } diff --git a/technique.pest b/technique.pest index 31cddb0..13504d4 100644 --- a/technique.pest +++ b/technique.pest @@ -7,6 +7,8 @@ technique = { magic_line ~ NEWLINE ~ spdx_line? ~ + NEWLINE ~ + template_line? ~ NEWLINE+ ~ declaration ~ NEWLINE+ ~ @@ -29,6 +31,12 @@ year = @{ ASCII_DIGIT{4} ~ "-" ~ (ASCII_DIGIT{4})? | ASCII_DIGIT{4} } owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "," | "." )* } +// Template Header + +template_line = { "&" ~ template } + +template = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | "." | "," )* } + // Procedure Declaration declaration = { identifier ~ ":" ~ signature? } From 73a2427fe95e5432de3dce18ad0ca3da7cc8ca1e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 14:21:54 +1000 Subject: [PATCH 013/112] Additional year tests --- src/parsing/parser.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 3106b0d..035fea2 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -130,6 +130,38 @@ mod tests { year(0,4), ] }; + parses_to! { + parser: TechniqueParser, + input: "2024-", + rule: Rule::year, + tokens: [ + year(0,5), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "2002-2024", + rule: Rule::year, + tokens: [ + year(0,9), + ] + }; + fails_with! { + parser: TechniqueParser, + input: "02", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; + fails_with! { + parser: TechniqueParser, + input: "02-24", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; } #[test] From 900077efea9d22e4552f8a2d68db4dc26a7dc92c Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 14:28:54 +1000 Subject: [PATCH 014/112] Additional license tests --- src/parsing/parser.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 035fea2..f3362b2 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -121,6 +121,41 @@ mod tests { ]) ] }; + parses_to! { + parser: TechniqueParser, + input: "! PD", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 4, [ + license(2, 4) + ]) + ] + }; + + parses_to! { + parser: TechniqueParser, + input: "MIT", + rule: Rule::license, + tokens: [ + license(0,3), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "Public Domain", + rule: Rule::license, + tokens: [ + license(0,13), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "CC BY-SA 3.0 IGO", + rule: Rule::license, + tokens: [ + license(0,16), + ] + }; parses_to! { parser: TechniqueParser, From e7da2c1557f5c4f767e9d437e2a799be45c0d693 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 20 Sep 2024 14:58:09 +1000 Subject: [PATCH 015/112] Test identifiers and declarations --- src/parsing/parser.rs | 88 ++++++++++++++++++++++++++++++++++++++----- technique.pest | 2 +- 2 files changed, 80 insertions(+), 10 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index f3362b2..692ac78 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -115,7 +115,7 @@ mod tests { spdx_line(0, 26, [ license(2, 5), copyright(7, 26, [ - year(11,15), + year(11, 15), owner(16, 26) ]) ]) @@ -137,7 +137,7 @@ mod tests { input: "MIT", rule: Rule::license, tokens: [ - license(0,3), + license(0, 3), ] }; parses_to! { @@ -145,7 +145,7 @@ mod tests { input: "Public Domain", rule: Rule::license, tokens: [ - license(0,13), + license(0, 13), ] }; parses_to! { @@ -153,7 +153,7 @@ mod tests { input: "CC BY-SA 3.0 IGO", rule: Rule::license, tokens: [ - license(0,16), + license(0, 16), ] }; @@ -162,7 +162,7 @@ mod tests { input: "2024", rule: Rule::year, tokens: [ - year(0,4), + year(0, 4), ] }; parses_to! { @@ -170,7 +170,7 @@ mod tests { input: "2024-", rule: Rule::year, tokens: [ - year(0,5), + year(0, 5), ] }; parses_to! { @@ -178,7 +178,7 @@ mod tests { input: "2002-2024", rule: Rule::year, tokens: [ - year(0,9), + year(0, 9), ] }; fails_with! { @@ -211,7 +211,6 @@ mod tests { ]) ] }; - parses_to! { parser: TechniqueParser, input: "& nasa-flight-plan-v4.0", @@ -222,7 +221,6 @@ mod tests { ]) ] }; - fails_with! { parser: TechniqueParser, input: "&", @@ -232,4 +230,76 @@ mod tests { pos: 1 }; } + + #[test] + fn check_identifier_rules() { + parses_to! { + parser: TechniqueParser, + input: "p", + rule: Rule::identifier, + tokens: [ + identifier(0, 1) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "pizza", + rule: Rule::identifier, + tokens: [ + identifier(0, 5) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "cook_pizza", + rule: Rule::identifier, + tokens: [ + identifier(0, 10) + ] + }; + fails_with! { + parser: TechniqueParser, + input: "0trust", + rule: Rule::identifier, + positives: [Rule::identifier], + negatives: [], + pos: 0 + }; + } + + #[test] + fn check_declaration_syntax() { + parses_to! { + parser: TechniqueParser, + input: "p :", + rule: Rule::declaration, + tokens: [ + declaration(0, 3, [ + identifier(0, 1) + ]) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "p : A -> B", + rule: Rule::declaration, + tokens: [ + declaration(0, 10, [ + identifier(0, 1), + signature(4, 10, [ + forma(4, 5), + forma(9, 10) + ]) + ]) + ] + }; + fails_with! { + parser: TechniqueParser, + input: "cook-pizza :", + rule: Rule::declaration, + positives: [Rule::declaration], + negatives: [], + pos: 0 + }; + } } diff --git a/technique.pest b/technique.pest index 13504d4..5063adf 100644 --- a/technique.pest +++ b/technique.pest @@ -39,7 +39,7 @@ template = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | " // Procedure Declaration -declaration = { identifier ~ ":" ~ signature? } +declaration = { identifier ~ ":" ~ signature? } identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } From 8f595953b3efbcb4d284c4ec2e1c47958864bfc8 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 23 Sep 2024 12:15:59 +1000 Subject: [PATCH 016/112] Add winnow parser dependency --- Cargo.lock | 10 ++++++++++ Cargo.toml | 1 + 2 files changed, 11 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e5500bf..db4d7f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -393,6 +393,7 @@ dependencies = [ "tinytemplate", "tracing", "tracing-subscriber", + "winnow", ] [[package]] @@ -698,3 +699,12 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 86ded5a..86e4478 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" tracing-subscriber = "0.3.18" +winnow = "0.6.18" From df2cbea96fd95738b295143bd817bb5da5e19e43 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Wed, 25 Sep 2024 17:57:00 +1000 Subject: [PATCH 017/112] Reimplement identifier parser --- src/parsing/mod.rs | 2 +- src/parsing/parser.rs | 450 ++++++++++++++++++++---------------------- 2 files changed, 214 insertions(+), 238 deletions(-) diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index fbfb83f..a607228 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -7,5 +7,5 @@ pub fn load(source: &Path) { // read source to a str let content = std::fs::read_to_string(source).expect("Failed to read the source file"); - parser::parse_via_pest(content.as_str()); + parser::parse_via_winnow(content.as_str()); } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 692ac78..007b6cd 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,271 +1,246 @@ // parsing machinery -use pest::Parser; -use pest_derive::Parser; +// struct TechniqueParser; -#[derive(Parser)] -#[grammar = "../technique.pest"] -struct TechniqueParser; +use winnow::token::take_while; +use winnow::{PResult, Parser}; -pub fn parse_via_pest(content: &str) { - let technique = TechniqueParser::parse(Rule::technique, &content); - println!("{:?}", technique); +pub fn parse_via_winnow(_content: &str) { + // let technique = TechniqueParser::parse(Rule::technique, &content); + // println!("{:?}", technique); +} + +fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { + take_while(1.., (('0'..='9'), ('A'..='Z'), ('a'..='z'), ('_'))).parse_next(input) } #[cfg(test)] mod tests { - use pest::{consumes_to, fails_with, parses_to}; - - use super::*; // Import all parent module items - + use super::*; + #[test] - fn check_procedure_declaration_explicit() { - let input = "making_coffee : Beans, Milk -> Coffee"; + fn check_identifier_rules() { + let mut input = "p"; - let declaration = TechniqueParser::parse(Rule::declaration, &input) - .expect("Unsuccessful Parse") - .next() + let result = parse_identifier + .parse_next(&mut input) .unwrap(); - assert_eq!( - declaration.as_str(), - "making_coffee : Beans, Milk -> Coffee" - ); - assert_eq!(declaration.as_rule(), Rule::declaration); + assert_eq!(result, "p"); - let mut pairs = declaration.into_inner(); - - let identifier = pairs - .next() + let mut input = "pizza"; + let result = parse_identifier + .parse_next(&mut input) .unwrap(); + assert_eq!(result, "pizza"); - assert_eq!(identifier.as_str(), "making_coffee"); - assert_eq!(identifier.as_rule(), Rule::identifier); - - let signature = pairs - .next() + let mut input = "cook_pizza"; + let result = parse_identifier + .parse_next(&mut input) .unwrap(); + assert_eq!(result, "cook_pizza"); + /* + fails_with! { + parser: TechniqueParser, + input: "0trust", + rule: Rule::identifier, + positives: [Rule::identifier], + negatives: [], + pos: 0 + }; + */ + } - assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); - assert_eq!(signature.as_rule(), Rule::signature); - - let mut pairs = signature.into_inner(); + // Import all parent module items + /* + #[test] + fn check_procedure_declaration_explicit() { + let input = "making_coffee : Beans, Milk -> Coffee"; - let domain1 = pairs - .next() - .unwrap(); + // let declaration = TechniqueParser::parse(Rule::declaration, &input) + // .expect("Unsuccessful Parse") + // .next() + // .unwrap(); - assert_eq!(domain1.as_str(), "Beans"); - assert_eq!(domain1.as_rule(), Rule::forma); + assert_eq!( + input, // FIXME + "making_coffee : Beans, Milk -> Coffee" + ); - let domain2 = pairs - .next() - .unwrap(); + // assert_eq!(identifier.as_str(), "making_coffee"); + // assert_eq!(identifier.as_rule(), Rule::identifier); - assert_eq!(domain2.as_str(), "Milk"); - assert_eq!(domain2.as_rule(), Rule::forma); + // assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); + // assert_eq!(signature.as_rule(), Rule::signature); - let range = pairs - .next() - .unwrap(); + // assert_eq!(domain1.as_str(), "Beans"); + // assert_eq!(domain1.as_rule(), Rule::forma); - assert_eq!(range.as_str(), "Coffee"); - assert_eq!(range.as_rule(), Rule::forma); - } + // assert_eq!(domain2.as_str(), "Milk"); + // assert_eq!(domain2.as_rule(), Rule::forma); - #[test] - fn check_procedure_declaration_macro() { - parses_to! { - parser: TechniqueParser, - input: "making_coffee : Beans, Milk -> Coffee", - rule: Rule::declaration, - tokens: [ - declaration(0, 37, [ - identifier(0, 13), - signature(16, 37, [ - forma(16, 21), - forma(23, 27), - forma(31, 37) + // assert_eq!(range.as_str(), "Coffee"); + // assert_eq!(range.as_rule(), Rule::forma); + } + */ + /* + #[test] + fn check_procedure_declaration_macro() { + parses_to! { + parser: TechniqueParser, + input: "making_coffee : Beans, Milk -> Coffee", + rule: Rule::declaration, + tokens: [ + declaration(0, 37, [ + identifier(0, 13), + signature(16, 37, [ + forma(16, 21), + forma(23, 27), + forma(31, 37) + ]) ]) - ]) - ] - }; - } + ] + }; + } - #[test] - fn check_header_spdx() { - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 21, [ - license(2, 5), - copyright(7, 21, [ - owner(11, 21) + #[test] + fn check_header_spdx() { + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 21, [ + license(2, 5), + copyright(7, 21, [ + owner(11, 21) + ]) ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) 2024 ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 26, [ - license(2, 5), - copyright(7, 26, [ - year(11, 15), - owner(16, 26) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "! MIT; (c) 2024 ACME, Inc.", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 26, [ + license(2, 5), + copyright(7, 26, [ + year(11, 15), + owner(16, 26) + ]) ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! PD", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 4, [ - license(2, 4) - ]) - ] - }; + ] + }; + parses_to! { + parser: TechniqueParser, + input: "! PD", + rule: Rule::spdx_line, + tokens: [ + spdx_line(0, 4, [ + license(2, 4) + ]) + ] + }; - parses_to! { - parser: TechniqueParser, - input: "MIT", - rule: Rule::license, - tokens: [ - license(0, 3), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "Public Domain", - rule: Rule::license, - tokens: [ - license(0, 13), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "CC BY-SA 3.0 IGO", - rule: Rule::license, - tokens: [ - license(0, 16), - ] - }; + parses_to! { + parser: TechniqueParser, + input: "MIT", + rule: Rule::license, + tokens: [ + license(0, 3), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "Public Domain", + rule: Rule::license, + tokens: [ + license(0, 13), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "CC BY-SA 3.0 IGO", + rule: Rule::license, + tokens: [ + license(0, 16), + ] + }; - parses_to! { - parser: TechniqueParser, - input: "2024", - rule: Rule::year, - tokens: [ - year(0, 4), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2024-", - rule: Rule::year, - tokens: [ - year(0, 5), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2002-2024", - rule: Rule::year, - tokens: [ - year(0, 9), - ] - }; - fails_with! { - parser: TechniqueParser, - input: "02", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - fails_with! { - parser: TechniqueParser, - input: "02-24", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - } - - #[test] - fn check_header_template() { - parses_to! { - parser: TechniqueParser, - input: "& checklist", - rule: Rule::template_line, - tokens: [ - template_line(0, 11, [ - template(2, 11) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "& nasa-flight-plan-v4.0", - rule: Rule::template_line, - tokens: [ - template_line(0, 23, [ - template(2, 23) - ]) - ] - }; - fails_with! { - parser: TechniqueParser, - input: "&", - rule: Rule::template_line, - positives: [Rule::template], - negatives: [], - pos: 1 - }; - } + parses_to! { + parser: TechniqueParser, + input: "2024", + rule: Rule::year, + tokens: [ + year(0, 4), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "2024-", + rule: Rule::year, + tokens: [ + year(0, 5), + ] + }; + parses_to! { + parser: TechniqueParser, + input: "2002-2024", + rule: Rule::year, + tokens: [ + year(0, 9), + ] + }; + fails_with! { + parser: TechniqueParser, + input: "02", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; + fails_with! { + parser: TechniqueParser, + input: "02-24", + rule: Rule::year, + positives: [Rule::year], + negatives: [], + pos: 0 + }; + } - #[test] - fn check_identifier_rules() { - parses_to! { - parser: TechniqueParser, - input: "p", - rule: Rule::identifier, - tokens: [ - identifier(0, 1) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "pizza", - rule: Rule::identifier, - tokens: [ - identifier(0, 5) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "cook_pizza", - rule: Rule::identifier, - tokens: [ - identifier(0, 10) - ] - }; - fails_with! { - parser: TechniqueParser, - input: "0trust", - rule: Rule::identifier, - positives: [Rule::identifier], - negatives: [], - pos: 0 - }; - } + #[test] + fn check_header_template() { + parses_to! { + parser: TechniqueParser, + input: "& checklist", + rule: Rule::template_line, + tokens: [ + template_line(0, 11, [ + template(2, 11) + ]) + ] + }; + parses_to! { + parser: TechniqueParser, + input: "& nasa-flight-plan-v4.0", + rule: Rule::template_line, + tokens: [ + template_line(0, 23, [ + template(2, 23) + ]) + ] + }; + fails_with! { + parser: TechniqueParser, + input: "&", + rule: Rule::template_line, + positives: [Rule::template], + negatives: [], + pos: 1 + }; + } #[test] fn check_declaration_syntax() { @@ -302,4 +277,5 @@ mod tests { pos: 0 }; } + */ } From 47ce87050dad85e4e540c08d003506adaa29ce9e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 26 Sep 2024 13:04:25 +1000 Subject: [PATCH 018/112] Improve parse_identifier to enforce first character --- src/parsing/parser.rs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 007b6cd..9f63f97 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,7 +2,9 @@ // struct TechniqueParser; -use winnow::token::take_while; +use winnow::combinator::empty; +use winnow::stream::AsChar; +use winnow::token::{one_of, take_while}; use winnow::{PResult, Parser}; pub fn parse_via_winnow(_content: &str) { @@ -10,14 +12,20 @@ pub fn parse_via_winnow(_content: &str) { // println!("{:?}", technique); } +// a winnow parser that takes an alpha and then any character fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { - take_while(1.., (('0'..='9'), ('A'..='Z'), ('a'..='z'), ('_'))).parse_next(input) + ( + one_of('a'..='z'), + take_while(0.., (('0'..='9'), ('a'..='z'), ('_'))), + ) + .take() + .parse_next(input) } #[cfg(test)] mod tests { use super::*; - + #[test] fn check_identifier_rules() { let mut input = "p"; @@ -39,16 +47,9 @@ mod tests { .parse_next(&mut input) .unwrap(); assert_eq!(result, "cook_pizza"); - /* - fails_with! { - parser: TechniqueParser, - input: "0trust", - rule: Rule::identifier, - positives: [Rule::identifier], - negatives: [], - pos: 0 - }; - */ + + assert!(parse_identifier(&mut "0trust").is_err()); + assert!(parse_identifier(&mut "Pizza").is_err()); } // Import all parent module items From 61502e28f5fe7bcdb2d6f9b45d49c7fd0065c850 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sat, 28 Sep 2024 17:06:40 +1000 Subject: [PATCH 019/112] Use verify to ensure identifier parse is valid --- src/parsing/parser.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 9f63f97..cce1c73 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,14 +2,15 @@ // struct TechniqueParser; -use winnow::combinator::empty; -use winnow::stream::AsChar; +use winnow::error::StrContext; use winnow::token::{one_of, take_while}; use winnow::{PResult, Parser}; -pub fn parse_via_winnow(_content: &str) { +pub fn parse_via_winnow(content: &str) { // let technique = TechniqueParser::parse(Rule::technique, &content); // println!("{:?}", technique); + let result = parse_identifier.parse(content).unwrap(); + println!("{}", result); } // a winnow parser that takes an alpha and then any character @@ -19,6 +20,8 @@ fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { take_while(0.., (('0'..='9'), ('a'..='z'), ('_'))), ) .take() + .verify(|s: &str| s.len() == input.len()) + .context(StrContext::Label("identifier")) .parse_next(input) } @@ -50,6 +53,10 @@ mod tests { assert!(parse_identifier(&mut "0trust").is_err()); assert!(parse_identifier(&mut "Pizza").is_err()); + assert!(parse_identifier(&mut "pizZa").is_err()); + + assert_eq!(parse_identifier(&mut "cook_pizza"), Ok("cook_pizza")); + assert!(parse_identifier(&mut "cook-pizza").is_err()); } // Import all parent module items From 244a9db1776cd3c85987924ec74f4ba936d4923a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 12:51:59 +1000 Subject: [PATCH 020/112] Add chumsky parser dependency --- Cargo.lock | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 97 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index db4d7f9..e2fbd9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,24 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "anstream" version = "0.6.15" @@ -66,12 +84,31 @@ dependencies = [ "generic-array", ] +[[package]] +name = "cc" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9540e661f81799159abee814118cc139a2004b3a3aa3ea37724a1b66530b90e0" +dependencies = [ + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown", + "stacker", +] + [[package]] name = "clap" version = "4.5.16" @@ -155,6 +192,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -279,6 +326,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.37" @@ -359,12 +415,31 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.52.0", +] + [[package]] name = "strsim" version = "0.11.1" @@ -386,6 +461,7 @@ dependencies = [ name = "technique" version = "0.3.0" dependencies = [ + "chumsky", "clap", "pest", "pest_derive", @@ -708,3 +784,23 @@ checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" dependencies = [ "memchr", ] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 86e4478..ec5e3b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ repository = "https://github.com/technique-lang/technique" license = "MIT" [dependencies] +chumsky = "0.9.3" clap = { version = "4.5.16", features = [ "wrap_help" ] } pest = "2.7.11" pest_derive = "2.7.11" From bb5d9ffe68680afa690158250b23cd6ec0a92295 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 12:54:40 +1000 Subject: [PATCH 021/112] Reimplement identifier parser again --- src/parsing/mod.rs | 2 +- src/parsing/parser.rs | 66 ++++++++++++++++++------------------------- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index a607228..1b4b2ff 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -7,5 +7,5 @@ pub fn load(source: &Path) { // read source to a str let content = std::fs::read_to_string(source).expect("Failed to read the source file"); - parser::parse_via_winnow(content.as_str()); + parser::parse_via_chumsky(content.as_str()); } diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index cce1c73..5cef353 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,27 +2,26 @@ // struct TechniqueParser; -use winnow::error::StrContext; -use winnow::token::{one_of, take_while}; -use winnow::{PResult, Parser}; +use chumsky::{prelude::*, Span}; -pub fn parse_via_winnow(content: &str) { - // let technique = TechniqueParser::parse(Rule::technique, &content); - // println!("{:?}", technique); - let result = parse_identifier.parse(content).unwrap(); - println!("{}", result); +pub fn parse_via_chumsky(content: &str) { + let result = parse_identifier().parse(content); + println!("{:?}", result); + std::process::exit(0); } -// a winnow parser that takes an alpha and then any character -fn parse_identifier<'s>(input: &mut &'s str) -> PResult<&'s str> { - ( - one_of('a'..='z'), - take_while(0.., (('0'..='9'), ('a'..='z'), ('_'))), - ) - .take() - .verify(|s: &str| s.len() == input.len()) - .context(StrContext::Label("identifier")) - .parse_next(input) +type Identifier = String; + +// takes a single lower case character then any lower case character, digit, +// or unerscore. Based on the parser code in chumsky::text::ident(). + +fn parse_identifier() -> impl Parser> { + filter(|c: &char| c.is_ascii_lowercase()) + .map(Some) + .chain::, _>( + filter(|c: &char| c.is_ascii_lowercase() || c.is_ascii_digit() || *c == '_').repeated(), + ) + .collect() } #[cfg(test)] @@ -31,32 +30,23 @@ mod tests { #[test] fn check_identifier_rules() { - let mut input = "p"; + let input = "make_dinner"; + + let result = parse_identifier().parse(input); + + assert_eq!(result, Ok("make_dinner".to_string())); - let result = parse_identifier - .parse_next(&mut input) - .unwrap(); + let input = ""; - assert_eq!(result, "p"); + let result = parse_identifier().parse(input); - let mut input = "pizza"; - let result = parse_identifier - .parse_next(&mut input) - .unwrap(); - assert_eq!(result, "pizza"); + assert!(result.is_err()); - let mut input = "cook_pizza"; - let result = parse_identifier - .parse_next(&mut input) - .unwrap(); - assert_eq!(result, "cook_pizza"); + let input = "MakeDinner"; - assert!(parse_identifier(&mut "0trust").is_err()); - assert!(parse_identifier(&mut "Pizza").is_err()); - assert!(parse_identifier(&mut "pizZa").is_err()); + let result = parse_identifier().parse(input); - assert_eq!(parse_identifier(&mut "cook_pizza"), Ok("cook_pizza")); - assert!(parse_identifier(&mut "cook-pizza").is_err()); + assert!(result.is_err()); } // Import all parent module items From 33aa001adf41cf0a64be1934a616176fabfb9b3a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 17:58:40 +1000 Subject: [PATCH 022/112] Parse magic line --- src/parsing/parser.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 5cef353..b36dee1 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -2,7 +2,7 @@ // struct TechniqueParser; -use chumsky::{prelude::*, Span}; +use chumsky::prelude::*; pub fn parse_via_chumsky(content: &str) { let result = parse_identifier().parse(content); @@ -24,6 +24,12 @@ fn parse_identifier() -> impl Parser> { .collect() } +fn parse_magic_line() -> impl Parser> { + just('%') + .ignore_then(just("technique").padded()) + .ignore_then(just("v1").to(1u8)) +} + #[cfg(test)] mod tests { use super::*; @@ -49,6 +55,15 @@ mod tests { assert!(result.is_err()); } + #[test] + fn check_magic_line() { + assert_eq!(parse_magic_line().parse("% technique v1"), Ok(1)); + assert_eq!(parse_magic_line().parse("%technique v1"), Ok(1)); + // this isn't really ideal, but there's no absolutely vital reason it + // has to be rejected. + assert_eq!(parse_magic_line().parse("%techniquev1"), Ok(1)); + } + // Import all parent module items /* #[test] From b32204d28908349502d557311adf753306da32fa Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Mon, 30 Sep 2024 23:22:57 +1000 Subject: [PATCH 023/112] Parse license and copyright header parts --- src/parsing/parser.rs | 254 +++++++++++++++++++++--------------------- 1 file changed, 129 insertions(+), 125 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index b36dee1..e255e33 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -30,6 +30,74 @@ fn parse_magic_line() -> impl Parser> { .ignore_then(just("v1").to(1u8)) } +fn parse_spdx_line() -> impl Parser> +{ + just('!') + .ignore_then(parse_license()) + .then_ignore(just(';')) + .then(parse_copyright()) +} + +fn parse_license() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || *c != ';' // symbol which separates license and copyright probably shouldn't ever encounter it + || c.is_ascii_punctuation() + || *c == ' ' + }) + .repeated() + .at_least(1) + .collect() +} + +// change to a semantic Copyright type +fn parse_copyright() -> impl Parser> { + let p = parse_copyright_year() + .padded() + .then(parse_copyright_owner()); + + p.map(|((y1, y2), o)| { + let mut r = String::new(); + r.push_str(&y1); + r.push_str(&y2); + r.push_str(&o); + r + }) +} + +fn year() -> impl Parser> { + filter(|c: &char| c.is_ascii_digit()) + .repeated() + .at_least(4) + .at_most(4) + .collect() +} + +fn parse_copyright_year() -> impl Parser> { + year() + .then_ignore(just('-')) + .then(year()) + .or(year() + .then_ignore(just('-')) + .map(|yyyy| (yyyy, "".to_string()))) + .or(year().map(|yyyy| (yyyy, "".to_string()))) +} + +fn parse_copyright_owner() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || c.is_ascii_punctuation() + || *c == ' ' + }) + .repeated() + .at_least(1) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -64,7 +132,48 @@ mod tests { assert_eq!(parse_magic_line().parse("%techniquev1"), Ok(1)); } - // Import all parent module items + #[test] + fn check_header_spdx() { + assert_eq!(parse_license().parse("MIT"), Ok("MIT".to_string())); + assert_eq!( + parse_license().parse("Public Domain"), + Ok("Public Domain".to_string()) + ); + assert_eq!( + parse_license().parse("CC BY-SA 3.0 IGO"), + Ok("CC BY-SA 3.0 IGO".to_string()) + ); + + assert_eq!( + parse_copyright_year().parse("2024"), + Ok(("2024".to_string(), "".to_string())) + ); + assert_eq!( + parse_copyright_year().parse("2024-"), + Ok(("2024".to_string(), "".to_string())) + ); + assert_eq!( + parse_copyright_year().parse("2002-2024"), + Ok(("2002".to_string(), "2024".to_string())) + ); + + assert!(parse_copyright_year() + .parse("24") + .is_err()); + assert!(parse_copyright_year() + .parse("02-24") + .is_err()); + + assert_eq!( + parse_copyright_owner().parse("ACME"), + Ok("ACME".to_string()) + ); + assert_eq!( + parse_copyright_owner().parse("ACME, Inc."), + Ok("ACME, Inc.".to_string()) + ); + } + /* #[test] fn check_procedure_declaration_explicit() { @@ -97,132 +206,27 @@ mod tests { } */ /* - #[test] - fn check_procedure_declaration_macro() { - parses_to! { - parser: TechniqueParser, - input: "making_coffee : Beans, Milk -> Coffee", - rule: Rule::declaration, - tokens: [ - declaration(0, 37, [ - identifier(0, 13), - signature(16, 37, [ - forma(16, 21), - forma(23, 27), - forma(31, 37) + #[test] + fn check_procedure_declaration_macro() { + parses_to! { + parser: TechniqueParser, + input: "making_coffee : Beans, Milk -> Coffee", + rule: Rule::declaration, + tokens: [ + declaration(0, 37, [ + identifier(0, 13), + signature(16, 37, [ + forma(16, 21), + forma(23, 27), + forma(31, 37) + ]) ]) - ]) - ] - }; - } - - #[test] - fn check_header_spdx() { - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 21, [ - license(2, 5), - copyright(7, 21, [ - owner(11, 21) - ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! MIT; (c) 2024 ACME, Inc.", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 26, [ - license(2, 5), - copyright(7, 26, [ - year(11, 15), - owner(16, 26) - ]) - ]) - ] - }; - parses_to! { - parser: TechniqueParser, - input: "! PD", - rule: Rule::spdx_line, - tokens: [ - spdx_line(0, 4, [ - license(2, 4) - ]) - ] - }; - - parses_to! { - parser: TechniqueParser, - input: "MIT", - rule: Rule::license, - tokens: [ - license(0, 3), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "Public Domain", - rule: Rule::license, - tokens: [ - license(0, 13), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "CC BY-SA 3.0 IGO", - rule: Rule::license, - tokens: [ - license(0, 16), - ] - }; - - parses_to! { - parser: TechniqueParser, - input: "2024", - rule: Rule::year, - tokens: [ - year(0, 4), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2024-", - rule: Rule::year, - tokens: [ - year(0, 5), - ] - }; - parses_to! { - parser: TechniqueParser, - input: "2002-2024", - rule: Rule::year, - tokens: [ - year(0, 9), - ] - }; - fails_with! { - parser: TechniqueParser, - input: "02", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - fails_with! { - parser: TechniqueParser, - input: "02-24", - rule: Rule::year, - positives: [Rule::year], - negatives: [], - pos: 0 - }; - } + ] + }; + } + */ + /* #[test] fn check_header_template() { parses_to! { From f15e5926059aa7dbf6ffc66399848cf92a667ef3 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Tue, 1 Oct 2024 12:42:12 +1000 Subject: [PATCH 024/112] Complete SPDX header line parser --- src/parsing/parser.rs | 114 +++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 62 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index e255e33..c74ec79 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -22,6 +22,7 @@ fn parse_identifier() -> impl Parser> { filter(|c: &char| c.is_ascii_lowercase() || c.is_ascii_digit() || *c == '_').repeated(), ) .collect() + // .validate(|s : String, span : Range, emit| if s.len() != span.end() - span.start() { emit(Simple::custom(span, "Wrong length")) }) } fn parse_magic_line() -> impl Parser> { @@ -30,62 +31,41 @@ fn parse_magic_line() -> impl Parser> { .ignore_then(just("v1").to(1u8)) } -fn parse_spdx_line() -> impl Parser> -{ +fn parse_spdx_line() -> impl Parser, Option), Error = Simple> { just('!') - .ignore_then(parse_license()) - .then_ignore(just(';')) - .then(parse_copyright()) + .ignore_then( + parse_license() + .padded() + .or_not(), + ) + .then( + just(';') + .ignore_then( + just("(c)") + .or(just("(C)")) + .or(just("©")) + .padded(), + ) + .ignore_then(parse_copyright().padded()) + .or_not(), + ) } fn parse_license() -> impl Parser> { filter(|c: &char| { - c.is_ascii_uppercase() - || c.is_ascii_lowercase() - || c.is_ascii_digit() - || *c != ';' // symbol which separates license and copyright probably shouldn't ever encounter it - || c.is_ascii_punctuation() - || *c == ' ' + *c != ';' + && (c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || c.is_ascii_punctuation() + || *c == ' ') }) .repeated() .at_least(1) .collect() } -// change to a semantic Copyright type fn parse_copyright() -> impl Parser> { - let p = parse_copyright_year() - .padded() - .then(parse_copyright_owner()); - - p.map(|((y1, y2), o)| { - let mut r = String::new(); - r.push_str(&y1); - r.push_str(&y2); - r.push_str(&o); - r - }) -} - -fn year() -> impl Parser> { - filter(|c: &char| c.is_ascii_digit()) - .repeated() - .at_least(4) - .at_most(4) - .collect() -} - -fn parse_copyright_year() -> impl Parser> { - year() - .then_ignore(just('-')) - .then(year()) - .or(year() - .then_ignore(just('-')) - .map(|yyyy| (yyyy, "".to_string()))) - .or(year().map(|yyyy| (yyyy, "".to_string()))) -} - -fn parse_copyright_owner() -> impl Parser> { filter(|c: &char| { c.is_ascii_uppercase() || c.is_ascii_lowercase() @@ -144,33 +124,43 @@ mod tests { Ok("CC BY-SA 3.0 IGO".to_string()) ); + assert_eq!(parse_copyright().parse("ACME"), Ok("ACME".to_string())); assert_eq!( - parse_copyright_year().parse("2024"), - Ok(("2024".to_string(), "".to_string())) + parse_copyright().parse("ACME, Inc."), + Ok("ACME, Inc.".to_string()) ); + assert_eq!( - parse_copyright_year().parse("2024-"), - Ok(("2024".to_string(), "".to_string())) + parse_copyright().parse("2024 ACME, Inc."), + Ok("2024 ACME, Inc.".to_string()) ); + assert_eq!( - parse_copyright_year().parse("2002-2024"), - Ok(("2002".to_string(), "2024".to_string())) + parse_spdx_line().parse("! PD"), + Ok((Some("PD".to_string()), None)) ); - - assert!(parse_copyright_year() - .parse("24") - .is_err()); - assert!(parse_copyright_year() - .parse("02-24") - .is_err()); - assert_eq!( - parse_copyright_owner().parse("ACME"), - Ok("ACME".to_string()) + parse_spdx_line().parse("! MIT; (c) ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) ); assert_eq!( - parse_copyright_owner().parse("ACME, Inc."), - Ok("ACME, Inc.".to_string()) + parse_spdx_line().parse("! MIT; (C) ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; © ACME, Inc.".to_string()), + Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! MIT; (c) 2024 ACME, Inc."), + Ok((Some("MIT".to_string()), Some("2024 ACME, Inc.".to_string()))) + ); + assert_eq!( + parse_spdx_line().parse("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."), + Ok(( + Some("CC BY-SA 3.0 [IGO]".to_string()), + Some("2024 ACME, Inc.".to_string()) + )) ); } From 410cdccc391cdbaff0f540e278da9d61ce48ea34 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 3 Oct 2024 19:11:05 +1000 Subject: [PATCH 025/112] Parse template line --- src/parsing/parser.rs | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index c74ec79..d3d6900 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -78,6 +78,28 @@ fn parse_copyright() -> impl Parser> { .collect() } +fn parse_template_line() -> impl Parser, Error = Simple> { + just('&').ignore_then( + parse_template() + .padded() + .or_not(), + ) +} + +fn parse_template() -> impl Parser> { + filter(|c: &char| { + c.is_ascii_uppercase() + || c.is_ascii_lowercase() + || c.is_ascii_digit() + || *c == '.' + || *c == ',' + || *c == '-' + }) + .repeated() + .at_least(1) + .collect() +} + #[cfg(test)] mod tests { use super::*; @@ -164,6 +186,25 @@ mod tests { ); } + #[test] + fn check_header_template() { + assert_eq!( + parse_template().parse("checklist"), + Ok("checklist".to_string()) + ); + assert_eq!( + parse_template().parse("checklist,v1"), + Ok("checklist,v1".to_string()) + ); + assert_eq!( + parse_template().parse("checklist-v1.0"), + Ok("checklist-v1.0".to_string()) + ); + assert_eq!( + parse_template_line().parse("& checklist-v1"), + Ok(Some("checklist-v1".to_string())) + ); + } /* #[test] fn check_procedure_declaration_explicit() { From a7669de186d776aa34c7ece263e542c422e0095e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 3 Oct 2024 23:24:52 +1000 Subject: [PATCH 026/112] Add lalrpop parser dependency --- Cargo.lock | 432 +++++++++++++++++++++++++++++++++++------------------ Cargo.toml | 8 +- 2 files changed, 293 insertions(+), 147 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e2fbd9e..05b4524 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,23 +3,14 @@ version = 3 [[package]] -name = "ahash" -version = "0.8.11" +name = "aho-corasick" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", + "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - [[package]] name = "anstream" version = "0.6.15" @@ -69,6 +60,36 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "ascii-canvas" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1e3e699d84ab1b0911a1010c5c106aa34ae89aeac103be5ce0c3859db1e891" +dependencies = [ + "term", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.6.0" @@ -84,31 +105,12 @@ dependencies = [ "generic-array", ] -[[package]] -name = "cc" -version = "1.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9540e661f81799159abee814118cc139a2004b3a3aa3ea37724a1b66530b90e0" -dependencies = [ - "shlex", -] - [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chumsky" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" -dependencies = [ - "hashbrown", - "stacker", -] - [[package]] name = "clap" version = "4.5.16" @@ -172,6 +174,27 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "ena" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" +dependencies = [ + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.9" @@ -182,6 +205,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "generic-array" version = "0.14.7" @@ -194,12 +223,27 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + +[[package]] +name = "home" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "ahash", - "allocator-api2", + "windows-sys 0.52.0", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown", ] [[package]] @@ -208,12 +252,62 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "keccak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" +dependencies = [ + "cpufeatures", +] + +[[package]] +name = "lalrpop" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06093b57658c723a21da679530e061a8c25340fa5a6f98e313b542268c7e2a1f" +dependencies = [ + "ascii-canvas", + "bit-set", + "ena", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "sha3", + "string_cache", + "term", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feee752d43abd0f4807a921958ab4131f692a44d4d599733d4419c5d586176ce" +dependencies = [ + "regex-automata", + "rustversion", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -232,6 +326,16 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.22" @@ -244,6 +348,12 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -267,56 +377,65 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] -name = "pest" -version = "2.7.12" +name = "parking_lot" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c73c26c01b8c87956cea613c907c9d6ecffd8d18a2a5908e5de0adfaa185cea" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ - "memchr", - "thiserror", - "ucd-trie", + "lock_api", + "parking_lot_core", ] [[package]] -name = "pest_derive" -version = "2.7.12" +name = "parking_lot_core" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "664d22978e2815783adbdd2c588b455b1bd625299ce36b2a99881ac9627e6d8d" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ - "pest", - "pest_generator", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", ] [[package]] -name = "pest_generator" -version = "2.7.12" +name = "petgraph" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2d5487022d5d33f4c30d91c22afa240ce2a644e87fe08caad974d4eab6badbe" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", + "fixedbitset", + "indexmap", ] [[package]] -name = "pest_meta" -version = "2.7.12" +name = "phf_shared" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0091754bbd0ea592c4deb3a122ce8ecbb0753b738aa82bc055fcc2eccc8d8174" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" dependencies = [ - "once_cell", - "pest", - "sha2", + "siphasher", ] +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + [[package]] name = "pin-project-lite" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -327,23 +446,52 @@ dependencies = [ ] [[package]] -name = "psm" -version = "0.1.23" +name = "quote" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ - "cc", + "proc-macro2", ] [[package]] -name = "quote" -version = "1.0.37" +name = "redox_syscall" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ - "proc-macro2", + "bitflags", +] + +[[package]] +name = "regex" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustix" version = "0.38.34" @@ -357,12 +505,33 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustversion" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + [[package]] name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" version = "1.0.209" @@ -396,14 +565,13 @@ dependencies = [ ] [[package]] -name = "sha2" +name = "sha3" version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" dependencies = [ - "cfg-if", - "cpufeatures", "digest", + "keccak", ] [[package]] @@ -416,10 +584,10 @@ dependencies = [ ] [[package]] -name = "shlex" -version = "1.3.0" +name = "siphasher" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "smallvec" @@ -428,16 +596,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] -name = "stacker" -version = "0.1.17" +name = "string_cache" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.52.0", + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", ] [[package]] @@ -461,45 +629,33 @@ dependencies = [ name = "technique" version = "0.3.0" dependencies = [ - "chumsky", "clap", - "pest", - "pest_derive", + "lalrpop", + "lalrpop-util", "serde", "tinytemplate", "tracing", "tracing-subscriber", - "winnow", ] [[package]] -name = "terminal_size" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" -dependencies = [ - "rustix", - "windows-sys 0.48.0", -] - -[[package]] -name = "thiserror" -version = "1.0.63" +name = "term" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "4df4175de05129f31b80458c6df371a15e7fc3fd367272e6bf938e5c351c7ea0" dependencies = [ - "thiserror-impl", + "home", + "windows-sys 0.52.0", ] [[package]] -name = "thiserror-impl" -version = "1.0.63" +name = "terminal_size" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" dependencies = [ - "proc-macro2", - "quote", - "syn", + "rustix", + "windows-sys 0.48.0", ] [[package]] @@ -585,18 +741,18 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "ucd-trie" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "utf8parse" version = "0.2.2" @@ -615,6 +771,16 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "winapi" version = "0.3.9" @@ -631,6 +797,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -775,32 +950,3 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "winnow" -version = "0.6.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" -dependencies = [ - "memchr", -] - -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/Cargo.toml b/Cargo.toml index ec5e3b3..21c2dc3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,12 +8,12 @@ repository = "https://github.com/technique-lang/technique" license = "MIT" [dependencies] -chumsky = "0.9.3" clap = { version = "4.5.16", features = [ "wrap_help" ] } -pest = "2.7.11" -pest_derive = "2.7.11" +lalrpop-util = { version = "0.22.0", features = [ "lexer", "unicode" ] } serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" tracing-subscriber = "0.3.18" -winnow = "0.6.18" + +[build-dependencies] +lalrpop = { version = "0.22.0", features = [ "lexer", "unicode" ] } From 1fd1ae705bd680b67e7d3384e1cc57d54ac4d20a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 3 Oct 2024 23:37:09 +1000 Subject: [PATCH 027/112] Setup --- build.rs | 3 +++ src/main.rs | 2 +- src/parsing/mod.rs | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 build.rs diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..7e68f91 --- /dev/null +++ b/build.rs @@ -0,0 +1,3 @@ +fn main() { + lalrpop::process_src().unwrap(); +} diff --git a/src/main.rs b/src/main.rs index 52c5cde..661e0fa 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,8 +3,8 @@ use std::path::Path; use tracing::debug; use tracing_subscriber; -mod rendering; mod parsing; +mod rendering; fn main() { const VERSION: &str = concat!("v", env!("CARGO_PKG_VERSION")); diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 1b4b2ff..94f0889 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -7,5 +7,5 @@ pub fn load(source: &Path) { // read source to a str let content = std::fs::read_to_string(source).expect("Failed to read the source file"); - parser::parse_via_chumsky(content.as_str()); + parser::parse_via_lalrpop(content.as_str()); } From 3e6bc2740ff998cc18ad3d347584a4f47f68955e Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 3 Oct 2024 23:37:15 +1000 Subject: [PATCH 028/112] Parse identifiers --- src/parsing/parser.rs | 60 ++++++++++++++++++++++++------------------- src/technique.lalrpop | 7 +++++ 2 files changed, 41 insertions(+), 26 deletions(-) create mode 100644 src/technique.lalrpop diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index d3d6900..d7ece72 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,17 +1,12 @@ -// parsing machinery +use lalrpop_util::lalrpop_mod; -// struct TechniqueParser; +lalrpop_mod!(pub technique); -use chumsky::prelude::*; - -pub fn parse_via_chumsky(content: &str) { - let result = parse_identifier().parse(content); - println!("{:?}", result); +pub fn parse_via_lalrpop(_content: &str) { std::process::exit(0); } -type Identifier = String; - +/* // takes a single lower case character then any lower case character, digit, // or unerscore. Based on the parser code in chumsky::text::ident(). @@ -99,32 +94,44 @@ fn parse_template() -> impl Parser> { .at_least(1) .collect() } +*/ #[cfg(test)] mod tests { use super::*; + use lalrpop_util::ParseError; #[test] fn check_identifier_rules() { - let input = "make_dinner"; - - let result = parse_identifier().parse(input); - - assert_eq!(result, Ok("make_dinner".to_string())); - - let input = ""; - - let result = parse_identifier().parse(input); + let p = technique::identifierParser::new(); - assert!(result.is_err()); - - let input = "MakeDinner"; - - let result = parse_identifier().parse(input); - - assert!(result.is_err()); + assert_eq!(p.parse("a"), Ok("a".to_string())); + assert_eq!(p.parse("ab"), Ok("ab".to_string())); + assert_eq!(p.parse("johnny5"), Ok("johnny5".to_string())); + assert_eq!( + p.parse("Pizza"), + Err(ParseError::InvalidToken { location: 0 }) + ); + assert_eq!( + p.parse("pizZa"), + Err(ParseError::InvalidToken { location: 3 }) + ); + assert_eq!( + p.parse("0trust"), + Err(ParseError::InvalidToken { location: 0 }) + ); + assert_eq!(p.parse("make_dinner"), Ok("make_dinner".to_string())); + assert_eq!( + p.parse("MakeDinner"), + Err(ParseError::InvalidToken { location: 0 }) + ); + assert_eq!( + p.parse("make-dinner"), + Err(ParseError::InvalidToken { location: 4 }) + ); } - +} +/* #[test] fn check_magic_line() { assert_eq!(parse_magic_line().parse("% technique v1"), Ok(1)); @@ -327,3 +334,4 @@ mod tests { } */ } +*/ diff --git a/src/technique.lalrpop b/src/technique.lalrpop new file mode 100644 index 0000000..71f9256 --- /dev/null +++ b/src/technique.lalrpop @@ -0,0 +1,7 @@ +use std::str::FromStr; + +grammar; + +pub identifier: String = { + => <>.to_string() +}; From 476055be28bf8b14cf6d2e52044376c3f08bd42a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 3 Oct 2024 23:53:16 +1000 Subject: [PATCH 029/112] Parse magic line --- src/parsing/parser.rs | 27 ++++++++++++++++++--------- src/technique.lalrpop | 4 ++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index d7ece72..c46b2ab 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -99,7 +99,7 @@ fn parse_template() -> impl Parser> { #[cfg(test)] mod tests { use super::*; - use lalrpop_util::ParseError; + use lalrpop_util::{lexer::Token, ParseError}; #[test] fn check_identifier_rules() { @@ -130,17 +130,26 @@ mod tests { Err(ParseError::InvalidToken { location: 4 }) ); } -} -/* + #[test] fn check_magic_line() { - assert_eq!(parse_magic_line().parse("% technique v1"), Ok(1)); - assert_eq!(parse_magic_line().parse("%technique v1"), Ok(1)); - // this isn't really ideal, but there's no absolutely vital reason it - // has to be rejected. - assert_eq!(parse_magic_line().parse("%techniquev1"), Ok(1)); + let p = technique::magic_lineParser::new(); + assert_eq!(p.parse("% technique v1"), Ok(1)); + assert_eq!(p.parse("%technique v1"), Ok(1)); + // this is rejected because the technique keyword isn't present. I'm + // not convinced there is great value to having an error of this degree + // of detail hard-coded in the test case; change to .is_err() if it + // ever becomes a problem. + assert_eq!( + p.parse("%techniquev1"), + Err(ParseError::UnrecognizedToken { + token: (1, Token(0, "techniquev1"), 12), + expected: vec!["\"technique\"".to_string()] + }) + ); } - +} +/* #[test] fn check_header_spdx() { assert_eq!(parse_license().parse("MIT"), Ok("MIT".to_string())); diff --git a/src/technique.lalrpop b/src/technique.lalrpop index 71f9256..b3add4d 100644 --- a/src/technique.lalrpop +++ b/src/technique.lalrpop @@ -5,3 +5,7 @@ grammar; pub identifier: String = { => <>.to_string() }; + +pub magic_line: u8 = { + "%" "technique" "v1" => 1 +}; From 5a1c45debd579c39dc593ef8987581d4258b484d Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 4 Oct 2024 11:38:12 +1000 Subject: [PATCH 030/112] Redo lexer declaration with match --- src/parsing/parser.rs | 45 +++++++++++++++++-------------------------- src/technique.lalrpop | 9 ++++++++- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index c46b2ab..6ddae56 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -108,27 +108,22 @@ mod tests { assert_eq!(p.parse("a"), Ok("a".to_string())); assert_eq!(p.parse("ab"), Ok("ab".to_string())); assert_eq!(p.parse("johnny5"), Ok("johnny5".to_string())); - assert_eq!( - p.parse("Pizza"), - Err(ParseError::InvalidToken { location: 0 }) - ); - assert_eq!( - p.parse("pizZa"), - Err(ParseError::InvalidToken { location: 3 }) - ); - assert_eq!( - p.parse("0trust"), - Err(ParseError::InvalidToken { location: 0 }) - ); + assert!(p + .parse("Pizza") + .is_err(),); + assert!(p + .parse("pizZa") + .is_err()); + assert!(p + .parse("0trust") + .is_err()); assert_eq!(p.parse("make_dinner"), Ok("make_dinner".to_string())); - assert_eq!( - p.parse("MakeDinner"), - Err(ParseError::InvalidToken { location: 0 }) - ); - assert_eq!( - p.parse("make-dinner"), - Err(ParseError::InvalidToken { location: 4 }) - ); + assert!(p + .parse("MakeDinner") + .is_err()); + assert!(p + .parse("make-dinner") + .is_err()); } #[test] @@ -140,13 +135,9 @@ mod tests { // not convinced there is great value to having an error of this degree // of detail hard-coded in the test case; change to .is_err() if it // ever becomes a problem. - assert_eq!( - p.parse("%techniquev1"), - Err(ParseError::UnrecognizedToken { - token: (1, Token(0, "techniquev1"), 12), - expected: vec!["\"technique\"".to_string()] - }) - ); + assert!(p + .parse("%techniquev1") + .is_err()); } } /* diff --git a/src/technique.lalrpop b/src/technique.lalrpop index b3add4d..6ced15b 100644 --- a/src/technique.lalrpop +++ b/src/technique.lalrpop @@ -2,8 +2,15 @@ use std::str::FromStr; grammar; +match { + r"[a-z][a-z0-9_]*" => IDENTIFIER, + _ +} else { + r"[A-Za-z0-9][A-Za-z0-9_\-\.]*" => WORD +} + pub identifier: String = { - => <>.to_string() + => <>.to_string() }; pub magic_line: u8 = { From f4b7a24525d1d81cbff76f9419b2a70e84db89da Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 4 Oct 2024 13:22:53 +1000 Subject: [PATCH 031/112] Parse SPDX header line --- src/parsing/parser.rs | 55 ++++++++++++++++++++----------------------- src/technique.lalrpop | 17 ++++++++++--- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 6ddae56..cfd5168 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -99,7 +99,6 @@ fn parse_template() -> impl Parser> { #[cfg(test)] mod tests { use super::*; - use lalrpop_util::{lexer::Token, ParseError}; #[test] fn check_identifier_rules() { @@ -139,60 +138,56 @@ mod tests { .parse("%techniquev1") .is_err()); } -} -/* + #[test] fn check_header_spdx() { - assert_eq!(parse_license().parse("MIT"), Ok("MIT".to_string())); - assert_eq!( - parse_license().parse("Public Domain"), - Ok("Public Domain".to_string()) - ); + let l = technique::licenseParser::new(); + let c = technique::copyrightParser::new(); + let p = technique::spdx_lineParser::new(); + + assert_eq!(l.parse("MIT"), Ok("MIT".to_string())); + assert_eq!(l.parse("Public Domain"), Ok("Public Domain".to_string())); assert_eq!( - parse_license().parse("CC BY-SA 3.0 IGO"), + l.parse("CC BY-SA 3.0 IGO"), Ok("CC BY-SA 3.0 IGO".to_string()) ); - assert_eq!(parse_copyright().parse("ACME"), Ok("ACME".to_string())); - assert_eq!( - parse_copyright().parse("ACME, Inc."), - Ok("ACME, Inc.".to_string()) - ); + assert_eq!(c.parse("ACME"), Ok("ACME".to_string())); + assert_eq!(c.parse("ACME, Inc."), Ok("ACME, Inc.".to_string())); assert_eq!( - parse_copyright().parse("2024 ACME, Inc."), + c.parse("2024 ACME, Inc."), Ok("2024 ACME, Inc.".to_string()) ); + assert_eq!(p.parse("! PD"), Ok(("PD".to_string(), "".to_string()))); assert_eq!( - parse_spdx_line().parse("! PD"), - Ok((Some("PD".to_string()), None)) - ); - assert_eq!( - parse_spdx_line().parse("! MIT; (c) ACME, Inc.".to_string()), - Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + p.parse("! MIT; (c) ACME, Inc."), + Ok(("MIT".to_string(), "ACME, Inc.".to_string())) ); assert_eq!( - parse_spdx_line().parse("! MIT; (C) ACME, Inc.".to_string()), - Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + p.parse("! MIT; (C) ACME, Inc."), + Ok(("MIT".to_string(), "ACME, Inc.".to_string())) ); assert_eq!( - parse_spdx_line().parse("! MIT; © ACME, Inc.".to_string()), - Ok((Some("MIT".to_string()), Some("ACME, Inc.".to_string()))) + p.parse("! MIT; © ACME, Inc."), + Ok(("MIT".to_string(), "ACME, Inc.".to_string())) ); assert_eq!( - parse_spdx_line().parse("! MIT; (c) 2024 ACME, Inc."), - Ok((Some("MIT".to_string()), Some("2024 ACME, Inc.".to_string()))) + p.parse("! MIT; (c) 2024 ACME, Inc."), + Ok(("MIT".to_string(), "2024 ACME, Inc.".to_string())) ); assert_eq!( - parse_spdx_line().parse("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."), + p.parse("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."), Ok(( - Some("CC BY-SA 3.0 [IGO]".to_string()), - Some("2024 ACME, Inc.".to_string()) + "CC BY-SA 3.0 [IGO]".to_string(), + "2024 ACME, Inc.".to_string() )) ); } +} +/* #[test] fn check_header_template() { assert_eq!( diff --git a/src/technique.lalrpop b/src/technique.lalrpop index 6ced15b..648eeb6 100644 --- a/src/technique.lalrpop +++ b/src/technique.lalrpop @@ -1,12 +1,10 @@ -use std::str::FromStr; - grammar; match { r"[a-z][a-z0-9_]*" => IDENTIFIER, _ } else { - r"[A-Za-z0-9][A-Za-z0-9_\-\.]*" => WORD + r"[A-Za-z0-9_\-\.,\[\]]*" => WORD } pub identifier: String = { @@ -16,3 +14,16 @@ pub identifier: String = { pub magic_line: u8 = { "%" "technique" "v1" => 1 }; + +pub spdx_line: (String,String) = { + "!" ";" r"\(c\)|\(C\)|©" => (one.to_string(),two.to_string()), + "!" => (one.to_string(),"".to_string()) +} + +pub license: String = { + => <>.join(" ").to_string() +}; + +pub copyright: String = { + => <>.join(" ").to_string() +} From 2c48be28b843ff7b637d92c1e335ac0b4acc15fb Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 4 Oct 2024 13:24:04 +1000 Subject: [PATCH 032/112] Remove previous parser code --- src/parsing/parser.rs | 95 +------------------------------------------ 1 file changed, 1 insertion(+), 94 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index cfd5168..a7cecb7 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -6,96 +6,6 @@ pub fn parse_via_lalrpop(_content: &str) { std::process::exit(0); } -/* -// takes a single lower case character then any lower case character, digit, -// or unerscore. Based on the parser code in chumsky::text::ident(). - -fn parse_identifier() -> impl Parser> { - filter(|c: &char| c.is_ascii_lowercase()) - .map(Some) - .chain::, _>( - filter(|c: &char| c.is_ascii_lowercase() || c.is_ascii_digit() || *c == '_').repeated(), - ) - .collect() - // .validate(|s : String, span : Range, emit| if s.len() != span.end() - span.start() { emit(Simple::custom(span, "Wrong length")) }) -} - -fn parse_magic_line() -> impl Parser> { - just('%') - .ignore_then(just("technique").padded()) - .ignore_then(just("v1").to(1u8)) -} - -fn parse_spdx_line() -> impl Parser, Option), Error = Simple> { - just('!') - .ignore_then( - parse_license() - .padded() - .or_not(), - ) - .then( - just(';') - .ignore_then( - just("(c)") - .or(just("(C)")) - .or(just("©")) - .padded(), - ) - .ignore_then(parse_copyright().padded()) - .or_not(), - ) -} - -fn parse_license() -> impl Parser> { - filter(|c: &char| { - *c != ';' - && (c.is_ascii_uppercase() - || c.is_ascii_lowercase() - || c.is_ascii_digit() - || c.is_ascii_punctuation() - || *c == ' ') - }) - .repeated() - .at_least(1) - .collect() -} - -fn parse_copyright() -> impl Parser> { - filter(|c: &char| { - c.is_ascii_uppercase() - || c.is_ascii_lowercase() - || c.is_ascii_digit() - || c.is_ascii_punctuation() - || *c == ' ' - }) - .repeated() - .at_least(1) - .collect() -} - -fn parse_template_line() -> impl Parser, Error = Simple> { - just('&').ignore_then( - parse_template() - .padded() - .or_not(), - ) -} - -fn parse_template() -> impl Parser> { - filter(|c: &char| { - c.is_ascii_uppercase() - || c.is_ascii_lowercase() - || c.is_ascii_digit() - || *c == '.' - || *c == ',' - || *c == '-' - }) - .repeated() - .at_least(1) - .collect() -} -*/ - #[cfg(test)] mod tests { use super::*; @@ -130,10 +40,7 @@ mod tests { let p = technique::magic_lineParser::new(); assert_eq!(p.parse("% technique v1"), Ok(1)); assert_eq!(p.parse("%technique v1"), Ok(1)); - // this is rejected because the technique keyword isn't present. I'm - // not convinced there is great value to having an error of this degree - // of detail hard-coded in the test case; change to .is_err() if it - // ever becomes a problem. + // this is rejected because the technique keyword isn't present. assert!(p .parse("%techniquev1") .is_err()); From bbd7c9ea8ec84aeefb3711d5efa1b7dabf3789fa Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 4 Oct 2024 14:22:29 +1000 Subject: [PATCH 033/112] Validate identifiers in parser helper rather than tokenizer --- src/parsing/parser.rs | 30 ++++++++++++++++++++++++++++++ src/technique.lalrpop | 20 ++++++++++++++++---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index a7cecb7..5a087fa 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -6,6 +6,35 @@ pub fn parse_via_lalrpop(_content: &str) { std::process::exit(0); } +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ValidationError { + InvalidIdentifier, +} + +// only accept [a-z][a-zA-Z0-9_]* as an identifier +fn validate_identifier(input: &str) -> Result { + if input.len() == 0 { + return Err(ValidationError::InvalidIdentifier); + } + + if !input + .chars() + .next() + .unwrap() + .is_ascii_lowercase() + { + return Err(ValidationError::InvalidIdentifier); + } + + for c in input.chars() { + if !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') { + return Err(ValidationError::InvalidIdentifier); + } + } + + Ok(input.to_string()) +} + #[cfg(test)] mod tests { use super::*; @@ -60,6 +89,7 @@ mod tests { ); assert_eq!(c.parse("ACME"), Ok("ACME".to_string())); + assert_eq!(c.parse("lower"), Ok("lower".to_string())); assert_eq!(c.parse("ACME, Inc."), Ok("ACME, Inc.".to_string())); assert_eq!( diff --git a/src/technique.lalrpop b/src/technique.lalrpop index 648eeb6..ff2696f 100644 --- a/src/technique.lalrpop +++ b/src/technique.lalrpop @@ -1,14 +1,22 @@ +use super::{ValidationError, validate_identifier}; +use lalrpop_util::ParseError; + grammar; +extern { + type Error = ValidationError; +} + match { - r"[a-z][a-z0-9_]*" => IDENTIFIER, + r"[A-Za-z0-9_\-\.,\[\]]*" => WORD, _ -} else { - r"[A-Za-z0-9_\-\.,\[\]]*" => WORD + } pub identifier: String = { - => <>.to_string() + =>? validate_identifier(<>).map_err(|e| ParseError::User { + error: e + }) }; pub magic_line: u8 = { @@ -20,6 +28,10 @@ pub spdx_line: (String,String) = { "!" => (one.to_string(),"".to_string()) } +pub template_line: String = { + "&" => <>.to_string() +} + pub license: String = { => <>.join(" ").to_string() }; From 35cf366463a0aa18dd3e837a2dc58da2dade8089 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 4 Oct 2024 22:01:29 +1000 Subject: [PATCH 034/112] Create an adapter trait to simplify error handling --- src/parsing/parser.rs | 15 ++++++++++++++- src/technique.lalrpop | 7 ++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index 5a087fa..faeccbe 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,4 +1,5 @@ use lalrpop_util::lalrpop_mod; +use lalrpop_util::ParseError; lalrpop_mod!(pub technique); @@ -11,7 +12,19 @@ pub enum ValidationError { InvalidIdentifier, } -// only accept [a-z][a-zA-Z0-9_]* as an identifier +/// An adapter trait to wrap our custom errors into a `ParseError`. +pub trait WrapError { + fn wrap(self) -> Result>; +} + +impl WrapError for Result { + fn wrap(self) -> Result> { + self.map_err(|e| ParseError::User { error: e }) + } +} + +/// Validates if the input string is a valid identifier corresponding +/// to `[a-z][a-zA-Z0-9_]*` as an identifier. fn validate_identifier(input: &str) -> Result { if input.len() == 0 { return Err(ValidationError::InvalidIdentifier); diff --git a/src/technique.lalrpop b/src/technique.lalrpop index ff2696f..a7819ef 100644 --- a/src/technique.lalrpop +++ b/src/technique.lalrpop @@ -1,5 +1,4 @@ -use super::{ValidationError, validate_identifier}; -use lalrpop_util::ParseError; +use super::{ValidationError, validate_identifier, WrapError}; grammar; @@ -14,9 +13,7 @@ match { } pub identifier: String = { - =>? validate_identifier(<>).map_err(|e| ParseError::User { - error: e - }) + =>? validate_identifier(<>).wrap() }; pub magic_line: u8 = { From c854cd0c5efd897299c87fad8d883321ac7d1963 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 4 Oct 2024 22:20:03 +1000 Subject: [PATCH 035/112] Different error for validation failure due to empty input --- src/parsing/parser.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index faeccbe..d81f729 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -9,6 +9,7 @@ pub fn parse_via_lalrpop(_content: &str) { #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum ValidationError { + ZeroLengthToken, InvalidIdentifier, } @@ -27,11 +28,12 @@ impl WrapError for Result { /// to `[a-z][a-zA-Z0-9_]*` as an identifier. fn validate_identifier(input: &str) -> Result { if input.len() == 0 { - return Err(ValidationError::InvalidIdentifier); + return Err(ValidationError::ZeroLengthToken); } - if !input - .chars() + let mut cs = input.chars(); + + if !cs .next() .unwrap() .is_ascii_lowercase() @@ -39,7 +41,7 @@ fn validate_identifier(input: &str) -> Result { return Err(ValidationError::InvalidIdentifier); } - for c in input.chars() { + for c in cs { if !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') { return Err(ValidationError::InvalidIdentifier); } From 070ca07c1e0bf7d2ef8dc9b826bb8d5c6d154ad9 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sun, 6 Oct 2024 15:30:56 +1100 Subject: [PATCH 036/112] Begin moving Technique language definition to library --- src/{technique.lalrpop => grammar.lalrpop} | 1 + src/lib.rs | 1 + src/parsing/parser.rs | 13 +++++++------ 3 files changed, 9 insertions(+), 6 deletions(-) rename src/{technique.lalrpop => grammar.lalrpop} (96%) create mode 100644 src/lib.rs diff --git a/src/technique.lalrpop b/src/grammar.lalrpop similarity index 96% rename from src/technique.lalrpop rename to src/grammar.lalrpop index a7819ef..c2c8fda 100644 --- a/src/technique.lalrpop +++ b/src/grammar.lalrpop @@ -1,4 +1,5 @@ use super::{ValidationError, validate_identifier, WrapError}; +use technique::language::*; grammar; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..39dba47 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod language; diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs index d81f729..61727c1 100644 --- a/src/parsing/parser.rs +++ b/src/parsing/parser.rs @@ -1,7 +1,8 @@ use lalrpop_util::lalrpop_mod; use lalrpop_util::ParseError; +use technique::language::*; -lalrpop_mod!(pub technique); +lalrpop_mod!(pub grammar); pub fn parse_via_lalrpop(_content: &str) { std::process::exit(0); @@ -56,7 +57,7 @@ mod tests { #[test] fn check_identifier_rules() { - let p = technique::identifierParser::new(); + let p = grammar::identifierParser::new(); assert_eq!(p.parse("a"), Ok("a".to_string())); assert_eq!(p.parse("ab"), Ok("ab".to_string())); @@ -81,7 +82,7 @@ mod tests { #[test] fn check_magic_line() { - let p = technique::magic_lineParser::new(); + let p = grammar::magic_lineParser::new(); assert_eq!(p.parse("% technique v1"), Ok(1)); assert_eq!(p.parse("%technique v1"), Ok(1)); // this is rejected because the technique keyword isn't present. @@ -92,9 +93,9 @@ mod tests { #[test] fn check_header_spdx() { - let l = technique::licenseParser::new(); - let c = technique::copyrightParser::new(); - let p = technique::spdx_lineParser::new(); + let l = grammar::licenseParser::new(); + let c = grammar::copyrightParser::new(); + let p = grammar::spdx_lineParser::new(); assert_eq!(l.parse("MIT"), Ok("MIT".to_string())); assert_eq!(l.parse("Public Domain"), Ok("Public Domain".to_string())); From c4de6618ca4fc6968c374d75ce2c5caa0f3e7619 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 10 Oct 2024 22:01:00 +1100 Subject: [PATCH 037/112] Remove obscelete files --- technique.pest | 52 -------------------------------------------------- 1 file changed, 52 deletions(-) delete mode 100644 technique.pest diff --git a/technique.pest b/technique.pest deleted file mode 100644 index 5063adf..0000000 --- a/technique.pest +++ /dev/null @@ -1,52 +0,0 @@ -// Parsing Expression Grammar for v1 of the Technique Procedure Language - -WHITESPACE = _{ " " | "\t" } - -technique = { - SOI ~ - magic_line ~ - NEWLINE ~ - spdx_line? ~ - NEWLINE ~ - template_line? ~ - NEWLINE+ ~ - declaration ~ - NEWLINE+ ~ - EOI -} - -// File Format Header - -magic_line = { "%" ~ "technique" ~ "v1" } - -// License and Copyright Header - -spdx_line = { "!" ~ license ~ (";" ~ copyright)? } - -license = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "." )* } - -copyright = { ("©" | "(c)" | "(C)") ~ year? ~ owner } - -year = @{ ASCII_DIGIT{4} ~ "-" ~ (ASCII_DIGIT{4})? | ASCII_DIGIT{4} } - -owner = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | " " | "," | "." )* } - -// Template Header - -template_line = { "&" ~ template } - -template = @{ (ASCII_ALPHA | ASCII_DIGIT) ~ (ASCII_ALPHA | ASCII_DIGIT | "-" | "_" | "." | "," )* } - -// Procedure Declaration - -declaration = { identifier ~ ":" ~ signature? } - -identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHA | ASCII_DIGIT | "_")* } - -signature = { forma ~ ("," ~ forma )* ~ "->" ~ forma } - -forma = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHA | ASCII_DIGIT)* } - - - - From 58b21e699824c1b622f0328fd88dbcb0e2312656 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Thu, 10 Oct 2024 22:12:42 +1100 Subject: [PATCH 038/112] Parse template header line --- src/grammar.lalrpop | 6 +++++- src/parsing/parser.rs | 29 +++++++++++------------------ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/grammar.lalrpop b/src/grammar.lalrpop index c2c8fda..269546e 100644 --- a/src/grammar.lalrpop +++ b/src/grammar.lalrpop @@ -27,7 +27,11 @@ pub spdx_line: (String,String) = { } pub template_line: String = { - "&" => <>.to_string() + "&"