diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..65330c4 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,46 @@ +root = true + +[*] +charset = utf-8 + +[*.{json,toml,yml,gyp}] +indent_style = space +indent_size = 2 + +[*.js] +indent_style = space +indent_size = 2 + +[*.scm] +indent_style = space +indent_size = 2 + +[*.{c,cc,h}] +indent_style = space +indent_size = 4 + +[*.rs] +indent_style = space +indent_size = 4 + +[*.{py,pyi}] +indent_style = space +indent_size = 4 + +[*.swift] +indent_style = space +indent_size = 4 + +[*.go] +indent_style = tab +indent_size = 8 + +[Makefile] +indent_style = tab +indent_size = 8 + +[parser.c] +indent_size = 2 + +[{alloc,array,parser}.h] +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..05b91fa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +* text=auto eol=lf + +# Generated source files +src/*.json linguist-generated +src/parser.c linguist-generated +src/tree_sitter/* linguist-generated + +# Rust bindings +bindings/rust/* linguist-generated +Cargo.toml linguist-generated +Cargo.lock linguist-generated diff --git a/.gitignore b/.gitignore index 2e0ed6d..16f95e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ /.vscode /target + +# rendered code fragments +/*.pdf + +# generated Tree Sitter parser artifacts +/src/grammar.json +/src/node-types.json diff --git a/Cargo.lock b/Cargo.lock index d3f609d..6661e73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,15 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] [[package]] name = "anstream" @@ -57,6 +66,15 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "cc" +version = "1.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +dependencies = [ + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -97,6 +115,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.9" @@ -107,6 +131,22 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -195,6 +235,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustix" version = "0.38.34" @@ -236,10 +305,11 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", @@ -255,12 +325,24 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -282,11 +364,14 @@ dependencies = [ name = "technique" version = "0.3.0" dependencies = [ + "cc", "clap", + "regex", "serde", "tinytemplate", "tracing", "tracing-subscriber", + "tree-sitter", ] [[package]] @@ -376,6 +461,26 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.25.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7cf18d43cbf0bfca51f657132cc616a5097edc4424d538bae6fa60142eaf9f0" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/Cargo.toml b/Cargo.toml index 75da6fc..cdbe5ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,14 +2,19 @@ name = "technique" version = "0.3.0" edition = "2021" -description = "A domain specific lanaguage for procedures." +description = "A domain specific language for procedures." authors = [ "Andrew Cowie" ] repository = "https://github.com/technique-lang/technique" license = "MIT" [dependencies] clap = { version = "4.5.16", features = [ "wrap_help" ] } +regex = "1.11.1" serde = { version = "1.0.209", features = [ "derive" ] } tinytemplate = "1.2.1" tracing = "0.1.40" tracing-subscriber = "0.3.18" +tree-sitter = "0.25" + +[build-dependencies] +cc = "1.0" \ No newline at end of file diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..6ae2a66 --- /dev/null +++ b/build.rs @@ -0,0 +1,14 @@ +fn main() { + let mut config = cc::Build::new(); + + // Compile the generated C parser + config.file("src/parser.c"); + + // Also include the directory with the C headers + config.include("src"); + + config.compile("tree-sitter-technique"); + + // Rerun the build script if the C source changes + println!("cargo:rerun-if-changed=src/parser.c"); +} diff --git a/grammar.js b/grammar.js new file mode 100644 index 0000000..7142a84 --- /dev/null +++ b/grammar.js @@ -0,0 +1,80 @@ +module.exports = grammar({ + name: "technique", + + // Only spaces, tabs, and carriage returns are extras; newlines are significant. + extras: ($) => [/[ \t\r]/], + + rules: { + // A source file: optional header_block (if present, must be first), then declarations. + source_file: ($) => + seq( + optional($.header_block), + repeat($.newline), + repeat(seq($.procedure_declaration, $.newline, repeat($.newline))), + optional($.procedure_declaration), + ), + + // Header block: magic line (required), then optional SPDX and template lines. + header_block: ($) => + seq($.magic_line, optional($.spdx_line), optional($.template_line)), + + magic_line: ($) => + seq( + "%", + "technique", + /[ \t]+/, + "v", + field("version", $.version), + $.newline, + ), + version: ($) => /[0-9]+/, + + spdx_line: ($) => + seq( + "!", + field("license", $.license), + optional( + seq( + ";", + choice("(c)", "(C)", "©"), + field("copyright", $.copyright), + ), + ), + $.newline, + ), + license: ($) => /[^;\n]+/, + copyright: ($) => /[^\n]+/, + template_line: ($) => seq("&", field("template", $.template), $.newline), + template: ($) => /[^\n]+/, + + procedure_declaration: ($) => + seq( + field("name", $.identifier), + ":", + optional( + seq( + field("parameters", $.genus), + "->", + field("return_type", $.genus), + ), + ), + ), + + // Genus: Unit, Simple, List, or Tuple + genus: ($) => + choice($.unit_genus, $.simple_genus, $.list_genus, $.tuple_genus), + unit_genus: ($) => seq("(", ")"), + simple_genus: ($) => $.forma, + list_genus: ($) => seq("[", $.forma, "]"), + tuple_genus: ($) => seq("(", $.forma, repeat(seq(",", $.forma)), ")"), + + // Forma: must start with uppercase letter, then letters or digits + forma: ($) => /[A-Z][a-zA-Z0-9]*/, + + // Identifiers for procedure names + identifier: ($) => /[a-zA-Z_][a-zA-Z0-9_]*/, + + // Newline as a literal + newline: ($) => "\n", + }, +}); diff --git a/src/language/mod.rs b/src/language/mod.rs new file mode 100644 index 0000000..5f445f7 --- /dev/null +++ b/src/language/mod.rs @@ -0,0 +1,6 @@ +// Types representing the Technique procedures language + +mod types; + +// Re-export all public symbols +pub use types::*; diff --git a/src/language/types.rs b/src/language/types.rs new file mode 100644 index 0000000..528ec60 --- /dev/null +++ b/src/language/types.rs @@ -0,0 +1,350 @@ +// Abstract Syntax Trees for the Technique language + +use regex::Regex; + +#[derive(Eq, Debug, PartialEq)] +pub struct Technique<'i> { + pub header: Option>, + pub body: Option>>, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Metadata<'i> { + pub version: u8, + pub license: Option<&'i str>, + pub copyright: Option<&'i str>, + pub template: Option<&'i str>, +} + +impl Default for Metadata<'_> { + fn default() -> Self { + Metadata { + version: 1, + license: None, + copyright: None, + template: None, + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ValidationError { + ZeroLengthToken, + InvalidLicense, + InvalidCopyright, + InvalidTemplate, + InvalidIdentifier, + InvalidForma, + InvalidGenus, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Procedure<'i> { + pub name: Identifier<'i>, + pub signature: Option>, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Identifier<'i>(pub &'i str); + +#[derive(Eq, Debug, PartialEq)] +pub struct Forma<'i>(pub &'i str); + +#[derive(Eq, Debug, PartialEq)] +pub enum Genus<'i> { + Unit, + Single(Forma<'i>), + Tuple(Vec>), + List(Forma<'i>), +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Signature<'i> { + pub domain: Genus<'i>, + pub range: Genus<'i>, +} + +#[derive(Eq, Debug, PartialEq)] +pub struct Attribute<'i>(pub &'i str); + +pub fn validate_identifier(input: &str) -> Result { + if input.len() == 0 { + return Err(ValidationError::ZeroLengthToken); + } + + let re = Regex::new(r"^[a-z][a-z0-9_]*$").unwrap(); + if re.is_match(input) { + Ok(Identifier(input)) + } else { + Err(ValidationError::InvalidIdentifier) + } +} + +pub fn validate_forma(input: &str) -> Result { + if input.len() == 0 { + return Err(ValidationError::ZeroLengthToken); + } + + let mut cs = input.chars(); + + if !cs + .next() + .unwrap() + .is_ascii_uppercase() + { + return Err(ValidationError::InvalidForma); + } + + for c in cs { + if !(c.is_ascii_uppercase() || c.is_ascii_lowercase() || c.is_ascii_digit()) { + return Err(ValidationError::InvalidForma); + } + } + + Ok(Forma(input)) +} + +/// This one copes with (and discards) any internal whitespace encountered. +pub fn validate_genus(input: &str) -> Result { + let first = input + .chars() + .next() + .unwrap(); + + match first { + '[' => { + // consume up to closing bracket + let re = Regex::new(r"\[\s*(.+)\s*\]").unwrap(); + + let cap = match re.captures(input) { + Some(c) => c, + None => return Err(ValidationError::ZeroLengthToken), + }; + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ValidationError::InvalidGenus)?; + + let forma = validate_forma(one)?; + + Ok(Genus::List(forma)) + } + '(' => { + // first trim off the parenthesis and whitespace + let re = Regex::new(r"\(\s*(.*)\s*\)").unwrap(); + + let cap = match re.captures(input) { + Some(c) => c, + None => return Err(ValidationError::ZeroLengthToken), + }; + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ValidationError::InvalidGenus)?; + + if one.len() == 0 { + return Ok(Genus::Unit); + } + + // now split on , characters, and gather + + let mut formas: Vec = Vec::new(); + + for text in one.split(",") { + let text = text.trim(); + let forma = validate_forma(text)?; + formas.push(forma); + } + + Ok(Genus::Tuple(formas)) + } + _ => { + let re = Regex::new(r"(.+)\s*").unwrap(); + + let cap = match re.captures(input) { + Some(c) => c, + None => return Err(ValidationError::ZeroLengthToken), + }; + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ValidationError::InvalidGenus)?; + + let forma = validate_forma(one)?; + + Ok(Genus::Single(forma)) + } + } +} + +// the validate functions all need to have start and end anchors, which seems +// like it should be abstracted away. + +pub fn validate_license(input: &str) -> Result<&str, ValidationError> { + let re = Regex::new(r"^[A-Za-z0-9.,\-_ \(\)\[\]]+$").unwrap(); + + if re.is_match(input) { + Ok(input) + } else { + Err(ValidationError::InvalidLicense) + } +} + +pub fn validate_copyright(input: &str) -> Result<&str, ValidationError> { + let re = Regex::new(r"^[A-Za-z0-9.,\-_ \(\)\[\]]+$").unwrap(); + + if re.is_match(input) { + Ok(input) + } else { + Err(ValidationError::InvalidCopyright) + } +} + +pub fn validate_template(input: &str) -> Result<&str, ValidationError> { + let re = Regex::new(r"^[A-Za-z0-9.,\-]+$").unwrap(); + + if re.is_match(input) { + Ok(input) + } else { + Err(ValidationError::InvalidTemplate) + } +} + +#[cfg(test)] +mod check { + use super::*; + + #[test] + fn identifier_rules() { + assert_eq!(validate_identifier("a"), Ok(Identifier("a"))); + assert_eq!(validate_identifier("ab"), Ok(Identifier("ab"))); + assert_eq!(validate_identifier("johnny5"), Ok(Identifier("johnny5"))); + assert_eq!( + validate_identifier("Pizza"), + Err(ValidationError::InvalidIdentifier) + ); + assert_eq!( + validate_identifier("pizZa"), + Err(ValidationError::InvalidIdentifier) + ); + assert!(validate_identifier("0trust").is_err()); + assert_eq!( + validate_identifier("make_dinner"), + Ok(Identifier("make_dinner")) + ); + assert!(validate_identifier("MakeDinner").is_err()); + assert!(validate_identifier("make-dinner").is_err()); + } + + #[test] + fn forma_rules() { + assert_eq!(validate_forma("A"), Ok(Forma("A"))); + assert_eq!(validate_forma("Beans"), Ok(Forma("Beans"))); + assert_eq!(validate_forma("lower"), Err(ValidationError::InvalidForma)); + assert_eq!( + validate_forma("0Degrees"), + Err(ValidationError::InvalidForma) + ); + } + + #[test] + fn genus_rules_single() { + assert_eq!(validate_genus("A"), Ok(Genus::Single(Forma("A")))); + } + + #[test] + fn genus_rules_list() { + assert_eq!(validate_genus("[A]"), Ok(Genus::List(Forma("A")))); + } + + #[test] + fn genus_rules_tuple() { + assert_eq!( + validate_genus("(A, B)"), + Ok(Genus::Tuple(vec![Forma("A"), Forma("B")])) + ); + + assert_eq!( + validate_genus("(Coffee, Tea)"), + Ok(Genus::Tuple(vec![Forma("Coffee"), Forma("Tea")])) + ); + + // not actually sure whether we should be normalizing this? Probably + // not, because formatting and linting is a separate concern. + + assert_eq!(validate_genus("(A)"), Ok(Genus::Tuple(vec![Forma("A")]))); + } + + #[test] + fn genus_rules_unit() { + assert_eq!(validate_genus("()"), Ok(Genus::Unit)); + } + + #[test] + fn license_rules() { + assert_eq!(validate_license("MIT"), Ok("MIT")); + assert_eq!(validate_license("Public Domain"), Ok("Public Domain")); + assert_eq!(validate_license("CC BY-SA 3.0 IGO"), Ok("CC BY-SA 3.0 IGO")); + } + + #[test] + fn copyright_rules() { + assert_eq!(validate_copyright("ACME"), Ok("ACME")); + assert_eq!(validate_copyright("lower"), Ok("lower")); + assert_eq!(validate_copyright("ACME, Inc"), Ok("ACME, Inc")); + assert_eq!(validate_copyright("2024 ACME, Inc."), Ok("2024 ACME, Inc.")); + } + + #[test] + fn template_rules() { + assert_eq!(validate_template("checklist"), Ok("checklist")); + assert_eq!(validate_template("checklist,v1"), Ok("checklist,v1")); + assert_eq!(validate_template("checklist-v1.0"), Ok("checklist-v1.0")); + } + + fn maker<'i>() -> Metadata<'i> { + let t1 = Metadata { + version: 1, + license: None, + copyright: None, + template: None, + }; + + t1 + } + + #[test] + fn ast_construction() { + let t1 = Metadata { + version: 1, + license: None, + copyright: None, + template: None, + }; + + assert_eq!(Metadata::default(), t1); + + let t2 = Metadata { + version: 1, + license: Some("MIT"), + copyright: Some("ACME, Inc"), + template: Some("checklist"), + }; + + let t3 = maker(); + + assert_eq!(t3, t1); + + let t4 = Metadata { + license: Some("MIT"), + copyright: Some("ACME, Inc"), + template: Some("checklist"), + ..t3 + }; + + assert_eq!(t4, t2); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..39dba47 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod language; diff --git a/src/main.rs b/src/main.rs index 8a81de0..4ba31d3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use std::path::Path; use tracing::debug; use tracing_subscriber; +mod parsing; mod rendering; fn main() { @@ -89,11 +90,11 @@ fn main() { let filename = submatches .get_one::("filename") - .unwrap(); // argument are required by definitin so always present + .unwrap(); // argument are required by definition so always present debug!(filename); - todo!(); + parsing::load(&Path::new(filename)); } Some(("format", submatches)) => { if submatches.contains_id("raw-control-chars") { diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..22c6ddc --- /dev/null +++ b/src/parser.c @@ -0,0 +1,1282 @@ +/* Automatically @generated by tree-sitter v0.25.6 */ + +#include "tree_sitter/parser.h" + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 15 +#define STATE_COUNT 59 +#define LARGE_STATE_COUNT 2 +#define SYMBOL_COUNT 40 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 24 +#define EXTERNAL_TOKEN_COUNT 0 +#define FIELD_COUNT 7 +#define MAX_ALIAS_SEQUENCE_LENGTH 6 +#define MAX_RESERVED_WORD_SET_SIZE 0 +#define PRODUCTION_ID_COUNT 7 +#define SUPERTYPE_COUNT 0 + +enum ts_symbol_identifiers { + anon_sym_PERCENT = 1, + anon_sym_technique = 2, + aux_sym_magic_line_token1 = 3, + anon_sym_v = 4, + sym_version = 5, + anon_sym_BANG = 6, + anon_sym_SEMI = 7, + anon_sym_LPARENc_RPAREN = 8, + anon_sym_LPARENC_RPAREN = 9, + anon_sym_u00a9 = 10, + sym_license = 11, + aux_sym_copyright_token1 = 12, + anon_sym_AMP = 13, + anon_sym_COLON = 14, + anon_sym_DASH_GT = 15, + anon_sym_LPAREN = 16, + anon_sym_RPAREN = 17, + anon_sym_LBRACK = 18, + anon_sym_RBRACK = 19, + anon_sym_COMMA = 20, + sym_forma = 21, + sym_identifier = 22, + sym_newline = 23, + sym_source_file = 24, + sym_header_block = 25, + sym_magic_line = 26, + sym_spdx_line = 27, + sym_copyright = 28, + sym_template_line = 29, + sym_template = 30, + sym_procedure_declaration = 31, + sym_genus = 32, + sym_unit_genus = 33, + sym_simple_genus = 34, + sym_list_genus = 35, + sym_tuple_genus = 36, + aux_sym_source_file_repeat1 = 37, + aux_sym_source_file_repeat2 = 38, + aux_sym_tuple_genus_repeat1 = 39, +}; + +static const char * const ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_PERCENT] = "%", + [anon_sym_technique] = "technique", + [aux_sym_magic_line_token1] = "magic_line_token1", + [anon_sym_v] = "v", + [sym_version] = "version", + [anon_sym_BANG] = "!", + [anon_sym_SEMI] = ";", + [anon_sym_LPARENc_RPAREN] = "(c)", + [anon_sym_LPARENC_RPAREN] = "(C)", + [anon_sym_u00a9] = "\u00a9", + [sym_license] = "license", + [aux_sym_copyright_token1] = "copyright_token1", + [anon_sym_AMP] = "&", + [anon_sym_COLON] = ":", + [anon_sym_DASH_GT] = "->", + [anon_sym_LPAREN] = "(", + [anon_sym_RPAREN] = ")", + [anon_sym_LBRACK] = "[", + [anon_sym_RBRACK] = "]", + [anon_sym_COMMA] = ",", + [sym_forma] = "forma", + [sym_identifier] = "identifier", + [sym_newline] = "newline", + [sym_source_file] = "source_file", + [sym_header_block] = "header_block", + [sym_magic_line] = "magic_line", + [sym_spdx_line] = "spdx_line", + [sym_copyright] = "copyright", + [sym_template_line] = "template_line", + [sym_template] = "template", + [sym_procedure_declaration] = "procedure_declaration", + [sym_genus] = "genus", + [sym_unit_genus] = "unit_genus", + [sym_simple_genus] = "simple_genus", + [sym_list_genus] = "list_genus", + [sym_tuple_genus] = "tuple_genus", + [aux_sym_source_file_repeat1] = "source_file_repeat1", + [aux_sym_source_file_repeat2] = "source_file_repeat2", + [aux_sym_tuple_genus_repeat1] = "tuple_genus_repeat1", +}; + +static const TSSymbol ts_symbol_map[] = { + [ts_builtin_sym_end] = ts_builtin_sym_end, + [anon_sym_PERCENT] = anon_sym_PERCENT, + [anon_sym_technique] = anon_sym_technique, + [aux_sym_magic_line_token1] = aux_sym_magic_line_token1, + [anon_sym_v] = anon_sym_v, + [sym_version] = sym_version, + [anon_sym_BANG] = anon_sym_BANG, + [anon_sym_SEMI] = anon_sym_SEMI, + [anon_sym_LPARENc_RPAREN] = anon_sym_LPARENc_RPAREN, + [anon_sym_LPARENC_RPAREN] = anon_sym_LPARENC_RPAREN, + [anon_sym_u00a9] = anon_sym_u00a9, + [sym_license] = sym_license, + [aux_sym_copyright_token1] = aux_sym_copyright_token1, + [anon_sym_AMP] = anon_sym_AMP, + [anon_sym_COLON] = anon_sym_COLON, + [anon_sym_DASH_GT] = anon_sym_DASH_GT, + [anon_sym_LPAREN] = anon_sym_LPAREN, + [anon_sym_RPAREN] = anon_sym_RPAREN, + [anon_sym_LBRACK] = anon_sym_LBRACK, + [anon_sym_RBRACK] = anon_sym_RBRACK, + [anon_sym_COMMA] = anon_sym_COMMA, + [sym_forma] = sym_forma, + [sym_identifier] = sym_identifier, + [sym_newline] = sym_newline, + [sym_source_file] = sym_source_file, + [sym_header_block] = sym_header_block, + [sym_magic_line] = sym_magic_line, + [sym_spdx_line] = sym_spdx_line, + [sym_copyright] = sym_copyright, + [sym_template_line] = sym_template_line, + [sym_template] = sym_template, + [sym_procedure_declaration] = sym_procedure_declaration, + [sym_genus] = sym_genus, + [sym_unit_genus] = sym_unit_genus, + [sym_simple_genus] = sym_simple_genus, + [sym_list_genus] = sym_list_genus, + [sym_tuple_genus] = sym_tuple_genus, + [aux_sym_source_file_repeat1] = aux_sym_source_file_repeat1, + [aux_sym_source_file_repeat2] = aux_sym_source_file_repeat2, + [aux_sym_tuple_genus_repeat1] = aux_sym_tuple_genus_repeat1, +}; + +static const TSSymbolMetadata ts_symbol_metadata[] = { + [ts_builtin_sym_end] = { + .visible = false, + .named = true, + }, + [anon_sym_PERCENT] = { + .visible = true, + .named = false, + }, + [anon_sym_technique] = { + .visible = true, + .named = false, + }, + [aux_sym_magic_line_token1] = { + .visible = false, + .named = false, + }, + [anon_sym_v] = { + .visible = true, + .named = false, + }, + [sym_version] = { + .visible = true, + .named = true, + }, + [anon_sym_BANG] = { + .visible = true, + .named = false, + }, + [anon_sym_SEMI] = { + .visible = true, + .named = false, + }, + [anon_sym_LPARENc_RPAREN] = { + .visible = true, + .named = false, + }, + [anon_sym_LPARENC_RPAREN] = { + .visible = true, + .named = false, + }, + [anon_sym_u00a9] = { + .visible = true, + .named = false, + }, + [sym_license] = { + .visible = true, + .named = true, + }, + [aux_sym_copyright_token1] = { + .visible = false, + .named = false, + }, + [anon_sym_AMP] = { + .visible = true, + .named = false, + }, + [anon_sym_COLON] = { + .visible = true, + .named = false, + }, + [anon_sym_DASH_GT] = { + .visible = true, + .named = false, + }, + [anon_sym_LPAREN] = { + .visible = true, + .named = false, + }, + [anon_sym_RPAREN] = { + .visible = true, + .named = false, + }, + [anon_sym_LBRACK] = { + .visible = true, + .named = false, + }, + [anon_sym_RBRACK] = { + .visible = true, + .named = false, + }, + [anon_sym_COMMA] = { + .visible = true, + .named = false, + }, + [sym_forma] = { + .visible = true, + .named = true, + }, + [sym_identifier] = { + .visible = true, + .named = true, + }, + [sym_newline] = { + .visible = true, + .named = true, + }, + [sym_source_file] = { + .visible = true, + .named = true, + }, + [sym_header_block] = { + .visible = true, + .named = true, + }, + [sym_magic_line] = { + .visible = true, + .named = true, + }, + [sym_spdx_line] = { + .visible = true, + .named = true, + }, + [sym_copyright] = { + .visible = true, + .named = true, + }, + [sym_template_line] = { + .visible = true, + .named = true, + }, + [sym_template] = { + .visible = true, + .named = true, + }, + [sym_procedure_declaration] = { + .visible = true, + .named = true, + }, + [sym_genus] = { + .visible = true, + .named = true, + }, + [sym_unit_genus] = { + .visible = true, + .named = true, + }, + [sym_simple_genus] = { + .visible = true, + .named = true, + }, + [sym_list_genus] = { + .visible = true, + .named = true, + }, + [sym_tuple_genus] = { + .visible = true, + .named = true, + }, + [aux_sym_source_file_repeat1] = { + .visible = false, + .named = false, + }, + [aux_sym_source_file_repeat2] = { + .visible = false, + .named = false, + }, + [aux_sym_tuple_genus_repeat1] = { + .visible = false, + .named = false, + }, +}; + +enum ts_field_identifiers { + field_copyright = 1, + field_license = 2, + field_name = 3, + field_parameters = 4, + field_return_type = 5, + field_template = 6, + field_version = 7, +}; + +static const char * const ts_field_names[] = { + [0] = NULL, + [field_copyright] = "copyright", + [field_license] = "license", + [field_name] = "name", + [field_parameters] = "parameters", + [field_return_type] = "return_type", + [field_template] = "template", + [field_version] = "version", +}; + +static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = { + [1] = {.index = 0, .length = 1}, + [2] = {.index = 1, .length = 1}, + [3] = {.index = 2, .length = 1}, + [4] = {.index = 3, .length = 3}, + [5] = {.index = 6, .length = 1}, + [6] = {.index = 7, .length = 2}, +}; + +static const TSFieldMapEntry ts_field_map_entries[] = { + [0] = + {field_name, 0}, + [1] = + {field_license, 1}, + [2] = + {field_template, 1}, + [3] = + {field_name, 0}, + {field_parameters, 2}, + {field_return_type, 4}, + [6] = + {field_version, 4}, + [7] = + {field_copyright, 4}, + {field_license, 1}, +}; + +static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = { + [0] = {0}, +}; + +static const uint16_t ts_non_terminal_alias_map[] = { + 0, +}; + +static const TSStateId ts_primary_state_ids[STATE_COUNT] = { + [0] = 0, + [1] = 1, + [2] = 2, + [3] = 3, + [4] = 4, + [5] = 5, + [6] = 6, + [7] = 7, + [8] = 8, + [9] = 9, + [10] = 10, + [11] = 11, + [12] = 12, + [13] = 13, + [14] = 14, + [15] = 15, + [16] = 16, + [17] = 17, + [18] = 18, + [19] = 19, + [20] = 20, + [21] = 21, + [22] = 22, + [23] = 23, + [24] = 24, + [25] = 25, + [26] = 26, + [27] = 27, + [28] = 28, + [29] = 29, + [30] = 30, + [31] = 31, + [32] = 32, + [33] = 33, + [34] = 34, + [35] = 35, + [36] = 36, + [37] = 37, + [38] = 38, + [39] = 39, + [40] = 40, + [41] = 41, + [42] = 42, + [43] = 43, + [44] = 44, + [45] = 45, + [46] = 46, + [47] = 47, + [48] = 48, + [49] = 49, + [50] = 50, + [51] = 51, + [52] = 52, + [53] = 53, + [54] = 54, + [55] = 55, + [56] = 56, + [57] = 57, + [58] = 58, +}; + +static bool ts_lex(TSLexer *lexer, TSStateId state) { + START_LEXER(); + eof = lexer->eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(18); + ADVANCE_MAP( + '\n', 44, + '!', 24, + '%', 19, + '&', 33, + '(', 37, + ')', 38, + ',', 41, + '-', 4, + ':', 34, + ';', 25, + '[', 39, + ']', 40, + 't', 7, + 'v', 22, + 0xa9, 28, + ); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') SKIP(0); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(23); + if (('A' <= lookahead && lookahead <= 'Z')) ADVANCE(42); + END_STATE(); + case 1: + if (lookahead == '\r') SKIP(1); + if (lookahead == '\t' || + lookahead == ' ') ADVANCE(21); + END_STATE(); + case 2: + if (lookahead == ')') ADVANCE(26); + END_STATE(); + case 3: + if (lookahead == ')') ADVANCE(27); + END_STATE(); + case 4: + if (lookahead == '>') ADVANCE(35); + END_STATE(); + case 5: + if (lookahead == 'C') ADVANCE(3); + if (lookahead == 'c') ADVANCE(2); + END_STATE(); + case 6: + if (lookahead == 'c') ADVANCE(9); + END_STATE(); + case 7: + if (lookahead == 'e') ADVANCE(6); + END_STATE(); + case 8: + if (lookahead == 'e') ADVANCE(20); + END_STATE(); + case 9: + if (lookahead == 'h') ADVANCE(11); + END_STATE(); + case 10: + if (lookahead == 'i') ADVANCE(12); + END_STATE(); + case 11: + if (lookahead == 'n') ADVANCE(10); + END_STATE(); + case 12: + if (lookahead == 'q') ADVANCE(13); + END_STATE(); + case 13: + if (lookahead == 'u') ADVANCE(8); + END_STATE(); + case 14: + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(31); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n') ADVANCE(32); + END_STATE(); + case 15: + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(29); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != ';') ADVANCE(30); + END_STATE(); + case 16: + if (eof) ADVANCE(18); + if (lookahead == '\n') ADVANCE(44); + if (lookahead == '!') ADVANCE(24); + if (lookahead == '%') ADVANCE(19); + if (lookahead == '&') ADVANCE(33); + if (lookahead == '(') ADVANCE(5); + if (lookahead == 0xa9) ADVANCE(28); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') SKIP(16); + if (('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(43); + END_STATE(); + case 17: + if (eof) ADVANCE(18); + if (lookahead == '\n') ADVANCE(44); + if (lookahead == '(') ADVANCE(36); + if (lookahead == '[') ADVANCE(39); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') SKIP(17); + if (('A' <= lookahead && lookahead <= 'Z')) ADVANCE(42); + END_STATE(); + case 18: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 19: + ACCEPT_TOKEN(anon_sym_PERCENT); + END_STATE(); + case 20: + ACCEPT_TOKEN(anon_sym_technique); + END_STATE(); + case 21: + ACCEPT_TOKEN(aux_sym_magic_line_token1); + if (lookahead == '\t' || + lookahead == ' ') ADVANCE(21); + END_STATE(); + case 22: + ACCEPT_TOKEN(anon_sym_v); + END_STATE(); + case 23: + ACCEPT_TOKEN(sym_version); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(23); + END_STATE(); + case 24: + ACCEPT_TOKEN(anon_sym_BANG); + END_STATE(); + case 25: + ACCEPT_TOKEN(anon_sym_SEMI); + END_STATE(); + case 26: + ACCEPT_TOKEN(anon_sym_LPARENc_RPAREN); + END_STATE(); + case 27: + ACCEPT_TOKEN(anon_sym_LPARENC_RPAREN); + END_STATE(); + case 28: + ACCEPT_TOKEN(anon_sym_u00a9); + END_STATE(); + case 29: + ACCEPT_TOKEN(sym_license); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(29); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n' && + lookahead != ';') ADVANCE(30); + END_STATE(); + case 30: + ACCEPT_TOKEN(sym_license); + if (lookahead != 0 && + lookahead != '\n' && + lookahead != ';') ADVANCE(30); + END_STATE(); + case 31: + ACCEPT_TOKEN(aux_sym_copyright_token1); + if (lookahead == '\t' || + lookahead == '\r' || + lookahead == ' ') ADVANCE(31); + if (lookahead != 0 && + lookahead != '\t' && + lookahead != '\n') ADVANCE(32); + END_STATE(); + case 32: + ACCEPT_TOKEN(aux_sym_copyright_token1); + if (lookahead != 0 && + lookahead != '\n') ADVANCE(32); + END_STATE(); + case 33: + ACCEPT_TOKEN(anon_sym_AMP); + END_STATE(); + case 34: + ACCEPT_TOKEN(anon_sym_COLON); + END_STATE(); + case 35: + ACCEPT_TOKEN(anon_sym_DASH_GT); + END_STATE(); + case 36: + ACCEPT_TOKEN(anon_sym_LPAREN); + END_STATE(); + case 37: + ACCEPT_TOKEN(anon_sym_LPAREN); + if (lookahead == 'c') ADVANCE(2); + END_STATE(); + case 38: + ACCEPT_TOKEN(anon_sym_RPAREN); + END_STATE(); + case 39: + ACCEPT_TOKEN(anon_sym_LBRACK); + END_STATE(); + case 40: + ACCEPT_TOKEN(anon_sym_RBRACK); + END_STATE(); + case 41: + ACCEPT_TOKEN(anon_sym_COMMA); + END_STATE(); + case 42: + ACCEPT_TOKEN(sym_forma); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(42); + END_STATE(); + case 43: + ACCEPT_TOKEN(sym_identifier); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'Z') || + lookahead == '_' || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(43); + END_STATE(); + case 44: + ACCEPT_TOKEN(sym_newline); + END_STATE(); + default: + return false; + } +} + +static const TSLexerMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0}, + [1] = {.lex_state = 16}, + [2] = {.lex_state = 17}, + [3] = {.lex_state = 17}, + [4] = {.lex_state = 16}, + [5] = {.lex_state = 16}, + [6] = {.lex_state = 16}, + [7] = {.lex_state = 16}, + [8] = {.lex_state = 16}, + [9] = {.lex_state = 16}, + [10] = {.lex_state = 16}, + [11] = {.lex_state = 16}, + [12] = {.lex_state = 16}, + [13] = {.lex_state = 16}, + [14] = {.lex_state = 16}, + [15] = {.lex_state = 16}, + [16] = {.lex_state = 16}, + [17] = {.lex_state = 16}, + [18] = {.lex_state = 16}, + [19] = {.lex_state = 0}, + [20] = {.lex_state = 0}, + [21] = {.lex_state = 16}, + [22] = {.lex_state = 0}, + [23] = {.lex_state = 0}, + [24] = {.lex_state = 0}, + [25] = {.lex_state = 16}, + [26] = {.lex_state = 16}, + [27] = {.lex_state = 16}, + [28] = {.lex_state = 0}, + [29] = {.lex_state = 0}, + [30] = {.lex_state = 0}, + [31] = {.lex_state = 0}, + [32] = {.lex_state = 0}, + [33] = {.lex_state = 0}, + [34] = {.lex_state = 0}, + [35] = {.lex_state = 0}, + [36] = {.lex_state = 0}, + [37] = {.lex_state = 0}, + [38] = {.lex_state = 0}, + [39] = {.lex_state = 14}, + [40] = {.lex_state = 14}, + [41] = {.lex_state = 0}, + [42] = {.lex_state = 0}, + [43] = {.lex_state = 0}, + [44] = {.lex_state = 0}, + [45] = {.lex_state = 0}, + [46] = {.lex_state = 0}, + [47] = {.lex_state = 0}, + [48] = {.lex_state = 0}, + [49] = {.lex_state = 0}, + [50] = {.lex_state = 0}, + [51] = {.lex_state = 0}, + [52] = {.lex_state = 0}, + [53] = {.lex_state = 15}, + [54] = {.lex_state = 0}, + [55] = {.lex_state = 0}, + [56] = {.lex_state = 0}, + [57] = {.lex_state = 0}, + [58] = {.lex_state = 1}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [STATE(0)] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_PERCENT] = ACTIONS(1), + [anon_sym_technique] = ACTIONS(1), + [anon_sym_v] = ACTIONS(1), + [sym_version] = ACTIONS(1), + [anon_sym_BANG] = ACTIONS(1), + [anon_sym_SEMI] = ACTIONS(1), + [anon_sym_LPARENc_RPAREN] = ACTIONS(1), + [anon_sym_u00a9] = ACTIONS(1), + [anon_sym_AMP] = ACTIONS(1), + [anon_sym_COLON] = ACTIONS(1), + [anon_sym_DASH_GT] = ACTIONS(1), + [anon_sym_LPAREN] = ACTIONS(1), + [anon_sym_RPAREN] = ACTIONS(1), + [anon_sym_LBRACK] = ACTIONS(1), + [anon_sym_RBRACK] = ACTIONS(1), + [anon_sym_COMMA] = ACTIONS(1), + [sym_forma] = ACTIONS(1), + [sym_newline] = ACTIONS(1), + }, + [STATE(1)] = { + [sym_source_file] = STATE(44), + [sym_header_block] = STATE(6), + [sym_magic_line] = STATE(4), + [sym_procedure_declaration] = STATE(33), + [aux_sym_source_file_repeat1] = STATE(5), + [aux_sym_source_file_repeat2] = STATE(11), + [ts_builtin_sym_end] = ACTIONS(3), + [anon_sym_PERCENT] = ACTIONS(5), + [sym_identifier] = ACTIONS(7), + [sym_newline] = ACTIONS(9), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 6, + ACTIONS(13), 1, + anon_sym_LPAREN, + ACTIONS(15), 1, + anon_sym_LBRACK, + ACTIONS(17), 1, + sym_forma, + STATE(50), 1, + sym_genus, + ACTIONS(11), 2, + ts_builtin_sym_end, + sym_newline, + STATE(20), 4, + sym_unit_genus, + sym_simple_genus, + sym_list_genus, + sym_tuple_genus, + [23] = 5, + ACTIONS(13), 1, + anon_sym_LPAREN, + ACTIONS(15), 1, + anon_sym_LBRACK, + ACTIONS(17), 1, + sym_forma, + STATE(38), 1, + sym_genus, + STATE(20), 4, + sym_unit_genus, + sym_simple_genus, + sym_list_genus, + sym_tuple_genus, + [42] = 5, + ACTIONS(21), 1, + anon_sym_BANG, + ACTIONS(23), 1, + anon_sym_AMP, + STATE(8), 1, + sym_spdx_line, + STATE(26), 1, + sym_template_line, + ACTIONS(19), 3, + ts_builtin_sym_end, + sym_identifier, + sym_newline, + [60] = 6, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(25), 1, + ts_builtin_sym_end, + ACTIONS(27), 1, + sym_newline, + STATE(10), 1, + aux_sym_source_file_repeat2, + STATE(16), 1, + aux_sym_source_file_repeat1, + STATE(32), 1, + sym_procedure_declaration, + [79] = 6, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(25), 1, + ts_builtin_sym_end, + ACTIONS(29), 1, + sym_newline, + STATE(7), 1, + aux_sym_source_file_repeat1, + STATE(10), 1, + aux_sym_source_file_repeat2, + STATE(32), 1, + sym_procedure_declaration, + [98] = 6, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(27), 1, + sym_newline, + ACTIONS(31), 1, + ts_builtin_sym_end, + STATE(12), 1, + aux_sym_source_file_repeat2, + STATE(16), 1, + aux_sym_source_file_repeat1, + STATE(36), 1, + sym_procedure_declaration, + [117] = 3, + ACTIONS(23), 1, + anon_sym_AMP, + STATE(21), 1, + sym_template_line, + ACTIONS(33), 3, + ts_builtin_sym_end, + sym_identifier, + sym_newline, + [129] = 1, + ACTIONS(35), 5, + ts_builtin_sym_end, + anon_sym_BANG, + anon_sym_AMP, + sym_identifier, + sym_newline, + [137] = 4, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(31), 1, + ts_builtin_sym_end, + STATE(14), 1, + aux_sym_source_file_repeat2, + STATE(36), 1, + sym_procedure_declaration, + [150] = 4, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(25), 1, + ts_builtin_sym_end, + STATE(14), 1, + aux_sym_source_file_repeat2, + STATE(32), 1, + sym_procedure_declaration, + [163] = 4, + ACTIONS(7), 1, + sym_identifier, + ACTIONS(37), 1, + ts_builtin_sym_end, + STATE(14), 1, + aux_sym_source_file_repeat2, + STATE(37), 1, + sym_procedure_declaration, + [176] = 3, + ACTIONS(41), 1, + sym_newline, + STATE(17), 1, + aux_sym_source_file_repeat1, + ACTIONS(39), 2, + ts_builtin_sym_end, + sym_identifier, + [187] = 4, + ACTIONS(39), 1, + ts_builtin_sym_end, + ACTIONS(43), 1, + sym_identifier, + STATE(14), 1, + aux_sym_source_file_repeat2, + STATE(46), 1, + sym_procedure_declaration, + [200] = 1, + ACTIONS(46), 4, + ts_builtin_sym_end, + anon_sym_AMP, + sym_identifier, + sym_newline, + [207] = 3, + ACTIONS(50), 1, + sym_newline, + STATE(16), 1, + aux_sym_source_file_repeat1, + ACTIONS(48), 2, + ts_builtin_sym_end, + sym_identifier, + [218] = 3, + ACTIONS(27), 1, + sym_newline, + STATE(16), 1, + aux_sym_source_file_repeat1, + ACTIONS(53), 2, + ts_builtin_sym_end, + sym_identifier, + [229] = 1, + ACTIONS(55), 4, + ts_builtin_sym_end, + anon_sym_AMP, + sym_identifier, + sym_newline, + [236] = 1, + ACTIONS(57), 3, + ts_builtin_sym_end, + anon_sym_DASH_GT, + sym_newline, + [242] = 1, + ACTIONS(59), 3, + ts_builtin_sym_end, + anon_sym_DASH_GT, + sym_newline, + [248] = 1, + ACTIONS(61), 3, + ts_builtin_sym_end, + sym_identifier, + sym_newline, + [254] = 1, + ACTIONS(63), 3, + ts_builtin_sym_end, + anon_sym_DASH_GT, + sym_newline, + [260] = 1, + ACTIONS(65), 3, + ts_builtin_sym_end, + anon_sym_DASH_GT, + sym_newline, + [266] = 3, + ACTIONS(67), 1, + anon_sym_RPAREN, + ACTIONS(69), 1, + anon_sym_COMMA, + STATE(29), 1, + aux_sym_tuple_genus_repeat1, + [276] = 1, + ACTIONS(71), 3, + anon_sym_LPARENc_RPAREN, + anon_sym_LPARENC_RPAREN, + anon_sym_u00a9, + [282] = 1, + ACTIONS(33), 3, + ts_builtin_sym_end, + sym_identifier, + sym_newline, + [288] = 1, + ACTIONS(73), 3, + ts_builtin_sym_end, + sym_identifier, + sym_newline, + [294] = 1, + ACTIONS(75), 3, + ts_builtin_sym_end, + anon_sym_DASH_GT, + sym_newline, + [300] = 3, + ACTIONS(69), 1, + anon_sym_COMMA, + ACTIONS(77), 1, + anon_sym_RPAREN, + STATE(31), 1, + aux_sym_tuple_genus_repeat1, + [310] = 1, + ACTIONS(79), 3, + ts_builtin_sym_end, + anon_sym_DASH_GT, + sym_newline, + [316] = 3, + ACTIONS(81), 1, + anon_sym_RPAREN, + ACTIONS(83), 1, + anon_sym_COMMA, + STATE(31), 1, + aux_sym_tuple_genus_repeat1, + [326] = 2, + ACTIONS(31), 1, + ts_builtin_sym_end, + ACTIONS(86), 1, + sym_newline, + [333] = 2, + ACTIONS(25), 1, + ts_builtin_sym_end, + ACTIONS(86), 1, + sym_newline, + [340] = 2, + ACTIONS(88), 1, + anon_sym_SEMI, + ACTIONS(90), 1, + sym_newline, + [347] = 2, + ACTIONS(92), 1, + anon_sym_RPAREN, + ACTIONS(94), 1, + sym_forma, + [354] = 2, + ACTIONS(37), 1, + ts_builtin_sym_end, + ACTIONS(86), 1, + sym_newline, + [361] = 2, + ACTIONS(86), 1, + sym_newline, + ACTIONS(96), 1, + ts_builtin_sym_end, + [368] = 1, + ACTIONS(98), 2, + ts_builtin_sym_end, + sym_newline, + [373] = 2, + ACTIONS(100), 1, + aux_sym_copyright_token1, + STATE(57), 1, + sym_copyright, + [380] = 2, + ACTIONS(102), 1, + aux_sym_copyright_token1, + STATE(55), 1, + sym_template, + [387] = 1, + ACTIONS(81), 2, + anon_sym_RPAREN, + anon_sym_COMMA, + [392] = 1, + ACTIONS(104), 1, + sym_version, + [396] = 1, + ACTIONS(106), 1, + anon_sym_v, + [400] = 1, + ACTIONS(108), 1, + ts_builtin_sym_end, + [404] = 1, + ACTIONS(110), 1, + sym_newline, + [408] = 1, + ACTIONS(86), 1, + sym_newline, + [412] = 1, + ACTIONS(112), 1, + sym_forma, + [416] = 1, + ACTIONS(114), 1, + anon_sym_COLON, + [420] = 1, + ACTIONS(116), 1, + anon_sym_technique, + [424] = 1, + ACTIONS(118), 1, + anon_sym_DASH_GT, + [428] = 1, + ACTIONS(120), 1, + sym_forma, + [432] = 1, + ACTIONS(122), 1, + anon_sym_RBRACK, + [436] = 1, + ACTIONS(124), 1, + sym_license, + [440] = 1, + ACTIONS(126), 1, + sym_newline, + [444] = 1, + ACTIONS(128), 1, + sym_newline, + [448] = 1, + ACTIONS(130), 1, + sym_newline, + [452] = 1, + ACTIONS(132), 1, + sym_newline, + [456] = 1, + ACTIONS(134), 1, + aux_sym_magic_line_token1, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(2)] = 0, + [SMALL_STATE(3)] = 23, + [SMALL_STATE(4)] = 42, + [SMALL_STATE(5)] = 60, + [SMALL_STATE(6)] = 79, + [SMALL_STATE(7)] = 98, + [SMALL_STATE(8)] = 117, + [SMALL_STATE(9)] = 129, + [SMALL_STATE(10)] = 137, + [SMALL_STATE(11)] = 150, + [SMALL_STATE(12)] = 163, + [SMALL_STATE(13)] = 176, + [SMALL_STATE(14)] = 187, + [SMALL_STATE(15)] = 200, + [SMALL_STATE(16)] = 207, + [SMALL_STATE(17)] = 218, + [SMALL_STATE(18)] = 229, + [SMALL_STATE(19)] = 236, + [SMALL_STATE(20)] = 242, + [SMALL_STATE(21)] = 248, + [SMALL_STATE(22)] = 254, + [SMALL_STATE(23)] = 260, + [SMALL_STATE(24)] = 266, + [SMALL_STATE(25)] = 276, + [SMALL_STATE(26)] = 282, + [SMALL_STATE(27)] = 288, + [SMALL_STATE(28)] = 294, + [SMALL_STATE(29)] = 300, + [SMALL_STATE(30)] = 310, + [SMALL_STATE(31)] = 316, + [SMALL_STATE(32)] = 326, + [SMALL_STATE(33)] = 333, + [SMALL_STATE(34)] = 340, + [SMALL_STATE(35)] = 347, + [SMALL_STATE(36)] = 354, + [SMALL_STATE(37)] = 361, + [SMALL_STATE(38)] = 368, + [SMALL_STATE(39)] = 373, + [SMALL_STATE(40)] = 380, + [SMALL_STATE(41)] = 387, + [SMALL_STATE(42)] = 392, + [SMALL_STATE(43)] = 396, + [SMALL_STATE(44)] = 400, + [SMALL_STATE(45)] = 404, + [SMALL_STATE(46)] = 408, + [SMALL_STATE(47)] = 412, + [SMALL_STATE(48)] = 416, + [SMALL_STATE(49)] = 420, + [SMALL_STATE(50)] = 424, + [SMALL_STATE(51)] = 428, + [SMALL_STATE(52)] = 432, + [SMALL_STATE(53)] = 436, + [SMALL_STATE(54)] = 440, + [SMALL_STATE(55)] = 444, + [SMALL_STATE(56)] = 448, + [SMALL_STATE(57)] = 452, + [SMALL_STATE(58)] = 456, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 0, 0, 0), + [5] = {.entry = {.count = 1, .reusable = true}}, SHIFT(49), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(48), + [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(5), + [11] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_procedure_declaration, 2, 0, 1), + [13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(35), + [15] = {.entry = {.count = 1, .reusable = true}}, SHIFT(51), + [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(22), + [19] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_header_block, 1, 0, 0), + [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(53), + [23] = {.entry = {.count = 1, .reusable = true}}, SHIFT(40), + [25] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 1, 0, 0), + [27] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), + [29] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), + [31] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 2, 0, 0), + [33] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_header_block, 2, 0, 0), + [35] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_magic_line, 6, 0, 5), + [37] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 3, 0, 0), + [39] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat2, 2, 0, 0), + [41] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), + [43] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat2, 2, 0, 0), SHIFT_REPEAT(48), + [46] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_spdx_line, 3, 0, 2), + [48] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), + [50] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_file_repeat1, 2, 0, 0), SHIFT_REPEAT(16), + [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_file_repeat2, 3, 0, 0), + [55] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_spdx_line, 6, 0, 6), + [57] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_list_genus, 3, 0, 0), + [59] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_genus, 1, 0, 0), + [61] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_header_block, 3, 0, 0), + [63] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_simple_genus, 1, 0, 0), + [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unit_genus, 2, 0, 0), + [67] = {.entry = {.count = 1, .reusable = true}}, SHIFT(28), + [69] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47), + [71] = {.entry = {.count = 1, .reusable = true}}, SHIFT(39), + [73] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_template_line, 3, 0, 3), + [75] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tuple_genus, 3, 0, 0), + [77] = {.entry = {.count = 1, .reusable = true}}, SHIFT(30), + [79] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tuple_genus, 4, 0, 0), + [81] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_tuple_genus_repeat1, 2, 0, 0), + [83] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_tuple_genus_repeat1, 2, 0, 0), SHIFT_REPEAT(47), + [86] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [88] = {.entry = {.count = 1, .reusable = true}}, SHIFT(25), + [90] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15), + [92] = {.entry = {.count = 1, .reusable = true}}, SHIFT(23), + [94] = {.entry = {.count = 1, .reusable = true}}, SHIFT(24), + [96] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source_file, 4, 0, 0), + [98] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_procedure_declaration, 5, 0, 4), + [100] = {.entry = {.count = 1, .reusable = true}}, SHIFT(56), + [102] = {.entry = {.count = 1, .reusable = true}}, SHIFT(54), + [104] = {.entry = {.count = 1, .reusable = true}}, SHIFT(45), + [106] = {.entry = {.count = 1, .reusable = true}}, SHIFT(42), + [108] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [110] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [112] = {.entry = {.count = 1, .reusable = true}}, SHIFT(41), + [114] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [116] = {.entry = {.count = 1, .reusable = true}}, SHIFT(58), + [118] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [120] = {.entry = {.count = 1, .reusable = true}}, SHIFT(52), + [122] = {.entry = {.count = 1, .reusable = true}}, SHIFT(19), + [124] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), + [126] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_template, 1, 0, 0), + [128] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27), + [130] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_copyright, 1, 0, 0), + [132] = {.entry = {.count = 1, .reusable = true}}, SHIFT(18), + [134] = {.entry = {.count = 1, .reusable = true}}, SHIFT(43), +}; + +#ifdef __cplusplus +extern "C" { +#endif +#ifdef TREE_SITTER_HIDE_SYMBOLS +#define TS_PUBLIC +#elif defined(_WIN32) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif + +TS_PUBLIC const TSLanguage *tree_sitter_technique(void) { + static const TSLanguage language = { + .abi_version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .supertype_count = SUPERTYPE_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .field_names = ts_field_names, + .field_map_slices = ts_field_map_slices, + .field_map_entries = ts_field_map_entries, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = (const void*)ts_lex_modes, + .lex_fn = ts_lex, + .primary_state_ids = ts_primary_state_ids, + .name = "technique", + .max_reserved_word_set_size = 0, + .metadata = { + .major_version = 0, + .minor_version = 1, + .patch_version = 0, + }, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..1d9dd14 --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,18 @@ +// parser for the Technique language +use std::path::Path; + +mod parser; +mod scope; +mod tree_sitter; + +pub fn load(source: &Path) { + // read source to a str + let content = std::fs::read_to_string(source).expect("Failed to read the source file"); + + let tree = tree_sitter::parse_tree(content.as_str()); + println!( + "{}", + tree.root_node() + .to_sexp() + ); +} diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs new file mode 100644 index 0000000..16eafb9 --- /dev/null +++ b/src/parsing/parser.rs @@ -0,0 +1,903 @@ +#![allow(dead_code)] + +use std::any::type_name; + +use regex::Regex; +use technique::language::*; + +use super::scope::*; + +pub fn parse_via_scopes(content: &str) { + let mut input = Parser::new(); + input.initialize(content); + + let result = input.parse_technique_header(); + println!("{:?}", result); + println!("{:?}", input); + + std::process::exit(0); +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ParsingError { + IllegalParserState, + Unimplemented, + ZeroLengthToken, + Unrecognized, // improve this + Expected(&'static str), + InvalidHeader, + ValidationFailure(ValidationError), + InvalidCharacter(char), + UnexpectedEndOfInput, + InvalidIdentifier, + InvalidForma, + InvalidGenus, + InvalidSignature, + InvalidDeclaration, +} + +impl From for ParsingError { + fn from(error: ValidationError) -> Self { + ParsingError::ValidationFailure(error) + } +} + +#[derive(Debug)] +struct Parser<'i> { + scope: Scope, + source: &'i str, + offset: usize, + count: usize, +} + +impl<'i> Parser<'i> { + fn new() -> Parser<'i> { + Parser { + scope: Scope::new(), + source: "", + offset: 0, + count: 0, + } + } + + fn initialize(&mut self, content: &'i str) { + self.scope = Scope::new(); + self.source = content; + self.count = 0; + self.offset = 0; + } + + fn using_string(&mut self, f: F) -> Result + where + F: Fn(&'i str) -> Result, + { + let l = self + .source + .len(); + + let result = f(self.source)?; + + // advance the parser position + self.source = ""; + self.offset += l; + + // and return + Ok(result) + } + + fn using_regex(&mut self, re: regex::Regex, mut f: F) -> Result + where + F: FnMut(&mut Parser<'i>, regex::Captures<'i>) -> Result, + { + let cap = match re.captures(self.source) { + Some(c) => c, + None => return Err(ParsingError::Expected(type_name::())), + }; + + let zero = cap + .get(0) + .unwrap(); + + let l = zero.end(); + + let mut parser = Parser { + scope: self + .scope + .clone(), + source: zero.as_str(), + count: self.count, + offset: self.offset + zero.start(), + }; + + // this is effectively self.f(cap) + let result = f(&mut parser, cap)?; + + // advance the parser position + self.source = &self.source[l..]; + self.offset += l; + + // and return + Ok(result) + } + + fn try_using_regex( + &mut self, + re: regex::Regex, + mut f: F, + ) -> Result, ParsingError> + where + F: FnMut(&mut Parser<'i>, regex::Captures<'i>) -> Result, + { + let cap = match re.captures(self.source) { + Some(c) => c, + None => return Ok(None), + }; + + let zero = cap + .get(0) + .unwrap(); + + let l = zero.end(); + + let mut parser = Parser { + scope: self + .scope + .clone(), + source: zero.as_str(), + count: self.count, + offset: self.offset + zero.start(), + }; + + // this is effectively self.f(cap) + let result = f(&mut parser, cap)?; + + // advance the parser position + self.source = &self.source[l..]; + self.offset += l; + + // and return + Ok(Some(result)) + } + + /// Given a regex Match, fork a copy of the parser state and run a nested + /// parser on that derivative. Does NOT advance the parent's parser state; + /// the caller needs to do that via one of the using_*() methods. + + fn subparser_match( + &mut self, + needle: regex::Match<'i>, + mut f: F, + ) -> Result + where + F: FnMut(&mut Parser<'i>) -> Result, + { + let mut parser = Parser { + scope: self + .scope + .clone(), + source: needle.as_str(), + count: self.count, + offset: self.offset + needle.start(), + }; + + // this is effectively self.f() + let result = f(&mut parser)?; + + // and return + Ok(result) + } + + fn parse_from_start(&mut self) -> Result<(), ParsingError> { + let layer = self + .scope + .current(); + + match layer { + Layer::Technique => (), // this is where we should be + _ => return Err(ParsingError::IllegalParserState), + } + + let _header = self.parse_technique_header()?; + Ok(()) // FIXME + } + + fn parse_newline(&mut self) -> Result<(), ParsingError> { + for (i, c) in self + .source + .char_indices() + { + let l = i + 1; + + if c == '\n' { + self.source = &self.source[l..]; + self.count += 1; + self.offset += l; + return Ok(()); + } else if c.is_ascii_whitespace() { + continue; + } else { + return Err(ParsingError::InvalidCharacter(c)); + } + } + + // We don't actually require a newline to end the file. + + self.source = ""; + self.offset += self + .source + .len(); + Ok(()) + // Err(ParsingError::UnexpectedEndOfInput) + } + + // hard wire the version for now. If we ever grow to supporting multiple + // major versions then this will become a lot more complicated. + fn parse_magic_line(&mut self) -> Result { + let re = Regex::new(r"%\s*technique\s+v1").unwrap(); + + let m = re + .find(self.source) + .ok_or(ParsingError::Unrecognized)?; + + let l = m.end(); + + self.source = &self.source[l..]; + self.offset += l; + + Ok(1) + } + + // This one is awkward because if a SPDX line is present, then it really needs + // to have a license, whereas the copyright part is optional. + fn parse_spdx_line(&mut self) -> Result<(Option<&'i str>, Option<&'i str>), ParsingError> { + // First establish we have a valid line. + + if self + .source + .len() + == 0 + { + return Ok((None, None)); + } + + let x = self + .source + .chars() + .next() + .unwrap(); + + if x != '!' { + return Err(ParsingError::InvalidHeader); + } + + let mut lines = self + .source + .lines(); + let line = lines + .next() + .unwrap(); + + let re = Regex::new(r"!\s*([^;]+)(?:;\s*(?:\(c\)|\(C\)|©)\s*(.+))?").unwrap(); + + let cap = re + .captures(line) + .ok_or(ParsingError::Unrecognized)?; + + // Get the length of the match as a whole so we can advance the parser + // state later. + + let l = cap + .get(0) + .ok_or(ParsingError::Unrecognized)? + .end(); + + // Now to extracting the values we need. We get the license code from + // the first capture. It must be present otherwise we don't have a + // valid SPDX line (and we declared that we're on an SPDX line by the + // presence of the '!' character at the beginning of the line). + + let one = cap + .get(1) + .map(|v| v.as_str()) + .ok_or(ParsingError::InvalidHeader)?; + + let one = validate_license(one)?; + let one = Some(one); + + // Now dig out the copyright, if present: + + let two = cap + .get(2) + .map(|v| v.as_str()); + + let two = match two { + Some(text) => Some(validate_copyright(text)?), + None => None, + }; + + // Advance the parser state, and return. + + self.source = &self.source[l..]; + self.offset += l; + + Ok((one, two)) + } + + fn parse_template_line(&mut self) -> Result, ParsingError> { + let re = Regex::new(r"&\s*(.+)").unwrap(); + + self.try_using_regex(re, |outer, cap| { + let one = cap + .get(1) + .ok_or(ParsingError::Expected("a template"))?; + + outer.subparser_match(one, |inner| { + inner.using_string(|text| { + let result = validate_template(text)?; + Ok(result) + }) + }) + }) + } + + + fn parse_technique_header(&mut self) -> Result, ParsingError> { + let version = self.parse_magic_line()?; + self.parse_newline()?; + + let (license, copyright) = self.parse_spdx_line()?; + self.parse_newline()?; + + let template = self.parse_template_line()?; + self.parse_newline()?; + + Ok(Metadata { + version, + license, + copyright, + template, + }) + } + + fn parse_identifier(&mut self) -> Result, ParsingError> { + self.using_string(|text| { + let result = validate_identifier(text)?; + Ok(result) + }) + } + + fn parse_forma(&mut self) -> Result, ParsingError> { + self.using_string(|text| { + let result = validate_forma(text)?; + Ok(result) + }) + } + + fn ensure_nonempty(&mut self) -> Result<(), ParsingError> { + if self + .source + .len() + == 0 + { + return Err(ParsingError::UnexpectedEndOfInput); + } + Ok(()) + } + + fn trim_whitespace(&mut self) -> Result<(), ParsingError> { + let mut l = 0; + + for (i, c) in self + .source + .char_indices() + { + if c == '\n' { + break; + } else if c.is_ascii_whitespace() { + l = i + 1; + continue; + } else { + break; + } + } + + self.source = &self.source[l..]; + self.offset += l; + + Ok(()) + } + + fn parse_genus(&mut self) -> Result, ParsingError> { + self.trim_whitespace()?; + self.ensure_nonempty()?; + + let first = self + .source + .chars() + .next() + .unwrap(); + + let re = match first { + '[' => { + // consume up to closing bracket + Regex::new(r"\[.+?\]").unwrap() + } + '(' => { + // consume up to closing parenthesis + Regex::new(r"\(.*?\)").unwrap() + } + _ => Regex::new(r".+").unwrap(), + }; + + self.using_regex(re, |outer, _| { + println!("{:?}", outer.source); + outer.using_string(|text| { + let result = validate_genus(text)?; + Ok(result) + }) + }) + } + + // idea: put the current Capture in the parser state? + + fn parse_signature(&mut self) -> Result, ParsingError> { + let re = Regex::new(r"\s*(.+?)\s*->\s*(.+?)\s*$").unwrap(); + + let (domain, range) = self.using_regex(re, |outer, cap| { + let one = cap + .get(1) + .ok_or(ParsingError::Expected("a Genus for the domain"))?; + + let two = cap + .get(2) + .ok_or(ParsingError::Expected("a Genus for the range"))?; + + let domain = outer.subparser_match(one, |inner| inner.parse_genus())?; + let range = outer.subparser_match(two, |inner| inner.parse_genus())?; + + Ok((domain, range)) + })?; + + Ok(Signature { domain, range }) + } + + /// declarations are of the form + /// + /// identifier : signature + /// + /// where the optional signature is + /// + /// genus -> genus + /// + fn parse_procedure_declaration( + &mut self, + ) -> Result<(Identifier<'i>, Option>), ParsingError> { + // These capture groups use .+? to make "match more than one, but + // lazily" so that the subsequent grabs of whitespace and the all + // important ':' character are not absorbed. + let re = Regex::new(r"^\s*(.+?)\s*:\s*(.+?)?\s*$").unwrap(); + + self.using_regex(re, |outer, cap| { + let name = match cap.get(1) { + Some(one) => outer.subparser_match(one, |inner| { + let result = inner.parse_identifier()?; + Ok(result) + }), + None => Err(ParsingError::Expected("an Identifier")), + }?; + + let signature = match cap.get(2) { + Some(two) => outer.subparser_match(two, |inner| { + // println!("{:?}", two); + let result = inner.parse_signature()?; + Ok(Some(result)) + }), + None => Ok(None), + }?; + + Ok((name, signature)) + }) + } + + fn parse_procedure(&mut self) -> Result, ParsingError> { + let (name, signature) = self.parse_procedure_declaration()?; + + // let body = self.parse_body()?; + self.parse_newline()?; + + Ok(Procedure { name, signature }) + } +} + +#[cfg(test)] +mod check { + use super::*; + + #[test] + fn magic_line() { + let mut input = Parser::new(); + + input.initialize("% technique v1"); + assert_eq!(input.parse_magic_line(), Ok(1)); + + input.initialize("%technique v1"); + assert_eq!(input.parse_magic_line(), Ok(1)); + + // this is rejected because the technique keyword isn't present. + input.initialize("%techniquev1"); + assert_eq!(input.parse_magic_line(), Err(ParsingError::Unrecognized)); + } + + #[test] + fn header_spdx() { + let mut input = Parser::new(); + + input.initialize("! PD"); + assert_eq!(input.parse_spdx_line(), Ok((Some("PD"), None))); + + input.initialize("! MIT; (c) ACME, Inc."); + assert_eq!( + input.parse_spdx_line(), + Ok((Some("MIT"), Some("ACME, Inc."))) + ); + + input.initialize("! MIT; (C) 2024 ACME, Inc."); + assert_eq!( + input.parse_spdx_line(), + Ok((Some("MIT"), Some("2024 ACME, Inc."))) + ); + + input.initialize("! CC BY-SA 3.0 [IGO]; (c) 2024 ACME, Inc."); + assert_eq!( + input.parse_spdx_line(), + Ok((Some("CC BY-SA 3.0 [IGO]"), Some("2024 ACME, Inc."))) + ); + } + + #[test] + fn header_template() { + let mut input = Parser::new(); + input.initialize("& checklist"); + assert_eq!(input.parse_template_line(), Ok(Some("checklist"))); + + input.initialize("& nasa-flight-plan,v4.0"); + assert_eq!( + input.parse_template_line(), + Ok(Some("nasa-flight-plan,v4.0")) + ); + } + + // now we test incremental parsing + + #[test] + fn check_not_eof() { + let mut input = Parser::new(); + input.initialize("Hello World"); + assert_eq!(input.ensure_nonempty(), Ok(())); + + input.initialize(""); + assert_eq!( + input.ensure_nonempty(), + Err(ParsingError::UnexpectedEndOfInput) + ); + } + + #[test] + fn consume_whitespace() { + let mut input = Parser::new(); + input.initialize(" hello"); + assert_eq!(input.trim_whitespace(), Ok(())); + assert_eq!(input.source, "hello"); + } + + #[test] + fn forma_rules() { + let mut input = Parser::new(); + input.initialize("A"); + assert_eq!(input.parse_forma(), Ok(Forma("A"))); + + input.initialize("Apple"); + assert_eq!(input.parse_forma(), Ok(Forma("Apple"))); + } + + #[test] + fn single_genus_definitions() { + let mut input = Parser::new(); + input.initialize("A"); + assert_eq!(input.parse_genus(), Ok(Genus::Single(Forma("A")))); + assert_eq!(input.source, ""); + + input.initialize("Apple"); + assert_eq!(input.parse_genus(), Ok(Genus::Single(Forma("Apple")))); + assert_eq!(input.source, ""); + } + + #[test] + fn list_genus_definitions() { + let mut input = Parser::new(); + input.initialize("[A]"); + assert_eq!(input.parse_genus(), Ok(Genus::List(Forma("A")))); + assert_eq!(input.source, ""); + } + + #[test] + fn tuple_genus_definitions() { + let mut input = Parser::new(); + + input.initialize("(A, B)"); + assert_eq!( + input.parse_genus(), + Ok(Genus::Tuple(vec![Forma("A"), Forma("B")])) + ); + assert_eq!(input.source, ""); + + // not actually sure whether we should be normalizing this? Probably + // not, because formatting and linting is a separate concern. + + input.initialize("(A)"); + assert_eq!(input.parse_genus(), Ok(Genus::Tuple(vec![Forma("A")]))); + assert_eq!(input.source, ""); + } + + #[test] + fn unit_genus_definitions() { + let mut input = Parser::new(); + + // and now the special case of the unit type + + input.initialize("()"); + assert_eq!(input.parse_genus(), Ok(Genus::Unit)); + assert_eq!(input.source, "") + } + + #[test] + fn signatures() { + let mut input = Parser::new(); + + input.initialize("A -> B"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::Single(Forma("A")), + range: Genus::Single(Forma("B")) + }) + ); + + input.initialize("Beans -> Coffee"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::Single(Forma("Beans")), + range: Genus::Single(Forma("Coffee")) + }) + ); + + input.initialize("[Bits] -> Bob"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::List(Forma("Bits")), + range: Genus::Single(Forma("Bob")) + }) + ); + + input.initialize("Complex -> (Real, Imaginary)"); + assert_eq!( + input.parse_signature(), + Ok(Signature { + domain: Genus::Single(Forma("Complex")), + range: Genus::Tuple(vec![Forma("Real"), Forma("Imaginary")]) + }) + ); + } + + #[test] + fn declarations_simple() { + let mut input = Parser::new(); + + input.initialize("making_coffee :"); + assert_eq!( + input.parse_procedure_declaration(), + Ok((Identifier("making_coffee"), None)) + ); + } + + #[test] + fn declarations_full() { + let mut input = Parser::new(); + + input.initialize("f : A -> B"); + assert_eq!( + input.parse_procedure_declaration(), + Ok(( + Identifier("f"), + Some(Signature { + domain: Genus::Single(Forma("A")), + range: Genus::Single(Forma("B")) + }) + )) + ); + + input.initialize("making_coffee : Beans -> Coffee"); + assert_eq!( + input.parse_procedure_declaration(), + Ok(( + Identifier("making_coffee"), + Some(Signature { + domain: Genus::Single(Forma("Beans")), + range: Genus::Single(Forma("Coffee")) + }) + )) + ); + + input.initialize("making_coffee : (Beans, Milk) -> Coffee"); + assert_eq!( + input.parse_procedure_declaration(), + Ok(( + Identifier("making_coffee"), + Some(Signature { + domain: Genus::Tuple(vec![Forma("Beans"), Forma("Milk")]), + range: Genus::Single(Forma("Coffee")) + }) + )) + ); + } +} + +#[cfg(test)] +mod verify { + use super::*; + + #[test] + fn technique_header() { + let mut input = Parser::new(); + input.initialize("% technique v1"); + + assert_eq!( + input.parse_technique_header(), + Ok(Metadata { + version: 1, + license: None, + copyright: None, + template: None + }) + ); + + input.initialize( + r#" +% technique v1 +! MIT; (c) ACME, Inc +& checklist + "#, + ); + assert_eq!( + input.parse_technique_header(), + Ok(Metadata { + version: 1, + license: Some("MIT"), + copyright: Some("ACME, Inc"), + template: Some("checklist") + }) + ); + } +} + +/* + #[test] + fn check_procedure_signature() { + let p = grammar::signatureParser::new(); + + assert_eq!( + p.parse(""), + Ok(Signature { + domain: Genus::Single(Forma { + name: "A".to_owned() + }), + range: Genus::Single(Forma { + name: "B".to_owned() + }) + }) + ); + assert!(p + .parse("A ->") + .is_err()); + assert!(p + .parse("A") + .is_err()); + } + + #[test] + fn check_procedure_declaration() { + let d = grammar::declarationParser::new(); + + assert_eq!(d.parse("making_coffee :"), Ok("making_coffee".to_owned())); + + let p = grammar::declaration_lineParser::new(); + + assert_eq!( + p.parse("f :"), + Ok(Procedure { + name: "f".to_owned(), + signature: None + }) + ); + + assert!(p + .parse("cook-pizza :B") + .is_err()); + + assert_eq!( + p.parse("f : A -> B"), + Ok(Procedure { + name: "f".to_owned(), + signature: Some(Signature { + domain: Genus::Single(Forma { + name: "A".to_owned() + }), + range: Genus::Single(Forma { + name: "B".to_owned() + }) + }) + }) + ); + } + + #[test] + fn check_attribute_role() { + let a = grammar::attributeParser::new(); + + assert_eq!( + a.parse("@chef"), + Ok(Attribute { + name: "chef".to_owned() + }) + ); + + let p = grammar::attribute_lineParser::new(); + + assert_eq!( + p.parse("@chef"), + Ok(vec![Attribute { + name: "chef".to_owned() + }]) + ); + assert_eq!( + p.parse("@chef + @sous"), + Ok(vec![ + Attribute { + name: "chef".to_owned() + }, + Attribute { + name: "sous".to_owned() + } + ]) + ); + } + + // the verify_*() functions are where we do verificaton of larger composite + // structures built up from the smaller pieces check_*()'d above. + + /* + #[test] + fn check_procedure_declaration_explicit() { + let input = "making_coffee : Beans, Milk -> Coffee"; + + // let declaration = TechniqueParser::parse(Rule::declaration, &input) + // .expect("Unsuccessful Parse") + // .next() + // .unwrap(); + + assert_eq!( + input, // FIXME + "making_coffee : Beans, Milk -> Coffee" + ); + + // assert_eq!(identifier.as_str(), "making_coffee"); + // assert_eq!(identifier.as_rule(), Rule::identifier); + + // assert_eq!(signature.as_str(), "Beans, Milk -> Coffee"); + // assert_eq!(signature.as_rule(), Rule::signature); + + } + */ +*/ diff --git a/src/parsing/scope.rs b/src/parsing/scope.rs new file mode 100644 index 0000000..94f706c --- /dev/null +++ b/src/parsing/scope.rs @@ -0,0 +1,151 @@ +#![allow(unused_variables)] +#![allow(dead_code)] + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Layer { + Technique, // within a technique file, by definition the base state + Metadata, // header lines + Procedure, // procedure function block + Declaration, // procedure function signature + Description, // procedure description, as free form text + StepItem, // (sub)step within a procedure body + CodeBlock, // escape to a code mode + Embedded, // multi-line string of another language. +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct Scope { + stack: Vec, +} + +impl Scope { + pub(crate) fn new() -> Scope { + Scope { stack: vec![] } + } + + pub(crate) fn current(&self) -> Layer { + match self + .stack + .last() + { + Some(layer) => *layer, + None => Layer::Technique, + } + } + + pub(crate) fn push(&mut self, layer: Layer) { + self.stack + .push(layer); + } + + pub(crate) fn pop(&mut self) -> Layer { + match self + .stack + .pop() + { + Some(layer) => layer, + None => Layer::Technique, + } + } + + pub(crate) fn reset(&mut self) { + self.stack + .clear(); + } + + /// Iterate over the members of the stack and put them into a sorted list + /// suitable to be used as the tags for syntax highlighting. + pub(crate) fn tags(self) -> Vec { + let mut copy = self + .stack + .clone(); + + copy.push(Layer::Technique); + copy.sort(); + copy.dedup(); + + copy + } +} + +#[cfg(test)] +mod check { + use super::*; + + #[test] + fn stack_operations() { + let mut stack = Scope::new(); + + let current = stack.current(); + assert_eq!(current, Layer::Technique); + + stack.push(Layer::Metadata); + + let current = stack.current(); + assert_eq!(current, Layer::Metadata); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Metadata); + + stack.push(Layer::Procedure); + stack.push(Layer::Declaration); + let current = stack.current(); + assert_eq!(current, Layer::Declaration); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Declaration); + + let current = stack.current(); + assert_eq!(current, Layer::Procedure); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Procedure); + + // and if we pop again, we're still in Technique + let popped = stack.pop(); + assert_eq!(popped, Layer::Technique); + + stack.push(Layer::Description); + stack.push(Layer::CodeBlock); + + // TODO get layers as tags in sorted order + + // now we try reset() + + stack.reset(); + let current = stack.current(); + assert_eq!(current, Layer::Technique); + + let popped = stack.pop(); + assert_eq!(popped, Layer::Technique); + } + + #[test] + fn extract_tags_sorted() { + let mut stack = Scope::new(); + + stack.push(Layer::Procedure); + stack.push(Layer::StepItem); + stack.push(Layer::Description); + stack.push(Layer::StepItem); + stack.push(Layer::CodeBlock); + stack.push(Layer::Embedded); + stack.push(Layer::CodeBlock); + + let result = stack.tags(); + assert_eq!( + result, + vec![ + Layer::Technique, + Layer::Procedure, + Layer::Description, + Layer::StepItem, + Layer::CodeBlock, + Layer::Embedded + ] + ) + + // which raises an interesting question about what happens when we + // nest. This will probably need changing. + } +} diff --git a/src/parsing/tree_sitter.rs b/src/parsing/tree_sitter.rs new file mode 100644 index 0000000..fa33225 --- /dev/null +++ b/src/parsing/tree_sitter.rs @@ -0,0 +1,277 @@ +use tree_sitter::{Parser, Tree}; + +extern "C" { + fn tree_sitter_technique() -> tree_sitter::Language; +} + +pub fn parse_tree(source_code: &str) -> Tree { + let mut parser = Parser::new(); + let language = unsafe { tree_sitter_technique() }; + parser + .set_language(&language) + .unwrap(); + parser + .parse(source_code, None) + .unwrap() +} + +#[cfg(test)] +mod check { + use super::*; + + fn trim(s: &str) -> &str { + s.strip_prefix('\n') + .unwrap_or(s) + } + + fn assert_no_error(tree: &tree_sitter::Tree, input: &str) { + let root = tree.root_node(); + assert!( + !root.has_error(), + "Parse error in:\n{}\nTree: {:#?}", + input, + root.to_sexp() + ); + } + #[test] + fn simple_declaration() { + let input = trim( + r#" +make_coffee : Beans -> Coffee + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn full_declaration() { + let input = trim( + r#" +make_coffee : Beans -> Coffee + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn multiple_declarations() { + let input = trim( + r#" +make_coffee : Beans -> Coffee +make_tea : Leaves -> Tea +brew_chocolate : Powder -> Chocolate + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn declaration_at_left_margin() { + let input = trim( + r#" + make_coffee : Beans -> Coffee + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn declaration_with_leading_whitespace() { + let input = trim( + r#" + make_coffee : Beans -> Coffee + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn genus_forms() { + let input = trim( + r#" +f : +g : A -> B +h : (A,B) -> B +i : [A] -> A +j : (A,B,C) -> (A,B) +k : [A] -> [B] +l : [A] -> (A,B) +n : [A] -> () +o : () -> A + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn magic_only() { + let input = trim( + r#" +% technique v1 + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn header_block() { + let input = trim( + r#" +% technique v1 +! CC BY-SA 3.0 [IGO]; © 2024 ACME, Inc. +& checklist-template + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn header_and_delcarations() { + let input = trim( + r#" +% technique v1 +! CC BY-SA 3.0 [IGO]; © 2024 ACME, Inc. +& checklist-template + +make_coffee : +brew_tea : Leaves -> Tea + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn procedure_with_title_and_description() { + let input = trim( + r#" + my_proc: + + # My Procedure Title + + This is the first line of the description. + This is the second line. + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn procedure_with_dependent_steps() { + let input = trim( + r#" + my_proc: + 1. First step. + 2. Second step. + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn description_lines_with_periods_are_not_steps() { + let input = trim( + r#" + This is a line ending with one. + And another line but not a step. + + one. is not a step + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn numeric_and_alphabetic_steps_are_recognized() { + let input = trim( + r#" + 1. This is a numeric step + a. This is an alphabetic step + i. This is also an alphabetic step + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn mixed_description_and_steps() { + let input = trim( + r#" + This is a description. + + a. First step + b. Second step + + This is another paragraph. + + 1. Numeric step + 2. Another numeric step + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn step_like_text_in_description_is_not_step() { + let input = trim( + r#" + This is not a step a. is just a word here. + But this is a step + b. Now this is a step + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn procedure_with_parallel_steps() { + let input = trim( + r#" + my_proc: + - A parallel step. + * Another parallel step. + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } + + #[test] + fn network_probe_procedure() { + let input = trim( + r#" + connectivity_check(e,s) : (LocalEnvironment, TargetService) -> NetworkHealth + + # Network Connectivity Check + + We check the health of the network path between a user's machine running + in a branch office and a service running in a datacenter by establishing + functionality at each layer between our device and the remote server. + + 1. Local network connectivity + 2. Reachability of site border + 3. Check internet connectivity + 4. Reachability of away border + 5. Traversal of away-side load balancer + 6. Traversal of away-side local network + 7. Check response from remote service + "#, + ); + let tree = parse_tree(input); + assert_no_error(&tree, input); + } +} diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h new file mode 100644 index 0000000..1abdd12 --- /dev/null +++ b/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t size); +extern void *(*ts_current_calloc)(size_t count, size_t size); +extern void *(*ts_current_realloc)(void *ptr, size_t size); +extern void (*ts_current_free)(void *ptr); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h new file mode 100644 index 0000000..a17a574 --- /dev/null +++ b/src/tree_sitter/array.h @@ -0,0 +1,291 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(pop) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h new file mode 100644 index 0000000..858107d --- /dev/null +++ b/src/tree_sitter/parser.h @@ -0,0 +1,286 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +typedef struct TSLanguageMetadata { + uint8_t major_version; + uint8_t minor_version; + uint8_t patch_version; +} TSLanguageMetadata; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +// Used to index the field and supertype maps. +typedef struct { + uint16_t index; + uint16_t length; +} TSMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); + void (*log)(const TSLexer *, const char *, ...); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; + uint16_t reserved_word_set_id; +} TSLexerMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t abi_version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexerMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; + const char *name; + const TSSymbol *reserved_words; + uint16_t max_reserved_word_set_size; + uint32_t supertype_count; + const TSSymbol *supertype_symbols; + const TSMapSlice *supertype_map_slices; + const TSSymbol *supertype_map_entries; + TSLanguageMetadata metadata; +}; + +static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + const TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + const TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_ diff --git a/tree-sitter.json b/tree-sitter.json new file mode 100644 index 0000000..d03614a --- /dev/null +++ b/tree-sitter.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/config.schema.json", + "grammars": [ + { + "name": "technique", + "camelcase": "Technique", + "title": "Technique", + "scope": "source.technique", + "file-types": [ + "t" + ], + "injection-regex": "^technique$", + "class-name": "TreeSitterTechnique" + } + ], + "metadata": { + "version": "0.1.0", + "license": "MIT", + "description": "The Technique Procedures Language.", + "authors": [ + { + "name": "Andrew Cowie", + "email": "istathar@gmail.com" + } + ], + "links": { + "repository": "https://github.com/technique-lang/technique" + } + }, + "bindings": { + "c": false, + "go": false, + "node": false, + "python": false, + "rust": true, + "swift": false, + "zig": false + } +} \ No newline at end of file