From ba373e8697e24b52d47fae892ebb8aa9d3235a58 Mon Sep 17 00:00:00 2001 From: James Liu Date: Mon, 4 Nov 2024 15:08:30 -0500 Subject: [PATCH 1/4] feat: adds footnote support --- src/ast/generate.js | 11 +++++ src/ast/generated.rs | 67 ++++++++++++++++++++++++++++ src/export/event.rs | 2 + src/export/html.rs | 57 ++++++++++++++++++++++++ src/export/traverse.rs | 12 +++-- src/syntax/element.rs | 7 +-- src/syntax/fn_def.rs | 99 +++++++++++++++++++++++++----------------- src/syntax/fn_ref.rs | 45 ++++++++++--------- src/syntax/mod.rs | 2 + src/syntax/object.rs | 31 ++++++------- tests/html.rs | 15 +++++++ 11 files changed, 267 insertions(+), 81 deletions(-) diff --git a/src/ast/generate.js b/src/ast/generate.js index 81bbc4f..076555c 100644 --- a/src/ast/generate.js +++ b/src/ast/generate.js @@ -103,6 +103,16 @@ const nodes = [ { struct: "FnDef", kind: ["FN_DEF"], + token: [ + ["label", "FN_LABEL"], + ["description", "FN_CONTENT"], + ], + post_blank: true, + affiliated_keywords: true, + }, + { + struct: "FnContent", + kind: ["FN_CONTENT"], post_blank: true, affiliated_keywords: true, }, @@ -188,6 +198,7 @@ const nodes = [ { struct: "FnRef", kind: ["FN_REF"], + token: [["label", "FN_LABEL"]], }, { struct: "Macros", diff --git a/src/ast/generated.rs b/src/ast/generated.rs index 1bfd489..0f005c3 100644 --- a/src/ast/generated.rs +++ b/src/ast/generated.rs @@ -831,6 +831,70 @@ impl AstNode for FnDef { } } impl FnDef { + /// Beginning position of this element + pub fn start(&self) -> TextSize { + self.syntax.text_range().start() + } + /// Ending position of this element + pub fn end(&self) -> TextSize { + self.syntax.text_range().end() + } + /// Range of this element + pub fn text_range(&self) -> TextRange { + self.syntax.text_range() + } + /// Raw text of this element + pub fn raw(&self) -> String { + self.syntax.to_string() + } + pub fn label(&self) -> Option { + super::token(&self.syntax, FN_LABEL) + } + pub fn description(&self) -> Option { + super::token(&self.syntax, FN_CONTENT) + } + pub fn post_blank(&self) -> usize { + super::blank_lines(&self.syntax) + } + pub fn caption(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "CAPTION") + } + pub fn header(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "HEADER") + } + pub fn name(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "NAME") + } + pub fn plot(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "PLOT") + } + pub fn results(&self) -> Option { + affiliated_keyword(&self.syntax, |k| k == "RESULTS") + } + pub fn attr(&self, backend: &str) -> Option { + affiliated_keyword(&self.syntax, |k| { + k.starts_with("ATTR_") && &k[5..] == backend + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FnContent { + pub(crate) syntax: SyntaxNode, +} +impl AstNode for FnContent { + type Language = OrgLanguage; + fn can_cast(kind: SyntaxKind) -> bool { + kind == FN_CONTENT + } + fn cast(node: SyntaxNode) -> Option { + Self::can_cast(node.kind()).then(|| FnContent { syntax: node }) + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} +impl FnContent { /// Beginning position of this element pub fn start(&self) -> TextSize { self.syntax.text_range().start() @@ -1680,6 +1744,9 @@ impl FnRef { pub fn raw(&self) -> String { self.syntax.to_string() } + pub fn label(&self) -> Option { + super::token(&self.syntax, FN_LABEL) + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/src/export/event.rs b/src/export/event.rs index 46f65c6..46934d3 100644 --- a/src/export/event.rs +++ b/src/export/event.rs @@ -19,6 +19,7 @@ pub enum Container { DynBlock(DynBlock), FnDef(FnDef), + FnContent(FnContent), Comment(Comment), FixedWidth(FixedWidth), SpecialBlock(SpecialBlock), @@ -57,6 +58,7 @@ pub enum Event { Text(Token), Macros(Macros), Cookie(Cookie), + FnLabel(Token), InlineCall(InlineCall), InlineSrc(InlineSrc), Clock(Clock), diff --git a/src/export/html.rs b/src/export/html.rs index fa31d42..a074500 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -1,3 +1,4 @@ +use rowan::ast::AstNode; use rowan::NodeOrToken; use std::cmp::min; use std::fmt; @@ -6,6 +7,7 @@ use std::fmt::Write as _; use super::event::{Container, Event}; use super::TraversalContext; use super::Traverser; +use crate::ast::token; use crate::{SyntaxElement, SyntaxKind, SyntaxNode}; /// A wrapper for escaping sensitive characters in html. @@ -51,6 +53,9 @@ impl> fmt::Display for HtmlEscape { pub struct HtmlExport { output: String, + ///TODO: track footnotes and citations within the export struct and + /// construct them after the document is fully parsed? + //footnotes: HashMap, in_descriptive_list: Vec, table_row: TableRow, @@ -107,6 +112,56 @@ impl Traverser for HtmlExport { } Event::Leave(Container::Headline(_)) => {} + Event::Enter(Container::FnRef(t)) => { + if let Some(label) = t.label() { + let _ = write!( + &mut self.output, + "[{}]", + label.syntax().text(), + label.syntax().text() + ); + } + self.output += ""; + } + Event::Leave(Container::FnRef(_)) => {} + + Event::Enter(Container::FnDef(t)) => { + self.output += ""; + } + + Event::Enter(Container::FnContent(c)) => { + self.output += " { + self.output += ""; + } + Event::Enter(Container::Paragraph(_)) => self.output += "

", Event::Leave(Container::Paragraph(_)) => self.output += "

", @@ -297,6 +352,8 @@ impl Traverser for HtmlExport { let _ = write!(&mut self.output, "{}", HtmlEscape(text)); } + Event::FnLabel(_) => {} + Event::LineBreak(_) => self.output += "
", Event::Snippet(snippet) => { diff --git a/src/export/traverse.rs b/src/export/traverse.rs index d53b7b8..ef46a36 100644 --- a/src/export/traverse.rs +++ b/src/export/traverse.rs @@ -181,6 +181,7 @@ pub trait Traverser { DYN_BLOCK => walk!(DynBlock), FN_DEF => walk!(FnDef), FN_REF => walk!(FnRef), + FN_CONTENT => walk!(FnContent), MACROS => walk!(@Macros), SNIPPET => walk!(@Snippet), TIMESTAMP_ACTIVE | TIMESTAMP_INACTIVE | TIMESTAMP_DIARY => walk!(@Timestamp), @@ -210,12 +211,17 @@ pub trait Traverser { _ => {} } } - SyntaxElement::Token(token) => { - if token.kind() == TEXT { + SyntaxElement::Token(token) => match token.kind() { + TEXT => { self.event(Event::Text(Token(token)), ctx); take_control!(); } - } + FN_LABEL => { + self.event(Event::FnLabel(Token(token)), ctx); + take_control!(); + } + _ => {} + }, }; } } diff --git a/src/syntax/element.rs b/src/syntax/element.rs index aa4b88a..ae5c523 100644 --- a/src/syntax/element.rs +++ b/src/syntax/element.rs @@ -286,11 +286,12 @@ fn affiliated_keywords() { TEXT@10..25 " a footnote def" NEW_LINE@25..26 "\n" L_BRACKET@26..27 "[" - TEXT@27..29 "fn" + KEYWORD@27..29 "fn" COLON@29..30 ":" - TEXT@30..34 "WORD" + FN_LABEL@30..34 "WORD" R_BRACKET@34..35 "]" - TEXT@35..55 " https://orgmode.org" + FN_CONTENT@35..55 + TEXT@35..55 " https://orgmode.org" "### ); diff --git a/src/syntax/fn_def.rs b/src/syntax/fn_def.rs index 10346dc..22fee5a 100644 --- a/src/syntax/fn_def.rs +++ b/src/syntax/fn_def.rs @@ -7,12 +7,13 @@ use nom::{ use super::{ combinator::{ - blank_lines, colon_token, l_bracket_token, r_bracket_token, trim_line_end, GreenElement, - NodeBuilder, + blank_lines, colon_token, l_bracket_token, node, r_bracket_token, trim_line_end, + GreenElement, NodeBuilder, }, input::Input, keyword::affiliated_keyword_nodes, - SyntaxKind, + object::standard_object_nodes, + SyntaxKind::*, }; #[cfg_attr( @@ -20,6 +21,10 @@ use super::{ tracing::instrument(level = "debug", skip(input), fields(input = input.s)) )] pub fn fn_def_node(input: Input) -> IResult { + crate::lossless_parser!(fn_def_node_base, input) +} + +fn fn_def_node_base(input: Input) -> IResult { let mut parser = map( tuple(( affiliated_keyword_nodes, @@ -42,20 +47,25 @@ pub fn fn_def_node(input: Input) -> IResult { post_blank, )| { let mut b = NodeBuilder::new(); + b.children.extend(affiliated_keywords); b.push(l_bracket); - b.text(fn_); + b.push(fn_.token(KEYWORD)); b.push(colon); - b.text(label); + b.push(label.token(FN_LABEL)); b.push(r_bracket); - b.text(content); + + let content_node = node(FN_CONTENT, standard_object_nodes(content)); + b.push(content_node); + b.ws(ws_); b.nl(nl); b.children.extend(post_blank); - b.finish(SyntaxKind::FN_DEF) + b.finish(FN_DEF) }, ); - crate::lossless_parser!(parser, input) + let (i, fn_def) = parser(input)?; + Ok((i, fn_def)) } #[test] @@ -66,68 +76,78 @@ fn parse() { let to_fn_def = to_ast::(fn_def_node); insta::assert_debug_snapshot!( - to_fn_def("[fn:1] https://orgmode.org").syntax, - @r###" - FN_DEF@0..26 + to_fn_def("[fn:1] *bold* - https://orgmode.org").syntax, + @r#" + FN_DEF@0..36 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..5 "1" + FN_LABEL@4..5 "1" R_BRACKET@5..6 "]" - TEXT@6..26 " https://orgmode.org" - "### + FN_CONTENT@6..36 + TEXT@6..7 " " + BOLD@7..13 + STAR@7..8 "*" + TEXT@8..12 "bold" + STAR@12..13 "*" + TEXT@13..36 " - https://orgmode.org" + "# ); insta::assert_debug_snapshot!( to_fn_def("[fn:word_1] https://orgmode.org").syntax, - @r###" + @r#" FN_DEF@0..31 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..10 "word_1" + FN_LABEL@4..10 "word_1" R_BRACKET@10..11 "]" - TEXT@11..31 " https://orgmode.org" - "### + FN_CONTENT@11..31 + TEXT@11..31 " https://orgmode.org" + "# ); insta::assert_debug_snapshot!( to_fn_def("[fn:WORD-1] https://orgmode.org").syntax, - @r###" + @r#" FN_DEF@0..31 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..10 "WORD-1" + FN_LABEL@4..10 "WORD-1" R_BRACKET@10..11 "]" - TEXT@11..31 " https://orgmode.org" - "### + FN_CONTENT@11..31 + TEXT@11..31 " https://orgmode.org" + "# ); insta::assert_debug_snapshot!( to_fn_def("[fn:WORD]").syntax, - @r###" + @r#" FN_DEF@0..9 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..8 "WORD" + FN_LABEL@4..8 "WORD" R_BRACKET@8..9 "]" - "### + FN_CONTENT@9..9 + "# ); insta::assert_debug_snapshot!( to_fn_def("[fn:1] In particular, the parser requires stars at column 0 to be\n").syntax, - @r###" + @r#" FN_DEF@0..66 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..5 "1" + FN_LABEL@4..5 "1" R_BRACKET@5..6 "]" - TEXT@6..65 " In particular, the p ..." + FN_CONTENT@6..65 + TEXT@6..65 " In particular, the p ..." NEW_LINE@65..66 "\n" - "### + "# ); let config = &ParseConfig::default(); @@ -138,7 +158,7 @@ fn parse() { insta::assert_debug_snapshot!( to_fn_def("#+ATTR_poi: 1\n[fn:WORD-1] https://orgmode.org").syntax, - @r###" + @r##" FN_DEF@0..45 AFFILIATED_KEYWORD@0..14 HASH_PLUS@0..2 "#+" @@ -147,11 +167,12 @@ fn parse() { TEXT@11..13 " 1" NEW_LINE@13..14 "\n" L_BRACKET@14..15 "[" - TEXT@15..17 "fn" + KEYWORD@15..17 "fn" COLON@17..18 ":" - TEXT@18..24 "WORD-1" + FN_LABEL@18..24 "WORD-1" R_BRACKET@24..25 "]" - TEXT@25..45 " https://orgmode.org" - "### + FN_CONTENT@25..45 + TEXT@25..45 " https://orgmode.org" + "## ); } diff --git a/src/syntax/fn_ref.rs b/src/syntax/fn_ref.rs index c190825..40ec92c 100644 --- a/src/syntax/fn_ref.rs +++ b/src/syntax/fn_ref.rs @@ -31,10 +31,10 @@ fn fn_ref_node_base(input: Input) -> IResult { r_bracket_token, ))(input)?; - let mut children = vec![l_bracket, fn_.text_token(), colon, label.text_token()]; + let mut children = vec![l_bracket, fn_.token(KEYWORD), colon, label.token(FN_LABEL)]; if let Some((colon, definition)) = definition { children.push(colon); - children.extend(standard_object_nodes(definition)); + children.push(node(FN_CONTENT, standard_object_nodes(definition))); } children.push(r_bracket); @@ -64,56 +64,59 @@ fn parse() { insta::assert_debug_snapshot!( to_fn_ref("[fn:1]").syntax, - @r###" + @r#" FN_REF@0..6 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..5 "1" + FN_LABEL@4..5 "1" R_BRACKET@5..6 "]" - "### + "# ); insta::assert_debug_snapshot!( to_fn_ref("[fn:1:2]").syntax, - @r###" + @r#" FN_REF@0..8 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..5 "1" + FN_LABEL@4..5 "1" COLON@5..6 ":" - TEXT@6..7 "2" + FN_CONTENT@6..7 + TEXT@6..7 "2" R_BRACKET@7..8 "]" - "### + "# ); insta::assert_debug_snapshot!( to_fn_ref("[fn::2]").syntax, - @r###" + @r#" FN_REF@0..7 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..4 "" + FN_LABEL@4..4 "" COLON@4..5 ":" - TEXT@5..6 "2" + FN_CONTENT@5..6 + TEXT@5..6 "2" R_BRACKET@6..7 "]" - "### + "# ); insta::assert_debug_snapshot!( to_fn_ref("[fn::[]]").syntax, - @r###" + @r#" FN_REF@0..8 L_BRACKET@0..1 "[" - TEXT@1..3 "fn" + KEYWORD@1..3 "fn" COLON@3..4 ":" - TEXT@4..4 "" + FN_LABEL@4..4 "" COLON@4..5 ":" - TEXT@5..7 "[]" + FN_CONTENT@5..7 + TEXT@5..7 "[]" R_BRACKET@7..8 "]" - "### + "# ); let config = &ParseConfig::default(); diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 4b0a620..1472c82 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -199,6 +199,8 @@ pub enum SyntaxKind { COOKIE, RADIO_TARGET, FN_REF, + FN_LABEL, + FN_CONTENT, LATEX_FRAGMENT, MACROS, SNIPPET, diff --git a/src/syntax/object.rs b/src/syntax/object.rs index 4bacba1..7f70f09 100644 --- a/src/syntax/object.rs +++ b/src/syntax/object.rs @@ -305,7 +305,7 @@ fn parse() { insta::assert_debug_snapshot!( t("~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]"), - @r###" + @r#" PARAGRAPH@0..93 CODE@0..26 TILDE@0..1 "~" @@ -313,23 +313,24 @@ fn parse() { TILDE@25..26 "~" FN_REF@26..93 L_BRACKET@26..27 "[" - TEXT@27..29 "fn" + KEYWORD@27..29 "fn" COLON@29..30 ":" - TEXT@30..34 "oiml" + FN_LABEL@30..34 "oiml" COLON@34..35 ":" - TEXT@35..57 "The default value of \n" - CODE@57..83 - TILDE@57..58 "~" - TEXT@58..82 "org-inlinetask-min-level" - TILDE@82..83 "~" - TEXT@83..87 " is " - VERBATIM@87..91 - EQUAL@87..88 "=" - TEXT@88..90 "15" - EQUAL@90..91 "=" - TEXT@91..92 "." + FN_CONTENT@35..92 + TEXT@35..57 "The default value of \n" + CODE@57..83 + TILDE@57..58 "~" + TEXT@58..82 "org-inlinetask-min-level" + TILDE@82..83 "~" + TEXT@83..87 " is " + VERBATIM@87..91 + EQUAL@87..88 "=" + TEXT@88..90 "15" + EQUAL@90..91 "=" + TEXT@91..92 "." R_BRACKET@92..93 "]" - "### + "# ); insta::assert_debug_snapshot!( diff --git a/tests/html.rs b/tests/html.rs index ab1a60e..53ee088 100644 --- a/tests/html.rs +++ b/tests/html.rs @@ -174,3 +174,18 @@ fn line_break() { @r###""

aa
bb

""### ); } + +#[test] +fn footnote() { + insta::assert_debug_snapshot!( + Org::parse("[fn:1] In particular, the parser requires stars at column 0 to be\n").to_html(), + @r##""
""## + ); + // "~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]" + insta::assert_debug_snapshot!( + Org::parse( + "~org-inlinetask-min-level~[fn:oiml:The default value of \n~org-inlinetask-min-level~ is =15=.]" + ).to_html(), + @r##""

org-inlinetask-min-level[oiml]The default value of \norg-inlinetask-min-level is 15.

""## + ); +} From c1b761a704df5f28eba2fa98dcfbeeab29240b79 Mon Sep 17 00:00:00 2001 From: James Liu Date: Mon, 4 Nov 2024 15:08:54 -0500 Subject: [PATCH 2/4] chore: cargo clippy --- src/replace.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replace.rs b/src/replace.rs index aa63c95..c805276 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -133,7 +133,7 @@ impl Org { ) if level <= new_level // non-last headline must ends with a newline && (headline.end() == self.document().end() - || replace_with.ends_with(&['\n', '\r'])) => + || replace_with.ends_with(['\n', '\r'])) => { self.replace_headline(headline, range, replace_with) } From 0f694dddd8367122a3b1acef71c164df2015429b Mon Sep 17 00:00:00 2001 From: James Liu Date: Mon, 4 Nov 2024 15:09:25 -0500 Subject: [PATCH 3/4] docs: updates development.md with more convenient commands --- development.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/development.md b/development.md index bcd343c..484af8d 100644 --- a/development.md +++ b/development.md @@ -3,7 +3,7 @@ ```shell cargo fmt -- --check cargo test --all-features -cargo clippy --allow-dirty --allow-staged +cargo clippy --fix --lib -p orgize --allow-dirty --allow-staged ``` ## Update snapshot testing @@ -18,8 +18,8 @@ cargo insta review ```shell cargo install cargo-fuzz -rustup default nightly -cargo fuzz run fuzz_target_1 +rustup toolchain install nightly +cargo +nightly fuzz run fuzz_target_1 ``` ## Benchmark From 340ce2b5f3e79429a45331225ed250d3890a5288 Mon Sep 17 00:00:00 2001 From: James Liu Date: Thu, 7 Nov 2024 14:19:40 -0500 Subject: [PATCH 4/4] fixed: removed debug output --- src/export/html.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/export/html.rs b/src/export/html.rs index a074500..265ff4d 100644 --- a/src/export/html.rs +++ b/src/export/html.rs @@ -152,7 +152,6 @@ impl Traverser for HtmlExport { if let Some(parent) = c.syntax().parent() { if parent.kind() == SyntaxKind::FN_REF || parent.kind() == SyntaxKind::FN_DEF { let label = token(&parent, SyntaxKind::FN_LABEL).unwrap(); - dbg!(&label); let _ = write!(&mut self.output, "id=\"footnote_{}\" ", label); } }