Skip to content

Commit a7a7516

Browse files
committed
md2sexpr 0.1
0 parents  commit a7a7516

File tree

2 files changed

+261
-0
lines changed

2 files changed

+261
-0
lines changed

md2sexpr.d

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import parser;
2+
import std.string;
3+
private string escape(string text) {
4+
return `"` ~ text.replace(`\`, `\\`).replace(`"`, `\\"`) ~ `"`;
5+
}
6+
struct ConvertToSExprEventHandler {
7+
string start(string text) {
8+
return " (" ~ escape(text);
9+
}
10+
string end() {
11+
return ")";
12+
}
13+
}
14+
struct ConvertToSExprListEventHandler {
15+
bool multiple = false;
16+
string start(string text) {
17+
if (multiple) {
18+
return " ((" ~ escape(text);
19+
} else {
20+
multiple = true;
21+
return "(" ~ escape(text);
22+
}
23+
}
24+
string end() {
25+
if (multiple) {
26+
multiple = false;
27+
return "))";
28+
} else return ") ";
29+
}
30+
}
31+
void main(string[] args) {
32+
import std.stdio, std.file;
33+
string[] inputnames;
34+
string[] inputs;
35+
args = args[1..$]; // skip first arg, which is name of binary
36+
bool list = false;
37+
if (args.length > 0 && args[0] == "-l") {
38+
args = args[1..$];
39+
list = true;
40+
}
41+
if (args.length == 0) {
42+
// stdin
43+
string input;
44+
string line;
45+
while ((line = readln()) !is null)
46+
input ~= line;
47+
inputnames ~= "stdin";
48+
inputs ~= input;
49+
}
50+
void readRestArgs() {
51+
foreach (filename; args) {
52+
inputnames ~= filename;
53+
inputs ~= filename.readText;
54+
}
55+
}
56+
if (list) {
57+
readRestArgs();
58+
for (int i = 0; i < inputs.length; ++i) {
59+
string output;
60+
parse!(ConvertToSExprListEventHandler, s => output ~= s, s => output ~=s)(inputs[i], 4, inputnames[i]);
61+
// Write each on a separate lines
62+
writeln(output);
63+
}
64+
} else {
65+
readRestArgs();
66+
for (int i = 0; i < inputs.length; ++i) {
67+
string output;
68+
parse!(ConvertToSExprEventHandler, s => output ~= s, s => output ~=s)(inputs[i], 4, inputnames[i]);
69+
// Write each on separate lines
70+
writeln(output);
71+
}
72+
}
73+
}

parser.d

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/** Detect indent level and return where the indent level stops in "i"
2+
* A tab is counted as one indent.
3+
* Params:
4+
* i = the index to start looking from. This is mutated
5+
* by reference to indicate the index the last index
6+
* was.
7+
* spaces_per_indent = the number of spaces to treat as one indent
8+
*/
9+
int detect_indent_level(ref int i, string input, int spaces_per_indent=4) {
10+
int nspaces = 0;
11+
if (i == input.length || !(input[i] == ' ' || input[i] == '\t')) {
12+
return 0;
13+
}
14+
for (; i < input.length; ++i) {
15+
switch (input[i]) {
16+
case ' ': nspaces++; break;
17+
case '\t': nspaces += spaces_per_indent; break;
18+
// Commented out makes it only accept "-"
19+
//case '-': return nspaces / spaces_per_indent;
20+
case '\n': nspaces = 0; break;
21+
//default : return 0;
22+
default : return nspaces / spaces_per_indent;
23+
}
24+
}
25+
return nspaces / spaces_per_indent;
26+
}
27+
unittest {
28+
int i;
29+
assert(detect_indent_level(i = 0, " - hi") == 1);
30+
assert(detect_indent_level(i = 0, "Foo") == 0);
31+
assert(detect_indent_level(i = 0, " - hi") == 2);
32+
assert(detect_indent_level(i = 0, "\t - hey") == 1);
33+
}
34+
void doNothing(T)(T _=null) {}
35+
/** A streaming parser for Markdown trees. Emits a stream of parsing events.
36+
* Params:
37+
* ParseEventHandler = struct that handles a stream of parsing events.
38+
* See [ExampleParseEventHandler] for an example.
39+
* deleStart = function that is called with the return value of
40+
* the ParseEventHandler's start(). Defaults to
41+
* doNothing
42+
* deleEnd = function that is called with the return value of
43+
* the ParseEventHandler's end(). Defaults to
44+
* doNothing
45+
* These are all compile-time (template) parameters, thus the compiler will
46+
* inline functions into generated code.
47+
*/
48+
void parse(ParseEventHandler,
49+
alias deleStart=doNothing,
50+
alias deleEnd=doNothing,
51+
bool relaxed=false,
52+
Args...)
53+
(string input, int spaces_per_indent, string title, Args args) {
54+
import std.traits;
55+
ParseEventHandler parser = ParseEventHandler(args);
56+
//const initial_indent_level =
57+
int current_indent_level;
58+
//BitType current_bits;
59+
//BitType[] stack;
60+
int line_content_start_index;
61+
// Workwround for void not being a parameter type
62+
void emitBlockStart(string nodeContent) {
63+
static if (is(ReturnType!(parser.start) == void)) {
64+
parser.start(nodeContent);
65+
deleStart();
66+
} else deleStart(parser.start(nodeContent));
67+
}
68+
void emitBlockEnd() {
69+
static if (is(ReturnType!(parser.end) == void)) {
70+
parser.end();
71+
deleEnd();
72+
} else deleEnd(parser.end());
73+
}
74+
emitBlockStart(title);
75+
for (int i = 0; i < input.length; ++i) {
76+
// emit end when on same or lower indent level, number based on difference
77+
// start on new lines
78+
switch (input[i]) {
79+
case '\n':
80+
//stack[0] & current_bits;
81+
// FIXME: Start and end based on "-"
82+
const nodeContent = input[line_content_start_index..i];
83+
emitBlockStart(nodeContent);
84+
i++;
85+
int new_indent_level =
86+
detect_indent_level(i, input, spaces_per_indent);
87+
line_content_start_index = i;
88+
const extraEndings = i == input.length ? 0 : 1;
89+
foreach (_;
90+
0..current_indent_level - new_indent_level + extraEndings) {
91+
emitBlockEnd();
92+
}
93+
if (relaxed && new_indent_level > current_indent_level + 1) {
94+
foreach (_; 0..new_indent_level - current_indent_level - 1)
95+
{
96+
emitBlockStart("");
97+
}
98+
}
99+
current_indent_level = new_indent_level;
100+
break;
101+
case '[':
102+
break;
103+
// Skip across multiline constructs to prevent
104+
// detect_indent
105+
case '`':
106+
if (input[i+1] == '`' && input[i+2] == '`') {
107+
// Skip 3 at a time for efficiency
108+
// TODO: SIMD this and/or PGO it
109+
for (i += 3; i < input.length; ++i) {
110+
if (input[i-2] == '`' &&
111+
input[i-1] == '`' &&
112+
input[i-0] == '`') {
113+
break;
114+
}
115+
}
116+
}
117+
break;
118+
default: break;
119+
}
120+
}
121+
if (line_content_start_index != input.length) {
122+
emitBlockStart(input[line_content_start_index .. input.length]);
123+
}
124+
for (int i = 0; i < current_indent_level + 1; ++i) {
125+
emitBlockEnd();
126+
}
127+
// End again because we started for the title
128+
emitBlockEnd();
129+
}
130+
/// Test the stream of parser events
131+
unittest {
132+
import std.range, std.array;
133+
struct ExampleParseEventHandler {
134+
string start(string text) {
135+
return "START" ~ text;
136+
}
137+
string end() {
138+
return "END";
139+
}
140+
}
141+
string sample = "- ab\n\t- b\nhello\n\tworld\n\tfoo";
142+
// A trailing newline should not affect ther esult
143+
foreach (useTrailingNewline; [false, true]) {
144+
string result;
145+
parse!(ExampleParseEventHandler, s => result ~= s, s => result ~= s)
146+
(sample ~ (useTrailingNewline ? "\n" : ""), 4, "Title");
147+
assert(result ==
148+
`STARTTitleSTART- abSTART- bENDENDSTARThelloSTARTworldENDSTARTfooENDENDEND`
149+
);
150+
}
151+
}
152+
unittest {
153+
import std.stdio;
154+
struct ExampleParseEventHandler {
155+
string start(string text) {
156+
return "<div>" ~ text;
157+
}
158+
string end() {
159+
return "</div>";
160+
}
161+
}
162+
string sample =
163+
`a
164+
b
165+
c
166+
d`;
167+
string result;
168+
parse!(ExampleParseEventHandler, s => result ~= s, s => result ~= s, true)
169+
(sample, 1, "Title");
170+
result.writeln;
171+
}
172+
struct ConvertToXMLEventHandler {
173+
string start(string text) {
174+
return "<block>" ~ text;
175+
}
176+
string end() {
177+
return "</block>";
178+
}
179+
}
180+
/*struct ConvertToOPMLEventHandler {
181+
string start(string text) {
182+
// todo: escpae
183+
return `<outline text=` ~ text;
184+
}
185+
string end() {
186+
return `</outline>`;
187+
}
188+
}*/

0 commit comments

Comments
 (0)