From 3feb25ed06573f2a40a0b09c2b169c05cb5f5376 Mon Sep 17 00:00:00 2001 From: Afirium Date: Sat, 12 Jul 2025 19:22:45 +0300 Subject: [PATCH 1/6] feat(docs): Update description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ce1433b..ac969aa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Zig Docs MCP -MCP server providing up-to-date Zig documentation and builtin functions. +Model Context Protocol (MCP) server that provides up-to-date documentation for the Zig programming language standard library and builtin functions. ## Installation From b24740a227fdab4714d8335c8b0444e8dbca0949 Mon Sep 17 00:00:00 2001 From: Afirium Date: Sat, 12 Jul 2025 19:23:24 +0300 Subject: [PATCH 2/6] feat(license): Update contributors --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index c27f5e0..324f3d0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Andrey Ryapov, Other contributors +Copyright (c) 2024 Andrey Ryapov, "Zig and WebAssembly" contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From dfbc117842c2594210e4175a0a1a406fb5c930cc Mon Sep 17 00:00:00 2001 From: Afirium Date: Thu, 17 Jul 2025 21:27:15 +0300 Subject: [PATCH 3/6] feat(std): Add wasm module to read STD source code --- build.zig | 42 + docs/wasm/Decl.zig | 267 +++++ docs/wasm/Walk.zig | 1104 ++++++++++++++++++++ docs/wasm/html_render.zig | 400 ++++++++ docs/wasm/main.zig | 931 +++++++++++++++++ docs/wasm/markdown.zig | 1127 +++++++++++++++++++++ docs/wasm/markdown/Document.zig | 194 ++++ docs/wasm/markdown/Parser.zig | 1660 +++++++++++++++++++++++++++++++ docs/wasm/markdown/renderer.zig | 247 +++++ 9 files changed, 5972 insertions(+) create mode 100644 build.zig create mode 100644 docs/wasm/Decl.zig create mode 100644 docs/wasm/Walk.zig create mode 100644 docs/wasm/html_render.zig create mode 100644 docs/wasm/main.zig create mode 100644 docs/wasm/markdown.zig create mode 100644 docs/wasm/markdown/Document.zig create mode 100644 docs/wasm/markdown/Parser.zig create mode 100644 docs/wasm/markdown/renderer.zig diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..84ac954 --- /dev/null +++ b/build.zig @@ -0,0 +1,42 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const optimize = b.standardOptimizeOption(.{ .preferred_optimize_mode = .ReleaseSmall }); + + const wasm_target = b.resolveTargetQuery(.{ + .cpu_arch = .wasm32, + .os_tag = .freestanding, + .cpu_features_add = std.Target.wasm.featureSet(&.{ + .atomics, + .bulk_memory, + .multivalue, + .mutable_globals, + .nontrapping_fptoint, + .reference_types, + .sign_ext, + }), + }); + + const wasm_exe = b.addExecutable(.{ + .name = "main", + .root_module = b.createModule(.{ + .root_source_file = b.path("docs/wasm/main.zig"), + .target = wasm_target, + .optimize = optimize, + }), + }); + + const walk_module = b.createModule(.{ + .root_source_file = b.path("docs/wasm/Walk.zig"), + }); + wasm_exe.root_module.addImport("Walk", walk_module); + + wasm_exe.entry = .disabled; + wasm_exe.rdynamic = true; + + const install_wasm = b.addInstallArtifact(wasm_exe, .{ + .dest_dir = .{ .override = .prefix }, + }); + + b.getInstallStep().dependOn(&install_wasm.step); +} diff --git a/docs/wasm/Decl.zig b/docs/wasm/Decl.zig new file mode 100644 index 0000000..ba1a7b4 --- /dev/null +++ b/docs/wasm/Decl.zig @@ -0,0 +1,267 @@ +const Decl = @This(); +const std = @import("std"); +const Ast = std.zig.Ast; +const Walk = @import("Walk.zig"); +const gpa = std.heap.wasm_allocator; +const assert = std.debug.assert; +const log = std.log; +const Oom = error{OutOfMemory}; + +ast_node: Ast.Node.Index, +file: Walk.File.Index, +/// The decl whose namespace this is in. +parent: Index, + +pub const ExtraInfo = struct { + is_pub: bool, + name: []const u8, + first_doc_comment: Ast.OptionalTokenIndex, +}; + +pub const Index = enum(u32) { + none = std.math.maxInt(u32), + _, + + pub fn get(i: Index) *Decl { + return &Walk.decls.items[@intFromEnum(i)]; + } +}; + +pub fn is_pub(d: *const Decl) bool { + return d.extra_info().is_pub; +} + +pub fn extra_info(d: *const Decl) ExtraInfo { + const ast = d.file.get_ast(); + switch (ast.nodeTag(d.ast_node)) { + .root => return .{ + .name = "", + .is_pub = true, + .first_doc_comment = if (ast.tokenTag(0) == .container_doc_comment) + .fromToken(0) + else + .none, + }, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + const var_decl = ast.fullVarDecl(d.ast_node).?; + const name_token = var_decl.ast.mut_token + 1; + assert(ast.tokenTag(name_token) == .identifier); + const ident_name = ast.tokenSlice(name_token); + return .{ + .name = ident_name, + .is_pub = var_decl.visib_token != null, + .first_doc_comment = findFirstDocComment(ast, var_decl.firstToken()), + }; + }, + + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + .fn_decl, + => { + var buf: [1]Ast.Node.Index = undefined; + const fn_proto = ast.fullFnProto(&buf, d.ast_node).?; + const name_token = fn_proto.name_token.?; + assert(ast.tokenTag(name_token) == .identifier); + const ident_name = ast.tokenSlice(name_token); + return .{ + .name = ident_name, + .is_pub = fn_proto.visib_token != null, + .first_doc_comment = findFirstDocComment(ast, fn_proto.firstToken()), + }; + }, + + else => |t| { + log.debug("hit '{s}'", .{@tagName(t)}); + unreachable; + }, + } +} + +pub fn value_node(d: *const Decl) ?Ast.Node.Index { + const ast = d.file.get_ast(); + return switch (ast.nodeTag(d.ast_node)) { + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + .fn_decl, + .root, + => d.ast_node, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + const var_decl = ast.fullVarDecl(d.ast_node).?; + if (ast.tokenTag(var_decl.ast.mut_token) == .keyword_const) + return var_decl.ast.init_node.unwrap(); + + return null; + }, + + else => null, + }; +} + +pub fn categorize(decl: *const Decl) Walk.Category { + return decl.file.categorize_decl(decl.ast_node); +} + +/// Looks up a direct child of `decl` by name. +pub fn get_child(decl: *const Decl, name: []const u8) ?Decl.Index { + switch (decl.categorize()) { + .alias => |aliasee| return aliasee.get().get_child(name), + .namespace, .container => |node| { + const file = decl.file.get(); + const scope = file.scopes.get(node) orelse return null; + const child_node = scope.get_child(name) orelse return null; + return file.node_decls.get(child_node); + }, + .type_function => { + // Find a decl with this function as the parent, with a name matching `name` + for (Walk.decls.items, 0..) |*candidate, i| { + if (candidate.parent != .none and candidate.parent.get() == decl and std.mem.eql(u8, candidate.extra_info().name, name)) { + return @enumFromInt(i); + } + } + + return null; + }, + else => return null, + } +} + +/// If the type function returns another type function, return the index of that type function. +pub fn get_type_fn_return_type_fn(decl: *const Decl) ?Decl.Index { + if (decl.get_type_fn_return_expr()) |return_expr| { + const ast = decl.file.get_ast(); + var buffer: [1]Ast.Node.Index = undefined; + const call = ast.fullCall(&buffer, return_expr) orelse return null; + const token = ast.nodeMainToken(call.ast.fn_expr); + const name = ast.tokenSlice(token); + if (decl.lookup(name)) |function_decl| { + return function_decl; + } + } + return null; +} + +/// Gets the expression after the `return` keyword in a type function declaration. +pub fn get_type_fn_return_expr(decl: *const Decl) ?Ast.Node.Index { + switch (decl.categorize()) { + .type_function => { + const ast = decl.file.get_ast(); + + const body_node = ast.nodeData(decl.ast_node).node_and_node[1]; + + var buf: [2]Ast.Node.Index = undefined; + const statements = ast.blockStatements(&buf, body_node) orelse return null; + + for (statements) |stmt| { + if (ast.nodeTag(stmt) == .@"return") { + return ast.nodeData(stmt).node; + } + } + return null; + }, + else => return null, + } +} + +/// Looks up a decl by name accessible in `decl`'s namespace. +pub fn lookup(decl: *const Decl, name: []const u8) ?Decl.Index { + const namespace_node = switch (decl.categorize()) { + .namespace, .container => |node| node, + else => decl.parent.get().ast_node, + }; + const file = decl.file.get(); + const scope = file.scopes.get(namespace_node) orelse return null; + const resolved_node = scope.lookup(&file.ast, name) orelse return null; + return file.node_decls.get(resolved_node); +} + +/// Appends the fully qualified name to `out`. +pub fn fqn(decl: *const Decl, out: *std.ArrayListUnmanaged(u8)) Oom!void { + try decl.append_path(out); + if (decl.parent != .none) { + try append_parent_ns(out, decl.parent); + try out.appendSlice(gpa, decl.extra_info().name); + } else { + out.items.len -= 1; // remove the trailing '.' + } +} + +pub fn reset_with_path(decl: *const Decl, list: *std.ArrayListUnmanaged(u8)) Oom!void { + list.clearRetainingCapacity(); + try append_path(decl, list); +} + +pub fn append_path(decl: *const Decl, list: *std.ArrayListUnmanaged(u8)) Oom!void { + const start = list.items.len; + // Prefer the module name alias. + for (Walk.modules.keys(), Walk.modules.values()) |pkg_name, pkg_file| { + if (pkg_file == decl.file) { + try list.ensureUnusedCapacity(gpa, pkg_name.len + 1); + list.appendSliceAssumeCapacity(pkg_name); + list.appendAssumeCapacity('.'); + return; + } + } + + const file_path = decl.file.path(); + try list.ensureUnusedCapacity(gpa, file_path.len + 1); + list.appendSliceAssumeCapacity(file_path); + for (list.items[start..]) |*byte| switch (byte.*) { + '/' => byte.* = '.', + else => continue, + }; + if (std.mem.endsWith(u8, list.items, ".zig")) { + list.items.len -= 3; + } else { + list.appendAssumeCapacity('.'); + } +} + +pub fn append_parent_ns(list: *std.ArrayListUnmanaged(u8), parent: Decl.Index) Oom!void { + assert(parent != .none); + const decl = parent.get(); + if (decl.parent != .none) { + try append_parent_ns(list, decl.parent); + try list.appendSlice(gpa, decl.extra_info().name); + try list.append(gpa, '.'); + } +} + +pub fn findFirstDocComment(ast: *const Ast, token: Ast.TokenIndex) Ast.OptionalTokenIndex { + var it = token; + while (it > 0) { + it -= 1; + if (ast.tokenTag(it) != .doc_comment) { + return .fromToken(it + 1); + } + } + return .none; +} + +/// Successively looks up each component. +pub fn find(search_string: []const u8) Decl.Index { + var path_components = std.mem.splitScalar(u8, search_string, '.'); + const file = Walk.modules.get(path_components.first()) orelse return .none; + var current_decl_index = file.findRootDecl(); + while (path_components.next()) |component| { + while (true) switch (current_decl_index.get().categorize()) { + .alias => |aliasee| current_decl_index = aliasee, + else => break, + }; + current_decl_index = current_decl_index.get().get_child(component) orelse return .none; + } + return current_decl_index; +} diff --git a/docs/wasm/Walk.zig b/docs/wasm/Walk.zig new file mode 100644 index 0000000..fcda9ee --- /dev/null +++ b/docs/wasm/Walk.zig @@ -0,0 +1,1104 @@ +//! Find and annotate identifiers with links to their declarations. + +const Walk = @This(); +const std = @import("std"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; +const log = std.log; +const gpa = std.heap.wasm_allocator; +const Oom = error{OutOfMemory}; + +pub const Decl = @import("Decl.zig"); + +pub var files: std.StringArrayHashMapUnmanaged(File) = .empty; +pub var decls: std.ArrayListUnmanaged(Decl) = .empty; +pub var modules: std.StringArrayHashMapUnmanaged(File.Index) = .empty; + +file: File.Index, + +/// keep in sync with "CAT_" constants in main.js +pub const Category = union(enum(u8)) { + /// A struct type used only to group declarations. + namespace: Ast.Node.Index, + /// A container type (struct, union, enum, opaque). + container: Ast.Node.Index, + global_variable: Ast.Node.Index, + /// A function that has not been detected as returning a type. + function: Ast.Node.Index, + primitive: Ast.Node.Index, + error_set: Ast.Node.Index, + global_const: Ast.Node.Index, + alias: Decl.Index, + /// A primitive identifier that is also a type. + type, + /// Specifically it is the literal `type`. + type_type, + /// A function that returns a type. + type_function: Ast.Node.Index, + + pub const Tag = @typeInfo(Category).@"union".tag_type.?; +}; + +pub const File = struct { + ast: Ast, + /// Maps identifiers to the declarations they point to. + ident_decls: std.AutoArrayHashMapUnmanaged(Ast.TokenIndex, Ast.Node.Index) = .empty, + /// Maps field access identifiers to the containing field access node. + token_parents: std.AutoArrayHashMapUnmanaged(Ast.TokenIndex, Ast.Node.Index) = .empty, + /// Maps declarations to their global index. + node_decls: std.AutoArrayHashMapUnmanaged(Ast.Node.Index, Decl.Index) = .empty, + /// Maps function declarations to doctests. + doctests: std.AutoArrayHashMapUnmanaged(Ast.Node.Index, Ast.Node.Index) = .empty, + /// root node => its namespace scope + /// struct/union/enum/opaque decl node => its namespace scope + /// local var decl node => its local variable scope + scopes: std.AutoArrayHashMapUnmanaged(Ast.Node.Index, *Scope) = .empty, + + pub fn lookup_token(file: *File, token: Ast.TokenIndex) Decl.Index { + const decl_node = file.ident_decls.get(token) orelse return .none; + return file.node_decls.get(decl_node) orelse return .none; + } + + pub const Index = enum(u32) { + _, + + fn add_decl(i: Index, node: Ast.Node.Index, parent_decl: Decl.Index) Oom!Decl.Index { + try decls.append(gpa, .{ + .ast_node = node, + .file = i, + .parent = parent_decl, + }); + const decl_index: Decl.Index = @enumFromInt(decls.items.len - 1); + try i.get().node_decls.put(gpa, node, decl_index); + return decl_index; + } + + pub fn get(i: File.Index) *File { + return &files.values()[@intFromEnum(i)]; + } + + pub fn get_ast(i: File.Index) *Ast { + return &i.get().ast; + } + + pub fn path(i: File.Index) []const u8 { + return files.keys()[@intFromEnum(i)]; + } + + pub fn findRootDecl(file_index: File.Index) Decl.Index { + return file_index.get().node_decls.values()[0]; + } + + pub fn categorize_decl(file_index: File.Index, node: Ast.Node.Index) Category { + const ast = file_index.get_ast(); + switch (ast.nodeTag(node)) { + .root => { + for (ast.rootDecls()) |member| { + switch (ast.nodeTag(member)) { + .container_field_init, + .container_field_align, + .container_field, + => return .{ .container = node }, + else => {}, + } + } + return .{ .namespace = node }; + }, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + const var_decl = ast.fullVarDecl(node).?; + if (ast.tokenTag(var_decl.ast.mut_token) == .keyword_var) + return .{ .global_variable = node }; + const init_node = var_decl.ast.init_node.unwrap() orelse + return .{ .global_const = node }; + + return categorize_expr(file_index, init_node); + }, + + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + .fn_decl, + => { + var buf: [1]Ast.Node.Index = undefined; + const full = ast.fullFnProto(&buf, node).?; + return categorize_func(file_index, node, full); + }, + + else => unreachable, + } + } + + pub fn categorize_func( + file_index: File.Index, + node: Ast.Node.Index, + full: Ast.full.FnProto, + ) Category { + return switch (categorize_expr(file_index, full.ast.return_type.unwrap().?)) { + .namespace, .container, .error_set, .type_type => .{ .type_function = node }, + else => .{ .function = node }, + }; + } + + pub fn categorize_expr_deep(file_index: File.Index, node: Ast.Node.Index) Category { + return switch (categorize_expr(file_index, node)) { + .alias => |aliasee| aliasee.get().categorize(), + else => |result| result, + }; + } + + pub fn categorize_expr(file_index: File.Index, node: Ast.Node.Index) Category { + const file = file_index.get(); + const ast = file_index.get_ast(); + //log.debug("categorize_expr tag {s}", .{@tagName(ast.nodeTag(node))}); + return switch (ast.nodeTag(node)) { + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + => { + var buf: [2]Ast.Node.Index = undefined; + const container_decl = ast.fullContainerDecl(&buf, node).?; + if (ast.tokenTag(container_decl.ast.main_token) != .keyword_struct) { + return .{ .container = node }; + } + for (container_decl.ast.members) |member| { + switch (ast.nodeTag(member)) { + .container_field_init, + .container_field_align, + .container_field, + => return .{ .container = node }, + else => {}, + } + } + return .{ .namespace = node }; + }, + + .error_set_decl, + .merge_error_sets, + => .{ .error_set = node }, + + .identifier => { + const name_token = ast.nodeMainToken(node); + const ident_name = ast.tokenSlice(name_token); + if (std.mem.eql(u8, ident_name, "type")) + return .type_type; + + if (isPrimitiveNonType(ident_name)) + return .{ .primitive = node }; + + if (std.zig.primitives.isPrimitive(ident_name)) + return .type; + + if (file.ident_decls.get(name_token)) |decl_node| { + const decl_index = file.node_decls.get(decl_node) orelse .none; + if (decl_index != .none) return .{ .alias = decl_index }; + return categorize_decl(file_index, decl_node); + } + + return .{ .global_const = node }; + }, + + .field_access => { + const object_node, const field_ident = ast.nodeData(node).node_and_token; + const field_name = ast.tokenSlice(field_ident); + + switch (categorize_expr(file_index, object_node)) { + .alias => |aliasee| if (aliasee.get().get_child(field_name)) |decl_index| { + return .{ .alias = decl_index }; + }, + else => {}, + } + + return .{ .global_const = node }; + }, + + .builtin_call_two, + .builtin_call_two_comma, + .builtin_call, + .builtin_call_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + const params = ast.builtinCallParams(&buf, node).?; + return categorize_builtin_call(file_index, node, params); + }, + + .call_one, + .call_one_comma, + .call, + .call_comma, + => { + var buf: [1]Ast.Node.Index = undefined; + return categorize_call(file_index, node, ast.fullCall(&buf, node).?); + }, + + .if_simple, + .@"if", + => { + const if_full = ast.fullIf(node).?; + if (if_full.ast.else_expr.unwrap()) |else_expr| { + const then_cat = categorize_expr_deep(file_index, if_full.ast.then_expr); + const else_cat = categorize_expr_deep(file_index, else_expr); + if (then_cat == .type_type and else_cat == .type_type) { + return .type_type; + } else if (then_cat == .error_set and else_cat == .error_set) { + return .{ .error_set = node }; + } else if (then_cat == .type or else_cat == .type or + then_cat == .namespace or else_cat == .namespace or + then_cat == .container or else_cat == .container or + then_cat == .error_set or else_cat == .error_set or + then_cat == .type_function or else_cat == .type_function) + { + return .type; + } + } + return .{ .global_const = node }; + }, + + .@"switch", .switch_comma => return categorize_switch(file_index, node), + + .optional_type, + .array_type, + .array_type_sentinel, + .ptr_type_aligned, + .ptr_type_sentinel, + .ptr_type, + .ptr_type_bit_range, + .anyframe_type, + => .type, + + else => .{ .global_const = node }, + }; + } + + fn categorize_call( + file_index: File.Index, + node: Ast.Node.Index, + call: Ast.full.Call, + ) Category { + return switch (categorize_expr(file_index, call.ast.fn_expr)) { + .type_function => .type, + .alias => |aliasee| categorize_decl_as_callee(aliasee, node), + else => .{ .global_const = node }, + }; + } + + fn categorize_decl_as_callee(decl_index: Decl.Index, call_node: Ast.Node.Index) Category { + return switch (decl_index.get().categorize()) { + .type_function => .type, + .alias => |aliasee| categorize_decl_as_callee(aliasee, call_node), + else => .{ .global_const = call_node }, + }; + } + + fn categorize_builtin_call( + file_index: File.Index, + node: Ast.Node.Index, + params: []const Ast.Node.Index, + ) Category { + const ast = file_index.get_ast(); + const builtin_token = ast.nodeMainToken(node); + const builtin_name = ast.tokenSlice(builtin_token); + if (std.mem.eql(u8, builtin_name, "@import")) { + const str_lit_token = ast.nodeMainToken(params[0]); + const str_bytes = ast.tokenSlice(str_lit_token); + const file_path = std.zig.string_literal.parseAlloc(gpa, str_bytes) catch @panic("OOM"); + defer gpa.free(file_path); + if (modules.get(file_path)) |imported_file_index| { + return .{ .alias = File.Index.findRootDecl(imported_file_index) }; + } + const base_path = file_index.path(); + const resolved_path = std.fs.path.resolvePosix(gpa, &.{ + base_path, "..", file_path, + }) catch @panic("OOM"); + defer gpa.free(resolved_path); + log.debug("from '{s}' @import '{s}' resolved='{s}'", .{ + base_path, file_path, resolved_path, + }); + if (files.getIndex(resolved_path)) |imported_file_index| { + return .{ .alias = File.Index.findRootDecl(@enumFromInt(imported_file_index)) }; + } else { + log.warn("import target '{s}' did not resolve to any file", .{resolved_path}); + } + } else if (std.mem.eql(u8, builtin_name, "@This")) { + if (file_index.get().node_decls.get(node)) |decl_index| { + return .{ .alias = decl_index }; + } else { + log.warn("@This() is missing link to Decl.Index", .{}); + } + } + + return .{ .global_const = node }; + } + + fn categorize_switch(file_index: File.Index, node: Ast.Node.Index) Category { + const ast = file_index.get_ast(); + const full = ast.fullSwitch(node).?; + var all_type_type = true; + var all_error_set = true; + var any_type = false; + if (full.ast.cases.len == 0) return .{ .global_const = node }; + for (full.ast.cases) |case_node| { + const case = ast.fullSwitchCase(case_node).?; + switch (categorize_expr_deep(file_index, case.ast.target_expr)) { + .type_type => { + any_type = true; + all_error_set = false; + }, + .error_set => { + any_type = true; + all_type_type = false; + }, + .type, .namespace, .container, .type_function => { + any_type = true; + all_error_set = false; + all_type_type = false; + }, + else => { + all_error_set = false; + all_type_type = false; + }, + } + } + if (all_type_type) return .type_type; + if (all_error_set) return .{ .error_set = node }; + if (any_type) return .type; + return .{ .global_const = node }; + } + }; +}; + +pub const ModuleIndex = enum(u32) { + _, +}; + +pub fn add_file(file_name: []const u8, bytes: []u8) !File.Index { + const ast = try parse(file_name, bytes); + assert(ast.errors.len == 0); + const file_index: File.Index = @enumFromInt(files.entries.len); + try files.put(gpa, file_name, .{ .ast = ast }); + + var w: Walk = .{ + .file = file_index, + }; + const scope = try gpa.create(Scope); + scope.* = .{ .tag = .top }; + + const decl_index = try file_index.add_decl(.root, .none); + try struct_decl(&w, scope, decl_index, .root, ast.containerDeclRoot()); + + const file = file_index.get(); + shrinkToFit(&file.ident_decls); + shrinkToFit(&file.token_parents); + shrinkToFit(&file.node_decls); + shrinkToFit(&file.doctests); + shrinkToFit(&file.scopes); + + return file_index; +} + +/// Parses a file and returns its `Ast`. If the file cannot be parsed, returns +/// the `Ast` of an empty file, so that the rest of the Autodoc logic does not +/// need to handle parse errors. +fn parse(file_name: []const u8, source: []u8) Oom!Ast { + // Require every source file to end with a newline so that Zig's tokenizer + // can continue to require null termination and Autodoc implementation can + // avoid copying source bytes from the decompressed tar file buffer. + const adjusted_source: [:0]const u8 = s: { + if (source.len == 0) + break :s ""; + if (source[source.len - 1] != '\n') { + log.err("{s}: expected newline at end of file", .{file_name}); + break :s ""; + } + source[source.len - 1] = 0; + break :s source[0 .. source.len - 1 :0]; + }; + + var ast = try Ast.parse(gpa, adjusted_source, .zig); + if (ast.errors.len > 0) { + defer ast.deinit(gpa); + + const token_offsets = ast.tokens.items(.start); + var rendered_err: std.Io.Writer.Allocating = .init(gpa); + defer rendered_err.deinit(); + for (ast.errors) |err| { + const err_offset = token_offsets[err.token] + ast.errorOffset(err); + const err_loc = std.zig.findLineColumn(ast.source, err_offset); + rendered_err.clearRetainingCapacity(); + ast.renderError(err, &rendered_err.writer) catch |e| switch (e) { + error.WriteFailed => return error.OutOfMemory, + }; + log.err("{s}:{d}:{d}: {s}", .{ + file_name, err_loc.line + 1, err_loc.column + 1, rendered_err.getWritten(), + }); + } + return Ast.parse(gpa, "", .zig); + } + return ast; +} + +pub const Scope = struct { + tag: Tag, + + const Tag = enum { top, local, namespace }; + + const Local = struct { + base: Scope = .{ .tag = .local }, + parent: *Scope, + var_node: Ast.Node.Index, + }; + + const Namespace = struct { + base: Scope = .{ .tag = .namespace }, + parent: *Scope, + names: std.StringArrayHashMapUnmanaged(Ast.Node.Index) = .empty, + doctests: std.StringArrayHashMapUnmanaged(Ast.Node.Index) = .empty, + decl_index: Decl.Index, + }; + + fn getNamespaceDecl(start_scope: *Scope) Decl.Index { + var it: *Scope = start_scope; + while (true) switch (it.tag) { + .top => unreachable, + .local => { + const local: *Local = @alignCast(@fieldParentPtr("base", it)); + it = local.parent; + }, + .namespace => { + const namespace: *Namespace = @alignCast(@fieldParentPtr("base", it)); + return namespace.decl_index; + }, + }; + } + + pub fn get_child(scope: *Scope, name: []const u8) ?Ast.Node.Index { + switch (scope.tag) { + .top, .local => return null, + .namespace => { + const namespace: *Namespace = @alignCast(@fieldParentPtr("base", scope)); + return namespace.names.get(name); + }, + } + } + + pub fn lookup(start_scope: *Scope, ast: *const Ast, name: []const u8) ?Ast.Node.Index { + var it: *Scope = start_scope; + while (true) switch (it.tag) { + .top => break, + .local => { + const local: *Local = @alignCast(@fieldParentPtr("base", it)); + const name_token = ast.nodeMainToken(local.var_node) + 1; + const ident_name = ast.tokenSlice(name_token); + if (std.mem.eql(u8, ident_name, name)) { + return local.var_node; + } + it = local.parent; + }, + .namespace => { + const namespace: *Namespace = @alignCast(@fieldParentPtr("base", it)); + if (namespace.names.get(name)) |node| { + return node; + } + it = namespace.parent; + }, + }; + return null; + } +}; + +fn struct_decl( + w: *Walk, + scope: *Scope, + parent_decl: Decl.Index, + node: Ast.Node.Index, + container_decl: Ast.full.ContainerDecl, +) Oom!void { + const ast = w.file.get_ast(); + + const namespace = try gpa.create(Scope.Namespace); + namespace.* = .{ + .parent = scope, + .decl_index = parent_decl, + }; + try w.file.get().scopes.putNoClobber(gpa, node, &namespace.base); + try w.scanDecls(namespace, container_decl.ast.members); + + for (container_decl.ast.members) |member| switch (ast.nodeTag(member)) { + .container_field_init, + .container_field_align, + .container_field, + => try w.container_field(&namespace.base, parent_decl, ast.fullContainerField(member).?), + + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + .fn_decl, + => { + var buf: [1]Ast.Node.Index = undefined; + const full = ast.fullFnProto(&buf, member).?; + const fn_name_token = full.ast.fn_token + 1; + const fn_name = ast.tokenSlice(fn_name_token); + if (namespace.doctests.get(fn_name)) |doctest_node| { + try w.file.get().doctests.put(gpa, member, doctest_node); + } + const decl_index = try w.file.add_decl(member, parent_decl); + const body = if (ast.nodeTag(member) == .fn_decl) ast.nodeData(member).node_and_node[1].toOptional() else .none; + try w.fn_decl(&namespace.base, decl_index, body, full); + }, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + const decl_index = try w.file.add_decl(member, parent_decl); + try w.global_var_decl(&namespace.base, decl_index, ast.fullVarDecl(member).?); + }, + + .@"comptime", + => try w.expr(&namespace.base, parent_decl, ast.nodeData(member).node), + + .test_decl => try w.expr(&namespace.base, parent_decl, ast.nodeData(member).opt_token_and_node[1]), + + else => unreachable, + }; +} + +fn comptime_decl( + w: *Walk, + scope: *Scope, + parent_decl: Decl.Index, + full: Ast.full.VarDecl, +) Oom!void { + try w.expr(scope, parent_decl, full.ast.type_node); + try w.maybe_expr(scope, parent_decl, full.ast.align_node); + try w.maybe_expr(scope, parent_decl, full.ast.addrspace_node); + try w.maybe_expr(scope, parent_decl, full.ast.section_node); + try w.expr(scope, parent_decl, full.ast.init_node); +} + +fn global_var_decl( + w: *Walk, + scope: *Scope, + parent_decl: Decl.Index, + full: Ast.full.VarDecl, +) Oom!void { + try w.maybe_expr(scope, parent_decl, full.ast.type_node); + try w.maybe_expr(scope, parent_decl, full.ast.align_node); + try w.maybe_expr(scope, parent_decl, full.ast.addrspace_node); + try w.maybe_expr(scope, parent_decl, full.ast.section_node); + try w.maybe_expr(scope, parent_decl, full.ast.init_node); +} + +fn container_field( + w: *Walk, + scope: *Scope, + parent_decl: Decl.Index, + full: Ast.full.ContainerField, +) Oom!void { + try w.maybe_expr(scope, parent_decl, full.ast.type_expr); + try w.maybe_expr(scope, parent_decl, full.ast.align_expr); + try w.maybe_expr(scope, parent_decl, full.ast.value_expr); +} + +fn fn_decl( + w: *Walk, + scope: *Scope, + parent_decl: Decl.Index, + body: Ast.Node.OptionalIndex, + full: Ast.full.FnProto, +) Oom!void { + for (full.ast.params) |param| { + try expr(w, scope, parent_decl, param); + } + try expr(w, scope, parent_decl, full.ast.return_type.unwrap().?); + try maybe_expr(w, scope, parent_decl, full.ast.align_expr); + try maybe_expr(w, scope, parent_decl, full.ast.addrspace_expr); + try maybe_expr(w, scope, parent_decl, full.ast.section_expr); + try maybe_expr(w, scope, parent_decl, full.ast.callconv_expr); + try maybe_expr(w, scope, parent_decl, body); +} + +fn maybe_expr(w: *Walk, scope: *Scope, parent_decl: Decl.Index, node: Ast.Node.OptionalIndex) Oom!void { + if (node.unwrap()) |n| return expr(w, scope, parent_decl, n); +} + +fn expr(w: *Walk, scope: *Scope, parent_decl: Decl.Index, node: Ast.Node.Index) Oom!void { + const ast = w.file.get_ast(); + switch (ast.nodeTag(node)) { + .root => unreachable, // Top-level declaration. + .test_decl => unreachable, // Top-level declaration. + .container_field_init => unreachable, // Top-level declaration. + .container_field_align => unreachable, // Top-level declaration. + .container_field => unreachable, // Top-level declaration. + .fn_decl => unreachable, // Top-level declaration. + + .global_var_decl => unreachable, // Handled in `block`. + .local_var_decl => unreachable, // Handled in `block`. + .simple_var_decl => unreachable, // Handled in `block`. + .aligned_var_decl => unreachable, // Handled in `block`. + .@"defer" => unreachable, // Handled in `block`. + .@"errdefer" => unreachable, // Handled in `block`. + + .switch_case => unreachable, // Handled in `switchExpr`. + .switch_case_inline => unreachable, // Handled in `switchExpr`. + .switch_case_one => unreachable, // Handled in `switchExpr`. + .switch_case_inline_one => unreachable, // Handled in `switchExpr`. + + .asm_output => unreachable, // Handled in `asmExpr`. + .asm_input => unreachable, // Handled in `asmExpr`. + + .for_range => unreachable, // Handled in `forExpr`. + + .assign, + .assign_shl, + .assign_shl_sat, + .assign_shr, + .assign_bit_and, + .assign_bit_or, + .assign_bit_xor, + .assign_div, + .assign_sub, + .assign_sub_wrap, + .assign_sub_sat, + .assign_mod, + .assign_add, + .assign_add_wrap, + .assign_add_sat, + .assign_mul, + .assign_mul_wrap, + .assign_mul_sat, + .shl, + .shr, + .add, + .add_wrap, + .add_sat, + .sub, + .sub_wrap, + .sub_sat, + .mul, + .mul_wrap, + .mul_sat, + .div, + .mod, + .shl_sat, + + .bit_and, + .bit_or, + .bit_xor, + .bang_equal, + .equal_equal, + .greater_than, + .greater_or_equal, + .less_than, + .less_or_equal, + .array_cat, + + .array_mult, + .error_union, + .merge_error_sets, + .bool_and, + .bool_or, + .@"catch", + .@"orelse", + .array_type, + .array_access, + .switch_range, + => { + const lhs, const rhs = ast.nodeData(node).node_and_node; + try expr(w, scope, parent_decl, lhs); + try expr(w, scope, parent_decl, rhs); + }, + + .assign_destructure => { + const full = ast.assignDestructure(node); + for (full.ast.variables) |variable_node| try expr(w, scope, parent_decl, variable_node); + _ = try expr(w, scope, parent_decl, full.ast.value_expr); + }, + + .bool_not, + .bit_not, + .negation, + .negation_wrap, + .deref, + .address_of, + .optional_type, + .@"comptime", + .@"nosuspend", + .@"suspend", + .@"resume", + .@"try", + => try expr(w, scope, parent_decl, ast.nodeData(node).node), + .unwrap_optional, + .grouped_expression, + => try expr(w, scope, parent_decl, ast.nodeData(node).node_and_token[0]), + .@"return" => try maybe_expr(w, scope, parent_decl, ast.nodeData(node).opt_node), + + .anyframe_type => try expr(w, scope, parent_decl, ast.nodeData(node).token_and_node[1]), + .@"break" => try maybe_expr(w, scope, parent_decl, ast.nodeData(node).opt_token_and_opt_node[1]), + + .identifier => { + const ident_token = ast.nodeMainToken(node); + const ident_name = ast.tokenSlice(ident_token); + if (scope.lookup(ast, ident_name)) |var_node| { + try w.file.get().ident_decls.put(gpa, ident_token, var_node); + } + }, + .field_access => { + const object_node, const field_ident = ast.nodeData(node).node_and_token; + try w.file.get().token_parents.put(gpa, field_ident, node); + // This will populate the left-most field object if it is an + // identifier, allowing rendering code to piece together the link. + try expr(w, scope, parent_decl, object_node); + }, + + .string_literal, + .multiline_string_literal, + .number_literal, + .unreachable_literal, + .enum_literal, + .error_value, + .anyframe_literal, + .@"continue", + .char_literal, + .error_set_decl, + => {}, + + .asm_simple, + .@"asm", + => { + const full = ast.fullAsm(node).?; + for (full.ast.items) |n| { + // There is a missing call here to expr() for .asm_input and + // .asm_output nodes. + _ = n; + } + try expr(w, scope, parent_decl, full.ast.template); + }, + + .builtin_call_two, + .builtin_call_two_comma, + .builtin_call, + .builtin_call_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + const params = ast.builtinCallParams(&buf, node).?; + return builtin_call(w, scope, parent_decl, node, params); + }, + + .call_one, + .call_one_comma, + .call, + .call_comma, + => { + var buf: [1]Ast.Node.Index = undefined; + const full = ast.fullCall(&buf, node).?; + try expr(w, scope, parent_decl, full.ast.fn_expr); + for (full.ast.params) |param| { + try expr(w, scope, parent_decl, param); + } + }, + + .if_simple, + .@"if", + => { + const full = ast.fullIf(node).?; + try expr(w, scope, parent_decl, full.ast.cond_expr); + try expr(w, scope, parent_decl, full.ast.then_expr); + try maybe_expr(w, scope, parent_decl, full.ast.else_expr); + }, + + .while_simple, + .while_cont, + .@"while", + => { + try while_expr(w, scope, parent_decl, ast.fullWhile(node).?); + }, + + .for_simple, .@"for" => { + const full = ast.fullFor(node).?; + for (full.ast.inputs) |input| { + if (ast.nodeTag(input) == .for_range) { + const start, const end = ast.nodeData(input).node_and_opt_node; + try expr(w, scope, parent_decl, start); + try maybe_expr(w, scope, parent_decl, end); + } else { + try expr(w, scope, parent_decl, input); + } + } + try expr(w, scope, parent_decl, full.ast.then_expr); + try maybe_expr(w, scope, parent_decl, full.ast.else_expr); + }, + + .slice => return slice(w, scope, parent_decl, ast.slice(node)), + .slice_open => return slice(w, scope, parent_decl, ast.sliceOpen(node)), + .slice_sentinel => return slice(w, scope, parent_decl, ast.sliceSentinel(node)), + + .block_two, + .block_two_semicolon, + .block, + .block_semicolon, + => { + var buf: [2]Ast.Node.Index = undefined; + const statements = ast.blockStatements(&buf, node).?; + return block(w, scope, parent_decl, statements); + }, + + .ptr_type_aligned, + .ptr_type_sentinel, + .ptr_type, + .ptr_type_bit_range, + => { + const full = ast.fullPtrType(node).?; + try maybe_expr(w, scope, parent_decl, full.ast.align_node); + try maybe_expr(w, scope, parent_decl, full.ast.addrspace_node); + try maybe_expr(w, scope, parent_decl, full.ast.sentinel); + try maybe_expr(w, scope, parent_decl, full.ast.bit_range_start); + try maybe_expr(w, scope, parent_decl, full.ast.bit_range_end); + try expr(w, scope, parent_decl, full.ast.child_type); + }, + + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + => { + var buf: [2]Ast.Node.Index = undefined; + return struct_decl(w, scope, parent_decl, node, ast.fullContainerDecl(&buf, node).?); + }, + + .array_type_sentinel => { + const len_expr, const extra_index = ast.nodeData(node).node_and_extra; + const extra = ast.extraData(extra_index, Ast.Node.ArrayTypeSentinel); + try expr(w, scope, parent_decl, len_expr); + try expr(w, scope, parent_decl, extra.elem_type); + try expr(w, scope, parent_decl, extra.sentinel); + }, + .@"switch", .switch_comma => { + const full = ast.fullSwitch(node).?; + try expr(w, scope, parent_decl, full.ast.condition); + for (full.ast.cases) |case_node| { + const case = ast.fullSwitchCase(case_node).?; + for (case.ast.values) |value_node| { + try expr(w, scope, parent_decl, value_node); + } + try expr(w, scope, parent_decl, case.ast.target_expr); + } + }, + + .array_init_one, + .array_init_one_comma, + .array_init_dot_two, + .array_init_dot_two_comma, + .array_init_dot, + .array_init_dot_comma, + .array_init, + .array_init_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + const full = ast.fullArrayInit(&buf, node).?; + try maybe_expr(w, scope, parent_decl, full.ast.type_expr); + for (full.ast.elements) |elem| { + try expr(w, scope, parent_decl, elem); + } + }, + + .struct_init_one, + .struct_init_one_comma, + .struct_init_dot_two, + .struct_init_dot_two_comma, + .struct_init_dot, + .struct_init_dot_comma, + .struct_init, + .struct_init_comma, + => { + var buf: [2]Ast.Node.Index = undefined; + const full = ast.fullStructInit(&buf, node).?; + try maybe_expr(w, scope, parent_decl, full.ast.type_expr); + for (full.ast.fields) |field| { + try expr(w, scope, parent_decl, field); + } + }, + + .fn_proto_simple, + .fn_proto_multi, + .fn_proto_one, + .fn_proto, + => { + var buf: [1]Ast.Node.Index = undefined; + return fn_decl(w, scope, parent_decl, .none, ast.fullFnProto(&buf, node).?); + }, + } +} + +fn slice(w: *Walk, scope: *Scope, parent_decl: Decl.Index, full: Ast.full.Slice) Oom!void { + try expr(w, scope, parent_decl, full.ast.sliced); + try expr(w, scope, parent_decl, full.ast.start); + try maybe_expr(w, scope, parent_decl, full.ast.end); + try maybe_expr(w, scope, parent_decl, full.ast.sentinel); +} + +fn builtin_call( + w: *Walk, + scope: *Scope, + parent_decl: Decl.Index, + node: Ast.Node.Index, + params: []const Ast.Node.Index, +) Oom!void { + const ast = w.file.get_ast(); + const builtin_token = ast.nodeMainToken(node); + const builtin_name = ast.tokenSlice(builtin_token); + if (std.mem.eql(u8, builtin_name, "@This")) { + try w.file.get().node_decls.put(gpa, node, scope.getNamespaceDecl()); + } + + for (params) |param| { + try expr(w, scope, parent_decl, param); + } +} + +fn block( + w: *Walk, + parent_scope: *Scope, + parent_decl: Decl.Index, + statements: []const Ast.Node.Index, +) Oom!void { + const ast = w.file.get_ast(); + + var scope = parent_scope; + + for (statements) |node| { + switch (ast.nodeTag(node)) { + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + const full = ast.fullVarDecl(node).?; + try global_var_decl(w, scope, parent_decl, full); + const local = try gpa.create(Scope.Local); + local.* = .{ + .parent = scope, + .var_node = node, + }; + try w.file.get().scopes.putNoClobber(gpa, node, &local.base); + scope = &local.base; + }, + + .assign_destructure => { + log.debug("walk assign_destructure not implemented yet", .{}); + }, + + .grouped_expression => try expr(w, scope, parent_decl, ast.nodeData(node).node_and_token[0]), + + .@"defer" => try expr(w, scope, parent_decl, ast.nodeData(node).node), + .@"errdefer" => try expr(w, scope, parent_decl, ast.nodeData(node).opt_token_and_node[1]), + + else => try expr(w, scope, parent_decl, node), + } + } +} + +fn while_expr(w: *Walk, scope: *Scope, parent_decl: Decl.Index, full: Ast.full.While) Oom!void { + try expr(w, scope, parent_decl, full.ast.cond_expr); + try maybe_expr(w, scope, parent_decl, full.ast.cont_expr); + try expr(w, scope, parent_decl, full.ast.then_expr); + try maybe_expr(w, scope, parent_decl, full.ast.else_expr); +} + +fn scanDecls(w: *Walk, namespace: *Scope.Namespace, members: []const Ast.Node.Index) Oom!void { + const ast = w.file.get_ast(); + + for (members) |member_node| { + const name_token = switch (ast.nodeTag(member_node)) { + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => ast.nodeMainToken(member_node) + 1, + + .fn_proto_simple, + .fn_proto_multi, + .fn_proto_one, + .fn_proto, + .fn_decl, + => blk: { + const ident = ast.nodeMainToken(member_node) + 1; + if (ast.tokenTag(ident) != .identifier) continue; + break :blk ident; + }, + + .test_decl => { + const opt_ident_token = ast.nodeData(member_node).opt_token_and_node[0]; + if (opt_ident_token.unwrap()) |ident_token| { + const is_doctest = ast.tokenTag(ident_token) == .identifier; + if (is_doctest) { + const token_bytes = ast.tokenSlice(ident_token); + try namespace.doctests.put(gpa, token_bytes, member_node); + } + } + continue; + }, + + else => continue, + }; + + const token_bytes = ast.tokenSlice(name_token); + try namespace.names.put(gpa, token_bytes, member_node); + } +} + +pub fn isPrimitiveNonType(name: []const u8) bool { + return std.mem.eql(u8, name, "undefined") or + std.mem.eql(u8, name, "null") or + std.mem.eql(u8, name, "true") or + std.mem.eql(u8, name, "false"); +} + +//test { +// const gpa = std.testing.allocator; +// +// var arena_instance = std.heap.ArenaAllocator.init(gpa); +// defer arena_instance.deinit(); +// const arena = arena_instance.allocator(); +// +// // example test command: +// // zig test --dep input.zig -Mroot=src/Walk.zig -Minput.zig=/home/andy/dev/zig/lib/std/fs/File/zig +// var ast = try Ast.parse(gpa, @embedFile("input.zig"), .zig); +// defer ast.deinit(gpa); +// +// var w: Walk = .{ +// .arena = arena, +// .token_links = .{}, +// .ast = &ast, +// }; +// +// try w.root(); +//} + +fn shrinkToFit(m: anytype) void { + m.shrinkAndFree(gpa, m.entries.len); +} diff --git a/docs/wasm/html_render.zig b/docs/wasm/html_render.zig new file mode 100644 index 0000000..68b10a0 --- /dev/null +++ b/docs/wasm/html_render.zig @@ -0,0 +1,400 @@ +const std = @import("std"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; + +const Walk = @import("Walk"); +const Decl = Walk.Decl; + +const gpa = std.heap.wasm_allocator; +const Oom = error{OutOfMemory}; + +/// Delete this to find out where URL escaping needs to be added. +pub const missing_feature_url_escape = true; + +pub const RenderSourceOptions = struct { + skip_doc_comments: bool = false, + skip_comments: bool = false, + collapse_whitespace: bool = false, + fn_link: Decl.Index = .none, + /// Assumed to be sorted ascending. + source_location_annotations: []const Annotation = &.{}, + /// Concatenated with dom_id. + annotation_prefix: []const u8 = "l", +}; + +pub const Annotation = struct { + file_byte_offset: u32, + /// Concatenated with annotation_prefix. + dom_id: u32, +}; + +pub fn fileSourceHtml( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + root_node: Ast.Node.Index, + options: RenderSourceOptions, +) !void { + const ast = file_index.get_ast(); + const file = file_index.get(); + + const g = struct { + var field_access_buffer: std.ArrayListUnmanaged(u8) = .empty; + }; + + const start_token = ast.firstToken(root_node); + const end_token = ast.lastToken(root_node) + 1; + + var cursor: usize = ast.tokenStart(start_token); + + var indent: usize = 0; + if (std.mem.lastIndexOf(u8, ast.source[0..cursor], "\n")) |newline_index| { + for (ast.source[newline_index + 1 .. cursor]) |c| { + if (c == ' ') { + indent += 1; + } else { + break; + } + } + } + + var next_annotate_index: usize = 0; + + for ( + ast.tokens.items(.tag)[start_token..end_token], + ast.tokens.items(.start)[start_token..end_token], + start_token.., + ) |tag, start, token_index| { + const between = ast.source[cursor..start]; + if (std.mem.trim(u8, between, " \t\r\n").len > 0) { + if (!options.skip_comments) { + try out.appendSlice(gpa, ""); + try appendUnindented(out, between, indent); + try out.appendSlice(gpa, ""); + } + } else if (between.len > 0) { + if (options.collapse_whitespace) { + if (out.items.len > 0 and out.items[out.items.len - 1] != ' ') + try out.append(gpa, ' '); + } else { + try appendUnindented(out, between, indent); + } + } + if (tag == .eof) break; + const slice = ast.tokenSlice(token_index); + cursor = start + slice.len; + + // Insert annotations. + while (true) { + if (next_annotate_index >= options.source_location_annotations.len) break; + const next_annotation = options.source_location_annotations[next_annotate_index]; + if (cursor <= next_annotation.file_byte_offset) break; + try out.writer(gpa).print("", .{ + options.annotation_prefix, next_annotation.dom_id, + }); + next_annotate_index += 1; + } + + switch (tag) { + .eof => unreachable, + + .keyword_addrspace, + .keyword_align, + .keyword_and, + .keyword_asm, + .keyword_break, + .keyword_catch, + .keyword_comptime, + .keyword_const, + .keyword_continue, + .keyword_defer, + .keyword_else, + .keyword_enum, + .keyword_errdefer, + .keyword_error, + .keyword_export, + .keyword_extern, + .keyword_for, + .keyword_if, + .keyword_inline, + .keyword_noalias, + .keyword_noinline, + .keyword_nosuspend, + .keyword_opaque, + .keyword_or, + .keyword_orelse, + .keyword_packed, + .keyword_anyframe, + .keyword_pub, + .keyword_resume, + .keyword_return, + .keyword_linksection, + .keyword_callconv, + .keyword_struct, + .keyword_suspend, + .keyword_switch, + .keyword_test, + .keyword_threadlocal, + .keyword_try, + .keyword_union, + .keyword_unreachable, + .keyword_var, + .keyword_volatile, + .keyword_allowzero, + .keyword_while, + .keyword_anytype, + .keyword_fn, + => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .string_literal, + .char_literal, + .multiline_string_literal_line, + => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .builtin => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .doc_comment, + .container_doc_comment, + => { + if (!options.skip_doc_comments) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + } + }, + + .identifier => i: { + if (options.fn_link != .none) { + const fn_link = options.fn_link.get(); + const fn_token = ast.nodeMainToken(fn_link.ast_node); + if (token_index == fn_token + 1) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + } + + if (token_index > 0 and ast.tokenTag(token_index - 1) == .keyword_fn) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (Walk.isPrimitiveNonType(slice)) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (std.zig.primitives.isPrimitive(slice)) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + + if (file.token_parents.get(token_index)) |field_access_node| { + g.field_access_buffer.clearRetainingCapacity(); + try walkFieldAccesses(file_index, &g.field_access_buffer, field_access_node); + if (g.field_access_buffer.items.len > 0) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + } else { + try appendEscaped(out, slice); + } + break :i; + } + + { + g.field_access_buffer.clearRetainingCapacity(); + try resolveIdentLink(file_index, &g.field_access_buffer, token_index); + if (g.field_access_buffer.items.len > 0) { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + break :i; + } + } + + try appendEscaped(out, slice); + }, + + .number_literal => { + try out.appendSlice(gpa, ""); + try appendEscaped(out, slice); + try out.appendSlice(gpa, ""); + }, + + .bang, + .pipe, + .pipe_pipe, + .pipe_equal, + .equal, + .equal_equal, + .equal_angle_bracket_right, + .bang_equal, + .l_paren, + .r_paren, + .semicolon, + .percent, + .percent_equal, + .l_brace, + .r_brace, + .l_bracket, + .r_bracket, + .period, + .period_asterisk, + .ellipsis2, + .ellipsis3, + .caret, + .caret_equal, + .plus, + .plus_plus, + .plus_equal, + .plus_percent, + .plus_percent_equal, + .plus_pipe, + .plus_pipe_equal, + .minus, + .minus_equal, + .minus_percent, + .minus_percent_equal, + .minus_pipe, + .minus_pipe_equal, + .asterisk, + .asterisk_equal, + .asterisk_asterisk, + .asterisk_percent, + .asterisk_percent_equal, + .asterisk_pipe, + .asterisk_pipe_equal, + .arrow, + .colon, + .slash, + .slash_equal, + .comma, + .ampersand, + .ampersand_equal, + .question_mark, + .angle_bracket_left, + .angle_bracket_left_equal, + .angle_bracket_angle_bracket_left, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_angle_bracket_left_pipe, + .angle_bracket_angle_bracket_left_pipe_equal, + .angle_bracket_right, + .angle_bracket_right_equal, + .angle_bracket_angle_bracket_right, + .angle_bracket_angle_bracket_right_equal, + .tilde, + => try appendEscaped(out, slice), + + .invalid, .invalid_periodasterisks => return error.InvalidToken, + } + } +} + +fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { + var it = std.mem.splitScalar(u8, s, '\n'); + var is_first_line = true; + while (it.next()) |line| { + if (is_first_line) { + try appendEscaped(out, line); + is_first_line = false; + } else { + try out.appendSlice(gpa, "\n"); + try appendEscaped(out, unindent(line, indent)); + } + } +} + +pub fn appendEscaped(out: *std.ArrayListUnmanaged(u8), s: []const u8) !void { + for (s) |c| { + try out.ensureUnusedCapacity(gpa, 6); + switch (c) { + '&' => out.appendSliceAssumeCapacity("&"), + '<' => out.appendSliceAssumeCapacity("<"), + '>' => out.appendSliceAssumeCapacity(">"), + '"' => out.appendSliceAssumeCapacity("""), + else => out.appendAssumeCapacity(c), + } + } +} + +fn walkFieldAccesses( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + node: Ast.Node.Index, +) Oom!void { + const ast = file_index.get_ast(); + assert(ast.nodeTag(node) == .field_access); + const object_node, const field_ident = ast.nodeData(node).node_and_token; + switch (ast.nodeTag(object_node)) { + .identifier => { + const lhs_ident = ast.nodeMainToken(object_node); + try resolveIdentLink(file_index, out, lhs_ident); + }, + .field_access => { + try walkFieldAccesses(file_index, out, object_node); + }, + else => {}, + } + if (out.items.len > 0) { + try out.append(gpa, '.'); + try out.appendSlice(gpa, ast.tokenSlice(field_ident)); + } +} + +fn resolveIdentLink( + file_index: Walk.File.Index, + out: *std.ArrayListUnmanaged(u8), + ident_token: Ast.TokenIndex, +) Oom!void { + const decl_index = file_index.get().lookup_token(ident_token); + if (decl_index == .none) return; + try resolveDeclLink(decl_index, out); +} + +fn unindent(s: []const u8, indent: usize) []const u8 { + var indent_idx: usize = 0; + for (s) |c| { + if (c == ' ' and indent_idx < indent) { + indent_idx += 1; + } else { + break; + } + } + return s[indent_idx..]; +} + +pub fn resolveDeclLink(decl_index: Decl.Index, out: *std.ArrayListUnmanaged(u8)) Oom!void { + const decl = decl_index.get(); + switch (decl.categorize()) { + .alias => |alias_decl| try alias_decl.get().fqn(out), + else => try decl.fqn(out), + } +} diff --git a/docs/wasm/main.zig b/docs/wasm/main.zig new file mode 100644 index 0000000..7e9ffa5 --- /dev/null +++ b/docs/wasm/main.zig @@ -0,0 +1,931 @@ +const std = @import("std"); +const log = std.log; +const assert = std.debug.assert; +const Ast = std.zig.Ast; +const Walk = @import("Walk"); +const markdown = @import("markdown.zig"); +const Decl = Walk.Decl; + +const fileSourceHtml = @import("html_render.zig").fileSourceHtml; +const appendEscaped = @import("html_render.zig").appendEscaped; +const resolveDeclLink = @import("html_render.zig").resolveDeclLink; +const missing_feature_url_escape = @import("html_render.zig").missing_feature_url_escape; + +const gpa = std.heap.wasm_allocator; + +const js = struct { + /// Keep in sync with the `LOG_` constants in `main.js`. + const LogLevel = enum(u8) { + err, + warn, + info, + debug, + }; + + extern "js" fn log(level: LogLevel, ptr: [*]const u8, len: usize) void; +}; + +pub const std_options: std.Options = .{ + .logFn = logFn, + //.log_level = .debug, +}; + +pub fn panic(msg: []const u8, st: ?*std.builtin.StackTrace, addr: ?usize) noreturn { + _ = st; + _ = addr; + log.err("panic: {s}", .{msg}); + @trap(); +} + +fn logFn( + comptime message_level: log.Level, + comptime scope: @TypeOf(.enum_literal), + comptime format: []const u8, + args: anytype, +) void { + const prefix = if (scope == .default) "" else @tagName(scope) ++ ": "; + var buf: [500]u8 = undefined; + const line = std.fmt.bufPrint(&buf, prefix ++ format, args) catch l: { + buf[buf.len - 3 ..][0..3].* = "...".*; + break :l &buf; + }; + js.log(@field(js.LogLevel, @tagName(message_level)), line.ptr, line.len); +} + +export fn alloc(n: usize) [*]u8 { + const slice = gpa.alloc(u8, n) catch @panic("OOM"); + return slice.ptr; +} + +export fn unpack(tar_ptr: [*]u8, tar_len: usize) void { + const tar_bytes = tar_ptr[0..tar_len]; + //log.debug("received {d} bytes of tar file", .{tar_bytes.len}); + + unpackInner(tar_bytes) catch |err| { + std.debug.panic("unable to unpack tar: {s}", .{@errorName(err)}); + }; +} + +var query_string: std.ArrayListUnmanaged(u8) = .empty; +var query_results: std.ArrayListUnmanaged(Decl.Index) = .empty; + +/// Resizes the query string to be the correct length; returns the pointer to +/// the query string. +export fn query_begin(query_string_len: usize) [*]u8 { + query_string.resize(gpa, query_string_len) catch @panic("OOM"); + return query_string.items.ptr; +} + +/// Executes the query. Returns the pointer to the query results which is an +/// array of u32. +/// The first element is the length of the array. +/// Subsequent elements are Decl.Index values which are all public +/// declarations. +export fn query_exec(ignore_case: bool) [*]Decl.Index { + const query = query_string.items; + log.debug("querying '{s}'", .{query}); + query_exec_fallible(query, ignore_case) catch |err| switch (err) { + error.OutOfMemory => @panic("OOM"), + }; + query_results.items[0] = @enumFromInt(query_results.items.len - 1); + return query_results.items.ptr; +} + +const max_matched_items = 1000; + +fn query_exec_fallible(query: []const u8, ignore_case: bool) !void { + const Score = packed struct(u32) { + points: u16, + segments: u16, + }; + const g = struct { + var full_path_search_text: std.ArrayListUnmanaged(u8) = .empty; + var full_path_search_text_lower: std.ArrayListUnmanaged(u8) = .empty; + var doc_search_text: std.ArrayListUnmanaged(u8) = .empty; + /// Each element matches a corresponding query_results element. + var scores: std.ArrayListUnmanaged(Score) = .empty; + }; + + // First element stores the size of the list. + try query_results.resize(gpa, 1); + // Corresponding point value is meaningless and therefore undefined. + try g.scores.resize(gpa, 1); + + decl_loop: for (Walk.decls.items, 0..) |*decl, decl_index| { + const info = decl.extra_info(); + if (!info.is_pub) continue; + + try decl.reset_with_path(&g.full_path_search_text); + if (decl.parent != .none) + try Decl.append_parent_ns(&g.full_path_search_text, decl.parent); + try g.full_path_search_text.appendSlice(gpa, info.name); + + try g.full_path_search_text_lower.resize(gpa, g.full_path_search_text.items.len); + @memcpy(g.full_path_search_text_lower.items, g.full_path_search_text.items); + + const ast = decl.file.get_ast(); + if (info.first_doc_comment.unwrap()) |first_doc_comment| { + try collect_docs(&g.doc_search_text, ast, first_doc_comment); + } + + if (ignore_case) { + ascii_lower(g.full_path_search_text_lower.items); + ascii_lower(g.doc_search_text.items); + } + + var it = std.mem.tokenizeScalar(u8, query, ' '); + var points: u16 = 0; + var bypass_limit = false; + while (it.next()) |term| { + // exact, case sensitive match of full decl path + if (std.mem.eql(u8, g.full_path_search_text.items, term)) { + points += 4; + bypass_limit = true; + continue; + } + // exact, case sensitive match of just decl name + if (std.mem.eql(u8, info.name, term)) { + points += 3; + bypass_limit = true; + continue; + } + // substring, case insensitive match of full decl path + if (std.mem.indexOf(u8, g.full_path_search_text_lower.items, term) != null) { + points += 2; + continue; + } + if (std.mem.indexOf(u8, g.doc_search_text.items, term) != null) { + points += 1; + continue; + } + continue :decl_loop; + } + + if (query_results.items.len < max_matched_items or bypass_limit) { + try query_results.append(gpa, @enumFromInt(decl_index)); + try g.scores.append(gpa, .{ + .points = points, + .segments = @intCast(count_scalar(g.full_path_search_text.items, '.')), + }); + } + } + + const sort_context: struct { + pub fn swap(sc: @This(), a_index: usize, b_index: usize) void { + _ = sc; + std.mem.swap(Score, &g.scores.items[a_index], &g.scores.items[b_index]); + std.mem.swap(Decl.Index, &query_results.items[a_index], &query_results.items[b_index]); + } + + pub fn lessThan(sc: @This(), a_index: usize, b_index: usize) bool { + _ = sc; + const a_score = g.scores.items[a_index]; + const b_score = g.scores.items[b_index]; + if (b_score.points < a_score.points) { + return true; + } else if (b_score.points > a_score.points) { + return false; + } else if (a_score.segments < b_score.segments) { + return true; + } else if (a_score.segments > b_score.segments) { + return false; + } else { + const a_decl = query_results.items[a_index]; + const b_decl = query_results.items[b_index]; + const a_file_path = a_decl.get().file.path(); + const b_file_path = b_decl.get().file.path(); + // This neglects to check the local namespace inside the file. + return std.mem.lessThan(u8, b_file_path, a_file_path); + } + } + } = .{}; + + std.mem.sortUnstableContext(1, query_results.items.len, sort_context); + + if (query_results.items.len > max_matched_items) + query_results.shrinkRetainingCapacity(max_matched_items); +} + +const String = Slice(u8); + +fn Slice(T: type) type { + return packed struct(u64) { + ptr: u32, + len: u32, + + fn init(s: []const T) @This() { + return .{ + .ptr = @intFromPtr(s.ptr), + .len = s.len, + }; + } + }; +} + +const ErrorIdentifier = packed struct(u64) { + token_index: Ast.TokenIndex, + decl_index: Decl.Index, + + fn hasDocs(ei: ErrorIdentifier) bool { + const decl_index = ei.decl_index; + const ast = decl_index.get().file.get_ast(); + const token_index = ei.token_index; + if (token_index == 0) return false; + return ast.tokenTag(token_index - 1) == .doc_comment; + } + + fn html(ei: ErrorIdentifier, base_decl: Decl.Index, out: *std.ArrayListUnmanaged(u8)) Oom!void { + const decl_index = ei.decl_index; + const ast = decl_index.get().file.get_ast(); + const name = ast.tokenSlice(ei.token_index); + const has_link = base_decl != decl_index; + + try out.appendSlice(gpa, "
"); + try out.appendSlice(gpa, name); + if (has_link) { + try out.appendSlice(gpa, " "); + try out.appendSlice(gpa, decl_index.get().extra_info().name); + try out.appendSlice(gpa, ""); + } + try out.appendSlice(gpa, "
"); + + if (Decl.findFirstDocComment(ast, ei.token_index).unwrap()) |first_doc_comment| { + try out.appendSlice(gpa, "
"); + try render_docs(out, decl_index, first_doc_comment, false); + try out.appendSlice(gpa, "
"); + } + } +}; + +var string_result: std.ArrayListUnmanaged(u8) = .empty; +var error_set_result: std.StringArrayHashMapUnmanaged(ErrorIdentifier) = .empty; + +export fn decl_error_set(decl_index: Decl.Index) Slice(ErrorIdentifier) { + return Slice(ErrorIdentifier).init(decl_error_set_fallible(decl_index) catch @panic("OOM")); +} + +export fn error_set_node_list(base_decl: Decl.Index, node: Ast.Node.Index) Slice(ErrorIdentifier) { + error_set_result.clearRetainingCapacity(); + addErrorsFromExpr(base_decl, &error_set_result, node) catch @panic("OOM"); + sort_error_set_result(); + return Slice(ErrorIdentifier).init(error_set_result.values()); +} + +export fn fn_error_set_decl(decl_index: Decl.Index, node: Ast.Node.Index) Decl.Index { + return switch (decl_index.get().file.categorize_expr(node)) { + .alias => |aliasee| fn_error_set_decl(aliasee, aliasee.get().ast_node), + else => decl_index, + }; +} + +fn decl_error_set_fallible(decl_index: Decl.Index) Oom![]ErrorIdentifier { + error_set_result.clearRetainingCapacity(); + try addErrorsFromDecl(decl_index, &error_set_result); + sort_error_set_result(); + return error_set_result.values(); +} + +fn sort_error_set_result() void { + const sort_context: struct { + pub fn lessThan(sc: @This(), a_index: usize, b_index: usize) bool { + _ = sc; + const a_name = error_set_result.keys()[a_index]; + const b_name = error_set_result.keys()[b_index]; + return std.mem.lessThan(u8, a_name, b_name); + } + } = .{}; + error_set_result.sortUnstable(sort_context); +} + +fn addErrorsFromDecl( + decl_index: Decl.Index, + out: *std.StringArrayHashMapUnmanaged(ErrorIdentifier), +) Oom!void { + switch (decl_index.get().categorize()) { + .error_set => |node| try addErrorsFromExpr(decl_index, out, node), + .alias => |aliasee| try addErrorsFromDecl(aliasee, out), + else => |cat| log.debug("unable to addErrorsFromDecl: {any}", .{cat}), + } +} + +fn addErrorsFromExpr( + decl_index: Decl.Index, + out: *std.StringArrayHashMapUnmanaged(ErrorIdentifier), + node: Ast.Node.Index, +) Oom!void { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + + switch (decl.file.categorize_expr(node)) { + .error_set => |n| switch (ast.nodeTag(n)) { + .error_set_decl => { + try addErrorsFromNode(decl_index, out, node); + }, + .merge_error_sets => { + const lhs, const rhs = ast.nodeData(n).node_and_node; + try addErrorsFromExpr(decl_index, out, lhs); + try addErrorsFromExpr(decl_index, out, rhs); + }, + else => unreachable, + }, + .alias => |aliasee| { + try addErrorsFromDecl(aliasee, out); + }, + else => return, + } +} + +fn addErrorsFromNode( + decl_index: Decl.Index, + out: *std.StringArrayHashMapUnmanaged(ErrorIdentifier), + node: Ast.Node.Index, +) Oom!void { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + const error_token = ast.nodeMainToken(node); + var tok_i = error_token + 2; + while (true) : (tok_i += 1) switch (ast.tokenTag(tok_i)) { + .doc_comment, .comma => {}, + .identifier => { + const name = ast.tokenSlice(tok_i); + const gop = try out.getOrPut(gpa, name); + // If there are more than one, take the one with doc comments. + // If they both have doc comments, prefer the existing one. + const new: ErrorIdentifier = .{ + .token_index = tok_i, + .decl_index = decl_index, + }; + if (!gop.found_existing or + (!gop.value_ptr.hasDocs() and new.hasDocs())) + { + gop.value_ptr.* = new; + } + }, + .r_brace => break, + else => unreachable, + }; +} + +export fn type_fn_fields(decl_index: Decl.Index) Slice(Ast.Node.Index) { + return decl_fields(decl_index); +} + +export fn decl_fields(decl_index: Decl.Index) Slice(Ast.Node.Index) { + return Slice(Ast.Node.Index).init(decl_fields_fallible(decl_index) catch @panic("OOM")); +} + +export fn decl_params(decl_index: Decl.Index) Slice(Ast.Node.Index) { + return Slice(Ast.Node.Index).init(decl_params_fallible(decl_index) catch @panic("OOM")); +} + +fn decl_fields_fallible(decl_index: Decl.Index) ![]Ast.Node.Index { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + + switch (decl.categorize()) { + .type_function => { + // If the type function returns a reference to another type function, get the fields from there + if (decl.get_type_fn_return_type_fn()) |function_decl| { + return decl_fields_fallible(function_decl); + } + // If the type function returns a container, such as a `struct`, read that container's fields + if (decl.get_type_fn_return_expr()) |return_expr| { + switch (ast.nodeTag(return_expr)) { + .container_decl, .container_decl_trailing, .container_decl_two, .container_decl_two_trailing, .container_decl_arg, .container_decl_arg_trailing => { + return ast_decl_fields_fallible(ast, return_expr); + }, + else => {}, + } + } + return &.{}; + }, + else => { + const value_node = decl.value_node() orelse return &.{}; + return ast_decl_fields_fallible(ast, value_node); + }, + } +} + +fn ast_decl_fields_fallible(ast: *Ast, ast_index: Ast.Node.Index) ![]Ast.Node.Index { + const g = struct { + var result: std.ArrayListUnmanaged(Ast.Node.Index) = .empty; + }; + g.result.clearRetainingCapacity(); + var buf: [2]Ast.Node.Index = undefined; + const container_decl = ast.fullContainerDecl(&buf, ast_index) orelse return &.{}; + for (container_decl.ast.members) |member_node| switch (ast.nodeTag(member_node)) { + .container_field_init, + .container_field_align, + .container_field, + => try g.result.append(gpa, member_node), + + else => continue, + }; + return g.result.items; +} + +fn decl_params_fallible(decl_index: Decl.Index) ![]Ast.Node.Index { + const g = struct { + var result: std.ArrayListUnmanaged(Ast.Node.Index) = .empty; + }; + g.result.clearRetainingCapacity(); + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + const value_node = decl.value_node() orelse return &.{}; + var buf: [1]Ast.Node.Index = undefined; + const fn_proto = ast.fullFnProto(&buf, value_node) orelse return &.{}; + try g.result.appendSlice(gpa, fn_proto.ast.params); + return g.result.items; +} + +export fn error_html(base_decl: Decl.Index, error_identifier: ErrorIdentifier) String { + string_result.clearRetainingCapacity(); + error_identifier.html(base_decl, &string_result) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn decl_field_html(decl_index: Decl.Index, field_node: Ast.Node.Index) String { + string_result.clearRetainingCapacity(); + decl_field_html_fallible(&string_result, decl_index, field_node) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn decl_param_html(decl_index: Decl.Index, param_node: Ast.Node.Index) String { + string_result.clearRetainingCapacity(); + decl_param_html_fallible(&string_result, decl_index, param_node) catch @panic("OOM"); + return String.init(string_result.items); +} + +fn decl_field_html_fallible( + out: *std.ArrayListUnmanaged(u8), + decl_index: Decl.Index, + field_node: Ast.Node.Index, +) !void { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + try out.appendSlice(gpa, "
");
+    try fileSourceHtml(decl.file, out, field_node, .{});
+    try out.appendSlice(gpa, "
"); + + const field = ast.fullContainerField(field_node).?; + + if (Decl.findFirstDocComment(ast, field.firstToken()).unwrap()) |first_doc_comment| { + try out.appendSlice(gpa, "
"); + try render_docs(out, decl_index, first_doc_comment, false); + try out.appendSlice(gpa, "
"); + } +} + +fn decl_param_html_fallible( + out: *std.ArrayListUnmanaged(u8), + decl_index: Decl.Index, + param_node: Ast.Node.Index, +) !void { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + const colon = ast.firstToken(param_node) - 1; + const name_token = colon - 1; + const first_doc_comment = f: { + var it = ast.firstToken(param_node); + while (it > 0) { + it -= 1; + switch (ast.tokenTag(it)) { + .doc_comment, .colon, .identifier, .keyword_comptime, .keyword_noalias => {}, + else => break, + } + } + break :f it + 1; + }; + const name = ast.tokenSlice(name_token); + + try out.appendSlice(gpa, "
");
+    try appendEscaped(out, name);
+    try out.appendSlice(gpa, ": ");
+    try fileSourceHtml(decl.file, out, param_node, .{});
+    try out.appendSlice(gpa, "
"); + + if (ast.tokenTag(first_doc_comment) == .doc_comment) { + try out.appendSlice(gpa, "
"); + try render_docs(out, decl_index, first_doc_comment, false); + try out.appendSlice(gpa, "
"); + } +} + +export fn decl_fn_proto_html(decl_index: Decl.Index, linkify_fn_name: bool) String { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + const proto_node = switch (ast.nodeTag(decl.ast_node)) { + .fn_decl => ast.nodeData(decl.ast_node).node_and_node[0], + + .fn_proto, + .fn_proto_one, + .fn_proto_simple, + .fn_proto_multi, + => decl.ast_node, + + else => unreachable, + }; + + string_result.clearRetainingCapacity(); + fileSourceHtml(decl.file, &string_result, proto_node, .{ + .skip_doc_comments = true, + .skip_comments = true, + .collapse_whitespace = true, + .fn_link = if (linkify_fn_name) decl_index else .none, + }) catch |err| { + std.debug.panic("unable to render source: {s}", .{@errorName(err)}); + }; + return String.init(string_result.items); +} + +export fn decl_source_html(decl_index: Decl.Index) String { + const decl = decl_index.get(); + + string_result.clearRetainingCapacity(); + fileSourceHtml(decl.file, &string_result, decl.ast_node, .{}) catch |err| { + std.debug.panic("unable to render source: {s}", .{@errorName(err)}); + }; + return String.init(string_result.items); +} + +export fn decl_doctest_html(decl_index: Decl.Index) String { + const decl = decl_index.get(); + const doctest_ast_node = decl.file.get().doctests.get(decl.ast_node) orelse + return String.init(""); + + string_result.clearRetainingCapacity(); + fileSourceHtml(decl.file, &string_result, doctest_ast_node, .{}) catch |err| { + std.debug.panic("unable to render source: {s}", .{@errorName(err)}); + }; + return String.init(string_result.items); +} + +export fn decl_fqn(decl_index: Decl.Index) String { + const decl = decl_index.get(); + string_result.clearRetainingCapacity(); + decl.fqn(&string_result) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn decl_parent(decl_index: Decl.Index) Decl.Index { + const decl = decl_index.get(); + return decl.parent; +} + +export fn fn_error_set(decl_index: Decl.Index) Ast.Node.OptionalIndex { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + var buf: [1]Ast.Node.Index = undefined; + const full = ast.fullFnProto(&buf, decl.ast_node).?; + const return_type = full.ast.return_type.unwrap().?; + return switch (ast.nodeTag(return_type)) { + .error_set_decl => return_type.toOptional(), + .error_union => ast.nodeData(return_type).node_and_node[0].toOptional(), + else => .none, + }; +} + +export fn decl_file_path(decl_index: Decl.Index) String { + string_result.clearRetainingCapacity(); + string_result.appendSlice(gpa, decl_index.get().file.path()) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn decl_category_name(decl_index: Decl.Index) String { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + const name = switch (decl.categorize()) { + .namespace, .container => |node| { + if (ast.nodeTag(decl.ast_node) == .root) + return String.init("struct"); + string_result.clearRetainingCapacity(); + var buf: [2]Ast.Node.Index = undefined; + const container_decl = ast.fullContainerDecl(&buf, node).?; + if (container_decl.layout_token) |t| { + if (ast.tokenTag(t) == .keyword_extern) { + string_result.appendSlice(gpa, "extern ") catch @panic("OOM"); + } + } + const main_token_tag = ast.tokenTag(container_decl.ast.main_token); + string_result.appendSlice(gpa, main_token_tag.lexeme().?) catch @panic("OOM"); + return String.init(string_result.items); + }, + .global_variable => "Global Variable", + .function => "Function", + .type_function => "Type Function", + .type, .type_type => "Type", + .error_set => "Error Set", + .global_const => "Constant", + .primitive => "Primitive Value", + .alias => "Alias", + }; + return String.init(name); +} + +export fn decl_name(decl_index: Decl.Index) String { + const decl = decl_index.get(); + string_result.clearRetainingCapacity(); + const name = n: { + if (decl.parent == .none) { + // Then it is the root struct of a file. + break :n std.fs.path.stem(decl.file.path()); + } + break :n decl.extra_info().name; + }; + string_result.appendSlice(gpa, name) catch @panic("OOM"); + return String.init(string_result.items); +} + +export fn decl_docs_html(decl_index: Decl.Index, short: bool) String { + const decl = decl_index.get(); + string_result.clearRetainingCapacity(); + if (decl.extra_info().first_doc_comment.unwrap()) |first_doc_comment| { + render_docs(&string_result, decl_index, first_doc_comment, short) catch @panic("OOM"); + } + return String.init(string_result.items); +} + +fn collect_docs( + list: *std.ArrayListUnmanaged(u8), + ast: *const Ast, + first_doc_comment: Ast.TokenIndex, +) Oom!void { + list.clearRetainingCapacity(); + var it = first_doc_comment; + while (true) : (it += 1) switch (ast.tokenTag(it)) { + .doc_comment, .container_doc_comment => { + // It is tempting to trim this string but think carefully about how + // that will affect the markdown parser. + const line = ast.tokenSlice(it)[3..]; + try list.appendSlice(gpa, line); + }, + else => break, + }; +} + +fn render_docs( + out: *std.ArrayListUnmanaged(u8), + decl_index: Decl.Index, + first_doc_comment: Ast.TokenIndex, + short: bool, +) Oom!void { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + + var parser = try markdown.Parser.init(gpa); + defer parser.deinit(); + var it = first_doc_comment; + while (true) : (it += 1) switch (ast.tokenTag(it)) { + .doc_comment, .container_doc_comment => { + const line = ast.tokenSlice(it)[3..]; + if (short and line.len == 0) break; + try parser.feedLine(line); + }, + else => break, + }; + + var parsed_doc = try parser.endInput(); + defer parsed_doc.deinit(gpa); + + const g = struct { + var link_buffer: std.ArrayListUnmanaged(u8) = .empty; + }; + + const Writer = std.ArrayListUnmanaged(u8).Writer; + const Renderer = markdown.Renderer(Writer, Decl.Index); + const renderer: Renderer = .{ + .context = decl_index, + .renderFn = struct { + fn render( + r: Renderer, + doc: markdown.Document, + node: markdown.Document.Node.Index, + writer: Writer, + ) !void { + const data = doc.nodes.items(.data)[@intFromEnum(node)]; + switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { + .code_span => { + try writer.writeAll(""); + const content = doc.string(data.text.content); + if (resolve_decl_path(r.context, content)) |resolved_decl_index| { + g.link_buffer.clearRetainingCapacity(); + try resolveDeclLink(resolved_decl_index, &g.link_buffer); + + try writer.writeAll("{f}", .{markdown.fmtHtml(content)}); + } else { + try writer.print("{f}", .{markdown.fmtHtml(content)}); + } + + try writer.writeAll(""); + }, + + else => try Renderer.renderDefault(r, doc, node, writer), + } + } + }.render, + }; + try renderer.render(parsed_doc, out.writer(gpa)); +} + +fn resolve_decl_path(decl_index: Decl.Index, path: []const u8) ?Decl.Index { + var path_components = std.mem.splitScalar(u8, path, '.'); + var current_decl_index = decl_index.get().lookup(path_components.first()) orelse return null; + while (path_components.next()) |component| { + switch (current_decl_index.get().categorize()) { + .alias => |aliasee| current_decl_index = aliasee, + else => {}, + } + current_decl_index = current_decl_index.get().get_child(component) orelse return null; + } + return current_decl_index; +} + +export fn decl_type_html(decl_index: Decl.Index) String { + const decl = decl_index.get(); + const ast = decl.file.get_ast(); + string_result.clearRetainingCapacity(); + t: { + // If there is an explicit type, use it. + if (ast.fullVarDecl(decl.ast_node)) |var_decl| { + if (var_decl.ast.type_node.unwrap()) |type_node| { + string_result.appendSlice(gpa, "") catch @panic("OOM"); + fileSourceHtml(decl.file, &string_result, type_node, .{ + .skip_comments = true, + .collapse_whitespace = true, + }) catch |e| { + std.debug.panic("unable to render html: {s}", .{@errorName(e)}); + }; + string_result.appendSlice(gpa, "") catch @panic("OOM"); + break :t; + } + } + } + return String.init(string_result.items); +} + +const Oom = error{OutOfMemory}; + +fn unpackInner(tar_bytes: []u8) !void { + var fbs = std.io.fixedBufferStream(tar_bytes); + var file_name_buffer: [1024]u8 = undefined; + var link_name_buffer: [1024]u8 = undefined; + var it = std.tar.iterator(fbs.reader(), .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + while (try it.next()) |tar_file| { + switch (tar_file.kind) { + .file => { + if (tar_file.size == 0 and tar_file.name.len == 0) break; + if (std.mem.endsWith(u8, tar_file.name, ".zig")) { + log.debug("found file: '{s}'", .{tar_file.name}); + const file_name = try gpa.dupe(u8, tar_file.name); + if (std.mem.indexOfScalar(u8, file_name, '/')) |pkg_name_end| { + const pkg_name = file_name[0..pkg_name_end]; + const gop = try Walk.modules.getOrPut(gpa, pkg_name); + const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); + if (!gop.found_existing or + std.mem.eql(u8, file_name[pkg_name_end..], "/root.zig") or + std.mem.eql(u8, file_name[pkg_name_end + 1 .. file_name.len - ".zig".len], pkg_name)) + { + gop.value_ptr.* = file; + } + const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)]; + assert(file == try Walk.add_file(file_name, file_bytes)); + } + } else { + log.warn("skipping: '{s}' - the tar creation should have done that", .{ + tar_file.name, + }); + } + }, + else => continue, + } + } +} + +fn ascii_lower(bytes: []u8) void { + for (bytes) |*b| b.* = std.ascii.toLower(b.*); +} + +export fn module_name(index: u32) String { + const names = Walk.modules.keys(); + return String.init(if (index >= names.len) "" else names[index]); +} + +export fn find_module_root(pkg: Walk.ModuleIndex) Decl.Index { + const root_file = Walk.modules.values()[@intFromEnum(pkg)]; + const result = root_file.findRootDecl(); + assert(result != .none); + return result; +} + +/// Set by `set_input_string`. +var input_string: std.ArrayListUnmanaged(u8) = .empty; + +export fn set_input_string(len: usize) [*]u8 { + input_string.resize(gpa, len) catch @panic("OOM"); + return input_string.items.ptr; +} + +/// Looks up the root struct decl corresponding to a file by path. +/// Uses `input_string`. +export fn find_file_root() Decl.Index { + const file: Walk.File.Index = @enumFromInt(Walk.files.getIndex(input_string.items) orelse return .none); + return file.findRootDecl(); +} + +/// Uses `input_string`. +/// Tries to look up the Decl component-wise but then falls back to a file path +/// based scan. +export fn find_decl() Decl.Index { + const result = Decl.find(input_string.items); + if (result != .none) return result; + + const g = struct { + var match_fqn: std.ArrayListUnmanaged(u8) = .empty; + }; + for (Walk.decls.items, 0..) |*decl, decl_index| { + g.match_fqn.clearRetainingCapacity(); + decl.fqn(&g.match_fqn) catch @panic("OOM"); + if (std.mem.eql(u8, g.match_fqn.items, input_string.items)) { + //const path = @as(Decl.Index, @enumFromInt(decl_index)).get().file.path(); + //log.debug("find_decl '{s}' found in {s}", .{ input_string.items, path }); + return @enumFromInt(decl_index); + } + } + return .none; +} + +/// Set only by `categorize_decl`; read only by `get_aliasee`, valid only +/// when `categorize_decl` returns `.alias`. +var global_aliasee: Decl.Index = .none; + +export fn get_aliasee() Decl.Index { + return global_aliasee; +} +export fn categorize_decl(decl_index: Decl.Index, resolve_alias_count: usize) Walk.Category.Tag { + global_aliasee = .none; + var chase_alias_n = resolve_alias_count; + var decl = decl_index.get(); + while (true) { + const result = decl.categorize(); + switch (result) { + .alias => |new_index| { + assert(new_index != .none); + global_aliasee = new_index; + if (chase_alias_n > 0) { + chase_alias_n -= 1; + decl = new_index.get(); + continue; + } + }, + else => {}, + } + return result; + } +} + +export fn type_fn_members(parent: Decl.Index, include_private: bool) Slice(Decl.Index) { + const decl = parent.get(); + + // If the type function returns another type function, get the members of that function + if (decl.get_type_fn_return_type_fn()) |function_decl| { + return namespace_members(function_decl, include_private); + } + + return namespace_members(parent, include_private); +} + +export fn namespace_members(parent: Decl.Index, include_private: bool) Slice(Decl.Index) { + const g = struct { + var members: std.ArrayListUnmanaged(Decl.Index) = .empty; + }; + + g.members.clearRetainingCapacity(); + + for (Walk.decls.items, 0..) |*decl, i| { + if (decl.parent == parent) { + if (include_private or decl.is_pub()) { + g.members.append(gpa, @enumFromInt(i)) catch @panic("OOM"); + } + } + } + + return Slice(Decl.Index).init(g.members.items); +} + +fn count_scalar(haystack: []const u8, needle: u8) usize { + var total: usize = 0; + for (haystack) |elem| { + if (elem == needle) + total += 1; + } + return total; +} diff --git a/docs/wasm/markdown.zig b/docs/wasm/markdown.zig new file mode 100644 index 0000000..3293b68 --- /dev/null +++ b/docs/wasm/markdown.zig @@ -0,0 +1,1127 @@ +//! Markdown parsing and rendering support. +//! +//! A Markdown document consists of a series of blocks. Depending on its type, +//! each block may contain other blocks, inline content, or nothing. The +//! supported blocks are as follows: +//! +//! - **List** - a sequence of list items of the same type. +//! +//! - **List item** - unordered list items start with `-`, `*`, or `+` followed +//! by a space. Ordered list items start with a number between 0 and +//! 999,999,999, followed by a `.` or `)` and a space. The number of an +//! ordered list item only matters for the first item in the list (to +//! determine the starting number of the list). All subsequent ordered list +//! items will have sequentially increasing numbers. +//! +//! All list items may contain block content. Any content indented at least as +//! far as the end of the list item marker (including the space after it) is +//! considered part of the list item. +//! +//! Lists which have no blank lines between items or between direct children +//! of items are considered _tight_, and direct child paragraphs of tight list +//! items are rendered without `

` tags. +//! +//! - **Table** - a sequence of adjacent table row lines, where each line starts +//! and ends with a `|`, and cells within the row are delimited by `|`s. +//! +//! The first or second row of a table may be a _header delimiter row_, which +//! is a row consisting of cells of the pattern `---` (for unset column +//! alignment), `:--` (for left alignment), `:-:` (for center alignment), or +//! `--:` (for right alignment). The number of `-`s must be at least one, but +//! is otherwise arbitrary. If there is a row just before the header delimiter +//! row, it becomes the header row for the table (a table need not have a +//! header row at all). +//! +//! - **Heading** - a sequence of between 1 and 6 `#` characters, followed by a +//! space and further inline content on the same line. +//! +//! - **Code block** - a sequence of at least 3 `` ` `` characters (a _fence_), +//! optionally followed by a "tag" on the same line, and continuing until a +//! line consisting only of a closing fence whose length matches the opening +//! fence, or until the end of the containing block. +//! +//! The content of a code block is not parsed as inline content. It is +//! included verbatim in the output document (minus leading indentation up to +//! the position of the opening fence). +//! +//! - **Blockquote** - a sequence of lines preceded by `>` characters. +//! +//! - **Paragraph** - ordinary text, parsed as inline content, ending with a +//! blank line or the end of the containing block. +//! +//! Paragraphs which are part of another block may be "lazily" continued by +//! subsequent paragraph lines even if those lines would not ordinarily be +//! considered part of the containing block. For example, this is a single +//! list item, not a list item followed by a paragraph: +//! +//! ```markdown +//! - First line of content. +//! This content is still part of the paragraph, +//! even though it isn't indented far enough. +//! ``` +//! +//! - **Thematic break** - a line consisting of at least three matching `-`, +//! `_`, or `*` characters and, optionally, spaces. +//! +//! Indentation may consist of spaces and tabs. The use of tabs is not +//! recommended: a tab is treated the same as a single space for the purpose of +//! determining the indentation level, and is not recognized as a space for +//! block starters which require one (for example, `-` followed by a tab is not +//! a valid list item). +//! +//! The supported inlines are as follows: +//! +//! - **Link** - of the format `[text](target)`. `text` may contain inline +//! content. `target` may contain `\`-escaped characters and balanced +//! parentheses. +//! +//! - **Autolink** - an abbreviated link, of the format ``, where +//! `target` serves as both the link target and text. `target` may not +//! contain spaces or `<`, and any `\` in it are interpreted literally (not as +//! escapes). `target` is expected to be an absolute URI: an autolink will not +//! be recognized unless `target` starts with a URI scheme followed by a `:`. +//! +//! For convenience, autolinks may also be recognized in plain text without +//! any `<>` delimiters. Such autolinks are restricted to start with `http://` +//! or `https://` followed by at least one other character, not including any +//! trailing punctuation after the link. +//! +//! - **Image** - a link directly preceded by a `!`. The link text is +//! interpreted as the alt text of the image. +//! +//! - **Emphasis** - a run of `*` or `_` characters may be an emphasis opener, +//! closer, or both. For `*` characters, the run may be an opener as long as +//! it is not directly followed by a whitespace character (or the end of the +//! inline content) and a closer as long as it is not directly preceded by +//! one. For `_` characters, this rule is strengthened by requiring that the +//! run also be preceded by a whitespace or punctuation character (for +//! openers) or followed by one (for closers), to avoid mangling `snake_case` +//! words. +//! +//! The rule for emphasis handling is greedy: any run that can close existing +//! emphasis will do so, otherwise it will open emphasis. A single run may +//! serve both functions: the middle `**` in the following example both closes +//! the initial emphasis and opens a new one: +//! +//! ```markdown +//! *one**two* +//! ``` +//! +//! A single `*` or `_` is used for normal emphasis (HTML ``), and a +//! double `**` or `__` is used for strong emphasis (HTML ``). Even +//! longer runs may be used to produce further nested emphasis (though only +//! `***` and `___` to produce `` is really useful). +//! +//! - **Code span** - a run of `` ` `` characters, terminated by a matching run +//! or the end of inline content. The content of a code span is not parsed +//! further. +//! +//! - **Text** - normal text is interpreted as-is, except that `\` may be used +//! to escape any punctuation character, preventing it from being interpreted +//! according to other syntax rules. A `\` followed by a line break within a +//! paragraph is interpreted as a hard line break. +//! +//! Any null bytes or invalid UTF-8 bytes within text are replaced with Unicode +//! replacement characters, `U+FFFD`. + +const std = @import("std"); +const testing = std.testing; + +pub const Document = @import("markdown/Document.zig"); +pub const Parser = @import("markdown/Parser.zig"); +pub const Renderer = @import("markdown/renderer.zig").Renderer; +pub const renderNodeInlineText = @import("markdown/renderer.zig").renderNodeInlineText; +pub const fmtHtml = @import("markdown/renderer.zig").fmtHtml; + +// Avoid exposing main to other files merely importing this one. +pub const main = if (@import("root") == @This()) + mainImpl +else + @compileError("only available as root source file"); + +fn mainImpl() !void { + const gpa = std.heap.c_allocator; + + var parser = try Parser.init(gpa); + defer parser.deinit(); + + var stdin_buf = std.io.bufferedReader(std.fs.File.stdin().deprecatedReader()); + var line_buf = std.ArrayList(u8).init(gpa); + defer line_buf.deinit(); + while (stdin_buf.reader().streamUntilDelimiter(line_buf.writer(), '\n', null)) { + if (line_buf.getLastOrNull() == '\r') _ = line_buf.pop(); + try parser.feedLine(line_buf.items); + line_buf.clearRetainingCapacity(); + } else |err| switch (err) { + error.EndOfStream => {}, + else => |e| return e, + } + + var doc = try parser.endInput(); + defer doc.deinit(gpa); + + var stdout_buf = std.io.bufferedWriter(std.fs.File.stdout().deprecatedWriter()); + try doc.render(stdout_buf.writer()); + try stdout_buf.flush(); +} + +test "empty document" { + try testRender("", ""); + try testRender(" ", ""); + try testRender("\n \n\t\n \n", ""); +} + +test "unordered lists" { + try testRender( + \\- Spam + \\- Spam + \\- Spam + \\- Eggs + \\- Bacon + \\- Spam + \\ + \\* Spam + \\* Spam + \\* Spam + \\* Eggs + \\* Bacon + \\* Spam + \\ + \\+ Spam + \\+ Spam + \\+ Spam + \\+ Eggs + \\+ Bacon + \\+ Spam + \\ + , + \\

    + \\
  • Spam
  • + \\
  • Spam
  • + \\
  • Spam
  • + \\
  • Eggs
  • + \\
  • Bacon
  • + \\
  • Spam
  • + \\
+ \\
    + \\
  • Spam
  • + \\
  • Spam
  • + \\
  • Spam
  • + \\
  • Eggs
  • + \\
  • Bacon
  • + \\
  • Spam
  • + \\
+ \\
    + \\
  • Spam
  • + \\
  • Spam
  • + \\
  • Spam
  • + \\
  • Eggs
  • + \\
  • Bacon
  • + \\
  • Spam
  • + \\
+ \\ + ); +} + +test "ordered lists" { + try testRender( + \\1. Breakfast + \\2. Second breakfast + \\3. Lunch + \\2. Afternoon snack + \\1. Dinner + \\6. Dessert + \\7. Midnight snack + \\ + \\1) Breakfast + \\2) Second breakfast + \\3) Lunch + \\2) Afternoon snack + \\1) Dinner + \\6) Dessert + \\7) Midnight snack + \\ + \\1001. Breakfast + \\2. Second breakfast + \\3. Lunch + \\2. Afternoon snack + \\1. Dinner + \\6. Dessert + \\7. Midnight snack + \\ + \\1001) Breakfast + \\2) Second breakfast + \\3) Lunch + \\2) Afternoon snack + \\1) Dinner + \\6) Dessert + \\7) Midnight snack + \\ + , + \\
    + \\
  1. Breakfast
  2. + \\
  3. Second breakfast
  4. + \\
  5. Lunch
  6. + \\
  7. Afternoon snack
  8. + \\
  9. Dinner
  10. + \\
  11. Dessert
  12. + \\
  13. Midnight snack
  14. + \\
+ \\
    + \\
  1. Breakfast
  2. + \\
  3. Second breakfast
  4. + \\
  5. Lunch
  6. + \\
  7. Afternoon snack
  8. + \\
  9. Dinner
  10. + \\
  11. Dessert
  12. + \\
  13. Midnight snack
  14. + \\
+ \\
    + \\
  1. Breakfast
  2. + \\
  3. Second breakfast
  4. + \\
  5. Lunch
  6. + \\
  7. Afternoon snack
  8. + \\
  9. Dinner
  10. + \\
  11. Dessert
  12. + \\
  13. Midnight snack
  14. + \\
+ \\
    + \\
  1. Breakfast
  2. + \\
  3. Second breakfast
  4. + \\
  5. Lunch
  6. + \\
  7. Afternoon snack
  8. + \\
  9. Dinner
  10. + \\
  11. Dessert
  12. + \\
  13. Midnight snack
  14. + \\
+ \\ + ); +} + +test "nested lists" { + try testRender( + \\- - Item 1. + \\ - Item 2. + \\Item 2 continued. + \\ * New list. + \\ + , + \\
    + \\
    • + \\
    • Item 1.
    • + \\
    • Item 2. + \\Item 2 continued.
    • + \\
    + \\
      + \\
    • New list.
    • + \\
    + \\
  • + \\
+ \\ + ); +} + +test "lists with block content" { + try testRender( + \\1. Item 1. + \\2. Item 2. + \\ + \\ This one has another paragraph. + \\3. Item 3. + \\ + \\- > Blockquote. + \\- - Sub-list. + \\ - Sub-list continued. + \\ * Different sub-list. + \\- ## Heading. + \\ + \\ Some contents below the heading. + \\ 1. Item 1. + \\ 2. Item 2. + \\ 3. Item 3. + \\ + , + \\
    + \\
  1. Item 1.

    + \\
  2. + \\
  3. Item 2.

    + \\

    This one has another paragraph.

    + \\
  4. + \\
  5. Item 3.

    + \\
  6. + \\
+ \\
    + \\
  • + \\

    Blockquote.

    + \\
    + \\
  • + \\
    • + \\
    • Sub-list.
    • + \\
    • Sub-list continued.
    • + \\
    + \\
      + \\
    • Different sub-list.
    • + \\
    + \\
  • + \\
  • Heading.

    + \\

    Some contents below the heading.

    + \\
      + \\
    1. Item 1.
    2. + \\
    3. Item 2.
    4. + \\
    5. Item 3.
    6. + \\
    + \\
  • + \\
+ \\ + ); +} + +test "indented lists" { + try testRender( + \\Test: + \\ * a1 + \\ * a2 + \\ * b1 + \\ * b2 + \\ + \\--- + \\ + \\ Test: + \\ - One + \\Two + \\ - Three + \\Four + \\ Five + \\Six + \\ + \\--- + \\ + \\None of these items are indented far enough from the previous one to + \\start a nested list: + \\ - One + \\ - Two + \\ - Three + \\ - Four + \\ - Five + \\ - Six + \\ - Seven + \\ - Eight + \\ - Nine + \\ + \\--- + \\ + \\ - One + \\ - Two + \\ - Three + \\ - Four + \\ - Five + \\ - Six + \\- Seven + \\ + , + \\

Test:

+ \\
    + \\
  • a1
  • + \\
  • a2
      + \\
    • b1
    • + \\
    • b2
    • + \\
    + \\
  • + \\
+ \\
+ \\

Test:

+ \\
    + \\
  • One + \\Two
      + \\
    • Three + \\Four + \\Five + \\Six
    • + \\
    + \\
  • + \\
+ \\
+ \\

None of these items are indented far enough from the previous one to + \\start a nested list:

+ \\
    + \\
  • One
  • + \\
  • Two
  • + \\
  • Three
  • + \\
  • Four
  • + \\
  • Five
  • + \\
  • Six
  • + \\
  • Seven
  • + \\
  • Eight
  • + \\
  • Nine
  • + \\
+ \\
+ \\
    + \\
  • One
      + \\
    • Two
        + \\
      • Three
          + \\
        • Four
        • + \\
        + \\
      • + \\
      + \\
    • + \\
    • Five
        + \\
      • Six
      • + \\
      + \\
    • + \\
    + \\
  • + \\
  • Seven
  • + \\
+ \\ + ); +} + +test "tables" { + try testRender( + \\| Operator | Meaning | + \\| :------: | ---------------- | + \\| `+` | Add | + \\| `-` | Subtract | + \\| `*` | Multiply | + \\| `/` | Divide | + \\| `??` | **Not sure yet** | + \\ + \\| Item 1 | Value 1 | + \\| Item 2 | Value 2 | + \\| Item 3 | Value 3 | + \\| Item 4 | Value 4 | + \\ + \\| :--- | :----: | ----: | + \\| Left | Center | Right | + \\ + \\ | One | Two | + \\ | Three | Four | + \\ | Five | Six | + \\ + , + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\
OperatorMeaning
+Add
-Subtract
*Multiply
/Divide
??Not sure yet
+ \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\
Item 1Value 1
Item 2Value 2
Item 3Value 3
Item 4Value 4
+ \\ + \\ + \\ + \\ + \\ + \\ + \\
LeftCenterRight
+ \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\
OneTwo
ThreeFour
FiveSix
+ \\ + ); +} + +test "table with uneven number of columns" { + try testRender( + \\| One | + \\| :-- | :--: | + \\| One | Two | Three | + \\ + , + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\
One
OneTwoThree
+ \\ + ); +} + +test "table with escaped pipes" { + try testRender( + \\| One \| Two | + \\| --- | --- | + \\| One \| Two | + \\ + , + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\
One | Two
One | Two
+ \\ + ); +} + +test "table with pipes in code spans" { + try testRender( + \\| `|` | Bitwise _OR_ | + \\| `||` | Combines error sets | + \\| `` `||` `` | Escaped version | + \\| ` ``||`` ` | Another escaped version | + \\| `Oops unterminated code span | + \\ + , + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\ + \\
|Bitwise OR
||Combines error sets
`||`Escaped version
``||``Another escaped version
+ \\

| Oops unterminated code span |

+ \\ + ); +} + +test "tables require leading and trailing pipes" { + try testRender( + \\Not | a | table + \\ + \\| But | this | is | + \\ + \\Also not a table: + \\| + \\ | + \\ + , + \\

Not | a | table

+ \\ + \\ + \\ + \\ + \\ + \\ + \\
Butthisis
+ \\

Also not a table: + \\| + \\|

+ \\ + ); +} + +test "headings" { + try testRender( + \\# Level one + \\## Level two + \\### Level three + \\#### Level four + \\##### Level five + \\###### Level six + \\####### Not a heading + \\ + , + \\

Level one

+ \\

Level two

+ \\

Level three

+ \\

Level four

+ \\
Level five
+ \\
Level six
+ \\

####### Not a heading

+ \\ + ); +} + +test "headings with inline content" { + try testRender( + \\# Outline of `std.zig` + \\## **Important** notes + \\### ***Nested* inline content** + \\ + , + \\

Outline of std.zig

+ \\

Important notes

+ \\

Nested inline content

+ \\ + ); +} + +test "code blocks" { + try testRender( + \\``` + \\Hello, world! + \\This is some code. + \\``` + \\``` zig test + \\const std = @import("std"); + \\ + \\test { + \\ try std.testing.expect(2 + 2 == 4); + \\} + \\``` + \\ ``` + \\ Indentation up to the fence is removed. + \\ Like this. + \\ Doesn't need to be fully indented. + \\ ``` + \\``` + \\Overly indented closing fence is fine: + \\ ``` + \\ + , + \\
Hello, world!
+        \\This is some code.
+        \\
+ \\
const std = @import("std");
+        \\
+        \\test {
+        \\    try std.testing.expect(2 + 2 == 4);
+        \\}
+        \\
+ \\
Indentation up to the fence is removed.
+        \\     Like this.
+        \\Doesn't need to be fully indented.
+        \\
+ \\
Overly indented closing fence is fine:
+        \\
+ \\ + ); +} + +test "blockquotes" { + try testRender( + \\> > You miss 100% of the shots you don't take. + \\> > + \\> > ~ Wayne Gretzky + \\> + \\> ~ Michael Scott + \\ + , + \\
+ \\
+ \\

You miss 100% of the shots you don't take.

+ \\

~ Wayne Gretzky

+ \\
+ \\

~ Michael Scott

+ \\
+ \\ + ); +} + +test "blockquote lazy continuation lines" { + try testRender( + \\>>>>Deeply nested blockquote + \\>>which continues on another line + \\and then yet another one. + \\>> + \\>> But now two of them have been closed. + \\ + \\And then there were none. + \\ + , + \\
+ \\
+ \\
+ \\
+ \\

Deeply nested blockquote + \\which continues on another line + \\and then yet another one.

+ \\
+ \\
+ \\

But now two of them have been closed.

+ \\
+ \\
+ \\

And then there were none.

+ \\ + ); +} + +test "paragraphs" { + try testRender( + \\Paragraph one. + \\ + \\Paragraph two. + \\Still in the paragraph. + \\ So is this. + \\ + \\ + \\ + \\ + \\ Last paragraph. + \\ + , + \\

Paragraph one.

+ \\

Paragraph two. + \\Still in the paragraph. + \\So is this.

+ \\

Last paragraph.

+ \\ + ); +} + +test "thematic breaks" { + try testRender( + \\--- + \\*** + \\___ + \\ --- + \\ - - - - - - - - - - - + \\ + , + \\
+ \\
+ \\
+ \\
+ \\
+ \\ + ); +} + +test "links" { + try testRender( + \\[Link](https://example.com) + \\[Link *with inlines*](https://example.com) + \\[Nested parens](https://example.com/nested(parens(inside))) + \\[Escaped parens](https://example.com/\)escaped\() + \\[Line break in target](test\ + \\target) + \\ + , + \\

Link + \\Link with inlines + \\Nested parens + \\Escaped parens + \\Line break in target

+ \\ + ); +} + +test "autolinks" { + try testRender( + \\ + \\**This is important: ** + \\ + \\ + \\ + \\1 < 2 + \\4 > 3 + \\Unclosed: < + \\ + , + \\

https://example.com + \\This is important: https://example.com/strong + \\https://example.com?query=abc.123#page(parens) + \\<placeholder> + \\data: + \\1 < 2 + \\4 > 3 + \\Unclosed: <

+ \\ + ); +} + +test "text autolinks" { + try testRender( + \\Text autolinks must start with http:// or https://. + \\This doesn't count: ftp://example.com. + \\Example: https://ziglang.org. + \\Here is an important link: **http://example.com** + \\(Links may be in parentheses: https://example.com/?q=(parens)) + \\Escaping a link so it's plain text: https\://example.com + \\ + , + \\

Text autolinks must start with http:// or https://. + \\This doesn't count: ftp://example.com. + \\Example: https://ziglang.org. + \\Here is an important link: http://example.com + \\(Links may be in parentheses: https://example.com/?q=(parens)) + \\Escaping a link so it's plain text: https://example.com

+ \\ + ); +} + +test "images" { + try testRender( + \\![Alt text](https://example.com/image.png) + \\![Alt text *with inlines*](https://example.com/image.png) + \\![Nested parens](https://example.com/nested(parens(inside)).png) + \\![Escaped parens](https://example.com/\)escaped\(.png) + \\![Line break in target](test\ + \\target) + \\ + , + \\

Alt text + \\Alt text with inlines + \\Nested parens + \\Escaped parens + \\Line break in target

+ \\ + ); +} + +test "emphasis" { + try testRender( + \\*Emphasis.* + \\**Strong.** + \\***Strong emphasis.*** + \\****More...**** + \\*****MORE...***** + \\******Even more...****** + \\*******OK, this is enough.******* + \\ + , + \\

Emphasis. + \\Strong. + \\Strong emphasis. + \\More... + \\MORE... + \\Even more... + \\OK, this is enough.

+ \\ + ); + try testRender( + \\_Emphasis._ + \\__Strong.__ + \\___Strong emphasis.___ + \\____More...____ + \\_____MORE..._____ + \\______Even more...______ + \\_______OK, this is enough._______ + \\ + , + \\

Emphasis. + \\Strong. + \\Strong emphasis. + \\More... + \\MORE... + \\Even more... + \\OK, this is enough.

+ \\ + ); +} + +test "nested emphasis" { + try testRender( + \\**Hello, *world!*** + \\*Hello, **world!*** + \\**Hello, _world!_** + \\_Hello, **world!**_ + \\*Hello, **nested** *world!** + \\***Hello,* world!** + \\__**Hello, world!**__ + \\****Hello,** world!** + \\__Hello,_ world!_ + \\*Test**123* + \\__Test____123__ + \\ + , + \\

Hello, world! + \\Hello, world! + \\Hello, world! + \\Hello, world! + \\Hello, nested world! + \\Hello, world! + \\Hello, world! + \\Hello, world! + \\Hello, world! + \\Test123 + \\Test____123

+ \\ + ); +} + +test "emphasis precedence" { + try testRender( + \\*First one _wins*_. + \\_*No other __rule matters.*_ + \\ + , + \\

First one _wins_. + \\No other __rule matters.

+ \\ + ); +} + +test "emphasis open and close" { + try testRender( + \\Cannot open: * + \\Cannot open: _ + \\*Cannot close: * + \\_Cannot close: _ + \\ + \\foo*bar*baz + \\foo_bar_baz + \\foo**bar**baz + \\foo__bar__baz + \\ + , + \\

Cannot open: * + \\Cannot open: _ + \\*Cannot close: * + \\_Cannot close: _

+ \\

foobarbaz + \\foo_bar_baz + \\foobarbaz + \\foo__bar__baz

+ \\ + ); +} + +test "code spans" { + try testRender( + \\`Hello, world!` + \\```Multiple `backticks` can be used.``` + \\`**This** does not produce emphasis.` + \\`` `Backtick enclosed string.` `` + \\`Delimiter lengths ```must``` match.` + \\ + \\Unterminated ``code... + \\ + \\Weird empty code span: ` + \\ + \\**Very important code: `hi`** + \\ + , + \\

Hello, world! + \\Multiple `backticks` can be used. + \\**This** does not produce emphasis. + \\`Backtick enclosed string.` + \\Delimiter lengths ```must``` match.

+ \\

Unterminated code...

+ \\

Weird empty code span:

+ \\

Very important code: hi

+ \\ + ); +} + +test "backslash escapes" { + try testRender( + \\Not \*emphasized\*. + \\Literal \\backslashes\\. + \\Not code: \`hi\`. + \\\# Not a title. + \\#\# Also not a title. + \\\> Not a blockquote. + \\\- Not a list item. + \\\| Not a table. | + \\| Also not a table. \| + \\Any \punctuation\ characte\r can be escaped: + \\\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ + \\ + , + \\

Not *emphasized*. + \\Literal \backslashes\. + \\Not code: `hi`. + \\# Not a title. + \\## Also not a title. + \\> Not a blockquote. + \\- Not a list item. + \\| Not a table. | + \\| Also not a table. | + \\Any \punctuation\ characte\r can be escaped: + \\!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+ \\ + ); +} + +test "hard line breaks" { + try testRender( + \\The iguana sits\ + \\Perched atop a short desk chair\ + \\Writing code in Zig + \\ + , + \\

The iguana sits
+ \\Perched atop a short desk chair
+ \\Writing code in Zig

+ \\ + ); +} + +test "Unicode handling" { + // Null bytes must be replaced. + try testRender("\x00\x00\x00", "

\u{FFFD}\u{FFFD}\u{FFFD}

\n"); + + // Invalid UTF-8 must be replaced. + try testRender("\xC0\x80\xE0\x80\x80\xF0\x80\x80\x80", "

\u{FFFD}\u{FFFD}\u{FFFD}

\n"); + try testRender("\xED\xA0\x80\xED\xBF\xBF", "

\u{FFFD}\u{FFFD}

\n"); + + // Incomplete UTF-8 must be replaced. + try testRender("\xE2\x82", "

\u{FFFD}

\n"); +} + +fn testRender(input: []const u8, expected: []const u8) !void { + var parser = try Parser.init(testing.allocator); + defer parser.deinit(); + + var lines = std.mem.splitScalar(u8, input, '\n'); + while (lines.next()) |line| { + try parser.feedLine(line); + } + var doc = try parser.endInput(); + defer doc.deinit(testing.allocator); + + var actual = std.ArrayList(u8).init(testing.allocator); + defer actual.deinit(); + try doc.render(actual.writer()); + + try testing.expectEqualStrings(expected, actual.items); +} diff --git a/docs/wasm/markdown/Document.zig b/docs/wasm/markdown/Document.zig new file mode 100644 index 0000000..59a4013 --- /dev/null +++ b/docs/wasm/markdown/Document.zig @@ -0,0 +1,194 @@ +//! An abstract tree representation of a Markdown document. + +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Renderer = @import("renderer.zig").Renderer; + +nodes: Node.List.Slice, +extra: []u32, +string_bytes: []u8, + +const Document = @This(); + +pub const Node = struct { + tag: Tag, + data: Data, + + pub const Index = enum(u32) { + root = 0, + _, + }; + pub const List = std.MultiArrayList(Node); + + pub const Tag = enum { + /// Data is `container`. + root, + + // Blocks + /// Data is `list`. + list, + /// Data is `list_item`. + list_item, + /// Data is `container`. + table, + /// Data is `container`. + table_row, + /// Data is `table_cell`. + table_cell, + /// Data is `heading`. + heading, + /// Data is `code_block`. + code_block, + /// Data is `container`. + blockquote, + /// Data is `container`. + paragraph, + /// Data is `none`. + thematic_break, + + // Inlines + /// Data is `link`. + link, + /// Data is `text`. + autolink, + /// Data is `link`. + image, + /// Data is `container`. + strong, + /// Data is `container`. + emphasis, + /// Data is `text`. + code_span, + /// Data is `text`. + text, + /// Data is `none`. + line_break, + }; + + pub const Data = union { + none: void, + container: struct { + children: ExtraIndex, + }, + text: struct { + content: StringIndex, + }, + list: struct { + start: ListStart, + children: ExtraIndex, + }, + list_item: struct { + tight: bool, + children: ExtraIndex, + }, + table_cell: struct { + info: packed struct { + alignment: TableCellAlignment, + header: bool, + }, + children: ExtraIndex, + }, + heading: struct { + /// Between 1 and 6, inclusive. + level: u3, + children: ExtraIndex, + }, + code_block: struct { + tag: StringIndex, + content: StringIndex, + }, + link: struct { + target: StringIndex, + children: ExtraIndex, + }, + + comptime { + // In Debug and ReleaseSafe builds, there may be hidden extra fields + // included for safety checks. Without such safety checks enabled, + // we always want this union to be 8 bytes. + if (builtin.mode != .Debug and builtin.mode != .ReleaseSafe) { + assert(@sizeOf(Data) == 8); + } + } + }; + + /// The starting number of a list. This is either a number between 0 and + /// 999,999,999, inclusive, or `unordered` to indicate an unordered list. + pub const ListStart = enum(u30) { + // When https://github.com/ziglang/zig/issues/104 is implemented, this + // type can be more naturally expressed as ?u30. As it is, we want + // values to fit within 4 bytes, so ?u30 does not yet suffice for + // storage. + unordered = std.math.maxInt(u30), + _, + + pub fn asNumber(start: ListStart) ?u30 { + if (start == .unordered) return null; + assert(@intFromEnum(start) <= 999_999_999); + return @intFromEnum(start); + } + }; + + pub const TableCellAlignment = enum(u2) { + unset, + left, + center, + right, + }; + + /// Trailing: `len` times `Node.Index` + pub const Children = struct { + len: u32, + }; +}; + +pub const ExtraIndex = enum(u32) { _ }; + +/// The index of a null-terminated string in `string_bytes`. +pub const StringIndex = enum(u32) { + empty = 0, + _, +}; + +pub fn deinit(doc: *Document, allocator: Allocator) void { + doc.nodes.deinit(allocator); + allocator.free(doc.extra); + allocator.free(doc.string_bytes); + doc.* = undefined; +} + +/// Renders a document directly to a writer using the default renderer. +pub fn render(doc: Document, writer: anytype) @TypeOf(writer).Error!void { + const renderer: Renderer(@TypeOf(writer), void) = .{ .context = {} }; + try renderer.render(doc, writer); +} + +pub fn ExtraData(comptime T: type) type { + return struct { data: T, end: usize }; +} + +pub fn extraData(doc: Document, comptime T: type, index: ExtraIndex) ExtraData(T) { + const fields = @typeInfo(T).@"struct".fields; + var i: usize = @intFromEnum(index); + var result: T = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => doc.extra[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return .{ .data = result, .end = i }; +} + +pub fn extraChildren(doc: Document, index: ExtraIndex) []const Node.Index { + const children = doc.extraData(Node.Children, index); + return @ptrCast(doc.extra[children.end..][0..children.data.len]); +} + +pub fn string(doc: Document, index: StringIndex) [:0]const u8 { + const start = @intFromEnum(index); + return std.mem.span(@as([*:0]u8, @ptrCast(doc.string_bytes[start..].ptr))); +} diff --git a/docs/wasm/markdown/Parser.zig b/docs/wasm/markdown/Parser.zig new file mode 100644 index 0000000..ce8db08 --- /dev/null +++ b/docs/wasm/markdown/Parser.zig @@ -0,0 +1,1660 @@ +//! A Markdown parser producing `Document`s. +//! +//! The parser operates at two levels: at the outer level, the parser accepts +//! the content of an input document line by line and begins building the _block +//! structure_ of the document. This creates a stack of currently open blocks. +//! +//! When the parser detects the end of a block, it closes the block, popping it +//! from the open block stack and completing any additional parsing of the +//! block's content. For blocks which contain parseable inline content, this +//! invokes the inner level of the parser, handling the _inline structure_ of +//! the block. +//! +//! Inline parsing scans through the collected inline content of a block. When +//! it encounters a character that could indicate the beginning of an inline, it +//! either handles the inline right away (if possible) or adds it to a pending +//! inlines stack. When an inline is completed, it is added to a list of +//! completed inlines, which (along with any surrounding text nodes) will become +//! the children of the parent inline or the block whose inline content is being +//! parsed. + +const std = @import("std"); +const mem = std.mem; +const assert = std.debug.assert; +const isWhitespace = std.ascii.isWhitespace; +const Allocator = mem.Allocator; +const expectEqual = std.testing.expectEqual; +const Document = @import("Document.zig"); +const Node = Document.Node; +const ExtraIndex = Document.ExtraIndex; +const ExtraData = Document.ExtraData; +const StringIndex = Document.StringIndex; + +nodes: Node.List = .{}, +extra: std.ArrayListUnmanaged(u32) = .empty, +scratch_extra: std.ArrayListUnmanaged(u32) = .empty, +string_bytes: std.ArrayListUnmanaged(u8) = .empty, +scratch_string: std.ArrayListUnmanaged(u8) = .empty, +pending_blocks: std.ArrayListUnmanaged(Block) = .empty, +allocator: Allocator, + +const Parser = @This(); + +/// An arbitrary limit on the maximum number of columns in a table so that +/// table-related metadata maintained by the parser does not require dynamic +/// memory allocation. +const max_table_columns = 128; + +/// A block element which is still receiving children. +const Block = struct { + tag: Tag, + data: Data, + extra_start: usize, + string_start: usize, + + const Tag = enum { + /// Data is `list`. + list, + /// Data is `list_item`. + list_item, + /// Data is `table`. + table, + /// Data is `none`. + table_row, + /// Data is `heading`. + heading, + /// Data is `code_block`. + code_block, + /// Data is `none`. + blockquote, + /// Data is `none`. + paragraph, + /// Data is `none`. + thematic_break, + }; + + const Data = union { + none: void, + list: struct { + marker: ListMarker, + /// Between 0 and 999,999,999, inclusive. + start: u30, + tight: bool, + last_line_blank: bool = false, + }, + list_item: struct { + continuation_indent: usize, + }, + table: struct { + column_alignments: std.BoundedArray(Node.TableCellAlignment, max_table_columns) = .{}, + }, + heading: struct { + /// Between 1 and 6, inclusive. + level: u3, + }, + code_block: struct { + tag: StringIndex, + fence_len: usize, + indent: usize, + }, + + const ListMarker = enum { + @"-", + @"*", + @"+", + number_dot, + number_paren, + }; + }; + + const ContentType = enum { + blocks, + inlines, + raw_inlines, + nothing, + }; + + fn canAccept(b: Block) ContentType { + return switch (b.tag) { + .list, + .list_item, + .table, + .blockquote, + => .blocks, + + .heading, + .paragraph, + => .inlines, + + .code_block, + => .raw_inlines, + + .table_row, + .thematic_break, + => .nothing, + }; + } + + /// Attempts to continue `b` using the contents of `line`. If successful, + /// returns the remaining portion of `line` to be considered part of `b` + /// (e.g. for a blockquote, this would be everything except the leading + /// `>`). If unsuccessful, returns null. + fn match(b: Block, line: []const u8) ?[]const u8 { + const unindented = mem.trimStart(u8, line, " \t"); + const indent = line.len - unindented.len; + return switch (b.tag) { + .list => line, + .list_item => if (indent >= b.data.list_item.continuation_indent) + line[b.data.list_item.continuation_indent..] + else if (unindented.len == 0) + // Blank lines should not close list items, since there may be + // more indented contents to follow after the blank line. + "" + else + null, + .table => if (unindented.len > 0) line else null, + .table_row => null, + .heading => null, + .code_block => code_block: { + const trimmed = mem.trimEnd(u8, unindented, " \t"); + if (mem.indexOfNone(u8, trimmed, "`") != null or trimmed.len != b.data.code_block.fence_len) { + const effective_indent = @min(indent, b.data.code_block.indent); + break :code_block line[effective_indent..]; + } else { + break :code_block null; + } + }, + .blockquote => if (mem.startsWith(u8, unindented, ">")) + unindented[1..] + else + null, + .paragraph => if (unindented.len > 0) line else null, + .thematic_break => null, + }; + } +}; + +pub fn init(allocator: Allocator) Allocator.Error!Parser { + var p: Parser = .{ .allocator = allocator }; + try p.nodes.append(allocator, .{ + .tag = .root, + .data = undefined, + }); + try p.string_bytes.append(allocator, 0); + return p; +} + +pub fn deinit(p: *Parser) void { + p.nodes.deinit(p.allocator); + p.extra.deinit(p.allocator); + p.scratch_extra.deinit(p.allocator); + p.string_bytes.deinit(p.allocator); + p.scratch_string.deinit(p.allocator); + p.pending_blocks.deinit(p.allocator); + p.* = undefined; +} + +/// Accepts a single line of content. `line` should not have a trailing line +/// ending character. +pub fn feedLine(p: *Parser, line: []const u8) Allocator.Error!void { + var rest_line = line; + const first_unmatched = for (p.pending_blocks.items, 0..) |b, i| { + if (b.match(rest_line)) |rest| { + rest_line = rest; + } else { + break i; + } + } else p.pending_blocks.items.len; + + const in_code_block = p.pending_blocks.items.len > 0 and + p.pending_blocks.getLast().tag == .code_block; + const code_block_end = in_code_block and + first_unmatched + 1 == p.pending_blocks.items.len; + // New blocks cannot be started if we are actively inside a code block or + // are just closing one (to avoid interpreting the closing ``` as a new code + // block start). + var maybe_block_start = if (!in_code_block or first_unmatched + 2 <= p.pending_blocks.items.len) + try p.startBlock(rest_line) + else + null; + + // This is a lazy continuation line if there are no new blocks to open and + // the last open block is a paragraph. + if (maybe_block_start == null and + !isBlank(rest_line) and + p.pending_blocks.items.len > 0 and + p.pending_blocks.getLast().tag == .paragraph) + { + try p.addScratchStringLine(mem.trimStart(u8, rest_line, " \t")); + return; + } + + // If a new block needs to be started, any paragraph needs to be closed, + // even though this isn't detected as part of the closing condition for + // paragraphs. + if (maybe_block_start != null and + p.pending_blocks.items.len > 0 and + p.pending_blocks.getLast().tag == .paragraph) + { + try p.closeLastBlock(); + } + + while (p.pending_blocks.items.len > first_unmatched) { + try p.closeLastBlock(); + } + + while (maybe_block_start) |block_start| : (maybe_block_start = try p.startBlock(rest_line)) { + try p.appendBlockStart(block_start); + // There may be more blocks to start within the same line. + rest_line = block_start.rest; + // Headings may only contain inline content. + if (block_start.tag == .heading) break; + // An opening code fence does not contain any additional block or inline + // content to process. + if (block_start.tag == .code_block) return; + } + + // Do not append the end of a code block (```) as textual content. + if (code_block_end) return; + + const can_accept = if (p.pending_blocks.getLastOrNull()) |last_pending_block| + last_pending_block.canAccept() + else + .blocks; + const rest_line_trimmed = mem.trimStart(u8, rest_line, " \t"); + switch (can_accept) { + .blocks => { + // If we're inside a list item and the rest of the line is blank, it + // means that any subsequent child of the list item (or subsequent + // item in the list) will cause the containing list to be considered + // loose. However, we can't immediately declare that the list is + // loose, since we might just be looking at a blank line after the + // end of the last item in the list. The final determination will be + // made when appending the next child of the list or list item. + const maybe_containing_list_index = if (p.pending_blocks.items.len > 0 and p.pending_blocks.getLast().tag == .list_item) + p.pending_blocks.items.len - 2 + else + null; + + if (rest_line_trimmed.len > 0) { + try p.appendBlockStart(.{ + .tag = .paragraph, + .data = .{ .none = {} }, + .rest = undefined, + }); + try p.addScratchStringLine(rest_line_trimmed); + } + + if (maybe_containing_list_index) |containing_list_index| { + p.pending_blocks.items[containing_list_index].data.list.last_line_blank = rest_line_trimmed.len == 0; + } + }, + .inlines => try p.addScratchStringLine(rest_line_trimmed), + .raw_inlines => try p.addScratchStringLine(rest_line), + .nothing => {}, + } +} + +/// Completes processing of the input and returns the parsed document. +pub fn endInput(p: *Parser) Allocator.Error!Document { + while (p.pending_blocks.items.len > 0) { + try p.closeLastBlock(); + } + // There should be no inline content pending after closing the last open + // block. + assert(p.scratch_string.items.len == 0); + + const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items)); + p.nodes.items(.data)[0] = .{ .container = .{ .children = children } }; + p.scratch_string.items.len = 0; + p.scratch_extra.items.len = 0; + + var nodes = p.nodes.toOwnedSlice(); + errdefer nodes.deinit(p.allocator); + const extra = try p.extra.toOwnedSlice(p.allocator); + errdefer p.allocator.free(extra); + const string_bytes = try p.string_bytes.toOwnedSlice(p.allocator); + errdefer p.allocator.free(string_bytes); + + return .{ + .nodes = nodes, + .extra = extra, + .string_bytes = string_bytes, + }; +} + +/// Data describing the start of a new block element. +const BlockStart = struct { + tag: Tag, + data: Data, + rest: []const u8, + + const Tag = enum { + /// Data is `list_item`. + list_item, + /// Data is `table_row`. + table_row, + /// Data is `heading`. + heading, + /// Data is `code_block`. + code_block, + /// Data is `none`. + blockquote, + /// Data is `none`. + paragraph, + /// Data is `none`. + thematic_break, + }; + + const Data = union { + none: void, + list_item: struct { + marker: Block.Data.ListMarker, + number: u30, + continuation_indent: usize, + }, + table_row: struct { + cells: std.BoundedArray([]const u8, max_table_columns), + }, + heading: struct { + /// Between 1 and 6, inclusive. + level: u3, + }, + code_block: struct { + tag: StringIndex, + fence_len: usize, + indent: usize, + }, + }; +}; + +fn appendBlockStart(p: *Parser, block_start: BlockStart) !void { + if (p.pending_blocks.getLastOrNull()) |last_pending_block| { + // Close the last block if it is a list and the new block is not a list item + // or not of the same marker type. + const should_close_list = last_pending_block.tag == .list and + (block_start.tag != .list_item or + block_start.data.list_item.marker != last_pending_block.data.list.marker); + // The last block should also be closed if the new block is not a table + // row, which is the only allowed child of a table. + const should_close_table = last_pending_block.tag == .table and + block_start.tag != .table_row; + if (should_close_list or should_close_table) { + try p.closeLastBlock(); + } + } + + if (p.pending_blocks.getLastOrNull()) |last_pending_block| { + // If the last block is a list or list item, check for tightness based + // on the last line. + const maybe_containing_list = switch (last_pending_block.tag) { + .list => &p.pending_blocks.items[p.pending_blocks.items.len - 1], + .list_item => &p.pending_blocks.items[p.pending_blocks.items.len - 2], + else => null, + }; + if (maybe_containing_list) |containing_list| { + if (containing_list.data.list.last_line_blank) { + containing_list.data.list.tight = false; + } + } + } + + // Start a new list if the new block is a list item and there is no + // containing list yet. + if (block_start.tag == .list_item and + (p.pending_blocks.items.len == 0 or p.pending_blocks.getLast().tag != .list)) + { + try p.pending_blocks.append(p.allocator, .{ + .tag = .list, + .data = .{ .list = .{ + .marker = block_start.data.list_item.marker, + .start = block_start.data.list_item.number, + .tight = true, + } }, + .string_start = p.scratch_string.items.len, + .extra_start = p.scratch_extra.items.len, + }); + } + + if (block_start.tag == .table_row) { + // Likewise, table rows start a table implicitly. + if (p.pending_blocks.items.len == 0 or p.pending_blocks.getLast().tag != .table) { + try p.pending_blocks.append(p.allocator, .{ + .tag = .table, + .data = .{ .table = .{ + .column_alignments = .{}, + } }, + .string_start = p.scratch_string.items.len, + .extra_start = p.scratch_extra.items.len, + }); + } + + const current_row = p.scratch_extra.items.len - p.pending_blocks.getLast().extra_start; + if (current_row <= 1) { + if (parseTableHeaderDelimiter(block_start.data.table_row.cells)) |alignments| { + p.pending_blocks.items[p.pending_blocks.items.len - 1].data.table.column_alignments = alignments; + if (current_row == 1) { + // We need to go back and mark the header row and its column + // alignments. + const datas = p.nodes.items(.data); + const header_data = datas[p.scratch_extra.getLast()]; + for (p.extraChildren(header_data.container.children), 0..) |header_cell, i| { + const alignment = if (i < alignments.len) alignments.buffer[i] else .unset; + const cell_data = &datas[@intFromEnum(header_cell)].table_cell; + cell_data.info.alignment = alignment; + cell_data.info.header = true; + } + } + return; + } + } + } + + const tag: Block.Tag, const data: Block.Data = switch (block_start.tag) { + .list_item => .{ .list_item, .{ .list_item = .{ + .continuation_indent = block_start.data.list_item.continuation_indent, + } } }, + .table_row => .{ .table_row, .{ .none = {} } }, + .heading => .{ .heading, .{ .heading = .{ + .level = block_start.data.heading.level, + } } }, + .code_block => .{ .code_block, .{ .code_block = .{ + .tag = block_start.data.code_block.tag, + .fence_len = block_start.data.code_block.fence_len, + .indent = block_start.data.code_block.indent, + } } }, + .blockquote => .{ .blockquote, .{ .none = {} } }, + .paragraph => .{ .paragraph, .{ .none = {} } }, + .thematic_break => .{ .thematic_break, .{ .none = {} } }, + }; + + try p.pending_blocks.append(p.allocator, .{ + .tag = tag, + .data = data, + .string_start = p.scratch_string.items.len, + .extra_start = p.scratch_extra.items.len, + }); + + if (tag == .table_row) { + // Table rows are unique, since we already have all the children + // available in the BlockStart. We can immediately parse and append + // these children now. + const containing_table = p.pending_blocks.items[p.pending_blocks.items.len - 2]; + const column_alignments = containing_table.data.table.column_alignments.slice(); + for (block_start.data.table_row.cells.slice(), 0..) |cell_content, i| { + const cell_children = try p.parseInlines(cell_content); + const alignment = if (i < column_alignments.len) column_alignments[i] else .unset; + const cell = try p.addNode(.{ + .tag = .table_cell, + .data = .{ .table_cell = .{ + .info = .{ + .alignment = alignment, + .header = false, + }, + .children = cell_children, + } }, + }); + try p.addScratchExtraNode(cell); + } + } +} + +fn startBlock(p: *Parser, line: []const u8) !?BlockStart { + const unindented = mem.trimStart(u8, line, " \t"); + const indent = line.len - unindented.len; + if (isThematicBreak(line)) { + // Thematic breaks take precedence over list items. + return .{ + .tag = .thematic_break, + .data = .{ .none = {} }, + .rest = "", + }; + } else if (startListItem(unindented)) |list_item| { + return .{ + .tag = .list_item, + .data = .{ .list_item = .{ + .marker = list_item.marker, + .number = list_item.number, + .continuation_indent = indent + list_item.marker_len, + } }, + .rest = list_item.rest, + }; + } else if (startTableRow(unindented)) |table_row| { + return .{ + .tag = .table_row, + .data = .{ .table_row = .{ + .cells = table_row.cells, + } }, + .rest = "", + }; + } else if (startHeading(unindented)) |heading| { + return .{ + .tag = .heading, + .data = .{ .heading = .{ + .level = heading.level, + } }, + .rest = heading.rest, + }; + } else if (try p.startCodeBlock(unindented)) |code_block| { + return .{ + .tag = .code_block, + .data = .{ .code_block = .{ + .tag = code_block.tag, + .fence_len = code_block.fence_len, + .indent = indent, + } }, + .rest = "", + }; + } else if (startBlockquote(unindented)) |rest| { + return .{ + .tag = .blockquote, + .data = .{ .none = {} }, + .rest = rest, + }; + } else { + return null; + } +} + +const ListItemStart = struct { + marker: Block.Data.ListMarker, + number: u30, + marker_len: usize, + rest: []const u8, +}; + +fn startListItem(unindented_line: []const u8) ?ListItemStart { + if (mem.startsWith(u8, unindented_line, "- ")) { + return .{ + .marker = .@"-", + .number = undefined, + .marker_len = 2, + .rest = unindented_line[2..], + }; + } else if (mem.startsWith(u8, unindented_line, "* ")) { + return .{ + .marker = .@"*", + .number = undefined, + .marker_len = 2, + .rest = unindented_line[2..], + }; + } else if (mem.startsWith(u8, unindented_line, "+ ")) { + return .{ + .marker = .@"+", + .number = undefined, + .marker_len = 2, + .rest = unindented_line[2..], + }; + } + + const number_end = mem.indexOfNone(u8, unindented_line, "0123456789") orelse return null; + const after_number = unindented_line[number_end..]; + const marker: Block.Data.ListMarker = if (mem.startsWith(u8, after_number, ". ")) + .number_dot + else if (mem.startsWith(u8, after_number, ") ")) + .number_paren + else + return null; + const number = std.fmt.parseInt(u30, unindented_line[0..number_end], 10) catch return null; + if (number > 999_999_999) return null; + return .{ + .marker = marker, + .number = number, + .marker_len = number_end + 2, + .rest = after_number[2..], + }; +} + +const TableRowStart = struct { + cells: std.BoundedArray([]const u8, max_table_columns), +}; + +fn startTableRow(unindented_line: []const u8) ?TableRowStart { + if (unindented_line.len < 2 or + !mem.startsWith(u8, unindented_line, "|") or + mem.endsWith(u8, unindented_line, "\\|") or + !mem.endsWith(u8, unindented_line, "|")) return null; + + var cells: std.BoundedArray([]const u8, max_table_columns) = .{}; + const table_row_content = unindented_line[1 .. unindented_line.len - 1]; + var cell_start: usize = 0; + var i: usize = 0; + while (i < table_row_content.len) : (i += 1) { + switch (table_row_content[i]) { + '\\' => i += 1, + '|' => { + cells.append(table_row_content[cell_start..i]) catch return null; + cell_start = i + 1; + }, + '`' => { + // Ignoring pipes in code spans allows table cells to contain + // code using ||, for example. + const open_start = i; + i = mem.indexOfNonePos(u8, table_row_content, i, "`") orelse return null; + const open_len = i - open_start; + while (mem.indexOfScalarPos(u8, table_row_content, i, '`')) |close_start| { + i = mem.indexOfNonePos(u8, table_row_content, close_start, "`") orelse return null; + const close_len = i - close_start; + if (close_len == open_len) break; + } else return null; + }, + else => {}, + } + } + cells.append(table_row_content[cell_start..]) catch return null; + + return .{ .cells = cells }; +} + +fn parseTableHeaderDelimiter( + row_cells: std.BoundedArray([]const u8, max_table_columns), +) ?std.BoundedArray(Node.TableCellAlignment, max_table_columns) { + var alignments: std.BoundedArray(Node.TableCellAlignment, max_table_columns) = .{}; + for (row_cells.slice()) |content| { + const alignment = parseTableHeaderDelimiterCell(content) orelse return null; + alignments.appendAssumeCapacity(alignment); + } + return alignments; +} + +fn parseTableHeaderDelimiterCell(content: []const u8) ?Node.TableCellAlignment { + var state: enum { + before_rule, + after_left_anchor, + in_rule, + after_right_anchor, + after_rule, + } = .before_rule; + var left_anchor = false; + var right_anchor = false; + for (content) |c| { + switch (state) { + .before_rule => switch (c) { + ' ' => {}, + ':' => { + left_anchor = true; + state = .after_left_anchor; + }, + '-' => state = .in_rule, + else => return null, + }, + .after_left_anchor => switch (c) { + '-' => state = .in_rule, + else => return null, + }, + .in_rule => switch (c) { + '-' => {}, + ':' => { + right_anchor = true; + state = .after_right_anchor; + }, + ' ' => state = .after_rule, + else => return null, + }, + .after_right_anchor => switch (c) { + ' ' => state = .after_rule, + else => return null, + }, + .after_rule => switch (c) { + ' ' => {}, + else => return null, + }, + } + } + + switch (state) { + .before_rule, + .after_left_anchor, + => return null, + + .in_rule, + .after_right_anchor, + .after_rule, + => {}, + } + + return if (left_anchor and right_anchor) + .center + else if (left_anchor) + .left + else if (right_anchor) + .right + else + .unset; +} + +test parseTableHeaderDelimiterCell { + try expectEqual(null, parseTableHeaderDelimiterCell("")); + try expectEqual(null, parseTableHeaderDelimiterCell(" ")); + try expectEqual(.unset, parseTableHeaderDelimiterCell("-")); + try expectEqual(.unset, parseTableHeaderDelimiterCell(" - ")); + try expectEqual(.unset, parseTableHeaderDelimiterCell("----")); + try expectEqual(.unset, parseTableHeaderDelimiterCell(" ---- ")); + try expectEqual(null, parseTableHeaderDelimiterCell(":")); + try expectEqual(null, parseTableHeaderDelimiterCell("::")); + try expectEqual(.left, parseTableHeaderDelimiterCell(":-")); + try expectEqual(.left, parseTableHeaderDelimiterCell(" :----")); + try expectEqual(.center, parseTableHeaderDelimiterCell(":-:")); + try expectEqual(.center, parseTableHeaderDelimiterCell(":----:")); + try expectEqual(.center, parseTableHeaderDelimiterCell(" :----: ")); + try expectEqual(.right, parseTableHeaderDelimiterCell("-:")); + try expectEqual(.right, parseTableHeaderDelimiterCell("----:")); + try expectEqual(.right, parseTableHeaderDelimiterCell(" ----: ")); +} + +const HeadingStart = struct { + level: u3, + rest: []const u8, +}; + +fn startHeading(unindented_line: []const u8) ?HeadingStart { + var level: u3 = 0; + return for (unindented_line, 0..) |c, i| { + switch (c) { + '#' => { + if (level == 6) break null; + level += 1; + }, + ' ' => { + // We must have seen at least one # by this point, since + // unindented_line has no leading spaces. + assert(level > 0); + break .{ + .level = level, + .rest = unindented_line[i + 1 ..], + }; + }, + else => break null, + } + } else null; +} + +const CodeBlockStart = struct { + tag: StringIndex, + fence_len: usize, +}; + +fn startCodeBlock(p: *Parser, unindented_line: []const u8) !?CodeBlockStart { + var fence_len: usize = 0; + const tag_bytes = for (unindented_line, 0..) |c, i| { + switch (c) { + '`' => fence_len += 1, + else => break unindented_line[i..], + } + } else ""; + // Code block tags may not contain backticks, since that would create + // potential confusion with inline code spans. + if (fence_len < 3 or mem.indexOfScalar(u8, tag_bytes, '`') != null) return null; + return .{ + .tag = try p.addString(mem.trim(u8, tag_bytes, " ")), + .fence_len = fence_len, + }; +} + +fn startBlockquote(unindented_line: []const u8) ?[]const u8 { + return if (mem.startsWith(u8, unindented_line, ">")) + unindented_line[1..] + else + null; +} + +fn isThematicBreak(line: []const u8) bool { + var char: ?u8 = null; + var count: usize = 0; + for (line) |c| { + switch (c) { + ' ' => {}, + '-', '_', '*' => { + if (char != null and c != char.?) return false; + char = c; + count += 1; + }, + else => return false, + } + } + return count >= 3; +} + +fn closeLastBlock(p: *Parser) !void { + const b = p.pending_blocks.pop().?; + const node = switch (b.tag) { + .list => list: { + assert(b.string_start == p.scratch_string.items.len); + + // Although tightness is parsed as a property of the list, it is + // stored at the list item level to make it possible to render each + // node without any context from its parents. + const list_items = p.scratch_extra.items[b.extra_start..]; + const node_datas = p.nodes.items(.data); + if (!b.data.list.tight) { + for (list_items) |list_item| { + node_datas[list_item].list_item.tight = false; + } + } + + const children = try p.addExtraChildren(@ptrCast(list_items)); + break :list try p.addNode(.{ + .tag = .list, + .data = .{ .list = .{ + .start = switch (b.data.list.marker) { + .number_dot, .number_paren => @enumFromInt(b.data.list.start), + .@"-", .@"*", .@"+" => .unordered, + }, + .children = children, + } }, + }); + }, + .list_item => list_item: { + assert(b.string_start == p.scratch_string.items.len); + const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..])); + break :list_item try p.addNode(.{ + .tag = .list_item, + .data = .{ .list_item = .{ + .tight = true, + .children = children, + } }, + }); + }, + .table => table: { + assert(b.string_start == p.scratch_string.items.len); + const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..])); + break :table try p.addNode(.{ + .tag = .table, + .data = .{ .container = .{ + .children = children, + } }, + }); + }, + .table_row => table_row: { + assert(b.string_start == p.scratch_string.items.len); + const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..])); + break :table_row try p.addNode(.{ + .tag = .table_row, + .data = .{ .container = .{ + .children = children, + } }, + }); + }, + .heading => heading: { + const children = try p.parseInlines(p.scratch_string.items[b.string_start..]); + break :heading try p.addNode(.{ + .tag = .heading, + .data = .{ .heading = .{ + .level = b.data.heading.level, + .children = children, + } }, + }); + }, + .code_block => code_block: { + const content = try p.addString(p.scratch_string.items[b.string_start..]); + break :code_block try p.addNode(.{ + .tag = .code_block, + .data = .{ .code_block = .{ + .tag = b.data.code_block.tag, + .content = content, + } }, + }); + }, + .blockquote => blockquote: { + assert(b.string_start == p.scratch_string.items.len); + const children = try p.addExtraChildren(@ptrCast(p.scratch_extra.items[b.extra_start..])); + break :blockquote try p.addNode(.{ + .tag = .blockquote, + .data = .{ .container = .{ + .children = children, + } }, + }); + }, + .paragraph => paragraph: { + const children = try p.parseInlines(p.scratch_string.items[b.string_start..]); + break :paragraph try p.addNode(.{ + .tag = .paragraph, + .data = .{ .container = .{ + .children = children, + } }, + }); + }, + .thematic_break => try p.addNode(.{ + .tag = .thematic_break, + .data = .{ .none = {} }, + }), + }; + p.scratch_string.items.len = b.string_start; + p.scratch_extra.items.len = b.extra_start; + try p.addScratchExtraNode(node); +} + +const InlineParser = struct { + parent: *Parser, + content: []const u8, + pos: usize = 0, + pending_inlines: std.ArrayListUnmanaged(PendingInline) = .empty, + completed_inlines: std.ArrayListUnmanaged(CompletedInline) = .empty, + + const PendingInline = struct { + tag: Tag, + data: Data, + start: usize, + + const Tag = enum { + /// Data is `emphasis`. + emphasis, + /// Data is `none`. + link, + /// Data is `none`. + image, + }; + + const Data = union { + none: void, + emphasis: struct { + underscore: bool, + run_len: usize, + }, + }; + }; + + const CompletedInline = struct { + node: Node.Index, + start: usize, + len: usize, + }; + + fn deinit(ip: *InlineParser) void { + ip.pending_inlines.deinit(ip.parent.allocator); + ip.completed_inlines.deinit(ip.parent.allocator); + } + + /// Parses all of `ip.content`, returning the children of the node + /// containing the inline content. + fn parse(ip: *InlineParser) Allocator.Error!ExtraIndex { + while (ip.pos < ip.content.len) : (ip.pos += 1) { + switch (ip.content[ip.pos]) { + '\\' => ip.pos += 1, + '[' => try ip.pending_inlines.append(ip.parent.allocator, .{ + .tag = .link, + .data = .{ .none = {} }, + .start = ip.pos, + }), + '!' => if (ip.pos + 1 < ip.content.len and ip.content[ip.pos + 1] == '[') { + try ip.pending_inlines.append(ip.parent.allocator, .{ + .tag = .image, + .data = .{ .none = {} }, + .start = ip.pos, + }); + ip.pos += 1; + }, + ']' => try ip.parseLink(), + '<' => try ip.parseAutolink(), + '*', '_' => try ip.parseEmphasis(), + '`' => try ip.parseCodeSpan(), + 'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) { + try ip.parseTextAutolink(); + }, + else => {}, + } + } + + const children = try ip.encodeChildren(0, ip.content.len); + // There may be pending inlines after parsing (e.g. unclosed emphasis + // runs), but there must not be any completed inlines, since those + // should all be part of `children`. + assert(ip.completed_inlines.items.len == 0); + return children; + } + + /// Parses a link, starting at the `]` at the end of the link text. `ip.pos` + /// is left at the closing `)` of the link target or at the closing `]` if + /// there is none. + fn parseLink(ip: *InlineParser) !void { + var i = ip.pending_inlines.items.len; + while (i > 0) { + i -= 1; + if (ip.pending_inlines.items[i].tag == .link or + ip.pending_inlines.items[i].tag == .image) break; + } else return; + const opener = ip.pending_inlines.items[i]; + ip.pending_inlines.shrinkRetainingCapacity(i); + const text_start = switch (opener.tag) { + .link => opener.start + 1, + .image => opener.start + 2, + else => unreachable, + }; + + if (ip.pos + 1 >= ip.content.len or ip.content[ip.pos + 1] != '(') return; + const text_end = ip.pos; + + const target_start = text_end + 2; + var target_end = target_start; + var nesting_level: usize = 1; + while (target_end < ip.content.len) : (target_end += 1) { + switch (ip.content[target_end]) { + '\\' => target_end += 1, + '(' => nesting_level += 1, + ')' => { + if (nesting_level == 1) break; + nesting_level -= 1; + }, + else => {}, + } + } else return; + ip.pos = target_end; + + const children = try ip.encodeChildren(text_start, text_end); + const target = try ip.encodeLinkTarget(target_start, target_end); + + const link = try ip.parent.addNode(.{ + .tag = switch (opener.tag) { + .link => .link, + .image => .image, + else => unreachable, + }, + .data = .{ .link = .{ + .target = target, + .children = children, + } }, + }); + try ip.completed_inlines.append(ip.parent.allocator, .{ + .node = link, + .start = opener.start, + .len = ip.pos - opener.start + 1, + }); + } + + fn encodeLinkTarget(ip: *InlineParser, start: usize, end: usize) !StringIndex { + // For efficiency, we can encode directly into string_bytes rather than + // creating a temporary string and then encoding it, since this process + // is entirely linear. + const string_top = ip.parent.string_bytes.items.len; + errdefer ip.parent.string_bytes.shrinkRetainingCapacity(string_top); + + var text_iter: TextIterator = .{ .content = ip.content[start..end] }; + while (text_iter.next()) |content| { + switch (content) { + .char => |c| try ip.parent.string_bytes.append(ip.parent.allocator, c), + .text => |s| try ip.parent.string_bytes.appendSlice(ip.parent.allocator, s), + .line_break => try ip.parent.string_bytes.appendSlice(ip.parent.allocator, "\\\n"), + } + } + try ip.parent.string_bytes.append(ip.parent.allocator, 0); + return @enumFromInt(string_top); + } + + /// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the + /// closing `>`, or remains unchanged at the opening `<` if there is none. + fn parseAutolink(ip: *InlineParser) !void { + const start = ip.pos; + ip.pos += 1; + var state: enum { + start, + scheme, + target, + } = .start; + while (ip.pos < ip.content.len) : (ip.pos += 1) { + switch (state) { + .start => switch (ip.content[ip.pos]) { + 'A'...'Z', 'a'...'z' => state = .scheme, + else => break, + }, + .scheme => switch (ip.content[ip.pos]) { + 'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {}, + ':' => state = .target, + else => break, + }, + .target => switch (ip.content[ip.pos]) { + '<', ' ', '\t', '\n' => break, // Not allowed in autolinks + '>' => { + // Backslash escapes are not recognized in autolink targets. + const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]); + const node = try ip.parent.addNode(.{ + .tag = .autolink, + .data = .{ .text = .{ + .content = target, + } }, + }); + try ip.completed_inlines.append(ip.parent.allocator, .{ + .node = node, + .start = start, + .len = ip.pos - start + 1, + }); + return; + }, + else => {}, + }, + } + } + ip.pos = start; + } + + /// Parses a plain text autolink (not delimited by `<>`), starting at the + /// first character in the link (an `h`). `ip.pos` is left at the last + /// character of the link, or remains unchanged if there is no valid link. + fn parseTextAutolink(ip: *InlineParser) !void { + const start = ip.pos; + var state: union(enum) { + /// Inside `http`. Contains the rest of the text to be matched. + http: []const u8, + after_http, + after_https, + /// Inside `://`. Contains the rest of the text to be matched. + authority: []const u8, + /// Inside link content. + content: struct { + start: usize, + paren_nesting: usize, + }, + } = .{ .http = "http" }; + + while (ip.pos < ip.content.len) : (ip.pos += 1) { + switch (state) { + .http => |rest| { + if (ip.content[ip.pos] != rest[0]) break; + if (rest.len > 1) { + state = .{ .http = rest[1..] }; + } else { + state = .after_http; + } + }, + .after_http => switch (ip.content[ip.pos]) { + 's' => state = .after_https, + ':' => state = .{ .authority = "//" }, + else => break, + }, + .after_https => switch (ip.content[ip.pos]) { + ':' => state = .{ .authority = "//" }, + else => break, + }, + .authority => |rest| { + if (ip.content[ip.pos] != rest[0]) break; + if (rest.len > 1) { + state = .{ .authority = rest[1..] }; + } else { + state = .{ .content = .{ + .start = ip.pos + 1, + .paren_nesting = 0, + } }; + } + }, + .content => |*content| switch (ip.content[ip.pos]) { + ' ', '\t', '\n' => break, + '(' => content.paren_nesting += 1, + ')' => if (content.paren_nesting == 0) { + break; + } else { + content.paren_nesting -= 1; + }, + else => {}, + }, + } + } + + switch (state) { + .http, .after_http, .after_https, .authority => { + ip.pos = start; + }, + .content => |content| { + while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) { + ip.pos -= 1; + } + if (ip.pos == content.start) { + ip.pos = start; + return; + } + + const target = try ip.parent.addString(ip.content[start..ip.pos]); + const node = try ip.parent.addNode(.{ + .tag = .autolink, + .data = .{ .text = .{ + .content = target, + } }, + }); + try ip.completed_inlines.append(ip.parent.allocator, .{ + .node = node, + .start = start, + .len = ip.pos - start, + }); + ip.pos -= 1; + }, + } + } + + /// Returns whether `c` may appear before a text autolink is recognized. + fn isPreTextAutolink(c: u8) bool { + return switch (c) { + ' ', '\t', '\n', '*', '_', '(' => true, + else => false, + }; + } + + /// Returns whether `c` is punctuation that may appear after a text autolink + /// and not be considered part of it. + fn isPostTextAutolink(c: u8) bool { + return switch (c) { + '?', '!', '.', ',', ':', '*', '_' => true, + else => false, + }; + } + + /// Parses emphasis, starting at the beginning of a run of `*` or `_` + /// characters. `ip.pos` is left at the last character in the run after + /// parsing. + fn parseEmphasis(ip: *InlineParser) !void { + const char = ip.content[ip.pos]; + var start = ip.pos; + while (ip.pos + 1 < ip.content.len and ip.content[ip.pos + 1] == char) { + ip.pos += 1; + } + var len = ip.pos - start + 1; + const underscore = char == '_'; + const space_before = start == 0 or isWhitespace(ip.content[start - 1]); + const space_after = start + len == ip.content.len or isWhitespace(ip.content[start + len]); + const punct_before = start == 0 or isPunctuation(ip.content[start - 1]); + const punct_after = start + len == ip.content.len or isPunctuation(ip.content[start + len]); + // The rules for when emphasis may be closed or opened are stricter for + // underscores to avoid inappropriately interpreting snake_case words as + // containing emphasis markers. + const can_open = if (underscore) + !space_after and (space_before or punct_before) + else + !space_after; + const can_close = if (underscore) + !space_before and (space_after or punct_after) + else + !space_before; + + if (can_close and ip.pending_inlines.items.len > 0) { + var i = ip.pending_inlines.items.len; + while (i > 0 and len > 0) { + i -= 1; + const opener = &ip.pending_inlines.items[i]; + if (opener.tag != .emphasis or + opener.data.emphasis.underscore != underscore) continue; + + const close_len = @min(opener.data.emphasis.run_len, len); + const opener_end = opener.start + opener.data.emphasis.run_len; + + const emphasis = try ip.encodeEmphasis(opener_end, start, close_len); + const emphasis_start = opener_end - close_len; + const emphasis_len = start - emphasis_start + close_len; + try ip.completed_inlines.append(ip.parent.allocator, .{ + .node = emphasis, + .start = emphasis_start, + .len = emphasis_len, + }); + + // There may still be other openers further down in the + // stack to close, or part of this run might serve as an + // opener itself. + start += close_len; + len -= close_len; + + // Remove any pending inlines above this on the stack, since + // closing this emphasis will prevent them from being closed. + // Additionally, if this opener is completely consumed by + // being closed, it can be removed. + opener.data.emphasis.run_len -= close_len; + if (opener.data.emphasis.run_len == 0) { + ip.pending_inlines.shrinkRetainingCapacity(i); + } else { + ip.pending_inlines.shrinkRetainingCapacity(i + 1); + } + } + } + + if (can_open and len > 0) { + try ip.pending_inlines.append(ip.parent.allocator, .{ + .tag = .emphasis, + .data = .{ .emphasis = .{ + .underscore = underscore, + .run_len = len, + } }, + .start = start, + }); + } + } + + /// Encodes emphasis specified by a run of `run_len` emphasis characters, + /// with `start..end` being the range of content contained within the + /// emphasis. + fn encodeEmphasis(ip: *InlineParser, start: usize, end: usize, run_len: usize) !Node.Index { + const children = try ip.encodeChildren(start, end); + var inner = switch (run_len % 3) { + 1 => try ip.parent.addNode(.{ + .tag = .emphasis, + .data = .{ .container = .{ + .children = children, + } }, + }), + 2 => try ip.parent.addNode(.{ + .tag = .strong, + .data = .{ .container = .{ + .children = children, + } }, + }), + 0 => strong_emphasis: { + const strong = try ip.parent.addNode(.{ + .tag = .strong, + .data = .{ .container = .{ + .children = children, + } }, + }); + break :strong_emphasis try ip.parent.addNode(.{ + .tag = .emphasis, + .data = .{ .container = .{ + .children = try ip.parent.addExtraChildren(&.{strong}), + } }, + }); + }, + else => unreachable, + }; + + var run_left = run_len; + while (run_left > 3) : (run_left -= 3) { + const strong = try ip.parent.addNode(.{ + .tag = .strong, + .data = .{ .container = .{ + .children = try ip.parent.addExtraChildren(&.{inner}), + } }, + }); + inner = try ip.parent.addNode(.{ + .tag = .emphasis, + .data = .{ .container = .{ + .children = try ip.parent.addExtraChildren(&.{strong}), + } }, + }); + } + + return inner; + } + + /// Parses a code span, starting at the beginning of the opening backtick + /// run. `ip.pos` is left at the last character in the closing run after + /// parsing. + fn parseCodeSpan(ip: *InlineParser) !void { + const opener_start = ip.pos; + ip.pos = mem.indexOfNonePos(u8, ip.content, ip.pos, "`") orelse ip.content.len; + const opener_len = ip.pos - opener_start; + + const start = ip.pos; + const end = while (mem.indexOfScalarPos(u8, ip.content, ip.pos, '`')) |closer_start| { + ip.pos = mem.indexOfNonePos(u8, ip.content, closer_start, "`") orelse ip.content.len; + const closer_len = ip.pos - closer_start; + + if (closer_len == opener_len) break closer_start; + } else unterminated: { + ip.pos = ip.content.len; + break :unterminated ip.content.len; + }; + + var content = if (start < ip.content.len) + ip.content[start..end] + else + ""; + // This single space removal rule allows code spans to be written which + // start or end with backticks. + if (mem.startsWith(u8, content, " `")) content = content[1..]; + if (mem.endsWith(u8, content, "` ")) content = content[0 .. content.len - 1]; + + const text = try ip.parent.addNode(.{ + .tag = .code_span, + .data = .{ .text = .{ + .content = try ip.parent.addString(content), + } }, + }); + try ip.completed_inlines.append(ip.parent.allocator, .{ + .node = text, + .start = opener_start, + .len = ip.pos - opener_start, + }); + // Ensure ip.pos is pointing at the last character of the + // closer, not after it. + ip.pos -= 1; + } + + /// Encodes children parsed in the content range `start..end`. The children + /// will be text nodes and any completed inlines within the range. + fn encodeChildren(ip: *InlineParser, start: usize, end: usize) !ExtraIndex { + const scratch_extra_top = ip.parent.scratch_extra.items.len; + defer ip.parent.scratch_extra.shrinkRetainingCapacity(scratch_extra_top); + + var child_index = ip.completed_inlines.items.len; + while (child_index > 0 and ip.completed_inlines.items[child_index - 1].start >= start) { + child_index -= 1; + } + const start_child_index = child_index; + + var pos = start; + while (child_index < ip.completed_inlines.items.len) : (child_index += 1) { + const child_inline = ip.completed_inlines.items[child_index]; + // Completed inlines must be strictly nested within the encodable + // content. + assert(child_inline.start >= pos and child_inline.start + child_inline.len <= end); + + if (child_inline.start > pos) { + try ip.encodeTextNode(pos, child_inline.start); + } + try ip.parent.addScratchExtraNode(child_inline.node); + + pos = child_inline.start + child_inline.len; + } + ip.completed_inlines.shrinkRetainingCapacity(start_child_index); + + if (pos < end) { + try ip.encodeTextNode(pos, end); + } + + const children = ip.parent.scratch_extra.items[scratch_extra_top..]; + return try ip.parent.addExtraChildren(@ptrCast(children)); + } + + /// Encodes textual content `ip.content[start..end]` to `scratch_extra`. The + /// encoded content may include both `text` and `line_break` nodes. + fn encodeTextNode(ip: *InlineParser, start: usize, end: usize) !void { + // For efficiency, we can encode directly into string_bytes rather than + // creating a temporary string and then encoding it, since this process + // is entirely linear. + const string_top = ip.parent.string_bytes.items.len; + errdefer ip.parent.string_bytes.shrinkRetainingCapacity(string_top); + + var string_start = string_top; + var text_iter: TextIterator = .{ .content = ip.content[start..end] }; + while (text_iter.next()) |content| { + switch (content) { + .char => |c| try ip.parent.string_bytes.append(ip.parent.allocator, c), + .text => |s| try ip.parent.string_bytes.appendSlice(ip.parent.allocator, s), + .line_break => { + if (ip.parent.string_bytes.items.len > string_start) { + try ip.parent.string_bytes.append(ip.parent.allocator, 0); + try ip.parent.addScratchExtraNode(try ip.parent.addNode(.{ + .tag = .text, + .data = .{ .text = .{ + .content = @enumFromInt(string_start), + } }, + })); + string_start = ip.parent.string_bytes.items.len; + } + try ip.parent.addScratchExtraNode(try ip.parent.addNode(.{ + .tag = .line_break, + .data = .{ .none = {} }, + })); + }, + } + } + if (ip.parent.string_bytes.items.len > string_start) { + try ip.parent.string_bytes.append(ip.parent.allocator, 0); + try ip.parent.addScratchExtraNode(try ip.parent.addNode(.{ + .tag = .text, + .data = .{ .text = .{ + .content = @enumFromInt(string_start), + } }, + })); + } + } + + /// An iterator over parts of textual content, handling unescaping of + /// escaped characters and line breaks. + const TextIterator = struct { + content: []const u8, + pos: usize = 0, + + const Content = union(enum) { + char: u8, + text: []const u8, + line_break, + }; + + const replacement = "\u{FFFD}"; + + fn next(iter: *TextIterator) ?Content { + if (iter.pos >= iter.content.len) return null; + if (iter.content[iter.pos] == '\\') { + iter.pos += 1; + if (iter.pos == iter.content.len) { + return .{ .char = '\\' }; + } else if (iter.content[iter.pos] == '\n') { + iter.pos += 1; + return .line_break; + } else if (isPunctuation(iter.content[iter.pos])) { + const c = iter.content[iter.pos]; + iter.pos += 1; + return .{ .char = c }; + } else { + return .{ .char = '\\' }; + } + } + return iter.nextCodepoint(); + } + + fn nextCodepoint(iter: *TextIterator) ?Content { + switch (iter.content[iter.pos]) { + 0 => { + iter.pos += 1; + return .{ .text = replacement }; + }, + 1...127 => |c| { + iter.pos += 1; + return .{ .char = c }; + }, + else => |b| { + const cp_len = std.unicode.utf8ByteSequenceLength(b) catch { + iter.pos += 1; + return .{ .text = replacement }; + }; + const is_valid = iter.pos + cp_len <= iter.content.len and + std.unicode.utf8ValidateSlice(iter.content[iter.pos..][0..cp_len]); + const cp_encoded = if (is_valid) + iter.content[iter.pos..][0..cp_len] + else + replacement; + iter.pos += cp_len; + return .{ .text = cp_encoded }; + }, + } + } + }; +}; + +fn parseInlines(p: *Parser, content: []const u8) !ExtraIndex { + var ip: InlineParser = .{ + .parent = p, + .content = mem.trim(u8, content, " \t\n"), + }; + defer ip.deinit(); + return try ip.parse(); +} + +pub fn extraData(p: Parser, comptime T: type, index: ExtraIndex) ExtraData(T) { + const fields = @typeInfo(T).@"struct".fields; + var i: usize = @intFromEnum(index); + var result: T = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => p.extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return .{ .data = result, .end = i }; +} + +pub fn extraChildren(p: Parser, index: ExtraIndex) []const Node.Index { + const children = p.extraData(Node.Children, index); + return @ptrCast(p.extra.items[children.end..][0..children.data.len]); +} + +fn addNode(p: *Parser, node: Node) !Node.Index { + const index: Node.Index = @enumFromInt(@as(u32, @intCast(p.nodes.len))); + try p.nodes.append(p.allocator, node); + return index; +} + +fn addString(p: *Parser, s: []const u8) !StringIndex { + if (s.len == 0) return .empty; + + const index: StringIndex = @enumFromInt(@as(u32, @intCast(p.string_bytes.items.len))); + try p.string_bytes.ensureUnusedCapacity(p.allocator, s.len + 1); + p.string_bytes.appendSliceAssumeCapacity(s); + p.string_bytes.appendAssumeCapacity(0); + return index; +} + +fn addExtraChildren(p: *Parser, nodes: []const Node.Index) !ExtraIndex { + const index: ExtraIndex = @enumFromInt(@as(u32, @intCast(p.extra.items.len))); + try p.extra.ensureUnusedCapacity(p.allocator, nodes.len + 1); + p.extra.appendAssumeCapacity(@intCast(nodes.len)); + p.extra.appendSliceAssumeCapacity(@ptrCast(nodes)); + return index; +} + +fn addScratchExtraNode(p: *Parser, node: Node.Index) !void { + try p.scratch_extra.append(p.allocator, @intFromEnum(node)); +} + +fn addScratchStringLine(p: *Parser, line: []const u8) !void { + try p.scratch_string.ensureUnusedCapacity(p.allocator, line.len + 1); + p.scratch_string.appendSliceAssumeCapacity(line); + p.scratch_string.appendAssumeCapacity('\n'); +} + +fn isBlank(line: []const u8) bool { + return mem.indexOfNone(u8, line, " \t") == null; +} + +fn isPunctuation(c: u8) bool { + return switch (c) { + '!', + '"', + '#', + '$', + '%', + '&', + '\'', + '(', + ')', + '*', + '+', + ',', + '-', + '.', + '/', + ':', + ';', + '<', + '=', + '>', + '?', + '@', + '[', + '\\', + ']', + '^', + '_', + '`', + '{', + '|', + '}', + '~', + => true, + else => false, + }; +} diff --git a/docs/wasm/markdown/renderer.zig b/docs/wasm/markdown/renderer.zig new file mode 100644 index 0000000..cba857d --- /dev/null +++ b/docs/wasm/markdown/renderer.zig @@ -0,0 +1,247 @@ +const std = @import("std"); +const Document = @import("Document.zig"); +const Node = Document.Node; +const assert = std.debug.assert; + +/// A Markdown document renderer. +/// +/// Each concrete `Renderer` type has a `renderDefault` function, with the +/// intention that custom `renderFn` implementations can call `renderDefault` +/// for node types for which they require no special rendering. +pub fn Renderer(comptime Writer: type, comptime Context: type) type { + return struct { + renderFn: *const fn ( + r: Self, + doc: Document, + node: Node.Index, + writer: Writer, + ) Writer.Error!void = renderDefault, + context: Context, + + const Self = @This(); + + pub fn render(r: Self, doc: Document, writer: Writer) Writer.Error!void { + try r.renderFn(r, doc, .root, writer); + } + + pub fn renderDefault( + r: Self, + doc: Document, + node: Node.Index, + writer: Writer, + ) Writer.Error!void { + const data = doc.nodes.items(.data)[@intFromEnum(node)]; + switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { + .root => { + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + }, + .list => { + if (data.list.start.asNumber()) |start| { + if (start == 1) { + try writer.writeAll("
    \n"); + } else { + try writer.print("
      \n", .{start}); + } + } else { + try writer.writeAll("
        \n"); + } + for (doc.extraChildren(data.list.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + if (data.list.start.asNumber() != null) { + try writer.writeAll("
    \n"); + } else { + try writer.writeAll("\n"); + } + }, + .list_item => { + try writer.writeAll("
  1. "); + for (doc.extraChildren(data.list_item.children)) |child| { + if (data.list_item.tight and doc.nodes.items(.tag)[@intFromEnum(child)] == .paragraph) { + const para_data = doc.nodes.items(.data)[@intFromEnum(child)]; + for (doc.extraChildren(para_data.container.children)) |para_child| { + try r.renderFn(r, doc, para_child, writer); + } + } else { + try r.renderFn(r, doc, child, writer); + } + } + try writer.writeAll("
  2. \n"); + }, + .table => { + try writer.writeAll("\n"); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("
    \n"); + }, + .table_row => { + try writer.writeAll("\n"); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("\n"); + }, + .table_cell => { + if (data.table_cell.info.header) { + try writer.writeAll(" try writer.writeAll(">"), + else => |a| try writer.print(" style=\"text-align: {s}\">", .{@tagName(a)}), + } + + for (doc.extraChildren(data.table_cell.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + + if (data.table_cell.info.header) { + try writer.writeAll("\n"); + } else { + try writer.writeAll("\n"); + } + }, + .heading => { + try writer.print("", .{data.heading.level}); + for (doc.extraChildren(data.heading.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.print("\n", .{data.heading.level}); + }, + .code_block => { + const content = doc.string(data.code_block.content); + try writer.print("
    {f}
    \n", .{fmtHtml(content)}); + }, + .blockquote => { + try writer.writeAll("
    \n"); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("
    \n"); + }, + .paragraph => { + try writer.writeAll("

    "); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll("

    \n"); + }, + .thematic_break => { + try writer.writeAll("
    \n"); + }, + .link => { + const target = doc.string(data.link.target); + try writer.print("", .{fmtHtml(target)}); + for (doc.extraChildren(data.link.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll(""); + }, + .autolink => { + const target = doc.string(data.text.content); + try writer.print("{0f}", .{fmtHtml(target)}); + }, + .image => { + const target = doc.string(data.link.target); + try writer.print("\"","); + }, + .strong => { + try writer.writeAll(""); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll(""); + }, + .emphasis => { + try writer.writeAll(""); + for (doc.extraChildren(data.container.children)) |child| { + try r.renderFn(r, doc, child, writer); + } + try writer.writeAll(""); + }, + .code_span => { + const content = doc.string(data.text.content); + try writer.print("{f}", .{fmtHtml(content)}); + }, + .text => { + const content = doc.string(data.text.content); + try writer.print("{f}", .{fmtHtml(content)}); + }, + .line_break => { + try writer.writeAll("
    \n"); + }, + } + } + }; +} + +/// Renders an inline node as plain text. Asserts that the node is an inline and +/// has no non-inline children. +pub fn renderInlineNodeText( + doc: Document, + node: Node.Index, + writer: anytype, +) @TypeOf(writer).Error!void { + const data = doc.nodes.items(.data)[@intFromEnum(node)]; + switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { + .root, + .list, + .list_item, + .table, + .table_row, + .table_cell, + .heading, + .code_block, + .blockquote, + .paragraph, + .thematic_break, + => unreachable, // Blocks + + .link, .image => { + for (doc.extraChildren(data.link.children)) |child| { + try renderInlineNodeText(doc, child, writer); + } + }, + .strong => { + for (doc.extraChildren(data.container.children)) |child| { + try renderInlineNodeText(doc, child, writer); + } + }, + .emphasis => { + for (doc.extraChildren(data.container.children)) |child| { + try renderInlineNodeText(doc, child, writer); + } + }, + .autolink, .code_span, .text => { + const content = doc.string(data.text.content); + try writer.print("{f}", .{fmtHtml(content)}); + }, + .line_break => { + try writer.writeAll("\n"); + }, + } +} + +pub fn fmtHtml(bytes: []const u8) std.fmt.Formatter([]const u8, formatHtml) { + return .{ .data = bytes }; +} + +fn formatHtml(bytes: []const u8, writer: *std.io.Writer) std.io.Writer.Error!void { + for (bytes) |b| { + switch (b) { + '<' => try writer.writeAll("<"), + '>' => try writer.writeAll(">"), + '&' => try writer.writeAll("&"), + '"' => try writer.writeAll("""), + else => try writer.writeByte(b), + } + } +} From 785bfcb266e5b0ee6a059f41b102872c24aa405b Mon Sep 17 00:00:00 2001 From: Afirium Date: Sat, 19 Jul 2025 03:23:19 +0300 Subject: [PATCH 4/6] feat(std): Add markdown rendering --- docs/wasm/html_render.zig | 133 +++++++++----------------------- docs/wasm/main.zig | 56 +++++--------- docs/wasm/markdown.zig | 1 - docs/wasm/markdown/renderer.zig | 104 ++++++++----------------- 4 files changed, 86 insertions(+), 208 deletions(-) diff --git a/docs/wasm/html_render.zig b/docs/wasm/html_render.zig index 68b10a0..16fb072 100644 --- a/docs/wasm/html_render.zig +++ b/docs/wasm/html_render.zig @@ -20,6 +20,8 @@ pub const RenderSourceOptions = struct { source_location_annotations: []const Annotation = &.{}, /// Concatenated with dom_id. annotation_prefix: []const u8 = "l", + /// Whether to add markdown code fence wrapper. + add_code_fence: bool = true, }; pub const Annotation = struct { @@ -35,11 +37,6 @@ pub fn fileSourceHtml( options: RenderSourceOptions, ) !void { const ast = file_index.get_ast(); - const file = file_index.get(); - - const g = struct { - var field_access_buffer: std.ArrayListUnmanaged(u8) = .empty; - }; const start_token = ast.firstToken(root_node); const end_token = ast.lastToken(root_node) + 1; @@ -59,6 +56,11 @@ pub fn fileSourceHtml( var next_annotate_index: usize = 0; + // Add markdown code fence if requested + if (options.add_code_fence) { + try out.appendSlice(gpa, "```zig\n"); + } + for ( ast.tokens.items(.tag)[start_token..end_token], ast.tokens.items(.start)[start_token..end_token], @@ -67,30 +69,25 @@ pub fn fileSourceHtml( const between = ast.source[cursor..start]; if (std.mem.trim(u8, between, " \t\r\n").len > 0) { if (!options.skip_comments) { - try out.appendSlice(gpa, ""); - try appendUnindented(out, between, indent); - try out.appendSlice(gpa, ""); + try appendUnindentedPlain(out, between, indent); } } else if (between.len > 0) { if (options.collapse_whitespace) { if (out.items.len > 0 and out.items[out.items.len - 1] != ' ') try out.append(gpa, ' '); } else { - try appendUnindented(out, between, indent); + try appendUnindentedPlain(out, between, indent); } } if (tag == .eof) break; const slice = ast.tokenSlice(token_index); cursor = start + slice.len; - // Insert annotations. + // Skip annotations in markdown mode while (true) { if (next_annotate_index >= options.source_location_annotations.len) break; const next_annotation = options.source_location_annotations[next_annotate_index]; if (cursor <= next_annotation.file_byte_offset) break; - try out.writer(gpa).print("", .{ - options.annotation_prefix, next_annotation.dom_id, - }); next_annotate_index += 1; } @@ -144,109 +141,34 @@ pub fn fileSourceHtml( .keyword_anytype, .keyword_fn, => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); + try out.appendSlice(gpa, slice); }, .string_literal, .char_literal, .multiline_string_literal_line, => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); + try out.appendSlice(gpa, slice); }, .builtin => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); + try out.appendSlice(gpa, slice); }, .doc_comment, .container_doc_comment, => { if (!options.skip_doc_comments) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); + try out.appendSlice(gpa, slice); } }, - .identifier => i: { - if (options.fn_link != .none) { - const fn_link = options.fn_link.get(); - const fn_token = ast.nodeMainToken(fn_link.ast_node); - if (token_index == fn_token + 1) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - } - - if (token_index > 0 and ast.tokenTag(token_index - 1) == .keyword_fn) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (Walk.isPrimitiveNonType(slice)) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (std.zig.primitives.isPrimitive(slice)) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - - if (file.token_parents.get(token_index)) |field_access_node| { - g.field_access_buffer.clearRetainingCapacity(); - try walkFieldAccesses(file_index, &g.field_access_buffer, field_access_node); - if (g.field_access_buffer.items.len > 0) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - } else { - try appendEscaped(out, slice); - } - break :i; - } - - { - g.field_access_buffer.clearRetainingCapacity(); - try resolveIdentLink(file_index, &g.field_access_buffer, token_index); - if (g.field_access_buffer.items.len > 0) { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); - break :i; - } - } - - try appendEscaped(out, slice); + .identifier => { + try out.appendSlice(gpa, slice); }, .number_literal => { - try out.appendSlice(gpa, ""); - try appendEscaped(out, slice); - try out.appendSlice(gpa, ""); + try out.appendSlice(gpa, slice); }, .bang, @@ -311,11 +233,16 @@ pub fn fileSourceHtml( .angle_bracket_angle_bracket_right, .angle_bracket_angle_bracket_right_equal, .tilde, - => try appendEscaped(out, slice), + => try out.appendSlice(gpa, slice), .invalid, .invalid_periodasterisks => return error.InvalidToken, } } + + // Add closing markdown code fence if requested + if (options.add_code_fence) { + try out.appendSlice(gpa, "\n```"); + } } fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { @@ -332,6 +259,20 @@ fn appendUnindented(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usi } } +fn appendUnindentedPlain(out: *std.ArrayListUnmanaged(u8), s: []const u8, indent: usize) !void { + var it = std.mem.splitScalar(u8, s, '\n'); + var is_first_line = true; + while (it.next()) |line| { + if (is_first_line) { + try out.appendSlice(gpa, line); + is_first_line = false; + } else { + try out.appendSlice(gpa, "\n"); + try out.appendSlice(gpa, unindent(line, indent)); + } + } +} + pub fn appendEscaped(out: *std.ArrayListUnmanaged(u8), s: []const u8) !void { for (s) |c| { try out.ensureUnusedCapacity(gpa, 6); diff --git a/docs/wasm/main.zig b/docs/wasm/main.zig index 7e9ffa5..36baa43 100644 --- a/docs/wasm/main.zig +++ b/docs/wasm/main.zig @@ -240,22 +240,19 @@ const ErrorIdentifier = packed struct(u64) { const name = ast.tokenSlice(ei.token_index); const has_link = base_decl != decl_index; - try out.appendSlice(gpa, "
    "); + try out.appendSlice(gpa, "**"); try out.appendSlice(gpa, name); + try out.appendSlice(gpa, "**"); if (has_link) { - try out.appendSlice(gpa, " "); + try out.appendSlice(gpa, " (from "); try out.appendSlice(gpa, decl_index.get().extra_info().name); - try out.appendSlice(gpa, ""); + try out.appendSlice(gpa, ")"); } - try out.appendSlice(gpa, "
    "); + try out.appendSlice(gpa, "\n\n"); if (Decl.findFirstDocComment(ast, ei.token_index).unwrap()) |first_doc_comment| { - try out.appendSlice(gpa, "
    "); try render_docs(out, decl_index, first_doc_comment, false); - try out.appendSlice(gpa, "
    "); + try out.appendSlice(gpa, "\n\n"); } } }; @@ -466,16 +463,13 @@ fn decl_field_html_fallible( ) !void { const decl = decl_index.get(); const ast = decl.file.get_ast(); - try out.appendSlice(gpa, "
    ");
         try fileSourceHtml(decl.file, out, field_node, .{});
    -    try out.appendSlice(gpa, "
    "); const field = ast.fullContainerField(field_node).?; if (Decl.findFirstDocComment(ast, field.firstToken()).unwrap()) |first_doc_comment| { - try out.appendSlice(gpa, "
    "); + try out.appendSlice(gpa, "\n\n"); try render_docs(out, decl_index, first_doc_comment, false); - try out.appendSlice(gpa, "
    "); } } @@ -501,16 +495,15 @@ fn decl_param_html_fallible( }; const name = ast.tokenSlice(name_token); - try out.appendSlice(gpa, "
    ");
    -    try appendEscaped(out, name);
    +    try out.appendSlice(gpa, "```zig\n");
    +    try out.appendSlice(gpa, name);
         try out.appendSlice(gpa, ": ");
    -    try fileSourceHtml(decl.file, out, param_node, .{});
    -    try out.appendSlice(gpa, "
    "); + try fileSourceHtml(decl.file, out, param_node, .{ .add_code_fence = false }); + try out.appendSlice(gpa, "\n```"); if (ast.tokenTag(first_doc_comment) == .doc_comment) { - try out.appendSlice(gpa, "
    "); + try out.appendSlice(gpa, "\n\n"); try render_docs(out, decl_index, first_doc_comment, false); - try out.appendSlice(gpa, "
    "); } } @@ -690,9 +683,6 @@ fn render_docs( var parsed_doc = try parser.endInput(); defer parsed_doc.deinit(gpa); - const g = struct { - var link_buffer: std.ArrayListUnmanaged(u8) = .empty; - }; const Writer = std.ArrayListUnmanaged(u8).Writer; const Renderer = markdown.Renderer(Writer, Decl.Index); @@ -708,21 +698,10 @@ fn render_docs( const data = doc.nodes.items(.data)[@intFromEnum(node)]; switch (doc.nodes.items(.tag)[@intFromEnum(node)]) { .code_span => { - try writer.writeAll(""); + try writer.writeAll("`"); const content = doc.string(data.text.content); - if (resolve_decl_path(r.context, content)) |resolved_decl_index| { - g.link_buffer.clearRetainingCapacity(); - try resolveDeclLink(resolved_decl_index, &g.link_buffer); - - try writer.writeAll("{f}", .{markdown.fmtHtml(content)}); - } else { - try writer.print("{f}", .{markdown.fmtHtml(content)}); - } - - try writer.writeAll(""); + try writer.writeAll(content); + try writer.writeAll("`"); }, else => try Renderer.renderDefault(r, doc, node, writer), @@ -754,14 +733,15 @@ export fn decl_type_html(decl_index: Decl.Index) String { // If there is an explicit type, use it. if (ast.fullVarDecl(decl.ast_node)) |var_decl| { if (var_decl.ast.type_node.unwrap()) |type_node| { - string_result.appendSlice(gpa, "") catch @panic("OOM"); + string_result.appendSlice(gpa, "`") catch @panic("OOM"); fileSourceHtml(decl.file, &string_result, type_node, .{ .skip_comments = true, .collapse_whitespace = true, + .add_code_fence = false, }) catch |e| { std.debug.panic("unable to render html: {s}", .{@errorName(e)}); }; - string_result.appendSlice(gpa, "") catch @panic("OOM"); + string_result.appendSlice(gpa, "`") catch @panic("OOM"); break :t; } } diff --git a/docs/wasm/markdown.zig b/docs/wasm/markdown.zig index 3293b68..68c58d4 100644 --- a/docs/wasm/markdown.zig +++ b/docs/wasm/markdown.zig @@ -131,7 +131,6 @@ pub const Document = @import("markdown/Document.zig"); pub const Parser = @import("markdown/Parser.zig"); pub const Renderer = @import("markdown/renderer.zig").Renderer; pub const renderNodeInlineText = @import("markdown/renderer.zig").renderNodeInlineText; -pub const fmtHtml = @import("markdown/renderer.zig").fmtHtml; // Avoid exposing main to other files merely importing this one. pub const main = if (@import("root") == @This()) diff --git a/docs/wasm/markdown/renderer.zig b/docs/wasm/markdown/renderer.zig index cba857d..98c2939 100644 --- a/docs/wasm/markdown/renderer.zig +++ b/docs/wasm/markdown/renderer.zig @@ -38,26 +38,12 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type { } }, .list => { - if (data.list.start.asNumber()) |start| { - if (start == 1) { - try writer.writeAll("
      \n"); - } else { - try writer.print("
        \n", .{start}); - } - } else { - try writer.writeAll("
          \n"); - } for (doc.extraChildren(data.list.children)) |child| { try r.renderFn(r, doc, child, writer); } - if (data.list.start.asNumber() != null) { - try writer.writeAll("
      \n"); - } else { - try writer.writeAll("\n"); - } }, .list_item => { - try writer.writeAll("
    1. "); + try writer.writeAll("- "); for (doc.extraChildren(data.list_item.children)) |child| { if (data.list_item.tight and doc.nodes.items(.tag)[@intFromEnum(child)] == .paragraph) { const para_data = doc.nodes.items(.data)[@intFromEnum(child)]; @@ -68,115 +54,103 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type { try r.renderFn(r, doc, child, writer); } } - try writer.writeAll("
    2. \n"); + try writer.writeAll("\n"); }, .table => { - try writer.writeAll("\n"); for (doc.extraChildren(data.container.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll("
      \n"); }, .table_row => { - try writer.writeAll("\n"); + try writer.writeAll("|"); for (doc.extraChildren(data.container.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll("\n"); + try writer.writeAll("\n"); }, .table_cell => { - if (data.table_cell.info.header) { - try writer.writeAll(" try writer.writeAll(">"), - else => |a| try writer.print(" style=\"text-align: {s}\">", .{@tagName(a)}), - } - + try writer.writeAll(" "); for (doc.extraChildren(data.table_cell.children)) |child| { try r.renderFn(r, doc, child, writer); } - - if (data.table_cell.info.header) { - try writer.writeAll("\n"); - } else { - try writer.writeAll("\n"); - } + try writer.writeAll(" |"); }, .heading => { - try writer.print("", .{data.heading.level}); + var i: u8 = 0; + while (i < data.heading.level) : (i += 1) { + try writer.writeAll("#"); + } + try writer.writeAll(" "); for (doc.extraChildren(data.heading.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.print("\n", .{data.heading.level}); + try writer.writeAll("\n"); }, .code_block => { const content = doc.string(data.code_block.content); - try writer.print("
      {f}
      \n", .{fmtHtml(content)}); + try writer.writeAll("```\n"); + try writer.print("{s}", .{content}); + try writer.writeAll("\n```\n"); }, .blockquote => { - try writer.writeAll("
      \n"); + try writer.writeAll("> "); for (doc.extraChildren(data.container.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll("
      \n"); }, .paragraph => { - try writer.writeAll("

      "); for (doc.extraChildren(data.container.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll("

      \n"); + try writer.writeAll("\n\n"); }, .thematic_break => { - try writer.writeAll("
      \n"); + try writer.writeAll("---\n"); }, .link => { const target = doc.string(data.link.target); - try writer.print("", .{fmtHtml(target)}); + try writer.writeAll("["); for (doc.extraChildren(data.link.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll(""); + try writer.print("]({s})", .{target}); }, .autolink => { const target = doc.string(data.text.content); - try writer.print("{0f}", .{fmtHtml(target)}); + try writer.print("<{s}>", .{target}); }, .image => { const target = doc.string(data.link.target); - try writer.print("\"","); + try writer.print("]({s})", .{target}); }, .strong => { - try writer.writeAll(""); + try writer.writeAll("**"); for (doc.extraChildren(data.container.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll(""); + try writer.writeAll("**"); }, .emphasis => { - try writer.writeAll(""); + try writer.writeAll("*"); for (doc.extraChildren(data.container.children)) |child| { try r.renderFn(r, doc, child, writer); } - try writer.writeAll(""); + try writer.writeAll("*"); }, .code_span => { const content = doc.string(data.text.content); - try writer.print("{f}", .{fmtHtml(content)}); + try writer.print("`{s}`", .{content}); }, .text => { const content = doc.string(data.text.content); - try writer.print("{f}", .{fmtHtml(content)}); + try writer.print("{s}", .{content}); }, .line_break => { - try writer.writeAll("
      \n"); + try writer.writeAll("\\\n"); }, } } @@ -222,26 +196,10 @@ pub fn renderInlineNodeText( }, .autolink, .code_span, .text => { const content = doc.string(data.text.content); - try writer.print("{f}", .{fmtHtml(content)}); + try writer.print("{s}", .{content}); }, .line_break => { try writer.writeAll("\n"); }, } } - -pub fn fmtHtml(bytes: []const u8) std.fmt.Formatter([]const u8, formatHtml) { - return .{ .data = bytes }; -} - -fn formatHtml(bytes: []const u8, writer: *std.io.Writer) std.io.Writer.Error!void { - for (bytes) |b| { - switch (b) { - '<' => try writer.writeAll("<"), - '>' => try writer.writeAll(">"), - '&' => try writer.writeAll("&"), - '"' => try writer.writeAll("""), - else => try writer.writeByte(b), - } - } -} From 6b7bdd837890f486064ed70e013f119888b2de00 Mon Sep 17 00:00:00 2001 From: Afirium Date: Sat, 19 Jul 2025 03:25:36 +0300 Subject: [PATCH 5/6] feat(std): Add HTML and JS files for local documentation interface --- biome.json | 5 + build.zig | 3 + docs/index.html | 44 +++ docs/main.js | 783 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 835 insertions(+) create mode 100644 docs/index.html create mode 100644 docs/main.js diff --git a/biome.json b/biome.json index 2b91b80..a2185c7 100644 --- a/biome.json +++ b/biome.json @@ -39,6 +39,11 @@ "formatter": { "indentWidth": 2 } + }, + { + "includes": ["docs/main.js"], + "formatter": { "enabled": false }, + "linter": { "enabled": false } } ] } diff --git a/build.zig b/build.zig index 84ac954..cdf5a69 100644 --- a/build.zig +++ b/build.zig @@ -38,5 +38,8 @@ pub fn build(b: *std.Build) !void { .dest_dir = .{ .override = .prefix }, }); + b.installFile("docs/index.html", "index.html"); + b.installFile("docs/main.js", "main.js"); + b.getInstallStep().dependOn(&install_wasm.step); } diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..aa0033d --- /dev/null +++ b/docs/index.html @@ -0,0 +1,44 @@ + + + + + + Zig Documentation + + + + +
      + + + + \ No newline at end of file diff --git a/docs/main.js b/docs/main.js new file mode 100644 index 0000000..1389441 --- /dev/null +++ b/docs/main.js @@ -0,0 +1,783 @@ +(function() { + const CAT_namespace = 0; + const CAT_container = 1; + const CAT_global_variable = 2; + const CAT_function = 3; + const CAT_primitive = 4; + const CAT_error_set = 5; + const CAT_global_const = 6; + const CAT_alias = 7; + const CAT_type = 8; + const CAT_type_type = 9; + const CAT_type_function = 10; + + const LOG_err = 0; + const LOG_warn = 1; + const LOG_info = 2; + const LOG_debug = 3; + + const domContent = document.getElementById("content"); + const domSearch = document.getElementById("search"); + const domErrors = document.getElementById("errors"); + const domErrorsText = document.getElementById("errorsText"); + + var searchTimer = null; + + const curNav = { + tag: 0, + decl: null, + path: null, + }; + var curNavSearch = ""; + + const moduleList = []; + + let wasm_promise = fetch("main.wasm"); + let sources_promise = fetch("sources.tar").then(function(response) { + if (!response.ok) throw new Error("unable to download sources"); + return response.arrayBuffer(); + }); + var wasm_exports = null; + + const text_decoder = new TextDecoder(); + const text_encoder = new TextEncoder(); + + WebAssembly.instantiateStreaming(wasm_promise, { + js: { + log: function(level, ptr, len) { + const msg = decodeString(ptr, len); + switch (level) { + case LOG_err: + console.error(msg); + domErrorsText.textContent += msg + "\n"; + domErrors.classList.remove("hidden"); + break; + case LOG_warn: + console.warn(msg); + break; + case LOG_info: + console.info(msg); + break; + case LOG_debug: + console.debug(msg); + break; + } + }, + }, + }).then(function(obj) { + wasm_exports = obj.instance.exports; + window.wasm = obj; // for debugging + + sources_promise.then(function(buffer) { + const js_array = new Uint8Array(buffer); + const ptr = wasm_exports.alloc(js_array.length); + const wasm_array = new Uint8Array(wasm_exports.memory.buffer, ptr, js_array.length); + wasm_array.set(js_array); + wasm_exports.unpack(ptr, js_array.length); + + updateModuleList(); + + window.addEventListener('popstate', onPopState, false); + domSearch.addEventListener('keydown', onSearchKeyDown, false); + domSearch.addEventListener('input', onSearchChange, false); + window.addEventListener('keydown', onWindowKeyDown, false); + onHashChange(null); + }); + }); + + function renderTitle() { + const suffix = " - Zig Documentation"; + if (curNavSearch.length > 0) { + document.title = curNavSearch + " - Search" + suffix; + } else if (curNav.decl != null) { + document.title = fullyQualifiedName(curNav.decl) + suffix; + } else if (curNav.path != null) { + document.title = curNav.path + suffix; + } else { + document.title = moduleList[0] + suffix; + } + } + + function render() { + renderTitle(); + domContent.textContent = ""; + + if (curNavSearch !== "") return renderSearch(); + + switch (curNav.tag) { + case 0: return renderHome(); + case 1: + if (curNav.decl == null) { + return renderNotFound(); + } else { + return renderDecl(curNav.decl); + } + case 2: return renderSource(curNav.path); + default: throw new Error("invalid navigation state"); + } + } + + function renderHome() { + if (moduleList.length == 0) { + domContent.textContent = "# Error\n\nsources.tar contains no modules"; + return; + } + return renderModule(0); + } + + function renderModule(pkg_index) { + const root_decl = wasm_exports.find_module_root(pkg_index); + return renderDecl(root_decl); + } + + function renderDecl(decl_index) { + const category = wasm_exports.categorize_decl(decl_index, 0); + switch (category) { + case CAT_namespace: + case CAT_container: + return renderNamespacePage(decl_index); + case CAT_global_variable: + case CAT_primitive: + case CAT_global_const: + case CAT_type: + case CAT_type_type: + return renderGlobal(decl_index); + case CAT_function: + return renderFunction(decl_index); + case CAT_type_function: + return renderTypeFunction(decl_index); + case CAT_error_set: + return renderErrorSetPage(decl_index); + case CAT_alias: + return renderDecl(wasm_exports.get_aliasee()); + default: + throw new Error("unrecognized category " + category); + } + } + + function renderSource(path) { + const decl_index = findFileRoot(path); + if (decl_index == null) return renderNotFound(); + + let markdown = ""; + markdown += "# " + path + "\n\n"; + markdown += unwrapString(wasm_exports.decl_source_html(decl_index)); + + domContent.textContent = markdown; + } + + function renderNamespacePage(decl_index) { + let markdown = ""; + + // Add navigation breadcrumb + markdown += renderNavMarkdown(decl_index); + + // Add title + const name = unwrapString(wasm_exports.decl_category_name(decl_index)); + markdown += "# " + name + "\n\n"; + + // Add documentation + const docs = unwrapString(wasm_exports.decl_docs_html(decl_index, false)); + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + + // Add namespace content + const members = namespaceMembers(decl_index, false).slice(); + const fields = declFields(decl_index).slice(); + markdown += renderNamespaceMarkdown(decl_index, members, fields); + + domContent.textContent = markdown; + } + + function renderFunction(decl_index) { + let markdown = ""; + + // Add navigation breadcrumb + markdown += renderNavMarkdown(decl_index); + + // Add title + const name = unwrapString(wasm_exports.decl_category_name(decl_index)); + markdown += "# " + name + "\n\n"; + + // Add documentation + const docs = unwrapString(wasm_exports.decl_docs_html(decl_index, false)); + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + + // Add function prototype + const proto = unwrapString(wasm_exports.decl_fn_proto_html(decl_index, false)); + if (proto.length > 0) { + markdown += "## Function Signature\n\n" + proto + "\n\n"; + } + + // Add parameters + const params = declParams(decl_index).slice(); + if (params.length > 0) { + markdown += "## Parameters\n\n"; + for (let i = 0; i < params.length; i++) { + const param_html = unwrapString(wasm_exports.decl_param_html(decl_index, params[i])); + markdown += param_html + "\n\n"; + } + } + + // Add errors + const errorSetNode = fnErrorSet(decl_index); + if (errorSetNode != null) { + const base_decl = wasm_exports.fn_error_set_decl(decl_index, errorSetNode); + const errorList = errorSetNodeList(decl_index, errorSetNode); + if (errorList != null && errorList.length > 0) { + markdown += "## Errors\n\n"; + for (let i = 0; i < errorList.length; i++) { + const error_html = unwrapString(wasm_exports.error_html(base_decl, errorList[i])); + markdown += error_html + "\n\n"; + } + } + } + + // Add doctest + const doctest = unwrapString(wasm_exports.decl_doctest_html(decl_index)); + if (doctest.length > 0) { + markdown += "## Example Usage\n\n" + doctest + "\n\n"; + } + + // Add source code + const source = unwrapString(wasm_exports.decl_source_html(decl_index)); + if (source.length > 0) { + markdown += "## Source Code\n\n" + source + "\n\n"; + } + + domContent.textContent = markdown; + } + + function renderGlobal(decl_index) { + let markdown = ""; + + // Add navigation breadcrumb + markdown += renderNavMarkdown(decl_index); + + // Add title + const name = unwrapString(wasm_exports.decl_category_name(decl_index)); + markdown += "# " + name + "\n\n"; + + // Add documentation + const docs = unwrapString(wasm_exports.decl_docs_html(decl_index, true)); + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + + // Add source code + const source = unwrapString(wasm_exports.decl_source_html(decl_index)); + if (source.length > 0) { + markdown += "## Source Code\n\n" + source + "\n\n"; + } + + domContent.textContent = markdown; + } + + function renderTypeFunction(decl_index) { + let markdown = ""; + + // Add navigation breadcrumb + markdown += renderNavMarkdown(decl_index); + + // Add title + const name = unwrapString(wasm_exports.decl_category_name(decl_index)); + markdown += "# " + name + "\n\n"; + + // Add documentation + const docs = unwrapString(wasm_exports.decl_docs_html(decl_index, false)); + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + + // Add parameters + const params = declParams(decl_index).slice(); + if (params.length > 0) { + markdown += "## Parameters\n\n"; + for (let i = 0; i < params.length; i++) { + const param_html = unwrapString(wasm_exports.decl_param_html(decl_index, params[i])); + markdown += param_html + "\n\n"; + } + } + + // Add doctest + const doctest = unwrapString(wasm_exports.decl_doctest_html(decl_index)); + if (doctest.length > 0) { + markdown += "## Example Usage\n\n" + doctest + "\n\n"; + } + + // Add namespace content or source + const members = unwrapSlice32(wasm_exports.type_fn_members(decl_index, false)).slice(); + const fields = unwrapSlice32(wasm_exports.type_fn_fields(decl_index)).slice(); + if (members.length !== 0 || fields.length !== 0) { + markdown += renderNamespaceMarkdown(decl_index, members, fields); + } else { + const source = unwrapString(wasm_exports.decl_source_html(decl_index)); + if (source.length > 0) { + markdown += "## Source Code\n\n" + source + "\n\n"; + } + } + + domContent.textContent = markdown; + } + + function renderErrorSetPage(decl_index) { + let markdown = ""; + + // Add navigation breadcrumb + markdown += renderNavMarkdown(decl_index); + + // Add title + const name = unwrapString(wasm_exports.decl_category_name(decl_index)); + markdown += "# " + name + "\n\n"; + + // Add documentation + const docs = unwrapString(wasm_exports.decl_docs_html(decl_index, false)); + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + + // Add errors + const errorSetList = declErrorSet(decl_index).slice(); + if (errorSetList != null && errorSetList.length > 0) { + markdown += "## Errors\n\n"; + for (let i = 0; i < errorSetList.length; i++) { + const error_html = unwrapString(wasm_exports.error_html(decl_index, errorSetList[i])); + markdown += error_html + "\n\n"; + } + } + + domContent.textContent = markdown; + } + + function renderNavMarkdown(decl_index) { + let markdown = ""; + const list = []; + + // Walk backwards through decl parents + let decl_it = decl_index; + while (decl_it != null) { + list.push(declIndexName(decl_it)); + decl_it = declParent(decl_it); + } + + // Walk backwards through file path segments + if (decl_index != null) { + const file_path = fullyQualifiedName(decl_index); + const parts = file_path.split("."); + parts.pop(); // skip last + for (let i = parts.length - 1; i >= 0; i--) { + if (parts[i]) { + list.push(parts[i]); + } + } + } + + list.reverse(); + + if (list.length > 0) { + markdown += "*Navigation: " + list.join(" > ") + "*\n\n"; + } + + return markdown; + } + + function renderNamespaceMarkdown(base_decl, members, fields) { + let markdown = ""; + + const typesList = []; + const namespacesList = []; + const errSetsList = []; + const fnsList = []; + const varsList = []; + const valsList = []; + + // Categorize members + for (let i = 0; i < members.length; i++) { + let member = members[i]; + const original = member; + while (true) { + const member_category = wasm_exports.categorize_decl(member, 0); + switch (member_category) { + case CAT_namespace: + namespacesList.push({original: original, member: member}); + break; + case CAT_container: + typesList.push({original: original, member: member}); + break; + case CAT_global_variable: + varsList.push(member); + break; + case CAT_function: + fnsList.push(member); + break; + case CAT_type: + case CAT_type_type: + case CAT_type_function: + typesList.push({original: original, member: member}); + break; + case CAT_error_set: + errSetsList.push({original: original, member: member}); + break; + case CAT_global_const: + case CAT_primitive: + valsList.push({original: original, member: member}); + break; + case CAT_alias: + member = wasm_exports.get_aliasee(); + continue; + default: + throw new Error("unknown category: " + member_category); + } + break; + } + } + + // Render each category + if (typesList.length > 0) { + markdown += "## Types\n\n"; + for (let i = 0; i < typesList.length; i++) { + const name = declIndexName(typesList[i].original); + markdown += "- " + name + "\n"; + } + markdown += "\n"; + } + + if (namespacesList.length > 0) { + markdown += "## Namespaces\n\n"; + for (let i = 0; i < namespacesList.length; i++) { + const name = declIndexName(namespacesList[i].original); + markdown += "- " + name + "\n"; + } + markdown += "\n"; + } + + if (errSetsList.length > 0) { + markdown += "## Error Sets\n\n"; + for (let i = 0; i < errSetsList.length; i++) { + const name = declIndexName(errSetsList[i].original); + markdown += "- " + name + "\n"; + } + markdown += "\n"; + } + + if (fnsList.length > 0) { + markdown += "## Functions\n\n"; + for (let i = 0; i < fnsList.length; i++) { + const decl = fnsList[i]; + const name = declIndexName(decl); + const proto = unwrapString(wasm_exports.decl_fn_proto_html(decl, true)); + const docs = unwrapString(wasm_exports.decl_docs_html(decl, true)); + + markdown += "### " + name + "\n\n"; + if (proto.length > 0) { + markdown += proto + "\n\n"; + } + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + } + } + + if (fields.length > 0) { + markdown += "## Fields\n\n"; + for (let i = 0; i < fields.length; i++) { + const field_html = unwrapString(wasm_exports.decl_field_html(base_decl, fields[i])); + markdown += field_html + "\n\n"; + } + } + + if (varsList.length > 0) { + markdown += "## Global Variables\n\n"; + for (let i = 0; i < varsList.length; i++) { + const decl = varsList[i]; + const name = declIndexName(decl); + const type_html = unwrapString(wasm_exports.decl_type_html(decl)); + const docs = unwrapString(wasm_exports.decl_docs_html(decl, true)); + + markdown += "### " + name + "\n\n"; + if (type_html.length > 0) { + markdown += "Type: " + type_html + "\n\n"; + } + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + } + } + + if (valsList.length > 0) { + markdown += "## Values\n\n"; + for (let i = 0; i < valsList.length; i++) { + const original_decl = valsList[i].original; + const decl = valsList[i].member; + const name = declIndexName(original_decl); + const type_html = unwrapString(wasm_exports.decl_type_html(decl)); + const docs = unwrapString(wasm_exports.decl_docs_html(decl, true)); + + markdown += "### " + name + "\n\n"; + if (type_html.length > 0) { + markdown += "Type: " + type_html + "\n\n"; + } + if (docs.length > 0) { + markdown += docs + "\n\n"; + } + } + } + + return markdown; + } + + function renderNotFound() { + domContent.textContent = "# Error\n\nDeclaration not found."; + } + + function renderSearch() { + const ignoreCase = (curNavSearch.toLowerCase() === curNavSearch); + const results = executeQuery(curNavSearch, ignoreCase); + + let markdown = "# Search Results\n\n"; + markdown += "Query: \"" + curNavSearch + "\"\n\n"; + + if (results.length > 0) { + markdown += "Found " + results.length + " results:\n\n"; + for (let i = 0; i < results.length; i++) { + const match = results[i]; + const full_name = fullyQualifiedName(match); + markdown += "- " + full_name + "\n"; + } + } else { + markdown += "No results found.\n\nPress escape to exit search."; + } + + domContent.textContent = markdown; + } + + // Event handlers and utility functions (unchanged from original) + function updateCurNav(location_hash) { + curNav.tag = 0; + curNav.decl = null; + curNav.path = null; + curNavSearch = ""; + + if (location_hash.length > 1 && location_hash[0] === '#') { + const query = location_hash.substring(1); + const qpos = query.indexOf("?"); + let nonSearchPart; + if (qpos === -1) { + nonSearchPart = query; + } else { + nonSearchPart = query.substring(0, qpos); + curNavSearch = decodeURIComponent(query.substring(qpos + 1)); + } + + if (nonSearchPart.length > 0) { + const source_mode = nonSearchPart.startsWith("src/"); + if (source_mode) { + curNav.tag = 2; + curNav.path = nonSearchPart.substring(4); + } else { + curNav.tag = 1; + curNav.decl = findDecl(nonSearchPart); + } + } + } + } + + function onHashChange(state) { + history.replaceState({}, ""); + navigate(location.hash); + if (state == null) window.scrollTo({top: 0}); + } + + function onPopState(ev) { + onHashChange(ev.state); + } + + function navigate(location_hash) { + updateCurNav(location_hash); + if (domSearch.value !== curNavSearch) { + domSearch.value = curNavSearch; + } + render(); + } + + function onSearchKeyDown(ev) { + switch (ev.code) { + case "Enter": + if (ev.shiftKey || ev.ctrlKey || ev.altKey) return; + clearAsyncSearch(); + location.hash = computeSearchHash(); + ev.preventDefault(); + ev.stopPropagation(); + return; + case "Escape": + if (ev.shiftKey || ev.ctrlKey || ev.altKey) return; + domSearch.value = ""; + domSearch.blur(); + ev.preventDefault(); + ev.stopPropagation(); + startSearch(); + return; + default: + ev.stopPropagation(); + return; + } + } + + function onSearchChange(ev) { + startAsyncSearch(); + } + + function onWindowKeyDown(ev) { + switch (ev.code) { + case "KeyS": + if (ev.shiftKey || ev.ctrlKey || ev.altKey) return; + domSearch.focus(); + domSearch.select(); + ev.preventDefault(); + ev.stopPropagation(); + startAsyncSearch(); + break; + } + } + + function clearAsyncSearch() { + if (searchTimer != null) { + clearTimeout(searchTimer); + searchTimer = null; + } + } + + function startAsyncSearch() { + clearAsyncSearch(); + searchTimer = setTimeout(startSearch, 10); + } + + function computeSearchHash() { + const oldWatHash = location.hash; + const oldHash = oldWatHash.startsWith("#") ? oldWatHash : "#" + oldWatHash; + const parts = oldHash.split("?"); + const newPart2 = (domSearch.value === "") ? "" : ("?" + domSearch.value); + return parts[0] + newPart2; + } + + function startSearch() { + clearAsyncSearch(); + navigate(computeSearchHash()); + } + + function updateModuleList() { + moduleList.length = 0; + for (let i = 0;; i += 1) { + const name = unwrapString(wasm_exports.module_name(i)); + if (name.length == 0) break; + moduleList.push(name); + } + } + + // Utility functions (unchanged from original) + function decodeString(ptr, len) { + if (len === 0) return ""; + return text_decoder.decode(new Uint8Array(wasm_exports.memory.buffer, ptr, len)); + } + + function unwrapString(bigint) { + const ptr = Number(bigint & 0xffffffffn); + const len = Number(bigint >> 32n); + return decodeString(ptr, len); + } + + function fullyQualifiedName(decl_index) { + return unwrapString(wasm_exports.decl_fqn(decl_index)); + } + + function declIndexName(decl_index) { + return unwrapString(wasm_exports.decl_name(decl_index)); + } + + function setQueryString(s) { + const jsArray = text_encoder.encode(s); + const len = jsArray.length; + const ptr = wasm_exports.query_begin(len); + const wasmArray = new Uint8Array(wasm_exports.memory.buffer, ptr, len); + wasmArray.set(jsArray); + } + + function executeQuery(query_string, ignore_case) { + setQueryString(query_string); + const ptr = wasm_exports.query_exec(ignore_case); + const head = new Uint32Array(wasm_exports.memory.buffer, ptr, 1); + const len = head[0]; + return new Uint32Array(wasm_exports.memory.buffer, ptr + 4, len); + } + + function namespaceMembers(decl_index, include_private) { + return unwrapSlice32(wasm_exports.namespace_members(decl_index, include_private)); + } + + function declFields(decl_index) { + return unwrapSlice32(wasm_exports.decl_fields(decl_index)); + } + + function declParams(decl_index) { + return unwrapSlice32(wasm_exports.decl_params(decl_index)); + } + + function declErrorSet(decl_index) { + return unwrapSlice64(wasm_exports.decl_error_set(decl_index)); + } + + function errorSetNodeList(base_decl, err_set_node) { + return unwrapSlice64(wasm_exports.error_set_node_list(base_decl, err_set_node)); + } + + function unwrapSlice32(bigint) { + const ptr = Number(bigint & 0xffffffffn); + const len = Number(bigint >> 32n); + if (len === 0) return []; + return new Uint32Array(wasm_exports.memory.buffer, ptr, len); + } + + function unwrapSlice64(bigint) { + const ptr = Number(bigint & 0xffffffffn); + const len = Number(bigint >> 32n); + if (len === 0) return []; + return new BigUint64Array(wasm_exports.memory.buffer, ptr, len); + } + + function findDecl(fqn) { + setInputString(fqn); + const result = wasm_exports.find_decl(); + if (result === -1) return null; + return result; + } + + function findFileRoot(path) { + setInputString(path); + const result = wasm_exports.find_file_root(); + if (result === -1) return null; + return result; + } + + function declParent(decl_index) { + const result = wasm_exports.decl_parent(decl_index); + if (result === -1) return null; + return result; + } + + function fnErrorSet(decl_index) { + const result = wasm_exports.fn_error_set(decl_index); + if (result === 0) return null; + return result; + } + + function setInputString(s) { + const jsArray = text_encoder.encode(s); + const len = jsArray.length; + const ptr = wasm_exports.set_input_string(len); + const wasmArray = new Uint8Array(wasm_exports.memory.buffer, ptr, len); + wasmArray.set(jsArray); + } +})(); \ No newline at end of file From 4782963fae29f042540868fa78f1d81a35de91fe Mon Sep 17 00:00:00 2001 From: Afirium Date: Sat, 19 Jul 2025 03:26:10 +0300 Subject: [PATCH 6/6] feat(docs): Add usage tip --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index ac969aa..cfe5477 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ Model Context Protocol (MCP) server that provides up-to-date documentation for the Zig programming language standard library and builtin functions. +> [!TIP] +> Add `use zigdocs` to your prompt if you want to explicitly instruct the LLM to use Zig docs tools. Otherwise, LLM will automatically decide when to utilize MCP tools based on the context of your questions. + ## Installation ### Claude Code