Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate html by allowlist #5823

Merged
merged 11 commits into from
Apr 28, 2020
10 changes: 10 additions & 0 deletions docs/specs/markdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,11 @@ inputs:
outputs:
docs/a.json: |
{ "conceptual": "<div></div><p>body</p>" }
.errors.log: |
["info","disallowed-html","HTML tag 'link' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/a.md"]
["info","disallowed-html","HTML attribute 'style' on tag 'div' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/a.md"]
["info","disallowed-html","HTML tag 'script' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/a.md"]
["info","disallowed-html","HTML tag 'style' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/a.md"]
---
# markdown table with styles is allowed
inputs:
Expand Down Expand Up @@ -481,6 +486,11 @@ outputs:
{ "conceptual": "<p><a onclick=\"javascript:window.open('https://shell.azure.com', '_blank', 'toolbar=no,scrollbars=yes,resizable=yes,menubar=no,location=no,status=no')\"><image src='https://shell.azure.com/images/launchcloudshell.png'></image></a></p>\n" }
docs/c.json: |
{ "conceptual": "<p><a onclick='javascript:window.open(\"https://shell.azure.com\", \"_blank\", \"toolbar=no,scrollbars=yes,resizable=yes,menubar=no,location=no,status=no\")'><image src=\"https://shell.azure.com/images/launchcloudshell.png\"></image></a></p>\n" }
.errors.log: |
["info","disallowed-html","HTML attribute 'style' on tag 'a' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/b.md"]
["info","disallowed-html","HTML attribute 'style' on tag 'a' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/c.md"]
["info","disallowed-html","HTML attribute 'onclick' on tag 'a' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/b.md"]
["info","disallowed-html","HTML attribute 'onclick' on tag 'a' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","docs/c.md"]
---
# yaml header shouldn't be array
inputs:
Expand Down
16 changes: 16 additions & 0 deletions docs/specs/validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -714,3 +714,19 @@ outputs:
["suggestion","path-duplication","Article URL '/docs/folder/duplicatedword/folder1/duplicatedword' contains duplicated text 'duplicatedword'. For SEO, article URLs should not contain duplication.", "v2/duplicatedword/folder1/DuplicatedWord.md"]
["suggestion","path-duplication","Article URL '/docs/folder/duplicated-base-path/docs/a' contains duplicated text 'docs'. For SEO, article URLs should not contain duplication.", "folder0/duplicated-base-path/docs/a.md"]
---
# Validate embed html
inputs:
docfx.yml:
a.md: |
<H2 id="head" TITLE="HEAD"></H2>
<BUTTON>click</BUTTON>
<img height="docs"></img>
<h1 height="docs"></h1>
<div onclick="alert();" data-source="docs"></div>
outputs:
a.json:
.errors.log: |
["info","disallowed-html","HTML attribute 'onclick' on tag 'div' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","a.md"]
["info","disallowed-html","HTML attribute 'height' on tag 'h1' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","a.md"]
["info","disallowed-html","HTML tag 'button' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.","a.md"]
---
12 changes: 12 additions & 0 deletions src/docfx/Errors.cs
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,18 @@ public static Error BookmarkNotFound(SourceInfo? source, Document reference, str
/// </summary>
public static Error Custom404Page(Document file)
=> new Error(ErrorLevel.Warning, "custom-404-page", $"Custom 404 page will be deprecated in future. Please remove the 404.md file to resolve this warning", file.FilePath);

/// <summary>
/// Html Tag value must be in allowed list
/// </summary>
public static Error DisallowedHTMLTag(Document file, string tag)
=> new Error(ErrorLevel.Info, "disallowed-html", $"HTML tag '{tag}' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.", file.FilePath, 0, 0, 0, 0, $"tag '{tag}'");
fkpwolf marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Html Attribute value must be in allowed list
/// </summary>
public static Error DisallowedHTMLAttribute(Document file, string tag, string attribute)
=> new Error(ErrorLevel.Info, "disallowed-html", $"HTML attribute '{attribute}' on tag '{tag}' isn't allowed. Disallowed HTML poses a security risk and must be replaced with approved Docs Markdown syntax.", file.FilePath, 0, 0, 0, 0, $"'{attribute}' on tag '{tag}'");
fkpwolf marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
9 changes: 8 additions & 1 deletion src/docfx/build/page/BuildPage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,14 @@ private static (List<Error> errors, JObject model) LoadMarkdown(Context context,
var (markupErrors, html) = context.MarkdownEngine.ToHtml(content, file, MarkdownPipelineType.Markdown);
errors.AddRange(markupErrors);

var htmlDom = HtmlUtility.LoadHtml(html).PostMarkup(context.Config.DryRun);
var rawHtmlDom = HtmlUtility.LoadHtml(html);
if (file.ContentType == ContentType.Page && TemplateEngine.IsConceptual(file.Mime))
{
var htmlErrors = HtmlUtility.SecurityScan(rawHtmlDom, file);
errors.AddRange(htmlErrors);
}

var htmlDom = rawHtmlDom.PostMarkup(context.Config.DryRun);
ValidateBookmarks(context, file, htmlDom);
if (!HtmlUtility.TryExtractTitle(htmlDom, out var title, out var rawTitle))
{
Expand Down
116 changes: 116 additions & 0 deletions src/docfx/lib/HtmlUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,87 @@ internal static class HtmlUtility

private static readonly string[] s_allowedStyles = new[] { "text-align: right;", "text-align: left;", "text-align: center;" };

// todo remove `class` & `hidden` & `tabindex` after we can know it is is generated by ourself
private static readonly HashSet<string> s_globalAllowedAttributes = new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "id", "itemid", "itemprop", "itemref", "itemscope", "itemtype", "part", "slot", "spellcheck", "title", "role", "class", "hidden", "tabindex" };

// ref https://developer.mozilla.org/en-US/docs/Web/HTML/Element
private static readonly Dictionary<string, HashSet<string>?> s_allowedTagAtrributeMap = new Dictionary<string, HashSet<string>?>()
{
// Content sectioning
{ "address", null },
{ "h1", null },
{ "h2", null },
{ "h3", null },
{ "h4", null },
{ "h5", null },
{ "h6", null },
{ "section", null },

// Text content
{ "blockquote", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "cite" } },
{ "dd", null },
{ "div", null },
{ "dl", null },
{ "dt", null },
{ "figcaption", null },
{ "figure", null },
{ "hr", null },
{ "li", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "value" } },
{ "ol", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "reversed", "start", "type" } },
{ "p", null },
{ "pre", null },
{ "ul", null },

// Inline text semantics
{ "a", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "download", "href", "hreflang", "ping", "rel", "target", "type" } },
{ "abbr", null },
{ "b", null },
{ "bdi", null },
{ "bdo", null },
{ "br", null },
{ "cite", null },
{ "code", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "name" } },
{ "data", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "value" } },
{ "dfn", null },
{ "em", null },
{ "i", null },
{ "mark", null },
{ "q", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "cite" } },
{ "s", null },
{ "samp", null },
{ "small", null },
{ "span", null },
{ "strong", null },
{ "sub", null },
{ "sup", null },
{ "time", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "datetime" } },
{ "u", null },
{ "var", null },

// Image and multimedia
{ "img", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "alt", "decoding", "height", "intrinsicsize", "loading", "sizes", "src", "width" } },
{ "image", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "alt", "decoding", "height", "intrinsicsize", "loading", "sizes", "src", "width" } },

// Demarcating edits
{ "del", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "cite", "datetime" } },
{ "ins", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "cite", "datetime" } },

// table
{ "caption", null },
{ "col", null },
{ "colgroup", null },
{ "table", null },
{ "tbody", null },
{ "td", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "style" } },
{ "tfoot", null },
{ "th", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "style" } },
{ "thead", null },
{ "tr", null },

// other
{ "iframe", new HashSet<string>(StringComparer.OrdinalIgnoreCase) { "allow", "allowfullscreen", "allowpaymentrequest", "height", "name", "referrerpolicy", "sandbox", "src", "srcdoc", "width" } },
};

public static string TransformHtml(string html, TransformHtmlDelegate transform)
{
var result = new ArrayBufferWriter<char>(html.Length + 64);
Expand Down Expand Up @@ -302,6 +383,41 @@ internal static HtmlNode StripTags(this HtmlNode html)
return html;
}

internal static List<Error> SecurityScan(this HtmlNode html, Document file)
{
var errors = new List<Error>();

foreach (var node in html.DescendantsAndSelf())
{
if (node.NodeType != HtmlNodeType.Element)
{
continue;
}

if (s_allowedTagAtrributeMap.TryGetValue(node.Name, out var additionalAttributes))
{
foreach (var attribute in node.Attributes)
{
if (attribute.Name.StartsWith("data-", StringComparison.OrdinalIgnoreCase) ||
attribute.Name.StartsWith("aria-", StringComparison.OrdinalIgnoreCase) ||
s_globalAllowedAttributes.Contains(attribute.Name))
{
continue;
}
else if (additionalAttributes == null || !additionalAttributes.Contains(attribute.Name))
{
errors.Add(Errors.Content.DisallowedHTMLAttribute(file, node.Name, attribute.Name));
}
}
}
else
{
errors.Add(Errors.Content.DisallowedHTMLTag(file, node.Name));
}
}
return errors;
}

private static void AddLinkType(this HtmlNode html, string tag, string attribute, string locale)
{
foreach (var node in html.Descendants(tag))
Expand Down