Skip to content

Add absolute links support #1802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions guide/src/format/configuration/renderers.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ edit-url-template = "https://github.com/rust-lang/mdBook/edit/master/guide/{path
site-url = "/example-book/"
cname = "myproject.rs"
input-404 = "not-found.md"
use-site-url-as-root = false
```

The following configuration options are available:
Expand Down Expand Up @@ -164,6 +165,7 @@ The following configuration options are available:
navigation links and script/css imports in the 404 file work correctly, even when accessing
urls in subdirectories. Defaults to `/`. If `site-url` is set,
make sure to use document relative links for your assets, meaning they should not start with `/`.
- **use-site-url-as-root:** Prepend the `site_url` in links with absolute path.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add this to the TOML summary up above?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, done,

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

links with absolute path

As a user, I would wonder "what links are those"? It could be

  • links generated by templates, such as those referencing style sheets and JavaScript code
  • links generated between pages of the book
  • links I insert using Markdown syntax

- **cname:** The DNS subdomain or apex domain at which your book will be hosted.
This string will be written to a file named CNAME in the root of your site, as
required by GitHub Pages (see [*Managing a custom domain for your GitHub Pages
Expand Down
3 changes: 3 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,8 @@ pub struct HtmlConfig {
pub input_404: Option<String>,
/// Absolute url to site, used to emit correct paths for the 404 page, which might be accessed in a deeply nested directory
pub site_url: Option<String>,
/// Prepend the `site_url` in links with absolute path.
pub use_site_url_as_root: bool,
Comment on lines +585 to +586
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, so technically adding this is a breaking change since HtmlConfig is in the public API. However, we have been adding new fields to this struct for a while now, and nobody has complained. That is something we should definitely fix in the future, but for now I guess we can let it slide. 😦

/// The DNS subdomain or apex domain at which your book will be hosted. This
/// string will be written to a file named CNAME in the root of your site,
/// as required by GitHub Pages (see [*Managing a custom domain for your
Expand Down Expand Up @@ -632,6 +634,7 @@ impl Default for HtmlConfig {
edit_url_template: None,
input_404: None,
site_url: None,
use_site_url_as_root: false,
cname: None,
live_reload_endpoint: None,
redirect: HashMap::new(),
Expand Down
11 changes: 10 additions & 1 deletion src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,16 @@ impl HtmlHandlebars {
.insert("git_repository_edit_url".to_owned(), json!(edit_url));
}

let content = utils::render_markdown(&ch.content, ctx.html_config.smart_punctuation());
let content = if ctx.html_config.use_site_url_as_root {
utils::render_markdown_with_abs_path(
&ch.content,
ctx.html_config.smart_punctuation(),
None,
ctx.html_config.site_url.as_deref(),
)
} else {
utils::render_markdown(&ch.content, ctx.html_config.smart_punctuation())
};

let fixed_content = utils::render_markdown_with_path(
&ch.content,
Expand Down
131 changes: 119 additions & 12 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,13 @@ pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, us
/// page go to the original location. Normal page rendering sets `path` to
/// None. Ideally, print page links would link to anchors on the print page,
/// but that is very difficult.
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>, abs_url: Option<&str>) -> Event<'a> {
static SCHEME_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap());
static MD_LINK: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap());

fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>, abs_url: Option<&str>) -> CowStr<'a> {
if dest.starts_with('#') {
// Fragment-only link.
if let Some(path) = path {
Expand Down Expand Up @@ -127,20 +127,32 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
}

if let Some(caps) = MD_LINK.captures(&dest) {
fixed_link.push_str(&caps["link"]);
fixed_link.push_str(&caps["link"].trim_start_matches('/'));
fixed_link.push_str(".html");
if let Some(anchor) = caps.name("anchor") {
fixed_link.push_str(anchor.as_str());
}
} else if !fixed_link.is_empty() {
// prevent links with double slashes
fixed_link.push_str(&dest.trim_start_matches('/'));
} else {
fixed_link.push_str(&dest);
};
return CowStr::from(fixed_link);
if dest.starts_with('/') || path.is_some() {
if let Some(abs_url) = abs_url {
fixed_link = format!(
"{}/{}",
abs_url.trim_end_matches('/'),
&fixed_link.trim_start_matches('/')
);
}
}
return CowStr::from(fixed_link.to_string());
}
dest
}

fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>, abs_url: Option<&str>) -> CowStr<'a> {
// This is a terrible hack, but should be reasonably reliable. Nobody
// should ever parse a tag with a regex. However, there isn't anything
// in Rust that I know of that is suitable for handling partial html
Expand All @@ -154,7 +166,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {

HTML_LINK
.replace_all(&html, |caps: &regex::Captures<'_>| {
let fixed = fix(caps[2].into(), path);
let fixed = fix(caps[2].into(), path, abs_url);
format!("{}{}\"", &caps[1], fixed)
})
.into_owned()
Expand All @@ -169,7 +181,7 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
id,
}) => Event::Start(Tag::Link {
link_type,
dest_url: fix(dest_url, path),
dest_url: fix(dest_url, path, abs_url),
title,
id,
}),
Expand All @@ -180,12 +192,12 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
id,
}) => Event::Start(Tag::Image {
link_type,
dest_url: fix(dest_url, path),
dest_url: fix(dest_url, path, abs_url),
title,
id,
}),
Event::Html(html) => Event::Html(fix_html(html, path)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
Event::Html(html) => Event::Html(fix_html(html, path, abs_url)),
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path, abs_url)),
_ => event,
}
}
Expand Down Expand Up @@ -214,10 +226,28 @@ pub fn new_cmark_parser(text: &str, smart_punctuation: bool) -> Parser<'_> {
/// `path` should only be set if this is being generated for the consolidated
/// print page. It should point to the page being rendered relative to the
/// root of the book.
pub fn render_markdown_with_path(
pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
render_markdown_with_abs_path(text, curly_quotes, path, None)
}

/// Renders markdown to HTML.
///
/// `path` should only be set if this is being generated for the consolidated
/// print page. It should point to the page being rendered relative to the
/// root of the book.
/// `abs_url` is the absolute URL to use for links that start with `/`.
/// If `abs_url` is `None`, then links that start with `/` will be
/// rendered relative to the current path.
/// If `abs_url` is `Some`, then links that start with `/` will be
/// rendered as absolute links using the provided URL.
////// This is useful for generating links in the print page, where the
/// links should point to the original location of the page, not the
/// print page itself.
pub fn render_markdown_with_abs_path(
text: &str,
smart_punctuation: bool,
path: Option<&Path>,
abs_url: Option<&str>,
) -> String {
let mut body = String::with_capacity(text.len() * 3 / 2);

Expand Down Expand Up @@ -250,7 +280,7 @@ pub fn render_markdown_with_path(

let events = new_cmark_parser(text, smart_punctuation)
.map(clean_codeblock_headers)
.map(|event| adjust_links(event, path))
.map(|event| adjust_links(event, path, abs_url))
.flat_map(|event| {
let (a, b) = wrap_tables(event);
a.into_iter().chain(b)
Expand Down Expand Up @@ -600,6 +630,83 @@ more text with spaces
}
}

mod render_markdown_with_abs_path {
use super::super::render_markdown_with_abs_path;
use std::path::Path;

#[test]
fn preserves_external_links() {
assert_eq!(
render_markdown_with_abs_path(
"[example](https://www.rust-lang.org/)",
false,
None,
Some("ABS_PATH")
),
"<p><a href=\"https://www.rust-lang.org/\">example</a></p>\n"
);
}

#[test]
fn replace_root_links() {
assert_eq!(
render_markdown_with_abs_path("[example](/testing)", false, None, Some("ABS_PATH")),
"<p><a href=\"ABS_PATH/testing\">example</a></p>\n"
);
}

#[test]
fn replace_root_links_using_path() {
assert_eq!(
render_markdown_with_abs_path(
"[example](bar.md)",
false,
Some(Path::new("foo/chapter.md")),
Some("ABS_PATH")
),
"<p><a href=\"ABS_PATH/foo/bar.html\">example</a></p>\n"
);
assert_eq!(
render_markdown_with_abs_path(
"[example](/bar.md)",
false,
Some(Path::new("foo/chapter.md")),
Some("ABS_PATH")
),
"<p><a href=\"ABS_PATH/foo/bar.html\">example</a></p>\n"
);
assert_eq!(
render_markdown_with_abs_path(
"[example](/bar.html)",
false,
Some(Path::new("foo/chapter.md")),
None
),
"<p><a href=\"foo/bar.html\">example</a></p>\n"
);
}

#[test]
fn preserves_relative_links() {
assert_eq!(
render_markdown_with_abs_path(
"[example](../testing)",
false,
None,
Some("ABS_PATH")
),
"<p><a href=\"../testing\">example</a></p>\n"
);
}

#[test]
fn preserves_root_links() {
assert_eq!(
render_markdown_with_abs_path("[example](/testing)", false, None, None),
"<p><a href=\"/testing\">example</a></p>\n"
);
}
}
#[allow(deprecated)]
mod id_from_content {
use super::super::id_from_content;
Expand Down