Skip to content

Commit

Permalink
Allow extended set for <num> and ignore ref style links, already link…
Browse files Browse the repository at this point in the history
…ed items, and attr_list cases with '#' before the ref (#6)

* fix: update to regex matching to resolve issues

* Use negative lookbehind to ignore ref links and already linked refs
* Handle word characters and hyphen for <num>
* Add tests for new cases

Refs: #1
Refs: #4

* docs: add some comments to test cases

* fix: removed code left by mistake
  • Loading branch information
yqbear committed Dec 22, 2023
1 parent 56493ae commit fe9c5be
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 30 deletions.
11 changes: 11 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"python.formatting.provider": "none"
}
27 changes: 8 additions & 19 deletions autolink_references/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,15 @@
from mkdocs.config import config_options


def replace_autolink_references(markdown, reference_prefix, target_url):
if "<num>" not in reference_prefix:
reference_prefix = reference_prefix + "<num>"

find_regex = reference_prefix.replace("<num>", "(?P<num>[0-9]+)")
find_regex = (
"(?P<b>\\[)?(?P<text>" + find_regex + ")(?(b)\\])(?(b)(?:\\((?P<url>.*?)\\))?)"
def replace_autolink_references(markdown, ref_prefix, target_url):
if "<num>" not in ref_prefix:
ref_prefix = ref_prefix + "<num>"
find_regex = re.compile(
r"(?<![#\[/])" + ref_prefix.replace(r"<num>", r"(?P<num>[-\w]+)")
)

def ref_replace(matchobj):
if matchobj.group("url"):
return matchobj.group(0)

return "[{}]({})".format(
reference_prefix.replace("<num>", matchobj.group("num")),
target_url.replace("<num>", matchobj.group("num")),
)

markdown = re.sub(find_regex, ref_replace, markdown, flags=re.IGNORECASE)

linked_ref = rf"[{ref_prefix}](" + target_url + r")"
replace_text = linked_ref.replace(r"<num>", r"\g<num>")
markdown = re.sub(find_regex, replace_text, markdown, re.IGNORECASE)
return markdown


Expand Down
77 changes: 66 additions & 11 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,74 @@

from autolink_references.main import replace_autolink_references as autolink

markdown_samples = [
("#1", "[#1](http://gh/1)"),
("hello #1", "hello [#1](http://gh/1)"),
("(#2)", "([#2](http://gh/2))"),
("x (#2)", "x ([#2](http://gh/2))"),
("x (#2) y", "x ([#2](http://gh/2)) y"),
("(#2)", "([#2](http://gh/2))"),
simple_replace = [
("TAG-<num>", "http://gh/<num>", "TAG-123", "[TAG-123](http://gh/123)"),
("TAG-<num>", "http://gh/<num>", "x TAG-123", "x [TAG-123](http://gh/123)"),
("TAG-<num>", "http://gh/<num>", "TAG-123 x", "[TAG-123](http://gh/123) x"),
("TAG-<num>", "http://gh/<num>", "x TAG-123 y", "x [TAG-123](http://gh/123) y"),
("TAG-<num>", "http://gh/<num>", "x TAG-123 y", "x [TAG-123](http://gh/123) y"),
("TAG-<num>", "http://gh/TAG-<num>", "(TAG-123)", "([TAG-123](http://gh/TAG-123))"),
("TAG-", "http://forgot-num/<num>", "TAG-543", "[TAG-543](http://forgot-num/543)"),
(
"TAG-<num>",
"http://gh/TAG-<num>",
"(TAG-12_3-4)",
"([TAG-12_3-4](http://gh/TAG-12_3-4))",
),
(
"TAG-<num>",
"http://gh/<num>",
"x TAG-123 y TAG-456 z",
"x [TAG-123](http://gh/123) y [TAG-456](http://gh/456) z",
),
(
"TAG-<num>",
"http://gh/TAG-<num>",
"TAG-Ab123dD",
"[TAG-Ab123dD](http://gh/TAG-Ab123dD)",
),
]

ignore_already_linked = [
(
"TAG-<num>",
"http://gh/<num>",
"[TAG-789](http://gh/789)",
"[TAG-789](http://gh/789)",
),
(
"TAG-<num>",
"http://gh/TAG-<num>",
"[TAG-789](http://gh/TAG-789)",
"[TAG-789](http://gh/TAG-789)",
),
]

# This test cases address #4. Reference style links should be ignored.
ignore_ref_links = [
("TAG-<num>", "http://gh/<num>", "[TAG-456]", "[TAG-456]"),
("TAG-<num>", "http://gh/<num>", "[TAG-456][test456]", "[TAG-456][test456]"),
("TAG-<num>", "http://gh/<num>", "[TAG-456] [tag456]", "[TAG-456] [tag456]"),
(
"TAG-<num>",
"http://gh/TAG-<num>",
"[tag456]: http://gh/TAG-456",
"[tag456]: http://gh/TAG-456",
),
]

@pytest.mark.parametrize("test_input,expected", markdown_samples)
def test_parser(test_input, expected):
ref_prefix = "#<num>"
target_url = "http://gh/<num>"

@pytest.mark.parametrize(
"ref_prefix, target_url, test_input, expected",
simple_replace + ignore_already_linked + ignore_ref_links,
)
def test_parser(ref_prefix, target_url, test_input, expected):
assert autolink(test_input, ref_prefix, target_url) == expected


# This test address #5. It currently only checks for '#' before the link
def test_with_attr_list():
text = "## Feature 1 { #F-001 .class-feature }"
ref_prefix = "F-<num>"
target_url = "http://gh/<num>"
assert autolink(text, ref_prefix, target_url) == text

0 comments on commit fe9c5be

Please sign in to comment.