Allow extended set for <num> and ignore ref style links, already link…

…ed items, and attr_list cases with '#' before the ref (#6) * fix: update to regex matching to resolve issues * Use negative lookbehind to ignore ref links and already linked refs * Handle word characters and hyphen for <num> * Add tests for new cases Refs: #1 Refs: #4 * docs: add some comments to test cases * fix: removed code left by mistake
theskumar · Dec 22, 2023 · fe9c5be · fe9c5be
1 parent 56493ae
commit fe9c5be
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 30 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,11 @@
+{
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.formatting.provider": "none"
+}
diff --git a/autolink_references/main.py b/autolink_references/main.py
@@ -3,26 +3,15 @@
 from mkdocs.config import config_options
 
 
-def replace_autolink_references(markdown, reference_prefix, target_url):
-    if "<num>" not in reference_prefix:
-        reference_prefix = reference_prefix + "<num>"
-
-    find_regex = reference_prefix.replace("<num>", "(?P<num>[0-9]+)")
-    find_regex = (
-        "(?P<b>\\[)?(?P<text>" + find_regex + ")(?(b)\\])(?(b)(?:\\((?P<url>.*?)\\))?)"
+def replace_autolink_references(markdown, ref_prefix, target_url):
+    if "<num>" not in ref_prefix:
+        ref_prefix = ref_prefix + "<num>"
+    find_regex = re.compile(
+        r"(?<![#\[/])" + ref_prefix.replace(r"<num>", r"(?P<num>[-\w]+)")
     )
-
-    def ref_replace(matchobj):
-        if matchobj.group("url"):
-            return matchobj.group(0)
-
-        return "[{}]({})".format(
-            reference_prefix.replace("<num>", matchobj.group("num")),
-            target_url.replace("<num>", matchobj.group("num")),
-        )
-
-    markdown = re.sub(find_regex, ref_replace, markdown, flags=re.IGNORECASE)
-
+    linked_ref = rf"[{ref_prefix}](" + target_url + r")"
+    replace_text = linked_ref.replace(r"<num>", r"\g<num>")
+    markdown = re.sub(find_regex, replace_text, markdown, re.IGNORECASE)
     return markdown
 
 

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -2,19 +2,74 @@
 
 from autolink_references.main import replace_autolink_references as autolink
 
-markdown_samples = [
-    ("#1", "[#1](http://gh/1)"),
-    ("hello #1", "hello [#1](http://gh/1)"),
-    ("(#2)", "([#2](http://gh/2))"),
-    ("x (#2)", "x ([#2](http://gh/2))"),
-    ("x (#2) y", "x ([#2](http://gh/2)) y"),
-    ("(#2)", "([#2](http://gh/2))"),
+simple_replace = [
+    ("TAG-<num>", "http://gh/<num>", "TAG-123", "[TAG-123](http://gh/123)"),
+    ("TAG-<num>", "http://gh/<num>", "x TAG-123", "x [TAG-123](http://gh/123)"),
+    ("TAG-<num>", "http://gh/<num>", "TAG-123 x", "[TAG-123](http://gh/123) x"),
+    ("TAG-<num>", "http://gh/<num>", "x TAG-123 y", "x [TAG-123](http://gh/123) y"),
+    ("TAG-<num>", "http://gh/<num>", "x TAG-123 y", "x [TAG-123](http://gh/123) y"),
+    ("TAG-<num>", "http://gh/TAG-<num>", "(TAG-123)", "([TAG-123](http://gh/TAG-123))"),
+    ("TAG-", "http://forgot-num/<num>", "TAG-543", "[TAG-543](http://forgot-num/543)"),
+    (
+        "TAG-<num>",
+        "http://gh/TAG-<num>",
+        "(TAG-12_3-4)",
+        "([TAG-12_3-4](http://gh/TAG-12_3-4))",
+    ),
+    (
+        "TAG-<num>",
+        "http://gh/<num>",
+        "x TAG-123 y TAG-456 z",
+        "x [TAG-123](http://gh/123) y [TAG-456](http://gh/456) z",
+    ),
+    (
+        "TAG-<num>",
+        "http://gh/TAG-<num>",
+        "TAG-Ab123dD",
+        "[TAG-Ab123dD](http://gh/TAG-Ab123dD)",
+    ),
 ]
 
+ignore_already_linked = [
+    (
+        "TAG-<num>",
+        "http://gh/<num>",
+        "[TAG-789](http://gh/789)",
+        "[TAG-789](http://gh/789)",
+    ),
+    (
+        "TAG-<num>",
+        "http://gh/TAG-<num>",
+        "[TAG-789](http://gh/TAG-789)",
+        "[TAG-789](http://gh/TAG-789)",
+    ),
+]
+
+# This test cases address #4. Reference style links should be ignored.
+ignore_ref_links = [
+    ("TAG-<num>", "http://gh/<num>", "[TAG-456]", "[TAG-456]"),
+    ("TAG-<num>", "http://gh/<num>", "[TAG-456][test456]", "[TAG-456][test456]"),
+    ("TAG-<num>", "http://gh/<num>", "[TAG-456] [tag456]", "[TAG-456] [tag456]"),
+    (
+        "TAG-<num>",
+        "http://gh/TAG-<num>",
+        "[tag456]: http://gh/TAG-456",
+        "[tag456]: http://gh/TAG-456",
+    ),
+]
 
-@pytest.mark.parametrize("test_input,expected", markdown_samples)
-def test_parser(test_input, expected):
-    ref_prefix = "#<num>"
-    target_url = "http://gh/<num>"
 
+@pytest.mark.parametrize(
+    "ref_prefix, target_url, test_input, expected",
+    simple_replace + ignore_already_linked + ignore_ref_links,
+)
+def test_parser(ref_prefix, target_url, test_input, expected):
     assert autolink(test_input, ref_prefix, target_url) == expected
+
+
+# This test address #5. It currently only checks for '#' before the link
+def test_with_attr_list():
+    text = "## Feature 1 { #F-001 .class-feature }"
+    ref_prefix = "F-<num>"
+    target_url = "http://gh/<num>"
+    assert autolink(text, ref_prefix, target_url) == text