From fe9c5be0addc17a22255adef4d65bbcf788f3e0e Mon Sep 17 00:00:00 2001 From: yqbear Date: Fri, 22 Dec 2023 05:15:37 -0800 Subject: [PATCH] Allow extended set for and ignore ref style links, already linked items, and attr_list cases with '#' before the ref (#6) * fix: update to regex matching to resolve issues * Use negative lookbehind to ignore ref links and already linked refs * Handle word characters and hyphen for * Add tests for new cases Refs: #1 Refs: #4 * docs: add some comments to test cases * fix: removed code left by mistake --- .vscode/settings.json | 11 ++++++ autolink_references/main.py | 27 ++++--------- tests/test_parser.py | 77 +++++++++++++++++++++++++++++++------ 3 files changed, 85 insertions(+), 30 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..5259ece --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/autolink_references/main.py b/autolink_references/main.py index 1d624bb..4bf5cb0 100755 --- a/autolink_references/main.py +++ b/autolink_references/main.py @@ -3,26 +3,15 @@ from mkdocs.config import config_options -def replace_autolink_references(markdown, reference_prefix, target_url): - if "" not in reference_prefix: - reference_prefix = reference_prefix + "" - - find_regex = reference_prefix.replace("", "(?P[0-9]+)") - find_regex = ( - "(?P\\[)?(?P" + find_regex + ")(?(b)\\])(?(b)(?:\\((?P.*?)\\))?)" +def replace_autolink_references(markdown, ref_prefix, target_url): + if "" not in ref_prefix: + ref_prefix = ref_prefix + "" + find_regex = re.compile( + r"(?", r"(?P[-\w]+)") ) - - def ref_replace(matchobj): - if matchobj.group("url"): - return matchobj.group(0) - - return "[{}]({})".format( - reference_prefix.replace("", matchobj.group("num")), - target_url.replace("", matchobj.group("num")), - ) - - markdown = re.sub(find_regex, ref_replace, markdown, flags=re.IGNORECASE) - + linked_ref = rf"[{ref_prefix}](" + target_url + r")" + replace_text = linked_ref.replace(r"", r"\g") + markdown = re.sub(find_regex, replace_text, markdown, re.IGNORECASE) return markdown diff --git a/tests/test_parser.py b/tests/test_parser.py index 152382a..cca7ab3 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2,19 +2,74 @@ from autolink_references.main import replace_autolink_references as autolink -markdown_samples = [ - ("#1", "[#1](http://gh/1)"), - ("hello #1", "hello [#1](http://gh/1)"), - ("(#2)", "([#2](http://gh/2))"), - ("x (#2)", "x ([#2](http://gh/2))"), - ("x (#2) y", "x ([#2](http://gh/2)) y"), - ("(#2)", "([#2](http://gh/2))"), +simple_replace = [ + ("TAG-", "http://gh/", "TAG-123", "[TAG-123](http://gh/123)"), + ("TAG-", "http://gh/", "x TAG-123", "x [TAG-123](http://gh/123)"), + ("TAG-", "http://gh/", "TAG-123 x", "[TAG-123](http://gh/123) x"), + ("TAG-", "http://gh/", "x TAG-123 y", "x [TAG-123](http://gh/123) y"), + ("TAG-", "http://gh/", "x TAG-123 y", "x [TAG-123](http://gh/123) y"), + ("TAG-", "http://gh/TAG-", "(TAG-123)", "([TAG-123](http://gh/TAG-123))"), + ("TAG-", "http://forgot-num/", "TAG-543", "[TAG-543](http://forgot-num/543)"), + ( + "TAG-", + "http://gh/TAG-", + "(TAG-12_3-4)", + "([TAG-12_3-4](http://gh/TAG-12_3-4))", + ), + ( + "TAG-", + "http://gh/", + "x TAG-123 y TAG-456 z", + "x [TAG-123](http://gh/123) y [TAG-456](http://gh/456) z", + ), + ( + "TAG-", + "http://gh/TAG-", + "TAG-Ab123dD", + "[TAG-Ab123dD](http://gh/TAG-Ab123dD)", + ), ] +ignore_already_linked = [ + ( + "TAG-", + "http://gh/", + "[TAG-789](http://gh/789)", + "[TAG-789](http://gh/789)", + ), + ( + "TAG-", + "http://gh/TAG-", + "[TAG-789](http://gh/TAG-789)", + "[TAG-789](http://gh/TAG-789)", + ), +] + +# This test cases address #4. Reference style links should be ignored. +ignore_ref_links = [ + ("TAG-", "http://gh/", "[TAG-456]", "[TAG-456]"), + ("TAG-", "http://gh/", "[TAG-456][test456]", "[TAG-456][test456]"), + ("TAG-", "http://gh/", "[TAG-456] [tag456]", "[TAG-456] [tag456]"), + ( + "TAG-", + "http://gh/TAG-", + "[tag456]: http://gh/TAG-456", + "[tag456]: http://gh/TAG-456", + ), +] -@pytest.mark.parametrize("test_input,expected", markdown_samples) -def test_parser(test_input, expected): - ref_prefix = "#" - target_url = "http://gh/" +@pytest.mark.parametrize( + "ref_prefix, target_url, test_input, expected", + simple_replace + ignore_already_linked + ignore_ref_links, +) +def test_parser(ref_prefix, target_url, test_input, expected): assert autolink(test_input, ref_prefix, target_url) == expected + + +# This test address #5. It currently only checks for '#' before the link +def test_with_attr_list(): + text = "## Feature 1 { #F-001 .class-feature }" + ref_prefix = "F-" + target_url = "http://gh/" + assert autolink(text, ref_prefix, target_url) == text