Skip to content

Commit

Permalink
Remove duplicate token matcher Python code (#205)
Browse files Browse the repository at this point in the history
  • Loading branch information
kieran-ryan committed Jan 4, 2024
1 parent 99041f5 commit 26b0f70
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 87 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ This project adheres to [Semantic Versioning](http://semver.org).
This document is formatted according to the principles of [Keep A CHANGELOG](http://keepachangelog.com).

## [Unreleased]
### Changed
- [Python] Removed duplicate code in markdown token matcher ([#205](https://github.com/cucumber/gherkin/pull/205))

### Added
- (i18n) Added Vietnamese translation of "Rule"

Expand Down
1 change: 1 addition & 0 deletions python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ acceptance/
*.pyc
dist/
build/
venv/
.tox/
.coverage
.coverage.*
1 change: 1 addition & 0 deletions python/MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ gherkin/parser.py
gherkin/token.py
gherkin/token_formatter_builder.py
gherkin/token_matcher.py
gherkin/token_matcher_markdown.py
gherkin/token_scanner.py
gherkin/pickles/__init__.py
gherkin/pickles/compiler.py
Expand Down
6 changes: 5 additions & 1 deletion python/gherkin/token_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,15 @@ def match_DocStringSeparator(self, token):
# close
return self._match_DocStringSeparator(token, self._active_doc_string_separator, False)

@staticmethod
def _default_docstring_content_type():
return None

def _match_DocStringSeparator(self, token, separator, is_open):
if not token.line.startswith(separator):
return False

content_type = None
content_type = self._default_docstring_content_type()
if is_open:
content_type = token.line.get_rest_trimmed(len(separator))
self._active_doc_string_separator = separator
Expand Down
94 changes: 8 additions & 86 deletions python/gherkin/token_matcher_markdown.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,16 @@
import re
from collections import defaultdict
from .dialect import Dialect
from .errors import NoSuchLanguageException
from .token_matcher import TokenMatcher

KEYWORD_PREFIX_BULLET = '^(\\s*[*+-]\\s*)'
KEYWORD_PREFIX_HEADER = '^(#{1,6}\\s)'

class GherkinInMarkdownTokenMatcher(object):
LANGUAGE_RE = re.compile(r"^\s*#\s*language\s*:\s*([a-zA-Z\-_]+)\s*$")
class GherkinInMarkdownTokenMatcher(TokenMatcher):

def __init__(self, dialect_name='en'):
self._default_dialect_name = dialect_name
self._change_dialect(dialect_name)
self.reset()
super(GherkinInMarkdownTokenMatcher, self).__init__(dialect_name)

def reset(self):
if self.dialect_name != self._default_dialect_name:
self._change_dialect(self._default_dialect_name)
self._indent_to_remove = 0
self._active_doc_string_separator = None
super(GherkinInMarkdownTokenMatcher, self).reset()
self.matched_feature_line=False

def match_FeatureLine(self, token):
Expand Down Expand Up @@ -52,7 +44,7 @@ def match_ExamplesLine(self, token):
return self._match_title_line(KEYWORD_PREFIX_HEADER, self.dialect.examples_keywords, ':', token, 'ExamplesLine')

def match_TableRow(self, token):
# Gherkin tables must be indented 2-5 spaces in order to be distinguidedn from non-Gherkin tables
# Gherkin tables must be indented 2-5 spaces to be distinguishable from non-Gherkin tables

if re.match('^\\s\\s\\s?\\s?\\s?\\|',token.line.get_line_text(0)):
table_cells = token.line.table_cells
Expand Down Expand Up @@ -119,7 +111,6 @@ def match_Language(self, token):
return False

def match_TagLine(self, token):

tags = []
matching_tags = re.finditer('`(@[^`]+)`', token.line.get_line_text())
idx=0
Expand All @@ -144,88 +135,19 @@ def match_DocStringSeparator(self, token):
# close
return self._match_DocStringSeparator(token, self._active_doc_string_separator, False)

def _match_DocStringSeparator(self, token, separator, is_open):
if not token.line.startswith(separator):
return False

content_type = ''
if is_open:
content_type = token.line.get_rest_trimmed(len(separator))
self._active_doc_string_separator = separator
self._indent_to_remove = token.line.indent
else:
self._active_doc_string_separator = None
self._indent_to_remove = 0

# TODO: Use the separator as keyword. That's needed for pretty printing.
self._set_token_matched(token, 'DocStringSeparator', content_type, separator)
return True

def match_Other(self, token):
# take the entire line, except removing DocString indents
text = token.line.get_line_text(self._indent_to_remove)
self._set_token_matched(token, 'Other', self._unescaped_docstring(text), indent=0)
return True

def match_EOF(self, token):
if not token.eof():
return False

self._set_token_matched(token, 'EOF')
return True
@staticmethod
def _default_docstring_content_type():
return ''

def _match_title_line(self, prefix, keywords, keywordSuffix, token, token_type):

keywords_or_list="|".join(map(lambda x: re.escape(x), keywords))
match = re.search(u'{}({}){}(.*)'.format(prefix, keywords_or_list, keywordSuffix), token.line.get_line_text())
indent = token.line.indent
result = False

if(match):
matchedKeyword = match.group(2)
indent += len(match.group(1))
self._set_token_matched(token, token_type, match.group(3).strip(), matchedKeyword, indent=indent)
return True
return False

def _set_token_matched(self, token, matched_type, text=None,
keyword=None, keyword_type=None, indent=None, items=None):
if items is None:
items = []
token.matched_type = matched_type
# text == '' should not result in None
token.matched_text = text.rstrip('\r\n') if text is not None else None
token.matched_keyword = keyword
token.matched_keyword_type = keyword_type
if indent is not None:
token.matched_indent = indent
else:
token.matched_indent = token.line.indent if token.line else 0
token.matched_items = items
token.location['column'] = token.matched_indent + 1
token.matched_gherkin_dialect = self.dialect_name

def _change_dialect(self, dialect_name, location=None):
dialect = Dialect.for_name(dialect_name)
if not dialect:
raise NoSuchLanguageException(dialect_name, location)

self.dialect_name = dialect_name
self.dialect = dialect
self.keyword_types = defaultdict(list)
for keyword in self.dialect.given_keywords:
self.keyword_types[keyword].append('Context')
for keyword in self.dialect.when_keywords:
self.keyword_types[keyword].append('Action')
for keyword in self.dialect.then_keywords:
self.keyword_types[keyword].append('Outcome')
for keyword in self.dialect.and_keywords + self.dialect.but_keywords:
self.keyword_types[keyword].append('Conjunction')

def _unescaped_docstring(self, text):
if self._active_doc_string_separator == '"""':
return text.replace('\\"\\"\\"', '"""')
elif self._active_doc_string_separator == '```':
return text.replace('\\`\\`\\`', '```')
else:
return text

0 comments on commit 26b0f70

Please sign in to comment.