From e6c0cbb1277bb4a085c228e7564f5014ccdf011f Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Wed, 12 Mar 2025 18:25:30 +0000 Subject: [PATCH 1/9] deal with () in filenames by escaping them (NO_JIRA_ --- main/githooks.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index ecfda0c..760968e 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -56,6 +56,14 @@ # File types that need a terminating newline TERMINATING_NEWLINE_EXTS = ['.c', '.cpp', '.h', '.inl'] +esc_re = re.compile(r'\s|[]()[]') +def _esc_char(match): + return '\\' + match.group(0) + + +def _escape_filename(filename): + return esc_re.sub(_esc_char, filename) + def _get_output(command, cwd='.'): return subprocess.check_output(command, shell=True, cwd=cwd).decode(errors='replace') @@ -121,7 +129,7 @@ def get_file_content_as_binary(filename): _skip(filename, 'File is not UTF-8 encoded') data = None else: - data = _get_output(f'git show :{filename}') + data = _get_output(f'git show :{_escape_filename(filename)}') return data @@ -134,7 +142,7 @@ def get_text_file_content(filename): if _is_github_event() or 'pytest' in sys.modules: data = Path(filename).read_text() else: - data = _get_output(f'git show :{filename}') + data = _get_output(f'git show :{_escape_filename(filename)}') return data @@ -166,7 +174,7 @@ def get_branch_files(): def add_file_to_index(filename): '''Add file to current commit''' - return _get_output(f'git add {filename}') + return _get_output(f'git add {_escape_filename(filename)}') def get_commit_files(): @@ -244,13 +252,13 @@ def get_changed_lines(modified_file): if _is_github_event(): if _is_pull_request(): output = _get_output( - f'git diff --unified=0 remotes/origin/{os.environ["GITHUB_BASE_REF"]}..remotes/origin/{os.environ["GITHUB_HEAD_REF"]} -- {modified_file}') + f'git diff --unified=0 remotes/origin/{os.environ["GITHUB_BASE_REF"]}..remotes/origin/{os.environ["GITHUB_HEAD_REF"]} -- {_escape_filename(modified_file)}') else: output = _get_output( - f'git diff --unified=0 HEAD~ {modified_file}') + f'git diff --unified=0 HEAD~ {_escape_filename(modified_file)}') else: output = _get_output( - f'git diff-index HEAD --unified=0 {modified_file}') + f'git diff-index HEAD --unified=0 {_escape_filename(modified_file)}') lines = [] for line in output.splitlines(): From c90fcf11cde73a0de9810eb770f6abb077b845f5 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Thu, 13 Mar 2025 13:47:22 +0000 Subject: [PATCH 2/9] Comments added (NO_JIRA) --- main/githooks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index 760968e..ad05a9b 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -56,13 +56,17 @@ # File types that need a terminating newline TERMINATING_NEWLINE_EXTS = ['.c', '.cpp', '.h', '.inl'] -esc_re = re.compile(r'\s|[]()[]') +_esc_re = re.compile(r'\s|[]()[]') def _esc_char(match): + ''' Lambda function to add in back-slashes to escape special chars as compiled in esc_re above + which makes filenames work with subprocess commands + ''' return '\\' + match.group(0) - def _escape_filename(filename): - return esc_re.sub(_esc_char, filename) + ''' Return an escaped filename - for example fi(1)le.txt would be changed to fi\\(1\\)le.txt + ''' + return _esc_re.sub(_esc_char, filename) def _get_output(command, cwd='.'): From 4a9c30fc818e6138cdc44a3d7e66d2d2bedd6231 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Thu, 13 Mar 2025 15:22:08 +0000 Subject: [PATCH 3/9] Set python version to 9.11 (NO_JIRA) --- .github/workflows/quality_check.yml | 2 +- .github/workflows/status_check.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/quality_check.yml b/.github/workflows/quality_check.yml index eb20453..0a41308 100644 --- a/.github/workflows/quality_check.yml +++ b/.github/workflows/quality_check.yml @@ -7,7 +7,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.11" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/status_check.yml b/.github/workflows/status_check.yml index 4259a8c..3e89078 100644 --- a/.github/workflows/status_check.yml +++ b/.github/workflows/status_check.yml @@ -13,7 +13,7 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.11" - name: Get the commit message run: | echo 'commit_message<> $GITHUB_ENV From 44e5f084fb9a2dda08249432cbb52e8b6fced012 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Fri, 14 Mar 2025 11:08:18 +0000 Subject: [PATCH 4/9] instead of escaping, use subprocess without Shell set to true to avoid file nastiness (NO_JIRA) --- main/githooks.py | 85 ++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 46 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index ad05a9b..c5496a3 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -56,21 +56,8 @@ # File types that need a terminating newline TERMINATING_NEWLINE_EXTS = ['.c', '.cpp', '.h', '.inl'] -_esc_re = re.compile(r'\s|[]()[]') -def _esc_char(match): - ''' Lambda function to add in back-slashes to escape special chars as compiled in esc_re above - which makes filenames work with subprocess commands - ''' - return '\\' + match.group(0) - -def _escape_filename(filename): - ''' Return an escaped filename - for example fi(1)le.txt would be changed to fi\\(1\\)le.txt - ''' - return _esc_re.sub(_esc_char, filename) - - -def _get_output(command, cwd='.'): - return subprocess.check_output(command, shell=True, cwd=cwd).decode(errors='replace') +def _get_output(command_list, cwd='.'): + return subprocess.check_output(command_list, cwd=cwd).decode(errors='replace') def _is_github_event(): @@ -103,7 +90,7 @@ def get_user(): if _is_github_event(): return os.environ['GITHUB_ACTOR'] else: - output = _get_output('git var GIT_AUTHOR_IDENT') + output = _get_output(['git', 'var', 'GIT_AUTHOR_IDENT']) match = re.match(r'^(.+) <', output) return match.group(1) @@ -116,7 +103,7 @@ def get_branch(): else: return os.environ['GITHUB_REF'].split('/')[-1] else: - return _get_output('git branch').split()[-1] + return _get_output(['git', 'branch']).split()[-1] def get_file_content_as_binary(filename): @@ -133,7 +120,7 @@ def get_file_content_as_binary(filename): _skip(filename, 'File is not UTF-8 encoded') data = None else: - data = _get_output(f'git show :{_escape_filename(filename)}') + data = _get_output(['git','show', f':{filename}']) return data @@ -146,7 +133,7 @@ def get_text_file_content(filename): if _is_github_event() or 'pytest' in sys.modules: data = Path(filename).read_text() else: - data = _get_output(f'git show :{_escape_filename(filename)}') + data = _get_output('git', 'show', f':{filename}') return data @@ -159,7 +146,7 @@ def get_sha(): GITHUB_SHA cannot be used because in a pull request it gives the sha of the fake merge commit. ''' - return _get_output(f'git rev-parse {get_branch()}') + return _get_output(['git','rev-parse', f'{get_branch()}']) def get_event(): @@ -173,12 +160,12 @@ def get_event(): def get_branch_files(): '''Get all files in branch''' branch = get_branch() - return _get_output(f'git ls-tree -r {branch} --name-only').splitlines() + return _get_output(['git','ls-tree', '-r', f'{branch}','--name-only']).splitlines() def add_file_to_index(filename): '''Add file to current commit''' - return _get_output(f'git add {_escape_filename(filename)}') + return _get_output(['git','add',f'{filename}']) def get_commit_files(): @@ -190,12 +177,15 @@ def get_commit_files(): ''' if _is_github_event(): + commands = ['git','diff','--ignore-submodules','--name-status'] if _is_pull_request(): - output = _get_output(f'git diff --ignore-submodules --name-status remotes/origin/{os.environ["GITHUB_BASE_REF"]}..remotes/origin/{os.environ["GITHUB_HEAD_REF"]} --') + commands += [f'remotes/origin/{os.environ["GITHUB_BASE_REF"]}..remotes/origin/{os.environ["GITHUB_HEAD_REF"]}','--'] else: - output = _get_output('git diff --ignore-submodules --name-status HEAD~.. --') + commands += ['HEAD~..', '--'] else: - output = _get_output('git diff-index --ignore-submodules HEAD --cached') + commands = ['git', 'diff-index', '--ignore-submodules', 'HEAD', '--cached'] + + output = _get_output(commands) result = defaultdict(list) for line in output.splitlines(): parts = line.split() @@ -254,15 +244,14 @@ def get_changed_lines(modified_file): :returns: A list of line number (integers and ranges) of changed lines ''' if _is_github_event(): + commands = ['git','diff','--unified=0'] if _is_pull_request(): - output = _get_output( - f'git diff --unified=0 remotes/origin/{os.environ["GITHUB_BASE_REF"]}..remotes/origin/{os.environ["GITHUB_HEAD_REF"]} -- {_escape_filename(modified_file)}') + commands += [f'remotes/origin/{os.environ["GITHUB_BASE_REF"]}..remotes/origin/{os.environ["GITHUB_HEAD_REF"]}', '--',f'{modified_file}'] else: - output = _get_output( - f'git diff --unified=0 HEAD~ {_escape_filename(modified_file)}') + commands += ['HEAD~', f'{modified_file}'] else: - output = _get_output( - f'git diff-index HEAD --unified=0 {_escape_filename(modified_file)}') + commands = [f'git', 'diff-index', 'HEAD', '--unified=0', f'{modified_file}'] + output = _get_output(commands) lines = [] for line in output.splitlines(): @@ -295,7 +284,7 @@ def _test(input, output): def get_config_setting(setting): '''Get the value of a config setting''' try: - return _get_output(f'git config --get {setting}').strip() + return _get_output(['git', 'config', '--get', f'{setting}']).strip() except subprocess.CalledProcessError: return None @@ -472,20 +461,24 @@ class TestTrimTrailingWhitespace(unittest.TestCase): def test_trim_trailing_whitespace(self): content = 'first line\nsecond line \nthird line ' trimmed_content = 'first line\nsecond line\nthird line' - with NamedTemporaryFile() as tmp: - Path(tmp.name).write_text(content) - - # Trailing whitespace found - retval = trim_trailing_whitespace_in_file(tmp.name, True, True) - self.assertEqual(retval, 1) - self.assertEqual(Path(tmp.name).read_text(), content) - - # Now remove the trailing whitespace - trim_trailing_whitespace_in_file(tmp.name, True, False, False) - # Trailing whitespace no longer found - self.assertEqual(Path(tmp.name).read_text(), trimmed_content) - retval = trim_trailing_whitespace_in_file(tmp.name, True, True) - self.assertEqual(retval, 0) + name = NamedTemporaryFile().name + Path(name).write_text(content) + # Trailing whitespace found + retval = trim_trailing_whitespace_in_file(name, True, True) + self.assertEqual(retval, 1) + self.assertEqual(Path(name).read_text(), content) + + # Now remove the trailing whitespace + trim_trailing_whitespace_in_file(name, True, False, False) + # Trailing whitespace no longer found + self.assertEqual(Path(name).read_text(), trimmed_content) + retval = trim_trailing_whitespace_in_file(name, True, True) + self.assertEqual(retval, 0) + + try: + Path(name).unlink(name) + except Exception as e: + pass def test_decodeerror(self): # A text file that is not utf-8 encoded - report and skip From 9c85798bb8b925bfdf1ba123744ae6aeae62cf2e Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Fri, 14 Mar 2025 11:27:22 +0000 Subject: [PATCH 5/9] correct syntax issue (NO_JIRA) --- main/githooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/githooks.py b/main/githooks.py index c5496a3..7a358f0 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -133,7 +133,7 @@ def get_text_file_content(filename): if _is_github_event() or 'pytest' in sys.modules: data = Path(filename).read_text() else: - data = _get_output('git', 'show', f':{filename}') + data = _get_output(['git', 'show', f':{filename}']) return data From d8d9e47d838bad1265fc85e1046d6021b3ff5896 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Fri, 14 Mar 2025 12:07:37 +0000 Subject: [PATCH 6/9] Make sure unlink is always executed using try...finally - allow any error to propagate rather than catching (NO_JIRA) --- main/githooks.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index 7a358f0..23ec8a1 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -461,24 +461,24 @@ class TestTrimTrailingWhitespace(unittest.TestCase): def test_trim_trailing_whitespace(self): content = 'first line\nsecond line \nthird line ' trimmed_content = 'first line\nsecond line\nthird line' - name = NamedTemporaryFile().name - Path(name).write_text(content) - # Trailing whitespace found - retval = trim_trailing_whitespace_in_file(name, True, True) - self.assertEqual(retval, 1) - self.assertEqual(Path(name).read_text(), content) - - # Now remove the trailing whitespace - trim_trailing_whitespace_in_file(name, True, False, False) - # Trailing whitespace no longer found - self.assertEqual(Path(name).read_text(), trimmed_content) - retval = trim_trailing_whitespace_in_file(name, True, True) - self.assertEqual(retval, 0) - + try: + name = NamedTemporaryFile().name + Path(name).write_text(content) + # Trailing whitespace found + retval = trim_trailing_whitespace_in_file(name, True, True) + self.assertEqual(retval, 1) + self.assertEqual(Path(name).read_text(), content) + + # Now remove the trailing whitespace + trim_trailing_whitespace_in_file(name, True, False, False) + # Trailing whitespace no longer found + self.assertEqual(Path(name).read_text(), trimmed_content) + retval = trim_trailing_whitespace_in_file(name, True, True) + self.assertEqual(retval, 0) + finally: Path(name).unlink(name) - except Exception as e: - pass + def test_decodeerror(self): # A text file that is not utf-8 encoded - report and skip From 3ffff0bbcee929db0b76e222860c6ee5c3007ce9 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Fri, 14 Mar 2025 12:09:02 +0000 Subject: [PATCH 7/9] make sure name is outside try block (NO_JIRA) --- main/githooks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index 23ec8a1..387cc62 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -461,9 +461,9 @@ class TestTrimTrailingWhitespace(unittest.TestCase): def test_trim_trailing_whitespace(self): content = 'first line\nsecond line \nthird line ' trimmed_content = 'first line\nsecond line\nthird line' - + + name = NamedTemporaryFile().name try: - name = NamedTemporaryFile().name Path(name).write_text(content) # Trailing whitespace found retval = trim_trailing_whitespace_in_file(name, True, True) From 4b62aa976abc357f6a0ca06e78fdc6a6e2fb2a08 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Fri, 14 Mar 2025 12:10:12 +0000 Subject: [PATCH 8/9] and last - dont need an argument to unlink() (NO_JIRA) --- main/githooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/githooks.py b/main/githooks.py index 387cc62..78cc60f 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -477,7 +477,7 @@ def test_trim_trailing_whitespace(self): retval = trim_trailing_whitespace_in_file(name, True, True) self.assertEqual(retval, 0) finally: - Path(name).unlink(name) + Path(name).unlink() def test_decodeerror(self): From 6a5aa0e6c4ef81c1b822eb67a9a7d82752dafdb9 Mon Sep 17 00:00:00 2001 From: Jason Cole Date: Fri, 14 Mar 2025 12:35:41 +0000 Subject: [PATCH 9/9] remove unnecessary variables and f-strings (NO_JIRA) --- main/githooks.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/main/githooks.py b/main/githooks.py index 78cc60f..7be88b3 100755 --- a/main/githooks.py +++ b/main/githooks.py @@ -146,7 +146,7 @@ def get_sha(): GITHUB_SHA cannot be used because in a pull request it gives the sha of the fake merge commit. ''' - return _get_output(['git','rev-parse', f'{get_branch()}']) + return _get_output(['git','rev-parse', get_branch()]) def get_event(): @@ -159,13 +159,12 @@ def get_event(): def get_branch_files(): '''Get all files in branch''' - branch = get_branch() - return _get_output(['git','ls-tree', '-r', f'{branch}','--name-only']).splitlines() + return _get_output(['git','ls-tree', '-r', get_branch(),'--name-only']).splitlines() def add_file_to_index(filename): '''Add file to current commit''' - return _get_output(['git','add',f'{filename}']) + return _get_output(['git','add',filename]) def get_commit_files(): @@ -284,7 +283,7 @@ def _test(input, output): def get_config_setting(setting): '''Get the value of a config setting''' try: - return _get_output(['git', 'config', '--get', f'{setting}']).strip() + return _get_output(['git', 'config', '--get', setting]).strip() except subprocess.CalledProcessError: return None