From c73916b6ad6d1fdd6f60a7c9339fd7a64d61f5d2 Mon Sep 17 00:00:00 2001 From: Christian Herdtweck Date: Fri, 25 Nov 2022 13:51:05 +0100 Subject: [PATCH] tests: Re-enable samples skipped because of #215 Some samples triggered antivirus engines, issues #215 and #217 ended with the agreement to encapsulate problematic samples in encrypted zip containers and decrypt them on-the-fly. Initial support for this was added but that did not cover 5 tests. Create on-the-fly decryption for these tests as well and re-enable them. --- tests/msodde/test_basic.py | 91 +++++++++++++---------------- tests/test_utils/__init__.py | 1 + tests/test_utils/testdata_reader.py | 44 +++++++++++++- 3 files changed, 86 insertions(+), 50 deletions(-) diff --git a/tests/msodde/test_basic.py b/tests/msodde/test_basic.py index 72aa424a5..2b54fbe90 100644 --- a/tests/msodde/test_basic.py +++ b/tests/msodde/test_basic.py @@ -12,13 +12,11 @@ import sys import os from os.path import join, basename -from traceback import print_exc -import json -from collections import OrderedDict from oletools import msodde from oletools.crypto import \ WrongEncryptionPassword, CryptoLibNotImported, check_msoffcrypto -from tests.test_utils import call_and_capture, DATA_BASE_DIR as BASE_DIR +from tests.test_utils import call_and_capture, decrypt_sample,\ + DATA_BASE_DIR as BASE_DIR class TestReturnCode(unittest.TestCase): @@ -26,14 +24,13 @@ class TestReturnCode(unittest.TestCase): def test_valid_doc(self): """ check that a valid doc file leads to 0 exit status """ for filename in ( - 'harmless-clean', - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED: - # 'dde-test-from-office2003', - # 'dde-test-from-office2016', - # 'dde-test-from-office2013-utf_16le-korean' + 'harmless-clean.doc', + 'dde-test-from-office2003.doc.zip', + 'dde-test-from-office2016.doc.zip', + 'dde-test-from-office2013-utf_16le-korean.doc.zip', ): - self.do_test_validity(join(BASE_DIR, 'msodde', - filename + '.doc')) + with decrypt_sample(join('msodde', filename)) as temp_name: + self.do_test_validity(temp_name) def test_valid_docx(self): """ check that a valid docx file leads to 0 exit status """ @@ -52,11 +49,11 @@ def test_valid_xml(self): for filename in ( 'harmless-clean-2003.xml', 'dde-in-excel2003.xml', - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED: - # 'dde-in-word2003.xml', - # 'dde-in-word2007.xml' + 'dde-in-word2003.xml.zip', + 'dde-in-word2007.xml.zip' ): - self.do_test_validity(join(BASE_DIR, 'msodde', filename)) + with decrypt_sample(join('msodde', filename)) as temp_name: + self.do_test_validity(temp_name) def test_invalid_none(self): """ check that no file argument leads to non-zero exit status """ @@ -99,13 +96,11 @@ def test_encrypted(self): def do_test_validity(self, filename, expect_error=None): """ helper for test_[in]valid_* """ found_error = None - # DEBUG: print('Testing file {}'.format(filename)) try: msodde.process_maybe_encrypted(filename, - field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) except Exception as exc: found_error = exc - # DEBUG: print_exc() if expect_error and not found_error: self.fail('Expected {} but msodde finished without errors for {}' @@ -145,15 +140,14 @@ def get_dde_from_output(output): """ return [o for o in output.splitlines()] - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED: - # def test_with_dde(self): - # """ check that dde links appear on stdout """ - # filename = 'dde-test-from-office2003.doc' - # output = msodde.process_maybe_encrypted( - # join(BASE_DIR, 'msodde', filename), - # field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) - # self.assertNotEqual(len(self.get_dde_from_output(output)), 0, - # msg='Found no dde links in output of ' + filename) + def test_with_dde(self): + """ check that dde links appear on stdout """ + filename = 'dde-test-from-office2003.doc.zip' + with decrypt_sample(join('msodde', filename)) as temp_file: + output = msodde.process_maybe_encrypted(temp_file, + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) + self.assertNotEqual(len(self.get_dde_from_output(output)), 0, + msg='Found no dde links in output of ' + filename) def test_no_dde(self): """ check that no dde links appear on stdout """ @@ -164,15 +158,14 @@ def test_no_dde(self): self.assertEqual(len(self.get_dde_from_output(output)), 0, msg='Found dde links in output of ' + filename) - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED: - # def test_with_dde_utf16le(self): - # """ check that dde links appear on stdout """ - # filename = 'dde-test-from-office2013-utf_16le-korean.doc' - # output = msodde.process_maybe_encrypted( - # join(BASE_DIR, 'msodde', filename), - # field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) - # self.assertNotEqual(len(self.get_dde_from_output(output)), 0, - # msg='Found no dde links in output of ' + filename) + def test_with_dde_utf16le(self): + """ check that dde links appear on stdout """ + filename = 'dde-test-from-office2013-utf_16le-korean.doc.zip' + with decrypt_sample(join('msodde', filename)) as temp_file: + output = msodde.process_maybe_encrypted(temp_file, + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) + self.assertNotEqual(len(self.get_dde_from_output(output)), 0, + msg='Found no dde links in output of ' + filename) def test_excel(self): """ check that dde links are found in excel 2007+ files """ @@ -188,19 +181,19 @@ def test_excel(self): def test_xml(self): """ check that dde in xml from word / excel is found """ - # TODO: TEMPORARILY DISABLED UNTIL ISSUE #215 IS FIXED: - for name_part in ('excel2003',): #, 'word2003', 'word2007': - filename = 'dde-in-' + name_part + '.xml' - output = msodde.process_maybe_encrypted( - join(BASE_DIR, 'msodde', filename), - field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) - links = self.get_dde_from_output(output) - self.assertEqual(len(links), 1, 'found {0} dde-links in {1}' - .format(len(links), filename)) - self.assertTrue('cmd' in links[0], 'no "cmd" in dde-link for {0}' - .format(filename)) - self.assertTrue('calc' in links[0], 'no "calc" in dde-link for {0}' - .format(filename)) + for filename in ('dde-in-excel2003.xml', + 'dde-in-word2003.xml.zip', + 'dde-in-word2007.xml.zip'): + with decrypt_sample(join('msodde', filename)) as temp_file: + output = msodde.process_maybe_encrypted(temp_file, + field_filter_mode=msodde.FIELD_FILTER_BLACKLIST) + links = self.get_dde_from_output(output) + self.assertEqual(len(links), 1, 'found {0} dde-links in {1}' + .format(len(links), filename)) + self.assertTrue('cmd' in links[0], 'no "cmd" in dde-link for {0}' + .format(filename)) + self.assertTrue('calc' in links[0], 'no "calc" in dde-link for {0}' + .format(filename)) def test_clean_rtf_blacklist(self): """ find a lot of hyperlinks in rtf spec """ diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index 16281fe0b..ad8234168 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -1 +1,2 @@ from .utils import * +from .testdata_reader import * \ No newline at end of file diff --git a/tests/test_utils/testdata_reader.py b/tests/test_utils/testdata_reader.py index cc96bca0e..5f1a6baad 100644 --- a/tests/test_utils/testdata_reader.py +++ b/tests/test_utils/testdata_reader.py @@ -7,7 +7,10 @@ """ import os, sys, zipfile -from os.path import dirname, abspath, normpath, relpath, join, basename +from os.path import relpath, join, isfile +from contextlib import contextmanager +from tempfile import mkstemp + from . import DATA_BASE_DIR # Passwort used to encrypt problematic test samples inside a zip container @@ -82,3 +85,42 @@ def loop_over_files(subdir=''): yield relative_path, read_encrypted(relative_path) else: yield relative_path, read(relative_path) + + +@contextmanager +def decrypt_sample(relpath): + """ + Decrypt test sample, save to tempfile, yield temp file name. + + Use as context-manager, deletes tempfile after use. + + If sample is not encrypted at all (filename does not end in '.zip'), + yields absolute path to sample itself, so can apply this code also + to non-encrypted samples. + + Code based on test_encoding_handler.temp_file(). + + :param relpath: path inside `DATA_BASE_DIR`, should end in '.zip' + :return: absolute path name to decrypted sample. + """ + if not relpath.endswith('.zip'): + yield get_path_from_root(relpath) + else: + tmp_descriptor = None + tmp_name = None + try: + tmp_descriptor, tmp_name = mkstemp(text=False) + with zipfile.ZipFile(get_path_from_root(relpath), 'r') as unzipper: + # no need to iterate over blobs, our test files are all small + os.write(tmp_descriptor, unzipper.read(unzipper.namelist()[0], + pwd=ENCRYPTED_FILES_PASSWORD)) + os.close(tmp_descriptor) + tmp_descriptor = None + yield tmp_name + except Exception: + raise + finally: + if tmp_descriptor is not None: + os.close(tmp_descriptor) + if tmp_name is not None and isfile(tmp_name): + os.unlink(tmp_name) \ No newline at end of file