Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Asynchronous extractor module #741

Merged
merged 22 commits into from
Jun 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Run tests
run: |
python setup.py test
pytest -v test/test_scanner.py test/test_checkers.py
pytest -v test/test_scanner.py test/test_checkers.py test/test_extractor.py
long_tests:
name: Long tests on python3.8
runs-on: ubuntu-latest
Expand Down Expand Up @@ -73,6 +73,7 @@ jobs:
pip install coverage
coverage run --append setup.py test
pytest --cov --cov-append -n auto -v test/test_scanner.py test/test_checkers.py
pytest --cov --cov-append -v test/test_extractor.py
coverage xml -o py38_cov_report.xml
- name: upload code coverage to codecov
uses: codecov/codecov-action@v1
Expand Down Expand Up @@ -121,7 +122,7 @@ jobs:
python -m unittest test.test_output_engine
- name: Run extract tests
run: |
python -m unittest test.test_extract
pytest -v test/test_extractor.py
- name: Run file tests
run: |
python -m unittest test.test_file
Expand Down
190 changes: 109 additions & 81 deletions cve_bin_tool/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,34 @@
"""
Extraction of archives
"""
import glob
import asyncio
import itertools
import os
import shutil
import subprocess
import sys
import tempfile
from contextlib import contextmanager

from .log import LOGGER
from .util import inpath
from .util import (
aio_unpack_archive,
aio_run_command,
aio_glob,
aio_inpath,
aio_rmdir,
aio_makedirs,
aio_mkdtemp,
file_writer,
ChangeDirContext,
)


class ExtractionFailed(ValueError):
""" Extraction fail """

# pass


class UnknownArchiveType(ValueError):
""" Unknown archive type"""

# pass


@contextmanager
def popen_ctx(*args, **kwargs):
""" Python 2 does not support context manager style Popen."""
proc = subprocess.Popen(*args, **kwargs)
try:
yield proc
finally:
if "stdout" in kwargs:
proc.stdout.close()
proc.terminate()
proc.wait()


class BaseExtractor(object):
"""Extracts tar, rpm, etc. files"""
Expand All @@ -50,101 +41,118 @@ def __init__(self, logger=None):
logger = LOGGER.getChild(self.__class__.__name__)
self.logger = logger
self.file_extractors = {
self.extract_file_tar: [".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"],
self.extract_file_rpm: [".rpm"],
self.extract_file_deb: [".deb", ".ipk"],
self.extract_file_cab: [".cab"],
self.extract_file_zip: [".exe", ".zip", ".jar", ".apk", ".msi"],
self.extract_file_tar: {".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"},
self.extract_file_rpm: {".rpm"},
self.extract_file_deb: {".deb", ".ipk"},
self.extract_file_cab: {".cab"},
self.extract_file_zip: {".exe", ".zip", ".jar", ".apk", ".msi"},
}

def can_extract(self, filename):
""" Check if the filename is something we know how to extract """
for extention in itertools.chain(*self.file_extractors.values()):
if filename[::-1].startswith(extention[::-1]):
for extension in itertools.chain(*self.file_extractors.values()):
if filename.endswith(extension):
return True
return False

@classmethod
def extract_file_tar(cls, filename, extraction_path):
async def extract_file_tar(cls, filename, extraction_path):
""" Extract tar files """
try:
shutil.unpack_archive(filename, extraction_path)
await aio_unpack_archive(filename, extraction_path)
return 0
except Exception:
return 1

@classmethod
def extract_file_rpm(cls, filename, extraction_path):
async def extract_file_rpm(cls, filename, extraction_path):
""" Extract rpm packages """
if sys.platform.startswith("linux"):
if not inpath("rpm2cpio") or not inpath("cpio"):
if not await aio_inpath("rpm2cpio") or not await aio_inpath("cpio"):
raise Exception(
"'rpm2cpio' and 'cpio' are required to extract rpm files"
)
else:
with popen_ctx(["rpm2cpio", filename], stdout=subprocess.PIPE) as proc:
return subprocess.call(
["cpio", "-idm", "--quiet"],
stdin=proc.stdout,
cwd=extraction_path,
)
stdout, stderr = await aio_run_command(["rpm2cpio", filename])
if stderr or not stdout:
return 1
cpio_path = os.path.join(extraction_path, "data.cpio")
await file_writer(cpio_path, "wb", stdout)
stdout, stderr = await aio_run_command(
["cpio", "-idm", "--file", cpio_path]
)
if stderr or not stdout:
return 1
else:
if not inpath("7z"):
if not await aio_inpath("7z"):
raise Exception("7z is required to extract rpm files")
else:
cpio_path = filename.split("\\")
cpio_path = "\\".join(cpio_path[: len(cpio_path) - 1])
subprocess.call(f'7z x {filename} -o"{cpio_path}"')
stdout, stderr = await aio_run_command(["7z", "x", filename])
if stderr or not stdout:
return 1
filenames = await aio_glob(os.path.join(extraction_path, "*.cpio"))
filename = filenames[0]

for file in os.listdir(cpio_path):
if "cpio" in file:
filename = cpio_path + "\\" + file

subprocess.call(f'7z x {filename} -o"{extraction_path}"')
if os.path.isfile(filename):
os.remove(filename)
stdout, stderr = await aio_run_command(["7z", "x", filename])
if stderr or not stdout:
return 1
return 0

@classmethod
def extract_file_deb(cls, filename, extraction_path):
async def extract_file_deb(cls, filename, extraction_path):
""" Extract debian packages """
if not inpath("ar"):
if not await aio_inpath("ar"):
raise Exception("'ar' is required to extract deb files")
else:
result = subprocess.call(["ar", "x", filename], cwd=extraction_path)
if result != 0:
return result
datafile = glob.glob(os.path.join(extraction_path, "data.tar.*"))[0]
stdout, stderr = await aio_run_command(["ar", "x", filename])
if stderr:
return 1
datafile = await aio_glob(os.path.join(extraction_path, "data.tar.*"))
try:
shutil.unpack_archive(datafile, extraction_path)
await aio_unpack_archive(datafile[0], extraction_path)
except Exception:
return 1
return 0

@classmethod
def extract_file_cab(cls, filename, extraction_path):
async def extract_file_cab(cls, filename, extraction_path):
""" Extract cab files """
if sys.platform.startswith("linux"):
if not inpath("cabextract"):
if not await aio_inpath("cabextract"):
raise Exception("'cabextract' is required to extract cab files")
else:
return subprocess.call(["cabextract", "-d", extraction_path, filename])
stdout, stderr = await aio_run_command(
["cabextract", "-d", extraction_path, filename]
)
if stderr or not stdout:
return 1
else:
subprocess.call(["Expand", filename, "-F:*", extraction_path])
stdout, stderr = await aio_run_command(
["Expand", filename, "-F:*", extraction_path]
)
if stderr or not stdout:
return 1
return 0

@classmethod
def extract_file_zip(cls, filename, extraction_path):
async def extract_file_zip(cls, filename, extraction_path):
""" Extract zip files """
if inpath("unzip"):
return subprocess.call(
["unzip", "-qq", "-n", "-d", extraction_path, filename]
if await aio_inpath("unzip"):
stdout, stderr = await aio_run_command(
["unzip", "-n", "-d", extraction_path, filename]
)
elif inpath("7z"):
return subprocess.call(f'7z x {filename} -o"{extraction_path}"')
if stderr or not stdout:
return 1
elif await aio_inpath("7z"):
stdout, stderr = await aio_run_command(["7z", "x", filename])
if stderr or not stdout:
return 1
else:
try:
shutil.unpack_archive(filename, extraction_path)
await aio_unpack_archive(filename, extraction_path)
except Exception:
return 1
return 0


class TempDirExtractorContext(BaseExtractor):
Expand All @@ -155,36 +163,56 @@ def __init__(self, raise_failure=False, *args, **kwargs):
self.tempdir = None
self.raise_failure = raise_failure

def extract(self, filename):
async def aio_extract(self, filename):
""" Run the extractor """
# Resolve path in case of cwd change
filename = os.path.abspath(filename)
for extractor in self.file_extractors:
for extention in self.file_extractors[extractor]:
if filename[::-1].startswith(extention[::-1]):
for extension in self.file_extractors[extractor]:
if filename.endswith(extension):
extracted_path = os.path.join(
self.tempdir, f"{os.path.basename(filename)}.extracted"
)
if os.path.exists(extracted_path):
shutil.rmtree(extracted_path)
os.makedirs(extracted_path, 0o700)
if extractor(filename, extracted_path) != 0:
if self.raise_failure:
raise ExtractionFailed(filename)
await aio_rmdir(extracted_path)
await aio_makedirs(extracted_path, 0o700)
async with ChangeDirContext(extracted_path):
if await extractor(filename, extracted_path) != 0:
if self.raise_failure:
raise ExtractionFailed(filename)
else:
self.logger.warning(f"Failure extracting {filename}")
else:
self.logger.warning(f"Failure extracting {filename}")
else:
self.logger.debug(f"Extracted {filename} to {extracted_path}")
self.logger.debug(
f"Extracted {filename} to {extracted_path}"
)
return extracted_path
raise UnknownArchiveType(filename)

def __enter__(self):
async def __aenter__(self):
""" Create a temporary directory to extract files to. """
self.tempdir = tempfile.mkdtemp(prefix="cve-bin-tool-")
self.tempdir = await aio_mkdtemp(prefix="cve-bin-tool-")
return self

def __exit__(self, exc_type, exc, exc_tb):
async def __aexit__(self, exc_type, exc, exc_tb):
""" Removes all extraction directories that need to be cleaned up."""
await aio_rmdir(self.tempdir)

def extract(self, filename):
aws = asyncio.ensure_future(self.aio_extract(filename))
result = self.loop.run_until_complete(aws)
return result

def __enter__(self):
self.tempdir = tempfile.mkdtemp(prefix="cve-bin-tool-")
if sys.platform.startswith("win"):
self.loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(self.loop)
else:
self.loop = asyncio.get_event_loop()
return self

def __exit__(self, exc_type, exc_val, exc_tb):
shutil.rmtree(self.tempdir)


Expand Down
54 changes: 52 additions & 2 deletions cve_bin_tool/util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,50 @@
# pylint: disable=too-many-arguments
""" Utility classes for the CVE Binary Tool """
import asyncio
import fnmatch
import glob
import os
import re
import shutil
import sys
import fnmatch
import tempfile
from functools import partial, wraps


def async_wrap(func):
@wraps(func)
async def run(*args, loop=None, executor=None, **kwargs):
if loop is None:
loop = asyncio.get_event_loop()
pfunc = partial(func, *args, **kwargs)
return await loop.run_in_executor(executor, pfunc)

return run


async def aio_run_command(args):
process = await asyncio.create_subprocess_exec(
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await process.communicate()
return stdout, stderr # binary encoded


@async_wrap
def file_writer(name, mode, content):
with open(name, mode) as f:
f.write(content)


class ChangeDirContext:
def __init__(self, destination_dir):
self.current_dir = os.getcwd()
self.destination_dir = destination_dir

async def __aenter__(self):
os.chdir(self.destination_dir)

async def __aexit__(self, exc_type, exc_val, exc_tb):
os.chdir(self.current_dir)


def regex_find(lines, version_patterns):
Expand Down Expand Up @@ -110,3 +151,12 @@ def pattern_match(text, patterns):
if fnmatch.fnmatch(text, pattern):
return True
return False


aio_rmdir = async_wrap(shutil.rmtree)
aio_rmfile = async_wrap(os.remove)
aio_unpack_archive = async_wrap(shutil.unpack_archive)
aio_glob = async_wrap(glob.glob)
aio_mkdtemp = async_wrap(tempfile.mkdtemp)
aio_makedirs = async_wrap(os.makedirs)
aio_inpath = async_wrap(inpath)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ pygal
jinja2
pytest
pytest-xdist
pytest-cov
pytest-cov
pytest-asyncio
Loading