Skip to content

Commit

Permalink
extractor: Make asynchronous
Browse files Browse the repository at this point in the history
  • Loading branch information
Niraj-Kamdar committed Jun 10, 2020
1 parent 87d2bfb commit 21903dc
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 120 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Run tests
run: |
python setup.py test
pytest -v test/test_scanner.py test/test_checkers.py
pytest -v test/test_scanner.py test/test_checkers.py test/test_extractor.py
long_tests:
name: Long tests on python3.8
runs-on: ubuntu-latest
Expand Down Expand Up @@ -73,6 +73,7 @@ jobs:
pip install coverage
coverage run --append setup.py test
pytest --cov --cov-append -n auto -v test/test_scanner.py test/test_checkers.py
pytest --cov --cov-append -v test/test_extractor.py
coverage xml -o py38_cov_report.xml
- name: upload code coverage to codecov
uses: codecov/codecov-action@v1
Expand Down Expand Up @@ -121,7 +122,7 @@ jobs:
python -m unittest test.test_output_engine
- name: Run extract tests
run: |
python -m unittest test.test_extract
pytest -v test/test_extractor.py
- name: Run file tests
run: |
python -m unittest test.test_file
Expand Down
190 changes: 109 additions & 81 deletions cve_bin_tool/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,34 @@
"""
Extraction of archives
"""
import glob
import asyncio
import itertools
import os
import shutil
import subprocess
import sys
import tempfile
from contextlib import contextmanager

from .log import LOGGER
from .util import inpath
from .util import (
aio_unpack_archive,
aio_run_command,
aio_glob,
aio_inpath,
aio_rmdir,
aio_makedirs,
aio_mkdtemp,
file_writer,
ChangeDirContext,
)


class ExtractionFailed(ValueError):
""" Extraction fail """

# pass


class UnknownArchiveType(ValueError):
""" Unknown archive type"""

# pass


@contextmanager
def popen_ctx(*args, **kwargs):
""" Python 2 does not support context manager style Popen."""
proc = subprocess.Popen(*args, **kwargs)
try:
yield proc
finally:
if "stdout" in kwargs:
proc.stdout.close()
proc.terminate()
proc.wait()


class BaseExtractor(object):
"""Extracts tar, rpm, etc. files"""
Expand All @@ -50,101 +41,118 @@ def __init__(self, logger=None):
logger = LOGGER.getChild(self.__class__.__name__)
self.logger = logger
self.file_extractors = {
self.extract_file_tar: [".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"],
self.extract_file_rpm: [".rpm"],
self.extract_file_deb: [".deb", ".ipk"],
self.extract_file_cab: [".cab"],
self.extract_file_zip: [".exe", ".zip", ".jar", ".apk", ".msi"],
self.extract_file_tar: {".tgz", ".tar.gz", ".tar", ".tar.xz", ".tar.bz2"},
self.extract_file_rpm: {".rpm"},
self.extract_file_deb: {".deb", ".ipk"},
self.extract_file_cab: {".cab"},
self.extract_file_zip: {".exe", ".zip", ".jar", ".apk", ".msi"},
}

def can_extract(self, filename):
""" Check if the filename is something we know how to extract """
for extention in itertools.chain(*self.file_extractors.values()):
if filename[::-1].startswith(extention[::-1]):
for extension in itertools.chain(*self.file_extractors.values()):
if filename.endswith(extension):
return True
return False

@classmethod
def extract_file_tar(cls, filename, extraction_path):
async def extract_file_tar(cls, filename, extraction_path):
""" Extract tar files """
try:
shutil.unpack_archive(filename, extraction_path)
await aio_unpack_archive(filename, extraction_path)
return 0
except Exception:
return 1

@classmethod
def extract_file_rpm(cls, filename, extraction_path):
async def extract_file_rpm(cls, filename, extraction_path):
""" Extract rpm packages """
if sys.platform.startswith("linux"):
if not inpath("rpm2cpio") or not inpath("cpio"):
if not await aio_inpath("rpm2cpio") or not await aio_inpath("cpio"):
raise Exception(
"'rpm2cpio' and 'cpio' are required to extract rpm files"
)
else:
with popen_ctx(["rpm2cpio", filename], stdout=subprocess.PIPE) as proc:
return subprocess.call(
["cpio", "-idm", "--quiet"],
stdin=proc.stdout,
cwd=extraction_path,
)
stdout, stderr = await aio_run_command(["rpm2cpio", filename])
if stderr or not stdout:
return 1
cpio_path = os.path.join(extraction_path, "data.cpio")
await file_writer(cpio_path, "wb", stdout)
stdout, stderr = await aio_run_command(
["cpio", "-idm", "--file", cpio_path]
)
if stderr or not stdout:
return 1
else:
if not inpath("7z"):
if not await aio_inpath("7z"):
raise Exception("7z is required to extract rpm files")
else:
cpio_path = filename.split("\\")
cpio_path = "\\".join(cpio_path[: len(cpio_path) - 1])
subprocess.call(f'7z x {filename} -o"{cpio_path}"')
stdout, stderr = await aio_run_command(["7z", "x", filename])
if stderr or not stdout:
return 1
filenames = await aio_glob(os.path.join(extraction_path, "*.cpio"))
filename = filenames[0]

for file in os.listdir(cpio_path):
if "cpio" in file:
filename = cpio_path + "\\" + file

subprocess.call(f'7z x {filename} -o"{extraction_path}"')
if os.path.isfile(filename):
os.remove(filename)
stdout, stderr = await aio_run_command(["7z", "x", filename])
if stderr or not stdout:
return 1
return 0

@classmethod
def extract_file_deb(cls, filename, extraction_path):
async def extract_file_deb(cls, filename, extraction_path):
""" Extract debian packages """
if not inpath("ar"):
if not await aio_inpath("ar"):
raise Exception("'ar' is required to extract deb files")
else:
result = subprocess.call(["ar", "x", filename], cwd=extraction_path)
if result != 0:
return result
datafile = glob.glob(os.path.join(extraction_path, "data.tar.*"))[0]
stdout, stderr = await aio_run_command(["ar", "x", filename])
if stderr:
return 1
datafile = await aio_glob(os.path.join(extraction_path, "data.tar.*"))
try:
shutil.unpack_archive(datafile, extraction_path)
await aio_unpack_archive(datafile[0], extraction_path)
except Exception:
return 1
return 0

@classmethod
def extract_file_cab(cls, filename, extraction_path):
async def extract_file_cab(cls, filename, extraction_path):
""" Extract cab files """
if sys.platform.startswith("linux"):
if not inpath("cabextract"):
if not await aio_inpath("cabextract"):
raise Exception("'cabextract' is required to extract cab files")
else:
return subprocess.call(["cabextract", "-d", extraction_path, filename])
stdout, stderr = await aio_run_command(
["cabextract", "-d", extraction_path, filename]
)
if stderr or not stdout:
return 1
else:
subprocess.call(["Expand", filename, "-F:*", extraction_path])
stdout, stderr = await aio_run_command(
["Expand", filename, "-F:*", extraction_path]
)
if stderr or not stdout:
return 1
return 0

@classmethod
def extract_file_zip(cls, filename, extraction_path):
async def extract_file_zip(cls, filename, extraction_path):
""" Extract zip files """
if inpath("unzip"):
return subprocess.call(
["unzip", "-qq", "-n", "-d", extraction_path, filename]
if await aio_inpath("unzip"):
stdout, stderr = await aio_run_command(
["unzip", "-n", "-d", extraction_path, filename]
)
elif inpath("7z"):
return subprocess.call(f'7z x {filename} -o"{extraction_path}"')
if stderr or not stdout:
return 1
elif await aio_inpath("7z"):
stdout, stderr = await aio_run_command(["7z", "x", filename])
if stderr or not stdout:
return 1
else:
try:
shutil.unpack_archive(filename, extraction_path)
await aio_unpack_archive(filename, extraction_path)
except Exception:
return 1
return 0


class TempDirExtractorContext(BaseExtractor):
Expand All @@ -155,36 +163,56 @@ def __init__(self, raise_failure=False, *args, **kwargs):
self.tempdir = None
self.raise_failure = raise_failure

def extract(self, filename):
async def aio_extract(self, filename):
""" Run the extractor """
# Resolve path in case of cwd change
filename = os.path.abspath(filename)
for extractor in self.file_extractors:
for extention in self.file_extractors[extractor]:
if filename[::-1].startswith(extention[::-1]):
for extension in self.file_extractors[extractor]:
if filename.endswith(extension):
extracted_path = os.path.join(
self.tempdir, f"{os.path.basename(filename)}.extracted"
)
if os.path.exists(extracted_path):
shutil.rmtree(extracted_path)
os.makedirs(extracted_path, 0o700)
if extractor(filename, extracted_path) != 0:
if self.raise_failure:
raise ExtractionFailed(filename)
await aio_rmdir(extracted_path)
await aio_makedirs(extracted_path, 0o700)
async with ChangeDirContext(extracted_path):
if await extractor(filename, extracted_path) != 0:
if self.raise_failure:
raise ExtractionFailed(filename)
else:
self.logger.warning(f"Failure extracting {filename}")
else:
self.logger.warning(f"Failure extracting {filename}")
else:
self.logger.debug(f"Extracted {filename} to {extracted_path}")
self.logger.debug(
f"Extracted {filename} to {extracted_path}"
)
return extracted_path
raise UnknownArchiveType(filename)

def __enter__(self):
async def __aenter__(self):
""" Create a temporary directory to extract files to. """
self.tempdir = tempfile.mkdtemp(prefix="cve-bin-tool-")
self.tempdir = await aio_mkdtemp(prefix="cve-bin-tool-")
return self

def __exit__(self, exc_type, exc, exc_tb):
async def __aexit__(self, exc_type, exc, exc_tb):
""" Removes all extraction directories that need to be cleaned up."""
await aio_rmdir(self.tempdir)

def extract(self, filename):
aws = asyncio.ensure_future(self.aio_extract(filename))
result = self.loop.run_until_complete(aws)
return result

def __enter__(self):
self.tempdir = tempfile.mkdtemp(prefix="cve-bin-tool-")
if sys.platform.startswith("win"):
self.loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(self.loop)
else:
self.loop = asyncio.get_event_loop()
return self

def __exit__(self, exc_type, exc_val, exc_tb):
shutil.rmtree(self.tempdir)


Expand Down
54 changes: 52 additions & 2 deletions cve_bin_tool/util.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,50 @@
# pylint: disable=too-many-arguments
""" Utility classes for the CVE Binary Tool """
import asyncio
import fnmatch
import glob
import os
import re
import shutil
import sys
import fnmatch
import tempfile
from functools import partial, wraps


def async_wrap(func):
@wraps(func)
async def run(*args, loop=None, executor=None, **kwargs):
if loop is None:
loop = asyncio.get_event_loop()
pfunc = partial(func, *args, **kwargs)
return await loop.run_in_executor(executor, pfunc)

return run


async def aio_run_command(args):
process = await asyncio.create_subprocess_exec(
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await process.communicate()
return stdout, stderr # binary encoded


@async_wrap
def file_writer(name, mode, content):
with open(name, mode) as f:
f.write(content)


class ChangeDirContext:
def __init__(self, destination_dir):
self.current_dir = os.getcwd()
self.destination_dir = destination_dir

async def __aenter__(self):
os.chdir(self.destination_dir)

async def __aexit__(self, exc_type, exc_val, exc_tb):
os.chdir(self.current_dir)


def regex_find(lines, version_patterns):
Expand Down Expand Up @@ -110,3 +151,12 @@ def pattern_match(text, patterns):
if fnmatch.fnmatch(text, pattern):
return True
return False


aio_rmdir = async_wrap(shutil.rmtree)
aio_rmfile = async_wrap(os.remove)
aio_unpack_archive = async_wrap(shutil.unpack_archive)
aio_glob = async_wrap(glob.glob)
aio_mkdtemp = async_wrap(tempfile.mkdtemp)
aio_makedirs = async_wrap(os.makedirs)
aio_inpath = async_wrap(inpath)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ pygal
jinja2
pytest
pytest-xdist
pytest-cov
pytest-cov
pytest-asyncio
Loading

0 comments on commit 21903dc

Please sign in to comment.