Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cleaner] Separate process of preparing obfuscations #3262

Merged
merged 6 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 62 additions & 37 deletions sos/cleaner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import logging
import os
import shutil
import sos.cleaner.preppers
import tempfile

from concurrent.futures import ThreadPoolExecutor
Expand All @@ -31,7 +32,7 @@
SoSCollectorDirectory)
from sos.cleaner.archives.generic import DataDirArchive, TarballArchive
from sos.cleaner.archives.insights import InsightsArchive
from sos.utilities import get_human_readable
from sos.utilities import get_human_readable, import_module, ImporterHelper
from textwrap import fill


Expand Down Expand Up @@ -583,49 +584,73 @@ def generate_parser_item_regexes(self):
for parser in self.parsers:
parser.generate_item_regexes()

def _prepare_archive_with_prepper(self, archive, prepper):
"""
For each archive we've determined we need to operate on, pass it to
each prepper so that we can extract necessary files and/or items for
direct regex replacement. Preppers define these methods per parser,
so it is possible that a single prepper will read the same file for
different parsers/mappings. This is preferable to the alternative of
building up monolithic lists of file paths, as we'd still need to
manipulate these on a per-archive basis.

:param archive: The archive we are currently using to prepare our
mappings with
:type archive: ``SoSObfuscationArchive`` subclass

:param prepper: The individual prepper we're using to source items
:type prepper: ``SoSPrepper`` subclass
"""
for _parser in self.parsers:
pname = _parser.name.lower().split()[0].strip()
for _file in prepper.get_parser_file_list(pname, archive):
content = archive.get_file_content(_file)
if not content:
continue
self.log_debug(f"Prepping {pname} parser with file {_file} "
f"from {archive.ui_name}")
for line in content.splitlines():
try:
_parser.parse_line(line)
except Exception as err:
self.log_debug(
f"Failed to prep {pname} map from {_file}: {err}"
)
map_items = prepper.get_items_for_map(pname, archive)
if map_items:
self.log_debug(f"Prepping {pname} mapping with items from "
f"{archive.ui_name}")
for item in map_items:
_parser.mapping.add(item)

for ritem in prepper.regex_items[pname]:
_parser.mapping.add_regex_item(ritem)

def get_preppers(self):
"""
Discover all locally available preppers so that we can prepare the
mappings with obfuscation matches in a controlled manner

:returns: All preppers that can be leveraged locally
:rtype: A generator of `SoSPrepper` items
"""
helper = ImporterHelper(sos.cleaner.preppers)
preps = []
for _prep in helper.get_modules():
preps.extend(import_module(f"sos.cleaner.preppers.{_prep}"))
for prepper in sorted(preps, key=lambda x: x.priority):
yield prepper()

def preload_all_archives_into_maps(self):
"""Before doing the actual obfuscation, if we have multiple archives
to obfuscate then we need to preload each of them into the mappings
to ensure that node1 is obfuscated in node2 as well as node2 being
obfuscated in node1's archive.
"""
self.log_info("Pre-loading all archives into obfuscation maps")
for _arc in self.report_paths:
for _parser in self.parsers:
try:
pfile = _arc.prep_files[_parser.name.lower().split()[0]]
if not pfile:
continue
except (IndexError, KeyError):
continue
if isinstance(pfile, str):
pfile = [pfile]
for parse_file in pfile:
self.log_debug("Attempting to load %s" % parse_file)
try:
content = _arc.get_file_content(parse_file)
if not content:
continue
if isinstance(_parser, SoSUsernameParser):
_parser.load_usernames_into_map(content)
elif isinstance(_parser, SoSHostnameParser):
if 'hostname' in parse_file:
_parser.load_hostname_into_map(
content.splitlines()[0]
)
elif 'etc/hosts' in parse_file:
_parser.load_hostname_from_etc_hosts(
content
)
else:
for line in content.splitlines():
self.obfuscate_line(line)
except Exception as err:
self.log_info(
"Could not prepare %s from %s (archive: %s): %s"
% (_parser.name, parse_file, _arc.archive_name,
err)
)
for prepper in self.get_preppers():
for archive in self.report_paths:
self._prepare_archive_with_prepper(archive, prepper)

def obfuscate_report(self, archive):
"""Individually handle each archive or directory we've discovered by
Expand Down
8 changes: 6 additions & 2 deletions sos/cleaner/archives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,12 @@ def get_file_content(self, fname):
)
return ''
else:
with open(self.format_file_name(fname), 'r') as to_read:
return to_read.read()
try:
with open(self.format_file_name(fname), 'r') as to_read:
return to_read.read()
except Exception as err:
self.log_debug(f"Failed to get contents of {fname}: {err}")
return ''

def extract(self, quiet=False):
if self.is_tarfile:
Expand Down
125 changes: 125 additions & 0 deletions sos/cleaner/preppers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Copyright 2023 Red Hat, Inc. Jake Hunsaker <jhunsake@redhat.com>

# This file is part of the sos project: https://github.com/sosreport/sos
#
# This copyrighted material is made available to anyone wishing to use,
# modify, copy, or redistribute it subject to the terms and conditions of
# version 2 of the GNU General Public License.
#
# See the LICENSE file in the source distribution for further information.

import logging


class SoSPrepper():
"""
A prepper is a way to prepare loaded mappings with selected items within
an sos report prior to beginning the full obfuscation routine.

This was previously handled directly within archives, however this is a bit
cumbersome and doesn't allow for all the flexibility we could use in this
effort.

Preppers are separated from parsers but will leverage them in order to feed
parser-matched strings from files highlighted by a Prepper() to the
appropriate mapping for initial obfuscation.

Preppers may specify their own priority in order to influence the order in
which mappings are prepped. Further, Preppers have two ways to prepare
the maps - either by generating a list of filenames or via directly pulling
content out of select files without the assistance of a parser. A lower
priority value means the prepper should run sooner than those with higher
values.
Comment on lines +27 to +32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick comment: swap 2nd and 3rd sentence (as priority levels follows the 1st sentence, while "two ways to prepare the maps" is followe by the next paragraph.


For the former approach, `Prepper._get_$parser_file_list()` should be used
and should yield filenames that exist in target archives. For the latter,
the `Prepper._get_items_for_$map()` should be used.

Finally, a `regex_items` dict is available for storing individual regex
items for parsers that rely on them. These items will be added after all
files and other individual items are handled. This dict has keys set to
parser/mapping names, and the values should be sets of items, so preppers
should add to them like so:

self.regex_items['hostname'].add('myhostname')
"""

name = 'Undefined'
priority = 100

def __init__(self):
self.regex_items = {
'hostname': set(),
'ip': set(),
'ipv6': set(),
'keyword': set(),
'mac': set(),
'username': set()
}
self.soslog = logging.getLogger('sos')
self.ui_log = logging.getLogger('sos_ui')

def _fmt_log_msg(self, msg):
return f"[prepper:{self.name}] {msg}"

def log_debug(self, msg):
self.soslog.debug(self._fmt_log_msg(msg))

def log_info(self, msg):
self.soslog.info(self._fmt_log_msg(msg))

def log_error(self, msg):
self.soslog.error(self._fmt_log_msg(msg))

def get_parser_file_list(self, parser, archive):
"""
Helper that calls the appropriate Prepper method for the specified
parser. This allows Preppers to be able to provide items for multiple
types of parsers without needing to handle repetitious logic to
determine which parser we're interested within each individual call.

The convention to use is to define `_get_$parser_file_list()` methods
within Preppers, e.g. `_get_hostname_file_list()` would be used to
provide filenames for the hostname parser. If such a method is not
defined within a Prepper for a given parser, we handle that here so
that individual Preppers do not need to.

:param parser: The _name_ of the parser to get a file list for
:type parser: ``str``

:param archive: The archive we are operating on currently for the
specified parser
:type archive: ``SoSObfuscationArchive``

:returns: A list of filenames within the archive to prep with
:rtype: ``list``
"""
_check = f"_get_{parser}_file_list"
if hasattr(self, _check):
return getattr(self, _check)(archive)
return []

def get_items_for_map(self, mapping, archive):
"""
Similar to `get_parser_file_list()`, a helper for calling the specific
method for generating items for the given `map`. This allows Preppers
to be able to provide items for multiple types of maps, without the
need to handle repetitious logic to determine which parser we're
interested in within each individual call.

:param mapping: The _name_ of the mapping to get items for
:type mapping: ``str``

:param archive: The archive we are operating on currently for the
specified parser
:type archive: ``SoSObfuscationArchive``

:returns: A list of distinct items to obfuscate without using a parser
:rtype: ``list``
"""
_check = f"_get_items_for_{mapping}"
if hasattr(self, _check):
return getattr(self, _check)(archive)
return []

# vim: set et ts=4 sw=4 :
57 changes: 47 additions & 10 deletions tests/unittests/cleaner_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
from sos.cleaner.mappings.hostname_map import SoSHostnameMap
from sos.cleaner.mappings.keyword_map import SoSKeywordMap
from sos.cleaner.mappings.ipv6_map import SoSIPv6Map
from sos.cleaner.preppers import SoSPrepper
from sos.cleaner.preppers.hostname import HostnamePrepper
from sos.cleaner.preppers.ip import IPPrepper
from sos.cleaner.archives.sos import SoSReportArchive


class CleanerMapTests(unittest.TestCase):
Expand All @@ -28,7 +32,7 @@ def setUp(self):
self.mac_map = SoSMacMap()
self.ip_map = SoSIPMap()
self.host_map = SoSHostnameMap()
self.host_map.load_domains_from_options(['redhat.com'])
self.host_map.sanitize_item('redhat.com')
self.kw_map = SoSKeywordMap()
self.ipv6_map = SoSIPv6Map()

Expand Down Expand Up @@ -152,13 +156,14 @@ def setUp(self):
self.ip_parser = SoSIPParser(config={})
self.ipv6_parser = SoSIPv6Parser(config={})
self.mac_parser = SoSMacParser(config={})
self.host_parser = SoSHostnameParser(config={},
opt_domains=['foobar.com'])
self.kw_parser = SoSKeywordParser(config={}, keywords=['foobar'])
self.host_parser = SoSHostnameParser(config={})
self.host_parser.mapping.add('foobar.com')
self.kw_parser = SoSKeywordParser(config={})
self.kw_parser.mapping.add('foobar')
self.kw_parser_none = SoSKeywordParser(config={})
self.kw_parser.generate_item_regexes()
self.uname_parser = SoSUsernameParser(config={},
opt_names=['DOMAIN\myusername'])
self.uname_parser = SoSUsernameParser(config={})
self.uname_parser.mapping.add('DOMAIN\myusername')

def test_ip_parser_valid_ipv4_line(self):
line = 'foobar foo 10.0.0.1/24 barfoo bar'
Expand Down Expand Up @@ -210,22 +215,22 @@ def test_mac_parser_with_quotes_ipv6_quad(self):

def test_hostname_load_hostname_string(self):
fqdn = 'myhost.subnet.example.com'
self.host_parser.load_hostname_into_map(fqdn)
self.host_parser.mapping.add(fqdn)

def test_hostname_valid_domain_line(self):
self.host_parser.load_hostname_into_map('myhost.subnet.example.com')
self.host_parser.mapping.add('myhost.subnet.example.com')
line = 'testing myhost.subnet.example.com in a string'
_test = self.host_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)

def test_hostname_short_name_in_line(self):
self.host_parser.load_hostname_into_map('myhost.subnet.example.com')
self.host_parser.mapping.add('myhost.subnet.example.com')
line = 'testing just myhost in a line'
_test = self.host_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)

def test_obfuscate_whole_fqdn_for_given_domainname(self):
self.host_parser.load_hostname_into_map('sostestdomain.domain')
self.host_parser.mapping.add('sostestdomain.domain')
line = 'let obfuscate soshost.sostestdomain.domain'
_test = self.host_parser.parse_line(line)[0]
self.assertFalse('soshost' in _test)
Expand Down Expand Up @@ -274,3 +279,35 @@ def test_ad_username(self):
line = "DOMAIN\myusername"
_test = self.uname_parser.parse_line(line)[0]
self.assertNotEqual(line, _test)


class PrepperTests(unittest.TestCase):
"""
Ensure that the translations for different parser/mapping methods are
working
"""

def setUp(self):
self.prepper = SoSPrepper()
self.archive = SoSReportArchive(
archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz',
tmpdir='/tmp'
)
self.host_prepper = HostnamePrepper()
self.ipv4_prepper = IPPrepper()

def test_parser_method_translation(self):
self.assertEqual([], self.prepper.get_parser_file_list('hostname', None))
pmoravec marked this conversation as resolved.
Show resolved Hide resolved

def test_mapping_method_translation(self):
self.assertEqual([], self.prepper.get_items_for_map('foobar', None))
Comment on lines +302 to +303
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment like to test_parser_method_translation.


def test_hostname_prepper_map_items(self):
self.assertEqual(['cleanertest'], self.host_prepper.get_items_for_map('hostname', self.archive))

def test_ipv4_prepper_parser_files(self):
self.assertEqual(['sos_commands/networking/ip_-o_addr'], self.ipv4_prepper.get_parser_file_list('ip', self.archive))

def test_ipv4_prepper_invalid_parser_files(self):
self.assertEqual([], self.ipv4_prepper.get_parser_file_list('foobar', self.archive))
pmoravec marked this conversation as resolved.
Show resolved Hide resolved