From 13ce9b48347087fddf24ea6106cb24b7f9f23562 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 25 May 2023 14:21:57 -0400 Subject: [PATCH 1/6] [cleaner] Separate cleaner prepping from archives Up until now, the archive abstractions have defined what files `sos clean` will use to prepare the mappings for obfuscation before entering the normal obfuscation loop over every file in every archive. While this is straight forward enough, it is not particularly flexible, and prevents us from easily using other approaches for preparing the mappings beyond what is directly obtained via the parsers (which in some cases need special handling to be prepared at all). Change this by introducing `SoSPrepper`s which will be used to determine now only what files to pass to which parsers on an archive-by-archive basis, but will also allow for manually retrieving items from disaparate sources within the archive(s) and handing those directly to the mappings, without the need for those items to first pass the parser check. Related: RH: SUPDEV-135 Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 99 +++++++++++++++--------- sos/cleaner/archives/__init__.py | 8 +- sos/cleaner/preppers/__init__.py | 125 +++++++++++++++++++++++++++++++ tests/unittests/cleaner_tests.py | 57 +++++++++++--- 4 files changed, 240 insertions(+), 49 deletions(-) create mode 100644 sos/cleaner/preppers/__init__.py diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index b8e4aafd9f..d440185d66 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -13,6 +13,7 @@ import logging import os import shutil +import sos.cleaner.preppers import tempfile from concurrent.futures import ThreadPoolExecutor @@ -31,7 +32,7 @@ SoSCollectorDirectory) from sos.cleaner.archives.generic import DataDirArchive, TarballArchive from sos.cleaner.archives.insights import InsightsArchive -from sos.utilities import get_human_readable +from sos.utilities import get_human_readable, import_module, ImporterHelper from textwrap import fill @@ -583,6 +584,63 @@ def generate_parser_item_regexes(self): for parser in self.parsers: parser.generate_item_regexes() + def _prepare_archive_with_prepper(self, archive, prepper): + """ + For each archive we've determined we need to operate on, pass it to + each prepper so that we can extract necessary files and/or items for + direct regex replacement. Preppers define these methods per parser, + so it is possible that a single prepper will read the same file for + different parsers/mappings. This is preferable to the alternative of + building up monolithic lists of file paths, as we'd still need to + manipulate these on a per-archive basis. + + :param archive: The archive we are currently using to prepare our + mappings with + :type archive: ``SoSObfuscationArchive`` subclass + + :param prepper: The individual prepper we're using to source items + :type prepper: ``SoSPrepper`` subclass + """ + for _parser in self.parsers: + pname = _parser.name.lower().split()[0].strip() + for _file in prepper.get_parser_file_list(pname, archive): + content = archive.get_file_content(_file) + if not content: + continue + self.log_debug(f"Prepping {pname} parser with file {_file} " + f"from {archive.ui_name}") + for line in content.splitlines(): + try: + _parser.parse_line(line) + except Exception as err: + self.log_debug( + f"Failed to prep {pname} map from {_file}: {err}" + ) + map_items = prepper.get_items_for_map(pname, archive) + if map_items: + self.log_debug(f"Prepping {pname} mapping with items from " + f"{archive.ui_name}") + for item in map_items: + _parser.mapping.add(item) + + for ritem in prepper.regex_items[pname]: + _parser.mapping.add_regex_item(ritem) + + def get_preppers(self): + """ + Discover all locally available preppers so that we can prepare the + mappings with obfuscation matches in a controlled manner + + :returns: All preppers that can be leveraged locally + :rtype: A generator of `SoSPrepper` items + """ + helper = ImporterHelper(sos.cleaner.preppers) + preps = [] + for _prep in helper.get_modules(): + preps.extend(import_module(f"sos.cleaner.preppers.{_prep}")) + for prepper in sorted(preps, key=lambda x: x.priority): + yield prepper() + def preload_all_archives_into_maps(self): """Before doing the actual obfuscation, if we have multiple archives to obfuscate then we need to preload each of them into the mappings @@ -590,42 +648,9 @@ def preload_all_archives_into_maps(self): obfuscated in node1's archive. """ self.log_info("Pre-loading all archives into obfuscation maps") - for _arc in self.report_paths: - for _parser in self.parsers: - try: - pfile = _arc.prep_files[_parser.name.lower().split()[0]] - if not pfile: - continue - except (IndexError, KeyError): - continue - if isinstance(pfile, str): - pfile = [pfile] - for parse_file in pfile: - self.log_debug("Attempting to load %s" % parse_file) - try: - content = _arc.get_file_content(parse_file) - if not content: - continue - if isinstance(_parser, SoSUsernameParser): - _parser.load_usernames_into_map(content) - elif isinstance(_parser, SoSHostnameParser): - if 'hostname' in parse_file: - _parser.load_hostname_into_map( - content.splitlines()[0] - ) - elif 'etc/hosts' in parse_file: - _parser.load_hostname_from_etc_hosts( - content - ) - else: - for line in content.splitlines(): - self.obfuscate_line(line) - except Exception as err: - self.log_info( - "Could not prepare %s from %s (archive: %s): %s" - % (_parser.name, parse_file, _arc.archive_name, - err) - ) + for prepper in self.get_preppers(): + for archive in self.report_paths: + self._prepare_archive_with_prepper(archive, prepper) def obfuscate_report(self, archive): """Individually handle each archive or directory we've discovered by diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index 6a6f46d989..a185ae349b 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -166,8 +166,12 @@ def get_file_content(self, fname): ) return '' else: - with open(self.format_file_name(fname), 'r') as to_read: - return to_read.read() + try: + with open(self.format_file_name(fname), 'r') as to_read: + return to_read.read() + except Exception as err: + self.log_debug(f"Failed to get contents of {fname}: {err}") + return '' def extract(self, quiet=False): if self.is_tarfile: diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py new file mode 100644 index 0000000000..b14873545c --- /dev/null +++ b/sos/cleaner/preppers/__init__.py @@ -0,0 +1,125 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +import logging + + +class SoSPrepper(): + """ + A prepper is a way to prepare loaded mappings with selected items within + an sos report prior to beginning the full obfuscation routine. + + This was previously handled directly within archives, however this is a bit + cumbersome and doesn't allow for all the flexibility we could use in this + effort. + + Preppers are separated from parsers but will leverage them in order to feed + parser-matched strings from files highlighted by a Prepper() to the + appropriate mapping for initial obfuscation. + + Preppers may specify their own priority in order to influence the order in + which mappings are prepped. Further, Preppers have two ways to prepare + the maps - either by generating a list of filenames or via directly pulling + content out of select files without the assistance of a parser. A lower + priority value means the prepper should run sooner than those with higher + values. + + For the former approach, `Prepper._get_$parser_file_list()` should be used + and should yield filenames that exist in target archives. For the latter, + the `Prepper._get_items_for_$map()` should be used. + + Finally, a `regex_items` dict is available for storing individual regex + items for parsers that rely on them. These items will be added after all + files and other individual items are handled. This dict has keys set to + parser/mapping names, and the values should be sets of items, so preppers + should add to them like so: + + self.regex_items['hostname'].add('myhostname') + """ + + name = 'Undefined' + priority = 100 + + def __init__(self): + self.regex_items = { + 'hostname': set(), + 'ip': set(), + 'ipv6': set(), + 'keyword': set(), + 'mac': set(), + 'username': set() + } + self.soslog = logging.getLogger('sos') + self.ui_log = logging.getLogger('sos_ui') + + def _fmt_log_msg(self, msg): + return f"[prepper:{self.name}] {msg}" + + def log_debug(self, msg): + self.soslog.debug(self._fmt_log_msg(msg)) + + def log_info(self, msg): + self.soslog.info(self._fmt_log_msg(msg)) + + def log_error(self, msg): + self.soslog.error(self._fmt_log_msg(msg)) + + def get_parser_file_list(self, parser, archive): + """ + Helper that calls the appropriate Prepper method for the specified + parser. This allows Preppers to be able to provide items for multiple + types of parsers without needing to handle repetitious logic to + determine which parser we're interested within each individual call. + + The convention to use is to define `_get_$parser_file_list()` methods + within Preppers, e.g. `_get_hostname_file_list()` would be used to + provide filenames for the hostname parser. If such a method is not + defined within a Prepper for a given parser, we handle that here so + that individual Preppers do not need to. + + :param parser: The _name_ of the parser to get a file list for + :type parser: ``str`` + + :param archive: The archive we are operating on currently for the + specified parser + :type archive: ``SoSObfuscationArchive`` + + :returns: A list of filenames within the archive to prep with + :rtype: ``list`` + """ + _check = f"_get_{parser}_file_list" + if hasattr(self, _check): + return getattr(self, _check)(archive) + return [] + + def get_items_for_map(self, mapping, archive): + """ + Similar to `get_parser_file_list()`, a helper for calling the specific + method for generating items for the given `map`. This allows Preppers + to be able to provide items for multiple types of maps, without the + need to handle repetitious logic to determine which parser we're + interested in within each individual call. + + :param mapping: The _name_ of the mapping to get items for + :type mapping: ``str`` + + :param archive: The archive we are operating on currently for the + specified parser + :type archive: ``SoSObfuscationArchive`` + + :returns: A list of distinct items to obfuscate without using a parser + :rtype: ``list`` + """ + _check = f"_get_items_for_{mapping}" + if hasattr(self, _check): + return getattr(self, _check)(archive) + return [] + +# vim: set et ts=4 sw=4 : diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index c28239a7a4..6e0be6c813 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -20,6 +20,10 @@ from sos.cleaner.mappings.hostname_map import SoSHostnameMap from sos.cleaner.mappings.keyword_map import SoSKeywordMap from sos.cleaner.mappings.ipv6_map import SoSIPv6Map +from sos.cleaner.preppers import SoSPrepper +from sos.cleaner.preppers.hostname import HostnamePrepper +from sos.cleaner.preppers.ip import IPPrepper +from sos.cleaner.archives.sos import SoSReportArchive class CleanerMapTests(unittest.TestCase): @@ -28,7 +32,7 @@ def setUp(self): self.mac_map = SoSMacMap() self.ip_map = SoSIPMap() self.host_map = SoSHostnameMap() - self.host_map.load_domains_from_options(['redhat.com']) + self.host_map.sanitize_item('redhat.com') self.kw_map = SoSKeywordMap() self.ipv6_map = SoSIPv6Map() @@ -152,13 +156,14 @@ def setUp(self): self.ip_parser = SoSIPParser(config={}) self.ipv6_parser = SoSIPv6Parser(config={}) self.mac_parser = SoSMacParser(config={}) - self.host_parser = SoSHostnameParser(config={}, - opt_domains=['foobar.com']) - self.kw_parser = SoSKeywordParser(config={}, keywords=['foobar']) + self.host_parser = SoSHostnameParser(config={}) + self.host_parser.mapping.add('foobar.com') + self.kw_parser = SoSKeywordParser(config={}) + self.kw_parser.mapping.add('foobar') self.kw_parser_none = SoSKeywordParser(config={}) self.kw_parser.generate_item_regexes() - self.uname_parser = SoSUsernameParser(config={}, - opt_names=['DOMAIN\myusername']) + self.uname_parser = SoSUsernameParser(config={}) + self.uname_parser.mapping.add('DOMAIN\myusername') def test_ip_parser_valid_ipv4_line(self): line = 'foobar foo 10.0.0.1/24 barfoo bar' @@ -210,22 +215,22 @@ def test_mac_parser_with_quotes_ipv6_quad(self): def test_hostname_load_hostname_string(self): fqdn = 'myhost.subnet.example.com' - self.host_parser.load_hostname_into_map(fqdn) + self.host_parser.mapping.add(fqdn) def test_hostname_valid_domain_line(self): - self.host_parser.load_hostname_into_map('myhost.subnet.example.com') + self.host_parser.mapping.add('myhost.subnet.example.com') line = 'testing myhost.subnet.example.com in a string' _test = self.host_parser.parse_line(line)[0] self.assertNotEqual(line, _test) def test_hostname_short_name_in_line(self): - self.host_parser.load_hostname_into_map('myhost.subnet.example.com') + self.host_parser.mapping.add('myhost.subnet.example.com') line = 'testing just myhost in a line' _test = self.host_parser.parse_line(line)[0] self.assertNotEqual(line, _test) def test_obfuscate_whole_fqdn_for_given_domainname(self): - self.host_parser.load_hostname_into_map('sostestdomain.domain') + self.host_parser.mapping.add('sostestdomain.domain') line = 'let obfuscate soshost.sostestdomain.domain' _test = self.host_parser.parse_line(line)[0] self.assertFalse('soshost' in _test) @@ -274,3 +279,35 @@ def test_ad_username(self): line = "DOMAIN\myusername" _test = self.uname_parser.parse_line(line)[0] self.assertNotEqual(line, _test) + + +class PrepperTests(unittest.TestCase): + """ + Ensure that the translations for different parser/mapping methods are + working + """ + + def setUp(self): + self.prepper = SoSPrepper() + self.archive = SoSReportArchive( + archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz', + tmpdir='/tmp' + ) + self.host_prepper = HostnamePrepper() + self.ipv4_prepper = IPPrepper() + + def test_parser_method_translation(self): + self.assertEqual([], self.prepper.get_parser_file_list('hostname', None)) + + def test_mapping_method_translation(self): + self.assertEqual([], self.prepper.get_items_for_map('foobar', None)) + + def test_hostname_prepper_map_items(self): + self.assertEqual(['cleanertest'], self.host_prepper.get_items_for_map('hostname', self.archive)) + + def test_ipv4_prepper_parser_files(self): + self.assertEqual(['sos_commands/networking/ip_-o_addr'], self.ipv4_prepper.get_parser_file_list('ip', self.archive)) + + def test_ipv4_prepper_invalid_parser_files(self): + self.assertEqual([], self.ipv4_prepper.get_parser_file_list('foobar', self.archive)) + From a8cf9c3f536ec6f623fac6a1d41d4ee46336c1c8 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Thu, 25 May 2023 15:00:23 -0400 Subject: [PATCH 2/6] [ip] Add new prepper Adds a new prepper for IP network address sourcing. Signed-off-by: Jake Hunsaker --- sos/cleaner/archives/__init__.py | 8 ++++++++ sos/cleaner/archives/insights.py | 1 - sos/cleaner/archives/sos.py | 1 - sos/cleaner/preppers/ip.py | 34 ++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 sos/cleaner/preppers/ip.py diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index a185ae349b..70e168d8b9 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -69,6 +69,14 @@ def check_is_type(cls, arc_path): """Check if the archive is a well-known type we directly support""" return False + @property + def is_sos(self): + return 'sos' in self.__class__.__name__.lower() + + @property + def is_insights(self): + return 'insights' in self.type_name + def _load_self(self): if self.is_tarfile: self.tarobj = tarfile.open(self.archive_path) diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py index dab48b16b5..c8cf6a97db 100644 --- a/sos/cleaner/archives/insights.py +++ b/sos/cleaner/archives/insights.py @@ -24,7 +24,6 @@ class InsightsArchive(SoSObfuscationArchive): prep_files = { 'hostname': 'data/insights_commands/hostname_-f', - 'ip': 'data/insights_commands/ip_addr', 'mac': 'data/insights_commands/ip_addr' } diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py index 12766496bc..9248a18d05 100644 --- a/sos/cleaner/archives/sos.py +++ b/sos/cleaner/archives/sos.py @@ -27,7 +27,6 @@ class SoSReportArchive(SoSObfuscationArchive): 'sos_commands/host/hostname', 'etc/hosts' ], - 'ip': 'sos_commands/networking/ip_-o_addr', 'mac': 'sos_commands/networking/ip_-d_address', 'username': [ 'sos_commands/login/lastlog_-u_1000-60000', diff --git a/sos/cleaner/preppers/ip.py b/sos/cleaner/preppers/ip.py new file mode 100644 index 0000000000..0bb1bdc153 --- /dev/null +++ b/sos/cleaner/preppers/ip.py @@ -0,0 +1,34 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.preppers import SoSPrepper + + +class IPPrepper(SoSPrepper): + """ + This prepper is for IP network addresses. The aim of this prepper is to + provide the file path for where the output of `ip addr` is saved. + """ + + name = 'ip' + + def _get_ipv6_file_list(self, archive): + return self._get_ip_file_list(archive) + + def _get_ip_file_list(self, archive): + _files = [] + if archive.is_sos: + _files = ['sos_commands/networking/ip_-o_addr'] + elif archive.is_insights: + _files = ['data/insights_commands/ip_addr'] + + return _files + +# vim: set et ts=4 sw=4 : From 978cff36bd0c4a4d3a406af88d3c21e3cb72f06d Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Fri, 26 May 2023 15:26:20 -0400 Subject: [PATCH 3/6] [hostname] Add new prepper Adds a new Prepper for handling hostname determination for preparing the mapping and parser. As part of this new prepper, pass the CLI options to each prepper for use. Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 9 +++- sos/cleaner/archives/insights.py | 1 - sos/cleaner/archives/sos.py | 4 -- sos/cleaner/mappings/hostname_map.py | 6 +-- sos/cleaner/parsers/hostname_parser.py | 59 +----------------------- sos/cleaner/preppers/__init__.py | 3 +- sos/cleaner/preppers/hostname.py | 63 ++++++++++++++++++++++++++ tests/unittests/cleaner_tests.py | 8 ++-- 8 files changed, 78 insertions(+), 75 deletions(-) create mode 100644 sos/cleaner/preppers/hostname.py diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index d440185d66..8783e8925a 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -126,7 +126,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, self.cleaner_md = self.manifest.components.add_section('cleaner') self.parsers = [ - SoSHostnameParser(self.cleaner_mapping, self.opts.domains), + SoSHostnameParser(self.cleaner_mapping), SoSIPParser(self.cleaner_mapping), SoSIPv6Parser(self.cleaner_mapping), SoSMacParser(self.cleaner_mapping), @@ -364,6 +364,11 @@ def execute(self): # we have at least one valid target to obfuscate self.completed_reports = [] + # TODO: as we separate mappings and parsers further, do this in a less + # janky manner + for parser in self.parsers: + if parser.name == 'Hostname Parser': + parser.mapping.set_initial_counts() self.preload_all_archives_into_maps() self.generate_parser_item_regexes() self.obfuscate_report_paths() @@ -639,7 +644,7 @@ def get_preppers(self): for _prep in helper.get_modules(): preps.extend(import_module(f"sos.cleaner.preppers.{_prep}")) for prepper in sorted(preps, key=lambda x: x.priority): - yield prepper() + yield prepper(options=self.opts) def preload_all_archives_into_maps(self): """Before doing the actual obfuscation, if we have multiple archives diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py index c8cf6a97db..30cddce1c6 100644 --- a/sos/cleaner/archives/insights.py +++ b/sos/cleaner/archives/insights.py @@ -23,7 +23,6 @@ class InsightsArchive(SoSObfuscationArchive): description = 'insights-client archive' prep_files = { - 'hostname': 'data/insights_commands/hostname_-f', 'mac': 'data/insights_commands/ip_addr' } diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py index 9248a18d05..3090174c0b 100644 --- a/sos/cleaner/archives/sos.py +++ b/sos/cleaner/archives/sos.py @@ -23,10 +23,6 @@ class SoSReportArchive(SoSObfuscationArchive): type_name = 'report' description = 'sos report archive' prep_files = { - 'hostname': [ - 'sos_commands/host/hostname', - 'etc/hosts' - ], 'mac': 'sos_commands/networking/ip_-d_address', 'username': [ 'sos_commands/login/lastlog_-u_1000-60000', diff --git a/sos/cleaner/mappings/hostname_map.py b/sos/cleaner/mappings/hostname_map.py index c997ce333c..4bdad103c6 100644 --- a/sos/cleaner/mappings/hostname_map.py +++ b/sos/cleaner/mappings/hostname_map.py @@ -41,7 +41,7 @@ class SoSHostnameMap(SoSMap): ] strip_exts = ('.yaml', '.yml', '.crt', '.key', '.pem', '.log', '.repo', - '.rules') + '.rules', '.conf', '.cfg') host_count = 0 domain_count = 0 @@ -80,10 +80,6 @@ def load_domains_from_map(self): self._domains[_domain_to_inject] = _ob_domain self.set_initial_counts() - def load_domains_from_options(self, domains): - for domain in domains: - self.sanitize_domain(domain.split('.')) - def get_regex_result(self, item): """Override the base get_regex_result() to provide a regex that, if this is an FQDN or a straight domain, will include an underscore diff --git a/sos/cleaner/parsers/hostname_parser.py b/sos/cleaner/parsers/hostname_parser.py index 07eb40f68f..a739629844 100644 --- a/sos/cleaner/parsers/hostname_parser.py +++ b/sos/cleaner/parsers/hostname_parser.py @@ -21,14 +21,9 @@ class SoSHostnameParser(SoSCleanerParser): r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] - def __init__(self, config, opt_domains=None): + def __init__(self, config): self.mapping = SoSHostnameMap() super(SoSHostnameParser, self).__init__(config) - self.mapping.load_domains_from_map() - self.mapping.load_domains_from_options(opt_domains) - self.short_names = [] - self.load_short_names_from_mapping() - self.mapping.set_initial_counts() def parse_line(self, line): """This will be called for every line in every file we process, so that @@ -47,55 +42,3 @@ def parse_line(self, line): line, _rcount = self._parse_line_with_compiled_regexes(line) count += _rcount return line, count - - def load_short_names_from_mapping(self): - """When we load the mapping file into the hostname map, we have to do - some dancing to get those loaded properly into the "intermediate" dicts - that the map uses to hold hosts and domains. Similarly, we need to also - extract shortnames known to the map here. - """ - for hname in self.mapping.dataset.keys(): - if len(hname.split('.')) == 1: - # we have a short name only with no domain - if hname not in self.short_names: - self.short_names.append(hname) - - def load_hostname_into_map(self, hostname_string): - """Force add the domainname found in /sos_commands/host/hostname into - the map. We have to do this here since the normal map prep approach - from the parser would be ignored since the system's hostname is not - guaranteed - """ - if 'localhost' in hostname_string: - return - domains = hostname_string.split('.') - if len(domains) > 1: - self.short_names.append(domains[0]) - else: - self.short_names.append(hostname_string) - if len(domains) > 3: - # make sure we implicitly get example.com if the system's hostname - # is something like foo.bar.example.com - high_domain = '.'.join(domains[-2:]) - self.mapping.add(high_domain) - self.mapping.add(hostname_string) - - def load_hostname_from_etc_hosts(self, content): - """Parse an archive's copy of /etc/hosts, which requires handling that - is separate from the output of the `hostname` command. Just like - load_hostname_into_map(), this has to be done explicitly and we - cannot rely upon the more generic methods to do this reliably. - """ - lines = content.splitlines() - for line in lines: - if line.startswith('#') or 'localhost' in line: - continue - hostln = line.split()[1:] - for host in hostln: - if len(host.split('.')) == 1: - # only generate a mapping for fqdns but still record the - # short name here for later obfuscation with parse_line() - self.short_names.append(host) - self.mapping.add_regex_item(host) - else: - self.mapping.add(host) diff --git a/sos/cleaner/preppers/__init__.py b/sos/cleaner/preppers/__init__.py index b14873545c..790c9e1525 100644 --- a/sos/cleaner/preppers/__init__.py +++ b/sos/cleaner/preppers/__init__.py @@ -47,7 +47,7 @@ class SoSPrepper(): name = 'Undefined' priority = 100 - def __init__(self): + def __init__(self, options): self.regex_items = { 'hostname': set(), 'ip': set(), @@ -56,6 +56,7 @@ def __init__(self): 'mac': set(), 'username': set() } + self.opts = options self.soslog = logging.getLogger('sos') self.ui_log = logging.getLogger('sos_ui') diff --git a/sos/cleaner/preppers/hostname.py b/sos/cleaner/preppers/hostname.py new file mode 100644 index 0000000000..0812597e50 --- /dev/null +++ b/sos/cleaner/preppers/hostname.py @@ -0,0 +1,63 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.preppers import SoSPrepper + + +class HostnamePrepper(SoSPrepper): + """ + Prepper for providing domain and hostname information to the hostname + mapping. + + The items from hostname sources are handled manually via the _get_items + method, rather than passing the file directly, as the parser does not know + what hostnames or domains to match on initially. + + This will also populate the regex_items list with local short names. + """ + + name = 'hostname' + + def _get_items_for_hostname(self, archive): + items = [] + _file = 'hostname' + if archive.is_sos: + _file = 'sos_commands/host/hostname' + elif archive.is_insights: + _file = 'data/insights_commands/hostname_-f' + + content = archive.get_file_content(_file) + if content and content != 'localhost': + domains = content.split('.') + if len(domains) > 1: + items.append(domains[0]) + self.regex_items['hostname'].add((domains[0])) + if len(domains) > 3: + # make sure we get example.com if the system's hostname + # is something like foo.bar.example.com + top_domain = '.'.join(domains[-2:]) + items.append(top_domain.strip()) + items.append(content.strip()) + + _hosts = archive.get_file_content('etc/hosts') + for line in _hosts.splitlines(): + if line.startswith('#') or 'localhost' in line: + continue + hostln = line.split()[1:] + for host in hostln: + if len(host.split('.')) == 1: + self.regex_items['hostname'].add(host) + else: + items.append(host) + + for domain in self.opts.domains: + items.append(domain) + + return items diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index 6e0be6c813..8bf1b239ca 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -24,7 +24,7 @@ from sos.cleaner.preppers.hostname import HostnamePrepper from sos.cleaner.preppers.ip import IPPrepper from sos.cleaner.archives.sos import SoSReportArchive - +from sos.options import SoSOptions class CleanerMapTests(unittest.TestCase): @@ -288,13 +288,13 @@ class PrepperTests(unittest.TestCase): """ def setUp(self): - self.prepper = SoSPrepper() + self.prepper = SoSPrepper(SoSOptions()) self.archive = SoSReportArchive( archive_path='tests/test_data/sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz', tmpdir='/tmp' ) - self.host_prepper = HostnamePrepper() - self.ipv4_prepper = IPPrepper() + self.host_prepper = HostnamePrepper(SoSOptions(domains=[])) + self.ipv4_prepper = IPPrepper(SoSOptions()) def test_parser_method_translation(self): self.assertEqual([], self.prepper.get_parser_file_list('hostname', None)) From bab03a0684d964f4a38367734bcff8c2d4e7085a Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 30 May 2023 12:36:56 -0400 Subject: [PATCH 4/6] [mac] Add new Prepper Adds a new Prepper to handle feeding relevant files to the mac parser for initial preparation of the mappings. Signed-off-by: Jake Hunsaker --- sos/cleaner/archives/insights.py | 4 ---- sos/cleaner/archives/sos.py | 1 - sos/cleaner/preppers/mac.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 sos/cleaner/preppers/mac.py diff --git a/sos/cleaner/archives/insights.py b/sos/cleaner/archives/insights.py index 30cddce1c6..872c6b36a1 100644 --- a/sos/cleaner/archives/insights.py +++ b/sos/cleaner/archives/insights.py @@ -22,10 +22,6 @@ class InsightsArchive(SoSObfuscationArchive): type_name = 'insights' description = 'insights-client archive' - prep_files = { - 'mac': 'data/insights_commands/ip_addr' - } - @classmethod def check_is_type(cls, arc_path): try: diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py index 3090174c0b..5200bc0dd5 100644 --- a/sos/cleaner/archives/sos.py +++ b/sos/cleaner/archives/sos.py @@ -23,7 +23,6 @@ class SoSReportArchive(SoSObfuscationArchive): type_name = 'report' description = 'sos report archive' prep_files = { - 'mac': 'sos_commands/networking/ip_-d_address', 'username': [ 'sos_commands/login/lastlog_-u_1000-60000', 'sos_commands/login/lastlog_-u_60001-65536', diff --git a/sos/cleaner/preppers/mac.py b/sos/cleaner/preppers/mac.py new file mode 100644 index 0000000000..75f6607631 --- /dev/null +++ b/sos/cleaner/preppers/mac.py @@ -0,0 +1,28 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.preppers import SoSPrepper + + +class MacPrepper(SoSPrepper): + """ + Prepper for sourcing the host's MAC address in order to prep the mapping. + """ + + name = 'mac' + + def _get_mac_file_list(self, archive): + if archive.is_sos: + return ['sos_commands/networking/ip_-d_address'] + elif archive.is_insights: + return ['data/insights_commands/ip_addr'] + return [] + +# vim: set et ts=4 sw=4 : From 1f1c5ec9d4e3cd4263c45075c5907680c0643c45 Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Tue, 30 May 2023 15:51:15 -0400 Subject: [PATCH 5/6] [username] Add new Prepper Adds a new Prepper for usernames, and removes the bits from the parser and mapping that otherwise handled the initial preparation of the mapping. The prepper will source from the same initial files, as well as from the `--usernames` command line option. Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 2 +- sos/cleaner/archives/sos.py | 12 ----- sos/cleaner/mappings/username_map.py | 5 -- sos/cleaner/parsers/username_parser.py | 33 +----------- sos/cleaner/preppers/usernames.py | 69 ++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 50 deletions(-) create mode 100644 sos/cleaner/preppers/usernames.py diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 8783e8925a..87001aece1 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -132,7 +132,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, SoSMacParser(self.cleaner_mapping), SoSKeywordParser(self.cleaner_mapping, self.opts.keywords, self.opts.keyword_file), - SoSUsernameParser(self.cleaner_mapping, self.opts.usernames) + SoSUsernameParser(self.cleaner_mapping) ] for _parser in self.opts.disable_parsers: diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py index 5200bc0dd5..4c58d09d5e 100644 --- a/sos/cleaner/archives/sos.py +++ b/sos/cleaner/archives/sos.py @@ -22,18 +22,6 @@ class SoSReportArchive(SoSObfuscationArchive): type_name = 'report' description = 'sos report archive' - prep_files = { - 'username': [ - 'sos_commands/login/lastlog_-u_1000-60000', - 'sos_commands/login/lastlog_-u_60001-65536', - 'sos_commands/login/lastlog_-u_65537-4294967295', - # AD users will be reported here, but favor the lastlog files since - # those will include local users who have not logged in - 'sos_commands/login/last', - 'etc/cron.allow', - 'etc/cron.deny' - ] - } @classmethod def check_is_type(cls, arc_path): diff --git a/sos/cleaner/mappings/username_map.py b/sos/cleaner/mappings/username_map.py index ed6dc09122..db12e78817 100644 --- a/sos/cleaner/mappings/username_map.py +++ b/sos/cleaner/mappings/username_map.py @@ -22,11 +22,6 @@ class SoSUsernameMap(SoSMap): name_count = 0 - def load_names_from_options(self, opt_names): - for name in opt_names: - if name and name not in self.dataset.keys(): - self.add(name) - def sanitize_item(self, username): """Obfuscate a new username not currently found in the map """ diff --git a/sos/cleaner/parsers/username_parser.py b/sos/cleaner/parsers/username_parser.py index da72f380e1..5909f52d39 100644 --- a/sos/cleaner/parsers/username_parser.py +++ b/sos/cleaner/parsers/username_parser.py @@ -25,41 +25,10 @@ class SoSUsernameParser(SoSCleanerParser): name = 'Username Parser' map_file_key = 'username_map' regex_patterns = [] - skip_list = [ - 'core', - 'nobody', - 'nfsnobody', - 'shutdown', - 'stack', - 'reboot', - 'root', - 'ubuntu', - 'username', - 'wtmp' - ] - def __init__(self, config, opt_names=None): + def __init__(self, config): self.mapping = SoSUsernameMap() super(SoSUsernameParser, self).__init__(config) - self.mapping.load_names_from_options(opt_names) - - def load_usernames_into_map(self, content): - """Since we don't get the list of usernames from a straight regex for - this parser, we need to override the initial parser prepping here. - """ - users = set() - for line in content.splitlines(): - try: - user = line.split()[0] - except Exception: - continue - if not user or user.lower() in self.skip_list: - continue - users.add(user.lower()) - for each in sorted(users, key=len, reverse=True): - self.mapping.get(each) - if '\\' in each: - self.mapping.get(each.split('\\')[-1]) def _parse_line(self, line): return line, 0 diff --git a/sos/cleaner/preppers/usernames.py b/sos/cleaner/preppers/usernames.py new file mode 100644 index 0000000000..0f059596db --- /dev/null +++ b/sos/cleaner/preppers/usernames.py @@ -0,0 +1,69 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +from sos.cleaner.preppers import SoSPrepper + + +class UsernamePrepper(SoSPrepper): + """ + This prepper is used to source usernames from various `last` output content + as well as a couple select files. This prepper will also leverage the + --usernames option. + """ + + name = 'username' + + skip_list = [ + 'core', + 'nobody', + 'nfsnobody', + 'shutdown', + 'stack', + 'reboot', + 'root', + 'ubuntu', + 'username', + 'wtmp' + ] + + def _get_items_for_username(self, archive): + items = set() + _files = [ + 'sos_commands/login/lastlog_-u_1000-60000', + 'sos_commands/login/lastlog_-u_60001-65536', + 'sos_commands/login/lastlog_-u_65537-4294967295', + # AD users will be reported here, but favor the lastlog files since + # those will include local users who have not logged in + 'sos_commands/login/last', + 'etc/cron.allow', + 'etc/cron.deny' + ] + for _file in _files: + content = archive.get_file_content(_file) + if not content: + continue + for line in content.splitlines(): + try: + user = line.split()[0].lower() + if user and user not in self.skip_list: + items.add(user) + if '\\' in user: + items.add(user.split('\\')[-1]) + except Exception: + # empty line or otherwise unusable for name sourcing + pass + + for opt_user in self.opts.usernames: + if opt_user not in self.skip_list: + items.add(opt_user) + + return items + +# vim: set et ts=4 sw=4 : From 359aaa4784e5920adfe667e94b80de36fcba1f6a Mon Sep 17 00:00:00 2001 From: Jake Hunsaker Date: Wed, 31 May 2023 10:18:19 -0400 Subject: [PATCH 6/6] [keywords] Add new Prepper Adds a new Prepper to handle keyword preparation. This is slightly inefficient since we will only realistically need this once, but baking it into the archive loop does not pose any other problems, and it would be more fragile to break out a special flow just for keywords. Signed-off-by: Jake Hunsaker --- sos/cleaner/__init__.py | 3 +-- sos/cleaner/parsers/keyword_parser.py | 17 +----------- sos/cleaner/preppers/keywords.py | 37 +++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 18 deletions(-) create mode 100644 sos/cleaner/preppers/keywords.py diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 87001aece1..feeedf6680 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -130,8 +130,7 @@ def __init__(self, parser=None, args=None, cmdline=None, in_place=False, SoSIPParser(self.cleaner_mapping), SoSIPv6Parser(self.cleaner_mapping), SoSMacParser(self.cleaner_mapping), - SoSKeywordParser(self.cleaner_mapping, self.opts.keywords, - self.opts.keyword_file), + SoSKeywordParser(self.cleaner_mapping), SoSUsernameParser(self.cleaner_mapping) ] diff --git a/sos/cleaner/parsers/keyword_parser.py b/sos/cleaner/parsers/keyword_parser.py index 9a0f65ba8a..f611ccd2b1 100644 --- a/sos/cleaner/parsers/keyword_parser.py +++ b/sos/cleaner/parsers/keyword_parser.py @@ -8,7 +8,6 @@ # # See the LICENSE file in the source distribution for further information. -import os from sos.cleaner.parsers import SoSCleanerParser from sos.cleaner.mappings.keyword_map import SoSKeywordMap @@ -21,23 +20,9 @@ class SoSKeywordParser(SoSCleanerParser): name = 'Keyword Parser' map_file_key = 'keyword_map' - def __init__(self, config, keywords=None, keyword_file=None): + def __init__(self, config): self.mapping = SoSKeywordMap() - self.user_keywords = [] super(SoSKeywordParser, self).__init__(config) - for _keyword in self.mapping.dataset.keys(): - self.user_keywords.append(_keyword) - if keywords: - for keyword in keywords: - if keyword not in self.user_keywords: - # pre-generate an obfuscation mapping for each keyword - # this is necessary for cases where filenames are being - # obfuscated before or instead of file content - self.mapping.get(keyword.lower()) - self.user_keywords.append(keyword) - if keyword_file and os.path.exists(keyword_file): - with open(keyword_file, 'r') as kwf: - self.user_keywords.extend(kwf.read().splitlines()) def _parse_line(self, line): return line, 0 diff --git a/sos/cleaner/preppers/keywords.py b/sos/cleaner/preppers/keywords.py new file mode 100644 index 0000000000..9baf86a2a8 --- /dev/null +++ b/sos/cleaner/preppers/keywords.py @@ -0,0 +1,37 @@ +# Copyright 2023 Red Hat, Inc. Jake Hunsaker + +# This file is part of the sos project: https://github.com/sosreport/sos +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions of +# version 2 of the GNU General Public License. +# +# See the LICENSE file in the source distribution for further information. + +import os + +from sos.cleaner.preppers import SoSPrepper + + +class KeywordPrepper(SoSPrepper): + """ + Prepper to handle keywords passed to cleaner via either the `--keywords` + or `--keyword-file` options. + """ + + name = 'keyword' + + def _get_items_for_keyword(self, archive): + items = [] + for kw in self.opts.keywords: + items.append(kw) + if self.opts.keyword_file and os.path.exists(self.opts.keyword_file): + with open(self.opts.keyword_file, 'r') as kwf: + items.extend(kwf.read().splitlines()) + + for item in items: + self.regex_items['keyword'].add(item) + + return items + +# vim: set et ts=4 sw=4 :