black

Smattr · Feb 2, 2024 · 18a40a0 · 18a40a0
1 parent a1af7f4
commit 18a40a0
Show file tree

Hide file tree

Showing 11 changed files with 314 additions and 222 deletions.
diff --git a/feeders/base.py b/feeders/base.py
@@ -15,15 +15,19 @@ def add(self, name, item):
     def __iter__(self):
         raise NotImplementedError
 
+
 class Entry(object):
-    def __init__(self, name=None, subject=None, content=None, date=None, html=False, files=None):
-        self.name = name or ''
-        self.subject = subject or ''
-        self.content = content or ''
+    def __init__(
+        self, name=None, subject=None, content=None, date=None, html=False, files=None
+    ):
+        self.name = name or ""
+        self.subject = subject or ""
+        self.content = content or ""
         self.date = date
         self.html = html
         self.files = files or []
 
+
 def download(url):
     RETRIES = 3
     for i in range(RETRIES):
@@ -33,14 +37,15 @@ def download(url):
         except urllib.error.URLError as e:
             if i == RETRIES - 1:
                 raise
-            if getattr(e, 'code', None) == 403:
+            if getattr(e, "code", None) == 403:
                 # Some sites explicitly block urllib to prevent crawling (e.g.
                 # Microsoft). Since we're not really a crawler, sidestep this by
                 # twiddling our user agent.
-                request = urllib.request.Request(url, headers={'User-Agent':''})
+                request = urllib.request.Request(url, headers={"User-Agent": ""})
                 response = urllib.request.urlopen(request)
                 return response.read()
 
+
 # Sentinel class used by feeders to ask the main logic to write back state to
 # disk. Feeders should use this following processing of each feed. The purpose
 # of this is to minimise the resending of entries when a feeder is interrupted

diff --git a/feeders/diff.py b/feeders/diff.py
@@ -8,25 +8,30 @@
 class Feeder(base.Feeder):
     def __iter__(self):
         for n, i in self.feeds.items():
-            assert 'url' in i
-            url = i['url']
+            assert "url" in i
+            url = i["url"]
             if url in self.resource:
                 old = self.resource[url].splitlines()
                 oldurl = url
             else:
                 old = []
-                oldurl = '/dev/null'
+                oldurl = "/dev/null"
             try:
-                new = bs4.BeautifulSoup(base.download(url).strip(), 'html.parser').get_text().splitlines()
+                new = (
+                    bs4.BeautifulSoup(base.download(url).strip(), "html.parser")
+                    .get_text()
+                    .splitlines()
+                )
             except Exception as e:
-                yield Exception(f'Error while loading {url}: {e}')
+                yield Exception(f"Error while loading {url}: {e}")
                 continue
-            lines = list(difflib.unified_diff(old, new, fromfile=oldurl,
-                tofile=url, lineterm=''))
-            if i.get('ignore_white_space', 'yes').lower() == 'yes':
+            lines = list(
+                difflib.unified_diff(old, new, fromfile=oldurl, tofile=url, lineterm="")
+            )
+            if i.get("ignore_white_space", "yes").lower() == "yes":
                 lines = list(diffcommon.suppress_whitespace(lines))
             if len(lines) > 2:
-                content = '\n'.join(lines)
-                yield base.Entry(n, f'{url} changes', content)
-            self.resource[url] = '\n'.join(new)
+                content = "\n".join(lines)
+                yield base.Entry(n, f"{url} changes", content)
+            self.resource[url] = "\n".join(new)
             yield base.SyncRequest()
diff --git a/feeders/diffcommon.py b/feeders/diffcommon.py
@@ -1,7 +1,7 @@
 def suppress_whitespace(lines):
-    '''
+    """
     Remove hunks from a unified diff that only contain white space changes.
-    '''
+    """
 
     # States for following state machine.
     IDLE, IN_HUNK = list(range(2))
@@ -15,7 +15,7 @@ def suppress_whitespace(lines):
         if state == IDLE:
             assert len(accumulated) == 0
 
-            if line.startswith('@@'):
+            if line.startswith("@@"):
                 # Encountered a new hunk.
                 accumulated = [line]
                 state = IN_HUNK
@@ -29,15 +29,16 @@ def suppress_whitespace(lines):
             assert state == IN_HUNK
             assert len(accumulated) > 0
 
-            if (line.startswith('+') or line.startswith('-')) and \
-                    line[1:].strip() != '':
+            if (line.startswith("+") or line.startswith("-")) and line[
+                1:
+            ].strip() != "":
                 # This is a non-empty change line. Decide to keep this hunk.
                 for a in accumulated:
                     yield a
                 accumulated = []
                 state = IDLE
 
-            elif line.startswith('@@'):
+            elif line.startswith("@@"):
                 # Encountered a new hunk without finding anything interesting in
                 # the current hunk. Ditch the current hunk (the prior contents
                 # of `accumulated`.

diff --git a/feeders/git.py b/feeders/git.py
@@ -9,22 +9,21 @@
 
 
 def run(cmd, cwd):
-    p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
+    p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = p.communicate()
-    stdout = stdout.decode('utf-8', 'replace')
-    stderr = stderr.decode('utf-8', 'replace')
+    stdout = stdout.decode("utf-8", "replace")
+    stderr = stderr.decode("utf-8", "replace")
     return p.returncode, stdout.strip(), stderr.strip()
 
-class Feeder(base.Feeder):
 
+class Feeder(base.Feeder):
     def __iter__(self):
         for n, i in self.feeds.items():
 
-            assert 'url' in i
-            remote = i['url']
+            assert "url" in i
+            remote = i["url"]
 
-            branch = i.get('branch', 'master')
+            branch = i.get("branch", "master")
 
             state = self.resource.get((remote, branch))
 
@@ -34,10 +33,11 @@ def __iter__(self):
             if state is None:
                 # This is the first time we've encountered this repository. We
                 # need to clone it.
-                ret, _, stderr = run(['git', 'clone', '--bare', '--branch',
-                    branch, remote, '.'], tmp)
+                ret, _, stderr = run(
+                    ["git", "clone", "--bare", "--branch", branch, remote, "."], tmp
+                )
                 if ret != 0:
-                    yield Exception(f'failed to clone {remote}:\n{stderr}')
+                    yield Exception(f"failed to clone {remote}:\n{stderr}")
                     shutil.rmtree(tmp)
                     continue
 
@@ -53,19 +53,23 @@ def __iter__(self):
                     t.extractall(tmp)
 
                 # Update the history in the working directory.
-                ret, _, stderr = run(['git', 'fetch', remote, f'{branch}:{branch}'], tmp)
+                ret, _, stderr = run(
+                    ["git", "fetch", remote, f"{branch}:{branch}"], tmp
+                )
                 if ret != 0:
-                    yield Exception('failed to update temporary working '
-                        f'directory for {remote}:\n{stderr}')
+                    yield Exception(
+                        "failed to update temporary working "
+                        f"directory for {remote}:\n{stderr}"
+                    )
                     shutil.rmtree(tmp)
                     continue
 
             # Now retrieve the log and look for new commits.
-            ret, stdout, stderr = run(['git', 'log', '--reverse', '--pretty=%H',
-                branch], tmp)
+            ret, stdout, stderr = run(
+                ["git", "log", "--reverse", "--pretty=%H", branch], tmp
+            )
             if ret != 0:
-                yield Exception('failed to retrieve Git log of '
-                    f'{remote}:\n{stderr}')
+                yield Exception("failed to retrieve Git log of " f"{remote}:\n{stderr}")
                 shutil.rmtree(tmp)
                 continue
 
@@ -76,17 +80,22 @@ def __iter__(self):
                 if last_commit is None or seen_last_commit:
                     # This is a new commit.
 
-                    ret, summary, stderr = run(['git', 'log', '-n', '1',
-                        '--format=%s', commit], tmp)
+                    ret, summary, stderr = run(
+                        ["git", "log", "-n", "1", "--format=%s", commit], tmp
+                    )
                     if ret != 0:
-                        yield Exception('failed to retrieve summary for Git '
-                            f'commit {commit} of {remote}:\n{stderr}')
+                        yield Exception(
+                            "failed to retrieve summary for Git "
+                            f"commit {commit} of {remote}:\n{stderr}"
+                        )
                         continue
 
-                    ret, diff, stderr = run(['git', 'show', commit], tmp)
+                    ret, diff, stderr = run(["git", "show", commit], tmp)
                     if ret != 0:
-                        yield Exception('failed to retrieve diff for Git '
-                            f'commit {commit} of {remote}:\n{stderr}')
+                        yield Exception(
+                            "failed to retrieve diff for Git "
+                            f"commit {commit} of {remote}:\n{stderr}"
+                        )
                         continue
 
                     yield base.Entry(n, summary, diff)
@@ -101,7 +110,7 @@ def __iter__(self):
             # bother compressing it because the resources as a whole are
             # compressed.
             buffer = io.BytesIO()
-            with tarfile.open(fileobj=buffer, mode='w') as t:
+            with tarfile.open(fileobj=buffer, mode="w") as t:
                 for item in Path(tmp).iterdir():
                     t.add(item, item.name)
             data = buffer.getvalue()

diff --git a/feeders/htmldiff.py b/feeders/htmldiff.py
@@ -6,25 +6,26 @@
 class Feeder(base.Feeder):
     def __iter__(self):
         for n, i in self.feeds.items():
-            assert 'url' in i
-            url = i['url']
+            assert "url" in i
+            url = i["url"]
             if url in self.resource:
                 old = self.resource[url].splitlines()
                 oldurl = url
             else:
                 old = []
-                oldurl = '/dev/null'
+                oldurl = "/dev/null"
             try:
-                new = base.download(url).decode('utf-8', 'replace').strip().splitlines()
+                new = base.download(url).decode("utf-8", "replace").strip().splitlines()
             except Exception as e:
-                yield Exception(f'Error while loading {url}: {e}')
+                yield Exception(f"Error while loading {url}: {e}")
                 continue
-            lines = list(difflib.unified_diff(old, new, fromfile=oldurl,
-                tofile=url, lineterm=''))
-            if i.get('ignore_white_space', 'yes').lower() == 'yes':
+            lines = list(
+                difflib.unified_diff(old, new, fromfile=oldurl, tofile=url, lineterm="")
+            )
+            if i.get("ignore_white_space", "yes").lower() == "yes":
                 lines = list(diffcommon.suppress_whitespace(lines))
             if len(lines) > 2:
-                content = '\n'.join(lines)
-                yield base.Entry(n, f'{url} changes', content)
-            self.resource[url] = '\n'.join(new)
+                content = "\n".join(lines)
+                yield base.Entry(n, f"{url} changes", content)
+            self.resource[url] = "\n".join(new)
             yield base.SyncRequest()
diff --git a/feeders/jumpthrough.py b/feeders/jumpthrough.py
@@ -6,14 +6,14 @@
 class Feeder(base.Feeder):
     def __iter__(self):
         for n, i in self.feeds.items():
-            assert 'url' in i
-            url = i['url']
+            assert "url" in i
+            url = i["url"]
             data = self.resource.get(url, {})
-            if isinstance(data, dict): # new scheme
-                etag = data.get('etag')
-                modified = data.get('modified')
-                seen = data.get('seen', [])[:]
-            else: # old scheme
+            if isinstance(data, dict):  # new scheme
+                etag = data.get("etag")
+                modified = data.get("modified")
+                seen = data.get("seen", [])[:]
+            else:  # old scheme
                 assert isinstance(data, list)
                 etag = None
                 modified = None
@@ -26,18 +26,19 @@ def __iter__(self):
                     if id not in seen:
                         try:
                             data = base.download(e.link)
-                            yield base.Entry(n, e.title, data, \
-                                date=rsscommon.get_date(e), html=True)
+                            yield base.Entry(
+                                n, e.title, data, date=rsscommon.get_date(e), html=True
+                            )
                         except urllib.error.HTTPError:
                             # Suppress 404s from broken links.
                             pass
                         seen.append(id)
                 # save in new scheme
                 self.resource[url] = {
-                  'etag':etag,
-                  'modified':modified,
-                  'seen':seen,
+                    "etag": etag,
+                    "modified": modified,
+                    "seen": seen,
                 }
                 yield base.SyncRequest()
             except Exception as e:
-                yield Exception(f'Error from feed {n}: {e}')
+                yield Exception(f"Error from feed {n}: {e}")
diff --git a/feeders/rss.py b/feeders/rss.py
@@ -4,14 +4,14 @@
 class Feeder(base.Feeder):
     def __iter__(self):
         for n, i in self.feeds.items():
-            assert 'url' in i
-            url = i['url']
+            assert "url" in i
+            url = i["url"]
             data = self.resource.get(url, {})
-            if isinstance(data, dict): # new scheme
-                etag = data.get('etag')
-                modified = data.get('modified')
-                seen = data.get('seen', [])[:]
-            else: # old scheme
+            if isinstance(data, dict):  # new scheme
+                etag = data.get("etag")
+                modified = data.get("modified")
+                seen = data.get("seen", [])[:]
+            else:  # old scheme
                 assert isinstance(data, list)
                 etag = None
                 modified = None
@@ -24,21 +24,29 @@ def __iter__(self):
                         id = rsscommon.get_id(e)
                         if id not in seen:
                             links = rsscommon.get_links(e)
-                            yield base.Entry(n, e.title,
-                               '<p><b>%(title)s</b><br/><font size="-1">%(links)s</font></p>%(content)s' % {
-                                   'title':rsscommon.get_title(e),
-                                   'links':'<br/>'.join(f'<a href="{x}">{x}</a>' for x in links),
-                                   'content':rsscommon.get_content(e),
-                               }, date=rsscommon.get_date(e), html=True)
+                            yield base.Entry(
+                                n,
+                                e.title,
+                                '<p><b>%(title)s</b><br/><font size="-1">%(links)s</font></p>%(content)s'
+                                % {
+                                    "title": rsscommon.get_title(e),
+                                    "links": "<br/>".join(
+                                        f'<a href="{x}">{x}</a>' for x in links
+                                    ),
+                                    "content": rsscommon.get_content(e),
+                                },
+                                date=rsscommon.get_date(e),
+                                html=True,
+                            )
                             seen.append(id)
                     except Exception as e:
-                        yield Exception(f'Error from feed {n}: {e}')
+                        yield Exception(f"Error from feed {n}: {e}")
                 # save in new scheme
                 self.resource[url] = {
-                  'etag':etag,
-                  'modified':modified,
-                  'seen':seen,
+                    "etag": etag,
+                    "modified": modified,
+                    "seen": seen,
                 }
                 yield base.SyncRequest()
             except Exception as e:
-                yield Exception(f'Error from feed {n}: {e}')
+                yield Exception(f"Error from feed {n}: {e}")