Skip to content

Commit

Permalink
mastodon: suppress backfeed from blocked accounts
Browse files Browse the repository at this point in the history
for #895
  • Loading branch information
snarfed committed Oct 30, 2019
1 parent 9913721 commit 73e8e91
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 33 deletions.
1 change: 1 addition & 0 deletions mastodon.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class Mastodon(models.Source):
OAUTH_START_HANDLER = StartHandler
SHORT_NAME = 'mastodon'
CAN_PUBLISH = True
HAS_BLOCKS = True
TYPE_LABELS = {
'post': 'toot',
'comment': 'reply',
Expand Down
35 changes: 33 additions & 2 deletions models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@

REFETCH_HFEED_TRIGGER = datetime.datetime.utcfromtimestamp(-1)

BLOCKLIST_CACHE_TIME = 60 * 60 * 2 # 2h
# limit size of cached block lists to try to stay under memcache 1MB value limit:
# https://github.com/snarfed/bridgy/issues/764
# https://cloud.google.com/appengine/docs/standard/python/memcache/#limits
BLOCKLIST_MAX_IDS = 35000

# maps string short name to Source subclass. populated by SourceMeta.
sources = {}

Expand Down Expand Up @@ -110,7 +116,8 @@ class Source(with_metaclass(SourceMeta, StringIdModel)):
RATE_LIMIT_HTTP_CODES = ('429',)
DISABLE_HTTP_CODES = ('401',)
TRANSIENT_ERROR_HTTP_CODES = ()

# whether granary supports fetching block lists
HAS_BLOCKS = False
# whether to require a u-syndication link for backfeed
BACKFEED_REQUIRES_SYNDICATION_LINK = False

Expand Down Expand Up @@ -174,6 +181,10 @@ class Source(with_metaclass(SourceMeta, StringIdModel)):
# datastore transactionally. set this to {} before beginning.
updates = None

# manually cache id blocklist (returned by granary's get_blocklist_ids()) in
# this attribute, per instance. set and used by is_blocked().
blocked_ids = None

# gr_source is *not* set to None by default here, since it needs to be unset
# for __getattr__ to run when it's accessed.

Expand Down Expand Up @@ -749,8 +760,28 @@ def is_blocked(self, obj):
"""Returns True if an object's author is being blocked.
...ie they're in this user's block list.
Note that this method is tested in test_twitter.py, not test_models.py, for
historical reasons.
"""
return False
if not self.HAS_BLOCKS:
return False

if self.blocked_ids is None:
cache_key = 'B %s' % self.bridgy_path()
self.blocked_ids = memcache.get(cache_key)
if self.blocked_ids is None:
try:
ids = self.gr_source.get_blocklist_ids()
except gr_source.RateLimited as e:
ids = e.partial or []
self.blocked_ids = ids[:BLOCKLIST_MAX_IDS]
memcache.set(cache_key, self.blocked_ids, time=BLOCKLIST_CACHE_TIME)

for o in [obj] + util.get_list(obj, 'object'):
for field in 'author', 'actor':
if o.get(field, {}).get('numeric_id') in self.blocked_ids:
return True


class Webmentions(StringIdModel):
Expand Down
2 changes: 1 addition & 1 deletion templates/about.html
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ <h3 id="listen">Pulling back responses</h3>
</ul>
<li>Instagram likes are even worse than Twitter likes. They <a href="https://github.com/snarfed/bridgy/issues/722">only show the first 10 individual likes per photo on the web</a>, and a total count after that, so Bridgy can only backfeed the first 10 likes.
</li>
<li>Have you blocked the author inside the silo? If so, Bridgy won't send you any of their responses. (This is <a href="https://github.com/snarfed/bridgy/issues/764">best effort only for Twitter</a>, due to <a href="https://developer.twitter.com/en/docs/accounts-and-users/mute-block-report-users/api-reference/get-blocks-ids#resource-information">their API's rate limits</a>. In practice, Bridgy will only filter out responses from the first 40,000 users on your Twitter block list.)
<li>Have you blocked the author inside the silo? If so, Bridgy won't send you any of their responses. (This is <a href="https://github.com/snarfed/bridgy/issues/764">best effort only for Twitter</a>, due to <a href="https://developer.twitter.com/en/docs/accounts-and-users/mute-block-report-users/api-reference/get-blocks-ids#resource-information">their API's rate limits</a>. In practice, Bridgy will only filter out responses from the first 35,000 users on your Twitter block list.)
</li>
<li>Currently, GitHub responses require a <a href="#link">syndication link on the original post on your web site</a> (<a href="https://chat.indieweb.org/dev/2018-02-26#t1519610412751400">background</a>). They're also currently best effort only. <a href="https://developer.github.com/v3/">Their API</a> is almost entirely broken down by user and repository, so there's no good way to ask for all recent responses to your posts. We get close by using their <a href="https://developer.github.com/v3/activity/notifications/">Notifications API</a>, which is good, but not always comprehensive. Apologies!
</li>
Expand Down
2 changes: 1 addition & 1 deletion tests/test_twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def test_is_blocked_rate_limited(self):

def test_is_blocked_size_limit(self):
"""Test that we cap block list sizes in memcache."""
self.mox.stubs.Set(twitter, 'BLOCKLIST_MAX_IDS', 2)
self.mox.stubs.Set(models, 'BLOCKLIST_MAX_IDS', 2)
api_url = gr_twitter.API_BASE + gr_twitter.API_BLOCK_IDS % '-1'
self.expect_urlopen(api_url, json_dumps({
'ids': ['1', '2', '3'],
Expand Down
30 changes: 1 addition & 29 deletions twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@
import models
import util

BLOCKLIST_CACHE_TIME = 60 * 60 * 2 # 2h
# limit size of cached block lists to try to stay under memcache 1MB value limit:
# https://github.com/snarfed/bridgy/issues/764
# https://cloud.google.com/appengine/docs/standard/python/memcache/#limits
BLOCKLIST_MAX_IDS = 35000


class Twitter(models.Source):
"""A Twitter account.
Expand All @@ -45,11 +39,10 @@ class Twitter(models.Source):
}
TRANSIENT_ERROR_HTTP_CODES = ('404',)
CAN_PUBLISH = True
HAS_BLOCKS = True
URL_CANONICALIZER = gr_twitter.Twitter.URL_CANONICALIZER
URL_CANONICALIZER.headers = util.REQUEST_HEADERS

blocked_ids = None

@staticmethod
def new(handler, auth_entity=None, **kwargs):
"""Creates and returns a :class:`Twitter` entity.
Expand Down Expand Up @@ -156,27 +149,6 @@ def canonicalize_url(self, url, activity=None, **kwargs):
url = url.replace('/statuses/', '/status/')
return super(Twitter, self).canonicalize_url(url, **kwargs)

def is_blocked(self, obj):
"""Returns True if an object's author is being blocked.
...ie they're in this user's block list."""

if self.blocked_ids is None:
cache_key = 'B %s' % self.bridgy_path()
self.blocked_ids = memcache.get(cache_key)
if self.blocked_ids is None:
try:
ids = self.gr_source.get_blocklist_ids()
except gr_source.RateLimited as e:
ids = e.partial or []
self.blocked_ids = ids[:BLOCKLIST_MAX_IDS]
memcache.set(cache_key, self.blocked_ids, time=BLOCKLIST_CACHE_TIME)

for o in [obj] + util.get_list(obj, 'object'):
for field in 'author', 'actor':
if o.get(field, {}).get('numeric_id') in self.blocked_ids:
return True


class AuthHandler(util.Handler):
"""Base OAuth handler class."""
Expand Down

0 comments on commit 73e8e91

Please sign in to comment.