From a09593bae1bdabafbaf7c2f8bc73cbf768c30f7f Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 1 Sep 2020 14:49:11 +0100 Subject: [PATCH 1/6] Add config options for controlling 3PID email address obfuscation --- sydent/sydent.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sydent/sydent.py b/sydent/sydent.py index 482efd42..02f8287e 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -124,6 +124,21 @@ 'email.smtppassword': '', 'email.hostname': '', 'email.tlsmode': '0', + # When a user is invited to a room via their email address, that invite is + # displayed in the room list using an obfuscated version of the user's email + # address. These config options determine how much of the email address to + # obfuscate. Note that the '@' sign is always included. + # + # If the given username or domain is shorter than the threshold defined here, + # the string is then redacted based on its length. This ensure that a full email + # address is never shown, even if it is extremely short. + # + # The number of characters from the beginning to reveal of the email's username + # portion (left of the '@' sign) + 'email.third_party_invite_username_obfuscate_characters': '3', + # The number of characters from the beginning to reveal of the email's domain + # portion (right of the '@' sign) + 'email.third_party_invite_domain_obfuscate_characters': '3', }, 'sms': { 'bodyTemplate': 'Your code is {token}', @@ -182,6 +197,13 @@ def __init__(self, cfg, reactor=twisted.internet.reactor): self.cfg.get("general", "delete_tokens_on_bind") ) + self.username_obfuscate_characters = int(self.cfg.get( + "email", "email.third_party_invite_username_obfuscate_characters" + )) + self.domain_obfuscate_characters = int(self.cfg.get( + "email", "email.third_party_invite_domain_obfuscate_characters" + )) + # See if a pepper already exists in the database # Note: This MUST be run before we start serving requests, otherwise lookups for # 3PID hashes may come in before we've completed generating them From 1666aaaa58180100b1e9b9cf48c7b3ac24235e29 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 1 Sep 2020 14:49:46 +0100 Subject: [PATCH 2/6] Make use of the new config options This endpoint is currently only accepting email addresses, which is why I didn't make it generic (or support msisdn's). --- sydent/http/servlets/store_invite_servlet.py | 47 +++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/sydent/http/servlets/store_invite_servlet.py b/sydent/http/servlets/store_invite_servlet.py index 1174d02c..a7c3d913 100644 --- a/sydent/http/servlets/store_invite_servlet.py +++ b/sydent/http/servlets/store_invite_servlet.py @@ -126,16 +126,15 @@ def render_POST(self, request): "token": token, "public_key": pubKeyBase64, "public_keys": keysToReturn, - "display_name": self.redact(address), + "display_name": self.redact_email_address(address), } return resp - def redact(self, address): + def redact_email_address(self, address): """ - Redacts the content of a 3PID address. If the address is an email address, - then redacts both the address's localpart and domain independently. Otherwise, - redacts the whole address. + Redacts the content of a 3PID address. Redacts both the email's username and + domain independently. :param address: The address to redact. :type address: unicode @@ -143,27 +142,41 @@ def redact(self, address): :return: The redacted address. :rtype: unicode """ - return u"@".join(map(self._redact, address.split(u"@", 1))) + # Extract strings from the address + username, domain = address.split(u"@", 1) - def _redact(self, s): + # Obfuscate strings + redacted_username = self._redact(username, self.sydent.username_obfuscate_characters) + redacted_domain = self._redact(domain, self.sydent.domain_obfuscate_characters) + + return redacted_username + u"@" + redacted_domain + + def _redact(self, s, characters_to_reveal): """ - Redacts the content of a 3PID address. If the address is an email address, - then redacts both the address's localpart and domain independently. Otherwise, - redacts the whole address. + Redacts the content of a string, using a given amount of characters to reveal. + If the string is shorter than the given threshold, redact it based on length. - :param s: The address to redact. + :param s: The string to redact. :type s: unicode - :return: The redacted address. + :param characters_to_reveal: How many characters of the string to leave before + the '...' + :type characters_to_reveal: int + + :return: The redacted string. :rtype: unicode """ - if len(s) > 5: - return s[:3] + u"..." - elif len(s) > 1: - return s[0] + u"..." - else: + # If the string is shorter than the defined threshold, redact based on length + if len(s) <= characters_to_reveal: + if len(s) > 5: + return s[3] + u"..." + if len(s) > 1: + return s[0] + u"..." return u"..." + # Otherwise truncate it and add an ellipses + return s[:characters_to_reveal] + u"..." + def _randomString(self, length): """ Generate a random string of the given length. From 2ab4225842e675773dbf003e8d7b8288beecac45 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 1 Sep 2020 14:49:58 +0100 Subject: [PATCH 3/6] Add a test --- tests/test_invites.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/test_invites.py b/tests/test_invites.py index 2deda95c..ec03b14a 100644 --- a/tests/test_invites.py +++ b/tests/test_invites.py @@ -4,6 +4,7 @@ from tests.utils import make_sydent from twisted.web.client import Response from twisted.trial import unittest +from sydent.http.servlets.store_invite_servlet import StoreInviteServlet class ThreepidInvitesTestCase(unittest.TestCase): @@ -11,7 +12,14 @@ class ThreepidInvitesTestCase(unittest.TestCase): def setUp(self): # Create a new sydent - self.sydent = make_sydent() + config = { + "email": { + # Used by test_invited_email_address_obfuscation + "email.third_party_invite_username_obfuscate_characters": "6", + "email.third_party_invite_domain_obfuscate_characters": "8", + }, + } + self.sydent = make_sydent(test_config=config) def test_delete_on_bind(self): """Tests that 3PID invite tokens are deleted upon delivery after a successful @@ -65,6 +73,23 @@ def post_json_get_nothing(uri, post_json, opts): # Check that we didn't get any result. self.assertEqual(len(rows), 0, rows) + def test_invited_email_address_obfuscation(self): + """Test that email addresses included in third-party invites are properly + obfuscated according to the relevant config options + """ + store_invite_servlet = StoreInviteServlet(self.sydent) + + email_address = "1234567890@1234567890.com" + redacted_address = store_invite_servlet.redact_email_address(email_address) + + self.assertEqual(redacted_address, "123456...@12345678...") + + # Even short addresses are redacted + short_email_address = "1@1.com" + redacted_address = store_invite_servlet.redact_email_address(short_email_address) + + self.assertEqual(redacted_address, "...@1...") + class ThreepidInvitesNoDeleteTestCase(unittest.TestCase): """Test that invite tokens are not deleted when that is disabled. From 2f92dd2626ba9602fc096e2c5b881d085a37d3d2 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 1 Sep 2020 15:44:43 +0100 Subject: [PATCH 4/6] Changelog --- changelog.d/311.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/311.feature diff --git a/changelog.d/311.feature b/changelog.d/311.feature new file mode 100644 index 00000000..42266b24 --- /dev/null +++ b/changelog.d/311.feature @@ -0,0 +1 @@ +Add config options for controlling how email addresses are obfuscated in third party invites. \ No newline at end of file From 95f37d2ff2a332847c5e02bf65e088ebe24a7712 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 1 Sep 2020 16:17:12 +0100 Subject: [PATCH 5/6] Explain what truncated based on length entails --- sydent/sydent.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sydent/sydent.py b/sydent/sydent.py index 02f8287e..3c4b1c6b 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -130,8 +130,17 @@ # obfuscate. Note that the '@' sign is always included. # # If the given username or domain is shorter than the threshold defined here, - # the string is then redacted based on its length. This ensure that a full email - # address is never shown, even if it is extremely short. + # the string is then redacted based on its length. The rules are as follows: + # + # If the string is longer than a configured limit below, it is truncated to that limit + # with '...' added. Otherwise: + # + # * If the string is longer than 5 characters, it is truncated to 3 characters + '...' + # * If the string is longer than 1 character, it is truncated to 1 character + '...' + # * If the string is 1 character long, it is converted to '...' + # + # This ensures that a full email address is never shown, even if it is extremely + # short. # # The number of characters from the beginning to reveal of the email's username # portion (left of the '@' sign) From 2b09cf3a90572fb9847316ad60732bf983ea975e Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 1 Sep 2020 18:29:14 +0100 Subject: [PATCH 6/6] Remove unnecessary line in config file explanation --- sydent/sydent.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sydent/sydent.py b/sydent/sydent.py index 3c4b1c6b..6d93eb20 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -129,9 +129,6 @@ # address. These config options determine how much of the email address to # obfuscate. Note that the '@' sign is always included. # - # If the given username or domain is shorter than the threshold defined here, - # the string is then redacted based on its length. The rules are as follows: - # # If the string is longer than a configured limit below, it is truncated to that limit # with '...' added. Otherwise: #