From 229eefe6f6a39d03427c2cbe4ca57992880d43d4 Mon Sep 17 00:00:00 2001 From: David Teller Date: Thu, 16 Sep 2021 09:22:46 +0200 Subject: [PATCH 1/6] Extend ModuleApi with the methods we'll need to reject spam based on IP - resolves #10832 Signed-off-by: David Teller --- changelog.d/10833.misc | 1 + synapse/module_api/__init__.py | 49 +++++++++++++++++++++++++++++++++- tests/module_api/test_api.py | 13 +++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 changelog.d/10833.misc diff --git a/changelog.d/10833.misc b/changelog.d/10833.misc new file mode 100644 index 000000000000..f23c0a1a023a --- /dev/null +++ b/changelog.d/10833.misc @@ -0,0 +1 @@ +Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 2d403532fadb..ab378bb02a5a 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -27,6 +27,7 @@ ) import jinja2 +from typing_extensions import TypedDict from twisted.internet import defer from twisted.web.resource import IResource @@ -46,7 +47,14 @@ from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter -from synapse.types import JsonDict, Requester, UserID, UserInfo, create_requester +from synapse.types import ( + DomainSpecificString, + JsonDict, + Requester, + UserID, + UserInfo, + create_requester, +) from synapse.util import Clock from synapse.util.caches.descriptors import cached @@ -79,6 +87,13 @@ logger = logging.getLogger(__name__) +class UserIpAndAgent(TypedDict): + ip: str + user_agent: str + # The time at which this user agent/ip was last seen. + last_seen: int + + class ModuleApi: """A proxy object that gets passed to various plugin modules so they can register new users etc if necessary. @@ -700,6 +715,38 @@ def read_templates( (td for td in (self.custom_template_dir, custom_template_directory) if td), ) + def ts(self) -> int: + """Return a timestamp for the current time, in milliseconds since the epoch""" + return self._hs.get_clock().time_msec() + + def is_mine(self, domain_specific_string: DomainSpecificString) -> bool: + """Checks whether an ID comes from this homeserver.""" + return self._hs.is_mine(domain_specific_string) + + def is_mine_id(self, string: str) -> bool: + """Checks whether an ID comes from this homeserver.""" + return string.split(":", 1)[1] == self._server_name + + async def get_user_ip_and_agents( + self, user: UserID, since_ts: Optional[int] = None + ) -> List[UserIpAndAgent]: + """ + Return the list of user IPs and agents for a user. + + Only useful for local users. + """ + raw_data = await self._store.get_user_ip_and_agents(user) + # Sanitize some of the data. We don't want to return tokens. + return [ + { + "ip": str(data["ip"]), + "user_agent": str(data["user_agent"]), + "last_seen": int(data["last_seen"]), + } + for data in raw_data + if since_ts is None or int(data["last_seen"]) >= since_ts + ] + class PublicRoomListManager: """Contains methods for adding to, removing from and querying whether a room diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py index 7dd519cd44a4..c2a2f0344467 100644 --- a/tests/module_api/test_api.py +++ b/tests/module_api/test_api.py @@ -89,6 +89,19 @@ def test_get_userinfo_by_id__no_user_found(self): found_user = self.get_success(self.module_api.get_userinfo_by_id("@alice:test")) self.assertIsNone(found_user) + def test_get_user_ip_and_agents(self): + user_id = self.register_user("test_get_user_ip_and_agents_user", "1234") + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + self.assertIdentical(info, []) + + def test_get_user_ip_and_agents__no_user_found(self): + info = self.get_success( + self.module_api.get_user_ip_and_agents( + "@test_get_user_ip_and_agents_user_nonexistent" + ) + ) + self.assertIdentical(info, []) + def test_sending_events_into_room(self): """Tests that a module can send events into a room""" # Mock out create_and_send_nonmember_event to check whether events are being sent From da94cdca47c2e7cbc6d1b0f59f2f40082e995d5c Mon Sep 17 00:00:00 2001 From: David Teller Date: Thu, 16 Sep 2021 15:20:24 +0200 Subject: [PATCH 2/6] FIXUP --- synapse/module_api/__init__.py | 65 +++++++++++++++++++++------------- tests/module_api/test_api.py | 32 +++++++++++++++-- 2 files changed, 70 insertions(+), 27 deletions(-) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index ab378bb02a5a..51ef46e6ff6b 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -24,10 +24,10 @@ List, Optional, Tuple, + Union, ) import jinja2 -from typing_extensions import TypedDict from twisted.internet import defer from twisted.web.resource import IResource @@ -87,12 +87,21 @@ logger = logging.getLogger(__name__) -class UserIpAndAgent(TypedDict): +class UserIpAndAgent: + """ + A ip, user_agent pair used by a user to connect to this homeserver. + """ + ip: str user_agent: str # The time at which this user agent/ip was last seen. last_seen: int + def __init__(self, ip: str, user_agent: str, last_seen: int): + self.ip = ip + self.user_agent = user_agent + self.last_seen = last_seen + class ModuleApi: """A proxy object that gets passed to various plugin modules so they @@ -715,37 +724,45 @@ def read_templates( (td for td in (self.custom_template_dir, custom_template_directory) if td), ) - def ts(self) -> int: - """Return a timestamp for the current time, in milliseconds since the epoch""" - return self._hs.get_clock().time_msec() - - def is_mine(self, domain_specific_string: DomainSpecificString) -> bool: + def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: """Checks whether an ID comes from this homeserver.""" - return self._hs.is_mine(domain_specific_string) - - def is_mine_id(self, string: str) -> bool: - """Checks whether an ID comes from this homeserver.""" - return string.split(":", 1)[1] == self._server_name + if isinstance(id, DomainSpecificString): + return self._hs.is_mine(id) + else: + return self._hs.is_mine_id(id) async def get_user_ip_and_agents( - self, user: UserID, since_ts: Optional[int] = None + self, user_id: str, since_ts: Optional[float] = None ) -> List[UserIpAndAgent]: """ Return the list of user IPs and agents for a user. Only useful for local users. """ - raw_data = await self._store.get_user_ip_and_agents(user) - # Sanitize some of the data. We don't want to return tokens. - return [ - { - "ip": str(data["ip"]), - "user_agent": str(data["user_agent"]), - "last_seen": int(data["last_seen"]), - } - for data in raw_data - if since_ts is None or int(data["last_seen"]) >= since_ts - ] + # Don't hit the db if this is not a local user. + is_mine = False + try: + # Let's be defensive against ill-formed strings. + if self.is_mine(user_id): + is_mine = True + except Exception: + pass + if is_mine: + raw_data = await self._store.get_user_ip_and_agents( + UserID.from_string(user_id) + ) + # Sanitize some of the data. We don't want to return tokens. + return [ + UserIpAndAgent( + ip=str(data["ip"]), + user_agent=str(data["user_agent"]), + last_seen=int(data["last_seen"]), + ) + for data in raw_data + if since_ts is None or int(data["last_seen"]) >= since_ts + ] + else: + return [] class PublicRoomListManager: diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py index c2a2f0344467..7bd2c64f95b6 100644 --- a/tests/module_api/test_api.py +++ b/tests/module_api/test_api.py @@ -43,6 +43,7 @@ def prepare(self, reactor, clock, homeserver): self.module_api = homeserver.get_module_api() self.event_creation_handler = homeserver.get_event_creation_handler() self.sync_handler = homeserver.get_sync_handler() + self.auth_handler = homeserver.get_auth_handler() def make_homeserver(self, reactor, clock): return self.setup_test_homeserver( @@ -92,15 +93,40 @@ def test_get_userinfo_by_id__no_user_found(self): def test_get_user_ip_and_agents(self): user_id = self.register_user("test_get_user_ip_and_agents_user", "1234") info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) - self.assertIdentical(info, []) + self.assertEqual(info, []) + + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip_1", "user_agent_1", None + ) + ) + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip_2", "user_agent_2", None + ) + ) + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + + self.assertEqual(len(info), 2) + ip_1_seen = False + ip_2_seen = False + for i in info: + if i.ip == "ip_1": + ip_1_seen = True + self.assertEqual(i.user_agent, "user_agent_1") + elif i.ip == "ip_2": + ip_2_seen = True + self.assertEqual(i.user_agent, "user_agent_2") + self.assertTrue(ip_1_seen) + self.assertTrue(ip_2_seen) def test_get_user_ip_and_agents__no_user_found(self): info = self.get_success( self.module_api.get_user_ip_and_agents( - "@test_get_user_ip_and_agents_user_nonexistent" + "@test_get_user_ip_and_agents_user_nonexistent:example.com" ) ) - self.assertIdentical(info, []) + self.assertEqual(info, []) def test_sending_events_into_room(self): """Tests that a module can send events into a room""" From 3d3cf8e858e479932490f1407166cc59822706d7 Mon Sep 17 00:00:00 2001 From: David Teller Date: Thu, 16 Sep 2021 18:41:35 +0200 Subject: [PATCH 3/6] FIXUP --- synapse/module_api/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 51ef46e6ff6b..917a0514252e 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -27,6 +27,7 @@ Union, ) +import attr import jinja2 from twisted.internet import defer @@ -87,9 +88,10 @@ logger = logging.getLogger(__name__) +@attr.s(auto_attribs=True) class UserIpAndAgent: """ - A ip, user_agent pair used by a user to connect to this homeserver. + An IP address and user agent used by a user to connect to this homeserver. """ ip: str @@ -97,11 +99,6 @@ class UserIpAndAgent: # The time at which this user agent/ip was last seen. last_seen: int - def __init__(self, ip: str, user_agent: str, last_seen: int): - self.ip = ip - self.user_agent = user_agent - self.last_seen = last_seen - class ModuleApi: """A proxy object that gets passed to various plugin modules so they @@ -725,7 +722,11 @@ def read_templates( ) def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: - """Checks whether an ID comes from this homeserver.""" + """ + Checks whether an ID comes from this homeserver. + + Added in Synapse v1.44.0. + """ if isinstance(id, DomainSpecificString): return self._hs.is_mine(id) else: @@ -738,6 +739,8 @@ async def get_user_ip_and_agents( Return the list of user IPs and agents for a user. Only useful for local users. + + Added in Synapse v1.44.0. """ # Don't hit the db if this is not a local user. is_mine = False From 916b92b55c0983b9f169cedda964eb97501a6602 Mon Sep 17 00:00:00 2001 From: David Teller Date: Mon, 20 Sep 2021 11:31:49 +0200 Subject: [PATCH 4/6] FIXUP: store.get_user_ip_and_agents now supports additional argument since_ts --- synapse/module_api/__init__.py | 10 ++++---- synapse/storage/databases/main/client_ips.py | 27 +++++++++++++++----- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 917a0514252e..c4abc14c62c8 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -724,7 +724,7 @@ def read_templates( def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: """ Checks whether an ID comes from this homeserver. - + Added in Synapse v1.44.0. """ if isinstance(id, DomainSpecificString): @@ -733,12 +733,13 @@ def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: return self._hs.is_mine_id(id) async def get_user_ip_and_agents( - self, user_id: str, since_ts: Optional[float] = None + self, user_id: str, since_ts: Optional[float] = 0 ) -> List[UserIpAndAgent]: """ Return the list of user IPs and agents for a user. - Only useful for local users. + Only useful for local users. If since_ts is not specified, + return the list since the epoch. Added in Synapse v1.44.0. """ @@ -752,7 +753,7 @@ async def get_user_ip_and_agents( pass if is_mine: raw_data = await self._store.get_user_ip_and_agents( - UserID.from_string(user_id) + UserID.from_string(user_id), since_ts ) # Sanitize some of the data. We don't want to return tokens. return [ @@ -762,7 +763,6 @@ async def get_user_ip_and_agents( last_seen=int(data["last_seen"]), ) for data in raw_data - if since_ts is None or int(data["last_seen"]) >= since_ts ] else: return [] diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 7a98275d927f..be2f3cd12809 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -555,8 +555,11 @@ async def get_last_client_ip_by_device( return ret async def get_user_ip_and_agents( - self, user: UserID + self, user: UserID, since_ts: Optional[float] = 0 ) -> List[Dict[str, Union[str, int]]]: + """ + Fetch IP/User Agent connection since a given timestamp. + """ user_id = user.to_string() results = {} @@ -568,13 +571,23 @@ async def get_user_ip_and_agents( ) = key if uid == user_id: user_agent, _, last_seen = self._batch_row_update[key] - results[(access_token, ip)] = (user_agent, last_seen) + if last_seen >= since_ts: + results[(access_token, ip)] = (user_agent, last_seen) - rows = await self.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "last_seen"], - desc="get_user_ip_and_agents", + def get_recent(txn): + txn.execute( + """ + SELECT access_token, ip, user_agent, last_seen FROM user_ips + WHERE last_seen >= ? AND user_id = ? + ORDER BY last_seen + DESC + """, + (since_ts, user_id), + ) + return txn.fetchall() + + rows = await self.db_pool.runInteraction( + desc="get_user_ip_and_agents", func=get_recent ) results.update( From ef6681fcead21ce87227dff05854b81d20dbc180 Mon Sep 17 00:00:00 2001 From: David Teller Date: Tue, 21 Sep 2021 11:49:06 +0200 Subject: [PATCH 5/6] FIXUP --- synapse/module_api/__init__.py | 3 +- synapse/storage/databases/main/client_ips.py | 2 +- tests/module_api/test_api.py | 37 ++++++++++++++++++-- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index c4abc14c62c8..d679ae53fd23 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -733,7 +733,7 @@ def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: return self._hs.is_mine_id(id) async def get_user_ip_and_agents( - self, user_id: str, since_ts: Optional[float] = 0 + self, user_id: str, since_ts: int = 0 ) -> List[UserIpAndAgent]: """ Return the list of user IPs and agents for a user. @@ -751,6 +751,7 @@ async def get_user_ip_and_agents( is_mine = True except Exception: pass + if is_mine: raw_data = await self._store.get_user_ip_and_agents( UserID.from_string(user_id), since_ts diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index be2f3cd12809..7e33ae578c7b 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -555,7 +555,7 @@ async def get_last_client_ip_by_device( return ret async def get_user_ip_and_agents( - self, user: UserID, since_ts: Optional[float] = 0 + self, user: UserID, since_ts: int = 0 ) -> List[Dict[str, Union[str, int]]]: """ Fetch IP/User Agent connection since a given timestamp. diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py index 7bd2c64f95b6..9d38974fba93 100644 --- a/tests/module_api/test_api.py +++ b/tests/module_api/test_api.py @@ -92,17 +92,28 @@ def test_get_userinfo_by_id__no_user_found(self): def test_get_user_ip_and_agents(self): user_id = self.register_user("test_get_user_ip_and_agents_user", "1234") + + # Initially, we should have no ip/agent for our user. info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) self.assertEqual(info, []) + # Insert a first ip, agent. We should be able to retrieve it. self.get_success( self.store.insert_client_ip( - user_id, "access_token", "ip_1", "user_agent_1", None + user_id, "access_token", "ip_1", "user_agent_1", "device_1", None ) ) + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + + self.assertEqual(len(info), 1) + last_seen_1 = info[0].last_seen + + # Insert a second ip, agent at a later date. We should be able to retrieve it. + last_seen_2 = last_seen_1 + 10000 + print("%s => %s" % (last_seen_1, last_seen_2)) self.get_success( self.store.insert_client_ip( - user_id, "access_token", "ip_2", "user_agent_2", None + user_id, "access_token", "ip_2", "user_agent_2", "device_2", last_seen_2 ) ) info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) @@ -110,16 +121,38 @@ def test_get_user_ip_and_agents(self): self.assertEqual(len(info), 2) ip_1_seen = False ip_2_seen = False + for i in info: if i.ip == "ip_1": ip_1_seen = True self.assertEqual(i.user_agent, "user_agent_1") + self.assertEqual(i.last_seen, last_seen_1) elif i.ip == "ip_2": ip_2_seen = True self.assertEqual(i.user_agent, "user_agent_2") + self.assertEqual(i.last_seen, last_seen_2) self.assertTrue(ip_1_seen) self.assertTrue(ip_2_seen) + # If we fetch from a midpoint between last_seen_1 and last_seen_2, + # we should only find the second ip, agent. + info = self.get_success( + self.module_api.get_user_ip_and_agents( + user_id, (last_seen_1 + last_seen_2) / 2 + ) + ) + self.assertEqual(len(info), 1) + self.assertEqual(info[0].ip, "ip_2") + self.assertEqual(info[0].user_agent, "user_agent_2") + self.assertEqual(info[0].last_seen, last_seen_2) + + # If we fetch from a point later than last_seen_2, we shouldn't + # find anything. + info = self.get_success( + self.module_api.get_user_ip_and_agents(user_id, last_seen_2 + 10000) + ) + self.assertEqual(info, []) + def test_get_user_ip_and_agents__no_user_found(self): info = self.get_success( self.module_api.get_user_ip_and_agents( From 6d63029fdc9d3ee561218b2e3a929e2497ada14d Mon Sep 17 00:00:00 2001 From: David Teller Date: Wed, 22 Sep 2021 14:45:55 +0200 Subject: [PATCH 6/6] FIXUP --- synapse/module_api/__init__.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index d679ae53fd23..d40adb773148 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -723,7 +723,13 @@ def read_templates( def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: """ - Checks whether an ID comes from this homeserver. + Checks whether an ID (user id, room, ...) comes from this homeserver. + + Args: + id: any Matrix id (e.g. user id, room id, ...), either as a raw id, + e.g. string "@user:example.com" or as a parsed UserID, RoomID, ... + Returns: + True if id comes from this homeserver, False otherwise. Added in Synapse v1.44.0. """ @@ -738,8 +744,14 @@ async def get_user_ip_and_agents( """ Return the list of user IPs and agents for a user. - Only useful for local users. If since_ts is not specified, - return the list since the epoch. + Args: + user_id: the id of a user, local or remote + since_ts: a timestamp in seconds since the epoch, + or the epoch itself if not specified. + Returns: + The list of all UserIpAndAgent that the user has + used to connect to this homeserver since `since_ts`. + If the user is remote, this list is empty. Added in Synapse v1.44.0. """