Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Avoid unnecessary work in the spaces summary. #10085

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10085.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve the performance of the spaces summary endpoint by only recursing into spaces.
6 changes: 6 additions & 0 deletions synapse/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ class EventContentFields:
ROOM_TYPE = "type"


class RoomTypes:
clokep marked this conversation as resolved.
Show resolved Hide resolved
"""Understood values of the room_type field of m.room.create events."""

SPACE = "m.space"


class RoomEncryptionAlgorithms:
MEGOLM_V1_AES_SHA2 = "m.megolm.v1.aes-sha2"
DEFAULT = MEGOLM_V1_AES_SHA2
Expand Down
92 changes: 54 additions & 38 deletions synapse/handlers/space_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging
import re
from collections import deque
from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Set, Tuple
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Set, Tuple

import attr

Expand All @@ -25,6 +25,7 @@
EventTypes,
HistoryVisibility,
Membership,
RoomTypes,
)
from synapse.events import EventBase
from synapse.events.utils import format_event_for_client_v2
Expand Down Expand Up @@ -87,12 +88,13 @@ async def get_space_summary(
# the queue of rooms to process
room_queue = deque((_RoomQueueEntry(room_id, ()),))

# rooms we have already processed
processed_rooms = set() # type: Set[str]
# A map of rooms that have been processed to whether that room was
# returned to the requesting user.
processed_rooms: Dict[str, bool] = {}

# events we have already processed. We don't necessarily have their event ids,
# so instead we key on (room id, state key)
processed_events = set() # type: Set[Tuple[str, str]]
# A map of room ID to a list of events which are pending the processing
# of that room.
pending_events: Dict[str, List[JsonDict]] = {}

rooms_result = [] # type: List[JsonDict]
events_result = [] # type: List[JsonDict]
Expand Down Expand Up @@ -126,21 +128,27 @@ async def get_space_summary(

if room:
rooms_result.append(room)

# Track whether this room is returned to the user or not.
processed_rooms[room_id] = bool(room)
else:
fed_rooms, fed_events = await self._summarize_remote_room(
fed_rooms, events = await self._summarize_remote_room(
queue_entry,
suggested_only,
max_children,
exclude_rooms=processed_rooms,
)

# The queried room may or may not have been returned, but don't process
# it again, anyway. (This may be overridden below if it is accessible.)
processed_rooms[room_id] = False

# The results over federation might include rooms that the we,
# as the requesting server, are allowed to see, but the requesting
# user is not permitted see.
#
# Filter the returned results to only what is accessible to the user.
room_ids = set()
events = []
for room in fed_rooms:
fed_room_id = room.get("room_id")
if not fed_room_id or not isinstance(fed_room_id, str):
Expand Down Expand Up @@ -184,42 +192,45 @@ async def get_space_summary(

# All rooms returned don't need visiting again (even if the user
# didn't have access to them).
processed_rooms.add(fed_room_id)

for event in fed_events:
if event.get("room_id") in room_ids:
events.append(event)
processed_rooms[fed_room_id] = include_room

logger.debug(
"Query of %s returned rooms %s, events %s",
room_id,
[room.get("room_id") for room in fed_rooms],
["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in fed_events],
["%s->%s" % (ev["room_id"], ev["state_key"]) for ev in events],
)

# the room we queried may or may not have been returned, but don't process
# it again, anyway.
processed_rooms.add(room_id)

# XXX: is it ok that we blindly iterate through any events returned by
# a remote server, whether or not they actually link to any rooms in our
# tree?
for ev in events:
# remote servers might return events we have already processed
# (eg, Dendrite returns inward pointers as well as outward ones), so
# we need to filter them out, to avoid returning duplicate links to the
# client.
ev_key = (ev["room_id"], ev["state_key"])
if ev_key in processed_events:
continue
events_result.append(ev)
# There might be events which point to this room which are waiting
# to see if it is accessible.
room_pending_events = pending_events.pop(room_id, ())
if not processed_rooms[room_id]:
# If the room was not accessible, we don't need to include these
# events in the results.
room_pending_events = ()
clokep marked this conversation as resolved.
Show resolved Hide resolved

# Any events which reference rooms that are known to be accessible
# should be included in the result. Events which reference rooms which
# have not yet been processed should be queued.
for ev in itertools.chain(events, room_pending_events):
parent_room_id = ev["room_id"]
child_room_id = ev["state_key"]

try:
if (
processed_rooms[parent_room_id]
and processed_rooms[child_room_id]
):
events_result.append(ev)
except KeyError:
# Note that events are pending processing of the child room.
# If the parent room was not accessible the event shouldn't
# be included at all.
pending_events.setdefault(child_room_id, []).append(ev)

# add the child to the queue. we have already validated
# that the vias are a list of server names.
room_queue.append(
_RoomQueueEntry(ev["state_key"], ev["content"]["via"])
)
processed_events.add(ev_key)
room_queue.append(_RoomQueueEntry(child_room_id, ev["content"]["via"]))

# Before returning to the client, remove the allowed_spaces key for any
# rooms.
Expand Down Expand Up @@ -318,7 +329,8 @@ async def _summarize_local_room(

Returns:
A tuple of:
An iterable of a single value of the room.
The room information, if it the room should be returned to the
user. None, otherwise.

An iterable of the sorted children events. This may be limited
to a maximum size or may include all children.
Expand All @@ -328,7 +340,11 @@ async def _summarize_local_room(

room_entry = await self._build_room_entry(room_id)

# look for child rooms/spaces.
# If the the room is not a space, return just the room information.
if room_entry.get("room_type") != RoomTypes.SPACE:
return room_entry, ()

# Otherwise, look for child rooms/spaces.
child_events = await self._get_child_events(room_id)

if suggested_only:
Expand All @@ -338,8 +354,8 @@ async def _summarize_local_room(
if max_children is None or max_children > MAX_ROOMS_PER_SPACE:
max_children = MAX_ROOMS_PER_SPACE

now = self._clock.time_msec()
events_result = [] # type: List[JsonDict]
now = self._clock.time_msec()
for edge_event in itertools.islice(child_events, max_children):
events_result.append(
await self._event_serializer.serialize_event(
Expand All @@ -348,6 +364,7 @@ async def _summarize_local_room(
event_format=format_event_for_client_v2,
)
)

return room_entry, events_result

async def _summarize_remote_room(
Expand Down Expand Up @@ -445,7 +462,6 @@ async def _is_room_accessible(
member_event_id = state_ids.get((EventTypes.Member, requester), None)

# If they're in the room they can see info on it.
member_event = None
if member_event_id:
member_event = await self._store.get_event(member_event_id)
if member_event.membership in (Membership.JOIN, Membership.INVITE):
Expand Down