Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add config options for media retention #12732

Merged
merged 14 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -1407,7 +1407,7 @@ federation_rr_transactions_per_room_per_second: 40
```
---
## Media Store ##
Config options relating to Synapse media store.
Config options related to Synapse's media store.

---
Config option: `enable_media_repo`
Expand Down Expand Up @@ -1511,6 +1511,43 @@ thumbnail_sizes:
height: 600
method: scale
```
---
Config option: `media_retention`

Controls whether local media and entries in the remote media cache
(media that is downloaded from other homeservers) should be removed
under certain conditions, typically for the purpose of saving space.

Purging media files will be the carried out by the media worker
(that is, the worker that has the `enable_media_repo` homeserver config
option set to 'true'). This may be the main process.

The `media_retention.enabled` option globally controls whether media
retention is enabled.

The `media_retention.purge_period` option dictates how often Synapse should
scan and purge media to be removed according to the configured thresholds.
For example, if set to "6h", Synapse will check every 6 hours for media
that can be purged. The default value is "24h" meaning 24 hours.

The `media_retention.local_media_lifetime` and
`media_retention.remote_media_lifetime` config options control whether
media will be purged if it has not been accessed in a given amount of
time. Note that media is 'accessed' when loaded in a room in a client, or
otherwise downloaded by a local or remote user. If the media has never
been accessed, the media's creation time is used instead. Both thumbnails
and the original media will be removed. If either of these options are unset,
then media of that type will not be purged.

Example configuration:
```yaml
media_retention:
enabled: true
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
purge_period: 24h
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
local_media_lifetime: 30d
remote_media_lifetime: 7d
```
---
Config option: `url_preview_enabled`

This setting determines whether the preview URL API is enabled.
Expand Down
21 changes: 21 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,27 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
"url_preview_accept_language"
) or ["en"]

media_retention = config.get("media_retention") or {}
self.media_retention_enabled = media_retention.get("enabled", False)

self.media_retention_purge_period = self.parse_duration(
media_retention.get("purge_period", "24h")
)

self.media_retention_local_media_lifetime_ms = None
local_media_lifetime = media_retention.get("local_media_lifetime")
if local_media_lifetime is not None:
self.media_retention_local_media_lifetime_ms = self.parse_duration(
local_media_lifetime
)

self.media_retention_remote_media_lifetime_ms = None
remote_media_lifetime = media_retention.get("remote_media_lifetime")
if remote_media_lifetime is not None:
self.media_retention_remote_media_lifetime_ms = self.parse_duration(
remote_media_lifetime
)

def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
assert data_dir_path is not None
media_store = os.path.join(data_dir_path, "media_store")
Expand Down
67 changes: 66 additions & 1 deletion synapse/rest/media/v1/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@
logger = logging.getLogger(__name__)


UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
# How often to run the background job to update the "recently accessed"
# attribute of local and remote media.
UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute


class MediaRepository:
Expand Down Expand Up @@ -122,11 +124,32 @@ def __init__(self, hs: "HomeServer"):
self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
)

# Media retention configuration options
self._media_retention_local_media_lifetime_ms = (
hs.config.media.media_retention_local_media_lifetime_ms
)
self._media_retention_remote_media_lifetime_ms = (
hs.config.media.media_retention_remote_media_lifetime_ms
)

if hs.config.media.media_retention_enabled:
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
# Run the background job to apply media retention rules routinely,
# with the duration between runs dictated by the homeserver config.
self.clock.looping_call(
self._start_apply_media_retention_rules,
hs.config.media.media_retention_purge_period,
)

def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
)

def _start_apply_media_retention_rules(self) -> Deferred:
return run_as_background_process(
"apply_media_retention_rules", self._apply_media_retention_rules
)

async def _update_recently_accessed(self) -> None:
remote_media = self.recently_accessed_remotes
self.recently_accessed_remotes = set()
Expand Down Expand Up @@ -835,6 +858,48 @@ async def _generate_thumbnails(

return {"width": m_width, "height": m_height}

async def _apply_media_retention_rules(self) -> None:
"""
Purge old local and remote media according to the media retention rules
defined in the homeserver config.

Raises:
...
"""
# Purge remote media
if self._media_retention_remote_media_lifetime_ms is not None:
# Calculate a threshold timestamp derived from the configured lifetime. Any
# media that has not been accessed since this timestamp will be removed.
remote_media_threshold_timestamp_ms = (
self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
)

logger.info(
"Purging remote media last accessed before"
f" {remote_media_threshold_timestamp_ms}"
)

await self.delete_old_remote_media(
before_ts=remote_media_threshold_timestamp_ms
)

# And now do the same for local media
if self._media_retention_local_media_lifetime_ms is not None:
# This works the same as the remote media threshold
local_media_threshold_timestamp_ms = (
self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
)

logger.info(
"Purging local media last accessed before"
f" {local_media_threshold_timestamp_ms}"
)

await self.delete_old_local_media(
before_ts=local_media_threshold_timestamp_ms,
keep_profiles=True,
)

async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
old_media = await self.store.get_remote_media_before(before_ts)

Expand Down