From 99ab65af2f6976bb9ea15f637bb04b6f6dd7690c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 24 Oct 2019 20:59:06 +0300 Subject: [PATCH 01/49] fix up logging to use rapidjson --- synapse/logging/_structured.py | 9 ++++- synapse/logging/_terse_json.py | 73 +++++++++++++++++++++++++--------- synapse/python_dependencies.py | 1 + tests/server.py | 2 + tests/unittest.py | 16 +++++--- tox.ini | 9 +++++ 6 files changed, 85 insertions(+), 25 deletions(-) diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 3220e985a9e4..c6a01577a693 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -261,6 +261,13 @@ def parse_drain_configs( ) +class StoppableLogPublisher(LogPublisher): + def stop(self): + for obs in self._observers: + if hasattr(obs, "stop"): + obs.stop() + + def setup_structured_logging( hs, config, @@ -336,7 +343,7 @@ def setup_structured_logging( # We should never get here, but, just in case, throw an error. raise ConfigError("%s drain type cannot be configured" % (observer.type,)) - publisher = LogPublisher(*observers) + publisher = StoppableLogPublisher(*observers) log_filter = LogLevelFilterPredicate() for namespace, namespace_config in log_config.get( diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 0ebbde06f217..f3b27d5bfd9e 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -24,7 +24,7 @@ from typing import IO import attr -from simplejson import dumps +from rapidjson import dumps from zope.interface import implementer from twisted.application.internet import ClientService @@ -33,9 +33,9 @@ TCP4ClientEndpoint, TCP6ClientEndpoint, ) +from twisted.internet.interfaces import IPushProducer from twisted.internet.protocol import Factory, Protocol from twisted.logger import FileLogObserver, ILogObserver, Logger -from twisted.python.failure import Failure def flatten_event(event: dict, metadata: dict, include_time: bool = False): @@ -141,11 +141,40 @@ def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogOb def formatEvent(_event: dict) -> str: flattened = flatten_event(_event, metadata) - return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" + return dumps(flattened, ensure_ascii=False) + "\n" return FileLogObserver(outFile, formatEvent) +@attr.s +@implementer(IPushProducer) +class LogProducer(object): + + _buffer = attr.ib() + _transport = attr.ib() + paused = attr.ib(default=False) + + def pauseProducing(self): + self.paused = True + + def stopProducing(self): + self.paused = True + self._buffer = None + + def resumeProducing(self): + self.paused = False + + while self.paused is False and (self._buffer and self._transport.connected): + try: + event = self._buffer.popleft() + self._transport.write(dumps(event, ensure_ascii=False).encode("utf8")) + self._transport.write(b"\n") + except Exception: + import traceback + + traceback.print_exc(file=sys.__stderr__) + + @attr.s @implementer(ILogObserver) class TerseJSONToTCPLogObserver(object): @@ -167,6 +196,7 @@ class TerseJSONToTCPLogObserver(object): _buffer = attr.ib(default=attr.Factory(deque), type=deque) _writer = attr.ib(default=None) _logger = attr.ib(default=attr.Factory(Logger)) + _producer = attr.ib(default=None) def start(self) -> None: @@ -188,6 +218,9 @@ def start(self) -> None: self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor()) self._service.startService() + def stop(self): + self._service.stopService() + def _write_loop(self) -> None: """ Implement the write loop. @@ -195,27 +228,29 @@ def _write_loop(self) -> None: if self._writer: return + if self._producer and self._producer._transport.connected: + self._producer.resumeProducing() + return + self._writer = self._service.whenConnected() - @self._writer.addBoth + @self._writer.addErrback + def fail(r): + r.printTraceback(file=sys.__stderr__) + self._writer = None + self.hs.get_reactor().callLater(1, self._write_loop) + return + + @self._writer.addCallback def writer(r): - if isinstance(r, Failure): - r.printTraceback(file=sys.__stderr__) - self._writer = None + def connectionLost(_self, reason): + self._producer.pauseProducing() + self._producer = None self.hs.get_reactor().callLater(1, self._write_loop) - return - try: - for event in self._buffer: - r.transport.write( - dumps(event, ensure_ascii=False, separators=(",", ":")).encode( - "utf8" - ) - ) - r.transport.write(b"\n") - self._buffer.clear() - except Exception as e: - sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) + self._producer = LogProducer(self._buffer, r.transport) + r.transport.registerProducer(self._producer, True) + self._producer.resumeProducing() self._writer = False self.hs.get_reactor().callLater(1, self._write_loop) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index aa7da1c54399..3c0040482cd8 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -74,6 +74,7 @@ "Jinja2>=2.9", "bleach>=1.4.3", "typing-extensions>=3.7.4", + "python-rapidjson>=0.4" ] CONDITIONAL_REQUIREMENTS = { diff --git a/tests/server.py b/tests/server.py index e397ebe8fa9b..bd647906ba65 100644 --- a/tests/server.py +++ b/tests/server.py @@ -375,6 +375,7 @@ class FakeTransport(object): disconnecting = False disconnected = False + connected = True buffer = attr.ib(default=b"") producer = attr.ib(default=None) autoflush = attr.ib(default=True) @@ -392,6 +393,7 @@ def loseConnection(self, reason=None): if self._protocol: self._protocol.connectionLost(reason) self.disconnected = True + self.connected = False def abortConnection(self): logger.info("FakeTransport: abortConnection()") diff --git a/tests/unittest.py b/tests/unittest.py index 561cebc223a4..dc251b17022d 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -178,11 +178,14 @@ class HomeserverTestCase(TestCase): needs_threadpool = False def __init__(self, methodName, *args, **kwargs): - super().__init__(methodName, *args, **kwargs) + if methodName: + super().__init__(methodName, *args, **kwargs) - # see if we have any additional config for this test - method = getattr(self, methodName) - self._extra_config = getattr(method, "_extra_config", None) + # see if we have any additional config for this test + method = getattr(self, methodName) + self._extra_config = getattr(method, "_extra_config", None) + else: + self._extra_config = None def setUp(self): """ @@ -190,7 +193,7 @@ def setUp(self): hijacking the authentication system to return a fixed user, and then calling the prepare function. """ - self.reactor, self.clock = get_clock() + self.reactor, self.clock = self.get_clock() self._hs_args = {"clock": self.clock, "reactor": self.reactor} self.hs = self.make_homeserver(self.reactor, self.clock) @@ -240,6 +243,9 @@ def get_user_by_req(request, allow_guest=False, rights="access"): if hasattr(self, "prepare"): self.prepare(self.reactor, self.clock, self.hs) + def get_clock(self): + return get_clock() + def wait_on_thread(self, deferred, timeout=10): """ Wait until a Deferred is done, where it's waiting on a real thread. diff --git a/tox.ini b/tox.ini index 3cd2c5e63394..4249f9df5a0c 100644 --- a/tox.ini +++ b/tox.ini @@ -102,6 +102,15 @@ commands = {envbindir}/coverage run "{envbindir}/trial" {env:TRIAL_FLAGS:} {posargs:tests} {env:TOXSUFFIX:} +[testenv:benchmark] +deps = + {[base]deps} + pyperf +setenv = + SYNAPSE_POSTGRES = 1 +commands = + python -m synapse.benchmarks {posargs:} + [testenv:packaging] skip_install=True deps = From d684ec8a2bbb682fde86d3b17c4897d40ac71b58 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 24 Oct 2019 22:09:43 +0300 Subject: [PATCH 02/49] benchmarks --- synapse/benchmarks/__init__.py | 63 +++++++++++ synapse/benchmarks/__main__.py | 33 ++++++ synapse/benchmarks/suites/__init__.py | 1 + synapse/benchmarks/suites/logging.py | 147 ++++++++++++++++++++++++++ synapse/logging/_terse_json.py | 8 +- synapse/python_dependencies.py | 1 - 6 files changed, 249 insertions(+), 4 deletions(-) create mode 100644 synapse/benchmarks/__init__.py create mode 100644 synapse/benchmarks/__main__.py create mode 100644 synapse/benchmarks/suites/__init__.py create mode 100644 synapse/benchmarks/suites/logging.py diff --git a/synapse/benchmarks/__init__.py b/synapse/benchmarks/__init__.py new file mode 100644 index 000000000000..d0450c4d9d84 --- /dev/null +++ b/synapse/benchmarks/__init__.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet.defer import Deferred + +from synapse.config.homeserver import HomeServerConfig +from synapse.util import Clock + +from tests.utils import default_config, setup_test_homeserver, setupdb + +DB_SETUP = False + + +def setup_database(): + global DB_SETUP + if not DB_SETUP: + setupdb() + DB_SETUP = True + + +async def make_homeserver(reactor, config=None): + def wait(time): + d = Deferred() + reactor.callLater(time, d.callback, True) + return d + + cleanup_tasks = [] + + clock = Clock(reactor) + + if not config: + config = default_config("test") + + config_obj = HomeServerConfig() + config_obj.parse_config_dict(config, "", "") + + hs = await setup_test_homeserver( + cleanup_tasks.append, config=config_obj, reactor=reactor, clock=clock + ) + stor = hs.get_datastore() + + # Run the database background updates. + if hasattr(stor, "do_next_background_update"): + while not await stor.has_completed_background_updates(): + await stor.do_next_background_update(1) + + def cleanup(): + for i in cleanup_tasks: + i() + + return hs, wait, cleanup diff --git a/synapse/benchmarks/__main__.py b/synapse/benchmarks/__main__.py new file mode 100644 index 000000000000..d67a1cf43a4f --- /dev/null +++ b/synapse/benchmarks/__main__.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pyperf + +from twisted.python import reflect + +from synapse.benchmarks import setupdb +from synapse.benchmarks.suites import SUITES + +if __name__ == "__main__": + + runner = pyperf.Runner(processes=5, values=1, warmups=0) + runner.parse_args() + runner.args.inherit_environ = ["SYNAPSE_POSTGRES"] + + for suite, loops in SUITES: + + func = reflect.namedAny("synapse.benchmarks.suites.%s.main" % (suite.lower(),)) + runner.args.loops = loops + runner.bench_time_func(suite + "_" + str(loops), func) diff --git a/synapse/benchmarks/suites/__init__.py b/synapse/benchmarks/suites/__init__.py new file mode 100644 index 000000000000..641e3299c213 --- /dev/null +++ b/synapse/benchmarks/suites/__init__.py @@ -0,0 +1 @@ +SUITES = [("LOGGING", 1000), ("LOGGING", 10000)] diff --git a/synapse/benchmarks/suites/logging.py b/synapse/benchmarks/suites/logging.py new file mode 100644 index 000000000000..37e7229b6c2b --- /dev/null +++ b/synapse/benchmarks/suites/logging.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import warnings +from contextlib import redirect_stderr +from io import StringIO + +from mock import Mock + +from pyperf import perf_counter + +from twisted.internet.defer import ensureDeferred +from twisted.internet.protocol import ServerFactory +from twisted.logger import ( + LogBeginner, + Logger, + LogPublisher, + globalLogBeginner, + textFileLogObserver, +) +from twisted.protocols.basic import LineOnlyReceiver +from twisted.python.failure import Failure + +from synapse.benchmarks import make_homeserver, setup_database +from synapse.logging._structured import setup_structured_logging + + +class LineCounter(LineOnlyReceiver): + + delimiter = b"\n" + + def __init__(self, *args, **kwargs): + self.count = 0 + super().__init__(*args, **kwargs) + + def lineReceived(self, line): + self.count += 1 + + +async def _main(reactor, loops): + + servers = [] + + def protocol(): + p = LineCounter() + servers.append(p) + return p + + logger_factory = ServerFactory.forProtocol(protocol) + port = reactor.listenTCP(0, logger_factory, interface="127.0.0.1") + + hs, wait, cleanup = await make_homeserver(reactor) + + errors = StringIO() + publisher = LogPublisher() + mock_sys = Mock() + beginner = LogBeginner( + publisher, errors, mock_sys, warnings, initialBufferSize=loops + ) + + log_config = { + "loggers": {"synapse": {"level": "DEBUG"}}, + "drains": { + "tersejson": { + "type": "network_json_terse", + "host": "127.0.0.1", + "port": port.getHost().port, + "maximum_buffer": 100, + } + }, + } + + logger = Logger(namespace="synapse.logging.test_terse_json", observer=publisher) + + start = perf_counter() + + logging_system = setup_structured_logging( + hs, hs.config, log_config, logBeginner=beginner, redirect_stdlib_logging=False + ) + + # Wait for it to connect... + await logging_system._observers[0]._service.whenConnected() + + # Send a bunch of useful messages + for i in range(0, loops): + logger.info("test message %s" % (i,)) + + if ( + len(logging_system._observers[0]._buffer) + == logging_system._observers[0].maximum_buffer + ): + while ( + len(logging_system._observers[0]._buffer) + > logging_system._observers[0].maximum_buffer / 2 + ): + await wait(0.01) + + while servers[0].count != loops: + await wait(0.01) + + end = perf_counter() - start + + logging_system.stop() + port.stopListening() + cleanup() + + return end + + +def main(loops): + + setup_database() + + if globalLogBeginner._temporaryObserver: + globalLogBeginner.beginLoggingTo([lambda event: None]) + + file_out = StringIO() + with redirect_stderr(file_out): + + from twisted.internet import epollreactor + + reactor = epollreactor.EPollReactor() + d = ensureDeferred(_main(reactor, loops)) + + def on_done(_): + if isinstance(_, Failure): + _.printTraceback() + reactor.stop() + return _ + + d.addBoth(on_done) + reactor.run() + + return d.result diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index f3b27d5bfd9e..b8e8ea500350 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -17,6 +17,7 @@ Log formatters that output terse JSON. """ +import json import sys from collections import deque from ipaddress import IPv4Address, IPv6Address, ip_address @@ -24,7 +25,6 @@ from typing import IO import attr -from rapidjson import dumps from zope.interface import implementer from twisted.application.internet import ClientService @@ -37,6 +37,8 @@ from twisted.internet.protocol import Factory, Protocol from twisted.logger import FileLogObserver, ILogObserver, Logger +_encoder = json.JSONEncoder(ensure_ascii=False, separators=(",", ":")) + def flatten_event(event: dict, metadata: dict, include_time: bool = False): """ @@ -141,7 +143,7 @@ def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogOb def formatEvent(_event: dict) -> str: flattened = flatten_event(_event, metadata) - return dumps(flattened, ensure_ascii=False) + "\n" + return _encoder.encode(flattened) + "\n" return FileLogObserver(outFile, formatEvent) @@ -167,7 +169,7 @@ def resumeProducing(self): while self.paused is False and (self._buffer and self._transport.connected): try: event = self._buffer.popleft() - self._transport.write(dumps(event, ensure_ascii=False).encode("utf8")) + self._transport.write(_encoder.encode(event).encode("utf8")) self._transport.write(b"\n") except Exception: import traceback diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 3c0040482cd8..aa7da1c54399 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -74,7 +74,6 @@ "Jinja2>=2.9", "bleach>=1.4.3", "typing-extensions>=3.7.4", - "python-rapidjson>=0.4" ] CONDITIONAL_REQUIREMENTS = { From 135fdaae0d0ff62d295a9a3fbe2f89e19890e269 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 31 Oct 2019 21:01:37 +1100 Subject: [PATCH 03/49] update linting --- changelog.d/6266.misc | 1 + synapse/benchmarks/__main__.py | 1 - synapse/benchmarks/suites/logging.py | 2 -- 3 files changed, 1 insertion(+), 3 deletions(-) create mode 100644 changelog.d/6266.misc diff --git a/changelog.d/6266.misc b/changelog.d/6266.misc new file mode 100644 index 000000000000..634e421a793c --- /dev/null +++ b/changelog.d/6266.misc @@ -0,0 +1 @@ +Add benchmarks for structured logging and improve output performance. diff --git a/synapse/benchmarks/__main__.py b/synapse/benchmarks/__main__.py index d67a1cf43a4f..0d490791fc8b 100644 --- a/synapse/benchmarks/__main__.py +++ b/synapse/benchmarks/__main__.py @@ -17,7 +17,6 @@ from twisted.python import reflect -from synapse.benchmarks import setupdb from synapse.benchmarks.suites import SUITES if __name__ == "__main__": diff --git a/synapse/benchmarks/suites/logging.py b/synapse/benchmarks/suites/logging.py index 37e7229b6c2b..875668210641 100644 --- a/synapse/benchmarks/suites/logging.py +++ b/synapse/benchmarks/suites/logging.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import warnings from contextlib import redirect_stderr from io import StringIO @@ -29,7 +28,6 @@ Logger, LogPublisher, globalLogBeginner, - textFileLogObserver, ) from twisted.protocols.basic import LineOnlyReceiver from twisted.python.failure import Failure From 73cfdebfec0ed341048e2963497e35801009f8c7 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 31 Oct 2019 21:15:07 +1100 Subject: [PATCH 04/49] fix --- synapse/benchmarks/suites/logging.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/synapse/benchmarks/suites/logging.py b/synapse/benchmarks/suites/logging.py index 875668210641..6dd63c69a26c 100644 --- a/synapse/benchmarks/suites/logging.py +++ b/synapse/benchmarks/suites/logging.py @@ -23,12 +23,7 @@ from twisted.internet.defer import ensureDeferred from twisted.internet.protocol import ServerFactory -from twisted.logger import ( - LogBeginner, - Logger, - LogPublisher, - globalLogBeginner, -) +from twisted.logger import LogBeginner, Logger, LogPublisher, globalLogBeginner from twisted.protocols.basic import LineOnlyReceiver from twisted.python.failure import Failure From c580eb32ba1be71abf0ade53fc11bf5044908f29 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Tue, 5 Nov 2019 00:51:51 +1100 Subject: [PATCH 05/49] revert this --- tests/unittest.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/unittest.py b/tests/unittest.py index dc251b17022d..561cebc223a4 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -178,14 +178,11 @@ class HomeserverTestCase(TestCase): needs_threadpool = False def __init__(self, methodName, *args, **kwargs): - if methodName: - super().__init__(methodName, *args, **kwargs) + super().__init__(methodName, *args, **kwargs) - # see if we have any additional config for this test - method = getattr(self, methodName) - self._extra_config = getattr(method, "_extra_config", None) - else: - self._extra_config = None + # see if we have any additional config for this test + method = getattr(self, methodName) + self._extra_config = getattr(method, "_extra_config", None) def setUp(self): """ @@ -193,7 +190,7 @@ def setUp(self): hijacking the authentication system to return a fixed user, and then calling the prepare function. """ - self.reactor, self.clock = self.get_clock() + self.reactor, self.clock = get_clock() self._hs_args = {"clock": self.clock, "reactor": self.reactor} self.hs = self.make_homeserver(self.reactor, self.clock) @@ -243,9 +240,6 @@ def get_user_by_req(request, allow_guest=False, rights="access"): if hasattr(self, "prepare"): self.prepare(self.reactor, self.clock, self.hs) - def get_clock(self): - return get_clock() - def wait_on_thread(self, deferred, timeout=10): """ Wait until a Deferred is done, where it's waiting on a real thread. From a14831d209d6d5b3cc29503768e6a406d8d40522 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 21 Nov 2019 00:35:49 +1100 Subject: [PATCH 06/49] Move cache configuration into a homeserver config option, instead of environment variables. --- changelog.d/6391.feature | 1 + synapse/api/auth.py | 4 +- synapse/app/homeserver.py | 3 +- synapse/config/cache.py | 100 ++++++++++++++++++ synapse/config/homeserver.py | 2 + synapse/http/client.py | 4 +- synapse/metrics/_exposition.py | 12 ++- synapse/push/bulk_push_rule_evaluator.py | 4 +- synapse/push/push_rule_evaluator.py | 4 +- .../replication/slave/storage/client_ips.py | 3 +- synapse/server.py | 1 + synapse/state/__init__.py | 5 +- .../storage/data_stores/main/client_ips.py | 3 +- synapse/storage/data_stores/main/state.py | 6 +- synapse/util/caches/__init__.py | 40 ++++--- synapse/util/caches/descriptors.py | 9 +- synapse/util/caches/expiringcache.py | 2 +- synapse/util/caches/lrucache.py | 27 ++++- synapse/util/caches/response_cache.py | 2 +- synapse/util/caches/stream_change_cache.py | 38 +++++-- synapse/util/caches/ttlcache.py | 2 +- tests/storage/test__base.py | 8 +- tests/test_metrics.py | 34 ++++++ tests/util/test_lrucache.py | 2 +- tests/util/test_stream_change_cache.py | 5 +- tests/utils.py | 1 + 26 files changed, 253 insertions(+), 69 deletions(-) create mode 100644 changelog.d/6391.feature create mode 100644 synapse/config/cache.py diff --git a/changelog.d/6391.feature b/changelog.d/6391.feature new file mode 100644 index 000000000000..f123426e23ae --- /dev/null +++ b/changelog.d/6391.feature @@ -0,0 +1 @@ +Synapse's cache factor can now be configured in `homeserver.yaml` by the `caches.global_factor` setting. Additionally, `caches.per_cache_factors` controls the cache factors for individual caches. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 5d0b7d2801eb..a1fc542c7640 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -41,7 +41,7 @@ ) from synapse.config.server import is_threepid_reserved from synapse.types import UserID -from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache +from synapse.util.caches import register_cache from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import Measure @@ -77,7 +77,7 @@ def __init__(self, hs): self.store = hs.get_datastore() self.state = hs.get_state_handler() - self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000) + self.token_cache = LruCache(10000) register_cache("cache", "token_cache", self.token_cache) self._account_validity = hs.config.account_validity diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 73e2c29d0679..98e528187a29 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -71,7 +71,6 @@ from synapse.storage import DataStore, are_all_users_on_domain from synapse.storage.engines import IncorrectDatabaseSetup, create_engine from synapse.storage.prepare_database import UpgradeDatabaseException, prepare_database -from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.httpresourcetree import create_resource_tree from synapse.util.manhole import manhole from synapse.util.module_loader import load_module @@ -516,7 +515,7 @@ def phone_stats_home(hs, stats, stats_process=_stats_process): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages - stats["cache_factor"] = CACHE_SIZE_FACTOR + stats["cache_factor"] = hs.config.caches.global_factor stats["event_cache_size"] = hs.config.event_cache_size # diff --git a/synapse/config/cache.py b/synapse/config/cache.py new file mode 100644 index 000000000000..e72c6ff3976b --- /dev/null +++ b/synapse/config/cache.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from collections import defaultdict +from typing import DefaultDict + +from twisted.logger import Logger + +from ._base import Config, ConfigError + +log = Logger() + +_CACHES = {} +DEFAULT_CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) + + +_DEFAULT_CONFIG = """\ +# Cache configuration +# +# 'global_factor' controls the global cache factor. This overrides the +# "SYNAPSE_CACHE_FACTOR" environment variable. +# +# 'per_cache_factors' is a dictionary of cache name to cache factor for that +# individual cache. +# +#caches: +# global_factor: 0.5 +# per_cache_factors: +# get_users_who_share_room_with_user: 2 +# +""" + + +def add_resizable_cache(cache_name, cache_resize_callback): + _CACHES[cache_name.lower()] = cache_resize_callback + + +class CacheConfig(Config): + section = "caches" + + def read_config(self, config, **kwargs): + global DEFAULT_CACHE_SIZE_FACTOR + + cache_config = config.get("caches", {}) + + self.global_factor = cache_config.get( + "global_factor", DEFAULT_CACHE_SIZE_FACTOR + ) + if not isinstance(self.global_factor, (int, float)): + raise ConfigError("caches.global_factor must be a number.") + + # Set the global one so that it's reflected in new caches + DEFAULT_CACHE_SIZE_FACTOR = self.global_factor + + individual_factors = cache_config.get("per_cache_factors", {}) or {} + if not isinstance(individual_factors, dict): + raise ConfigError("caches.per_cache_factors must be a dictionary") + + self.cache_factors = defaultdict( + lambda: self.global_factor + ) # type: DefaultDict[str, float] + + for cache, factor in individual_factors.items(): + if not isinstance(factor, (int, float)): + raise ConfigError( + "caches.per_cache_factors.%s must be a number" % (cache.lower(),) + ) + self.cache_factors[cache.lower()] = factor + + def resize_caches(self): + for cache_name, cache_resize_callback in _CACHES.items(): + cache_factor = self.cache_factors[cache_name] + log.debug( + "Setting cache factor for {cache_name} to {new_cache_factor}", + cache_name=cache_name, + new_cache_factor=cache_factor, + ) + changed = cache_resize_callback(cache_factor) + if changed: + log.info( + "Cache factor for {cache_name} set to {new_cache_factor}", + cache_name=cache_name, + new_cache_factor=cache_factor, + ) + + def get_factor_for(self, cache_name): + return self.cache_factors[cache_name.lower()] diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 6e348671c7f3..0372d19836eb 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -17,6 +17,7 @@ from ._base import RootConfig from .api import ApiConfig from .appservice import AppServiceConfig +from .cache import CacheConfig from .captcha import CaptchaConfig from .cas import CasConfig from .consent_config import ConsentConfig @@ -80,4 +81,5 @@ class HomeServerConfig(RootConfig): RoomDirectoryConfig, ThirdPartyRulesConfig, TracerConfig, + CacheConfig, ] diff --git a/synapse/http/client.py b/synapse/http/client.py index d4c285445e1d..b15aad067fc6 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -49,7 +49,6 @@ from synapse.logging.context import make_deferred_yieldable from synapse.logging.opentracing import set_tag, start_active_span, tags from synapse.util.async_helpers import timeout_deferred -from synapse.util.caches import CACHE_SIZE_FACTOR logger = logging.getLogger(__name__) @@ -241,7 +240,8 @@ def __getattr__(_self, attr): # tends to do so in batches, so we need to allow the pool to keep # lots of idle connections around. pool = HTTPConnectionPool(self.reactor) - pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) + # XXX: Why does this use the cache factor???? + pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5)) pool.cachedConnectionTimeout = 2 * 60 # The default context factory in Twisted 14.0.0 (which we require) is diff --git a/synapse/metrics/_exposition.py b/synapse/metrics/_exposition.py index a248103191ad..ab7f948ed453 100644 --- a/synapse/metrics/_exposition.py +++ b/synapse/metrics/_exposition.py @@ -33,6 +33,8 @@ from twisted.web.resource import Resource +from synapse.util import caches + try: from prometheus_client.samples import Sample except ImportError: @@ -103,13 +105,15 @@ def nameify_sample(sample): def generate_latest(registry, emit_help=False): - output = [] - for metric in registry.collect(): + # Trigger the cache metrics to be rescraped, which updates the common + # metrics but do not produce metrics themselves + for collector in caches.collectors_by_name.values(): + collector.collect() - if metric.name.startswith("__unused"): - continue + output = [] + for metric in registry.collect(): if not metric.samples: # No samples, don't bother. continue diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1ba7bcd4d881..1400f51691fe 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -51,6 +51,7 @@ "cache", "push_rules_delta_state_cache_metric", cache=[], # Meaningless size, as this isn't a cache that stores values + resizable=False, ) @@ -67,7 +68,8 @@ def __init__(self, hs): self.room_push_rule_cache_metrics = register_cache( "cache", "room_push_rule_cache", - cache=[], # Meaningless size, as this isn't a cache that stores values + cache=[], # Meaningless size, as this isn't a cache that stores values, + resizable=False, ) @defer.inlineCallbacks diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index b1587183a8e7..a70e6772812f 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -20,7 +20,7 @@ from six import string_types from synapse.types import UserID -from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache +from synapse.util.caches import register_cache from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -149,7 +149,7 @@ def _get_value(self, dotted_key): # Caches (glob, word_boundary) -> regex for push. See _glob_matches -regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR) +regex_cache = LruCache(50000) register_cache("cache", "regex_push_cache", regex_cache) diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index b4f58cea19f4..268014f9800c 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -14,7 +14,6 @@ # limitations under the License. from synapse.storage.data_stores.main.client_ips import LAST_SEEN_GRANULARITY -from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.caches.descriptors import Cache from ._base import BaseSlavedStore @@ -25,7 +24,7 @@ def __init__(self, db_conn, hs): super(SlavedClientIpStore, self).__init__(db_conn, hs) self.client_ip_last_seen = Cache( - name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR + name="client_ip_last_seen", keylen=4, max_entries=50000 ) def insert_client_ip(self, user_id, access_token, ip, user_agent, device_id): diff --git a/synapse/server.py b/synapse/server.py index 90c3b072e8d3..9f06b83f9495 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -252,6 +252,7 @@ def setup_master(self): """ for i in self.REQUIRED_ON_MASTER_STARTUP: getattr(self, "get_" + i)() + self.config.caches.resize_caches() def get_reactor(self): """ diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 139beef8ede7..e5630c7e1b10 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,6 @@ from synapse.logging.utils import log_function from synapse.state import v1, v2 from synapse.util.async_helpers import Linearizer -from synapse.util.caches import get_cache_factor_for from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.metrics import Measure, measure_func @@ -51,7 +50,6 @@ KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key")) -SIZE_OF_CACHE = 100000 * get_cache_factor_for("state_cache") EVICTION_TIMEOUT_SECONDS = 60 * 60 @@ -441,10 +439,11 @@ def __init__(self, hs): self._state_cache = None self.resolve_linearizer = Linearizer(name="state_resolve_lock") + cache_size = 100000 * hs.config.caches.get_factor_for("state_cache") self._state_cache = ExpiringCache( cache_name="state_cache", clock=self.clock, - max_len=SIZE_OF_CACHE, + max_len=cache_size, expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, iterable=True, reset_expiry_on_get=True, diff --git a/synapse/storage/data_stores/main/client_ips.py b/synapse/storage/data_stores/main/client_ips.py index 706c6a1f3fac..2f9ad008b579 100644 --- a/synapse/storage/data_stores/main/client_ips.py +++ b/synapse/storage/data_stores/main/client_ips.py @@ -22,7 +22,6 @@ from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage import background_updates from synapse.storage._base import Cache -from synapse.util.caches import CACHE_SIZE_FACTOR logger = logging.getLogger(__name__) @@ -366,7 +365,7 @@ class ClientIpStore(ClientIpBackgroundUpdateStore): def __init__(self, db_conn, hs): self.client_ip_last_seen = Cache( - name="client_ip_last_seen", keylen=4, max_entries=50000 * CACHE_SIZE_FACTOR + name="client_ip_last_seen", keylen=4, max_entries=50000 ) super(ClientIpStore, self).__init__(db_conn, hs) diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 6a90daea31bc..2c8f2c184f8b 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -31,7 +31,7 @@ from synapse.storage.data_stores.main.events_worker import EventsWorkerStore from synapse.storage.engines import PostgresEngine from synapse.storage.state import StateFilter -from synapse.util.caches import get_cache_factor_for, intern_string +from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.stringutils import to_ascii @@ -249,11 +249,11 @@ def __init__(self, db_conn, hs): self._state_group_cache = DictionaryCache( "*stateGroupCache*", # TODO: this hasn't been tuned yet - 50000 * get_cache_factor_for("stateGroupCache"), + 50000 * hs.config.caches.get_factor_for("stateGroupCache"), ) self._state_group_members_cache = DictionaryCache( "*stateGroupMembersCache*", - 500000 * get_cache_factor_for("stateGroupMembersCache"), + 500000 * hs.config.caches.get_factor_for("stateGroupMembersCache"), ) @defer.inlineCallbacks diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index da5077b47105..d7cfb83624f5 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -15,27 +15,16 @@ # limitations under the License. import logging -import os from typing import Dict import six from six.moves import intern -from prometheus_client.core import REGISTRY, Gauge, GaugeMetricFamily +from prometheus_client.core import Gauge -logger = logging.getLogger(__name__) - -CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) - - -def get_cache_factor_for(cache_name): - env_var = "SYNAPSE_CACHE_FACTOR_" + cache_name.upper() - factor = os.environ.get(env_var) - if factor: - return float(factor) - - return CACHE_SIZE_FACTOR +from synapse.config.cache import add_resizable_cache +logger = logging.getLogger(__name__) caches_by_name = {} collectors_by_name = {} # type: Dict @@ -44,6 +33,7 @@ def get_cache_factor_for(cache_name): cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"]) cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"]) +cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"]) response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"]) response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"]) @@ -53,8 +43,15 @@ def get_cache_factor_for(cache_name): response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"]) -def register_cache(cache_type, cache_name, cache, collect_callback=None): - """Register a cache object for metric collection. +def register_cache( + cache_type, + cache_name, + cache, + collect_callback=None, + resizable=True, + resize_callback=None, +): + """Register a cache object for metric collection and resizing. Args: cache_type (str): @@ -66,13 +63,15 @@ def register_cache(cache_type, cache_name, cache, collect_callback=None): Returns: CacheMetric: an object which provides inc_{hits,misses,evictions} methods """ + if resizable: + if not resize_callback: + resize_callback = getattr(cache, "set_cache_factor") + add_resizable_cache(cache_name, resize_callback) # Check if the metric is already registered. Unregister it, if so. # This usually happens during tests, as at runtime these caches are # effectively singletons. metric_name = "cache_%s_%s" % (cache_type, cache_name) - if metric_name in collectors_by_name.keys(): - REGISTRY.unregister(collectors_by_name[metric_name]) class CacheMetric(object): @@ -104,16 +103,15 @@ def collect(self): cache_hits.labels(cache_name).set(self.hits) cache_evicted.labels(cache_name).set(self.evicted_size) cache_total.labels(cache_name).set(self.hits + self.misses) + if hasattr(cache, "max_size"): + cache_max_size.labels(cache_name).set(cache.max_size) if collect_callback: collect_callback() except Exception as e: logger.warning("Error calculating metrics for %s: %s", cache_name, e) raise - yield GaugeMetricFamily("__unused", "") - metric = CacheMetric() - REGISTRY.register(metric) caches_by_name[cache_name] = cache collectors_by_name[metric_name] = metric return metric diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 84f5ae22c374..f1e525399f7a 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import functools import inspect import logging @@ -30,7 +31,6 @@ from synapse.logging.context import make_deferred_yieldable, preserve_fn from synapse.util import unwrapFirstError from synapse.util.async_helpers import ObservableDeferred -from synapse.util.caches import get_cache_factor_for from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry @@ -81,7 +81,6 @@ def invalidate(self): class Cache(object): __slots__ = ( "cache", - "max_entries", "name", "keylen", "thread", @@ -111,6 +110,10 @@ def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False) collect_callback=self._metrics_collection_callback, ) + @property + def max_entries(self): + return self.cache.max_size + def _on_evicted(self, evicted_count): self.metrics.inc_evictions(evicted_count) @@ -370,8 +373,6 @@ def __init__( cache_context=cache_context, ) - max_entries = int(max_entries * get_cache_factor_for(orig.__name__)) - self.max_entries = max_entries self.tree = tree self.iterable = iterable diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index cddf1ed51521..6d41d9dc65fc 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -66,7 +66,7 @@ def __init__( self.iterable = iterable - self.metrics = register_cache("expiring", cache_name, self) + self.metrics = register_cache("expiring", cache_name, self, resizable=False) if not self._expiry_ms: # Don't bother starting the loop if things never expire diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 1536cb64f3bd..3779617ff9b0 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -14,9 +14,11 @@ # limitations under the License. +import math import threading from functools import wraps +from synapse.config import cache as cache_config from synapse.util.caches.treecache import TreeCache @@ -76,6 +78,11 @@ def __init__( """ cache = cache_type() self.cache = cache # Used for introspection. + + # Save the original max size, and apply the default size factor. + self._original_max_size = max_size + self.max_size = math.floor(max_size * cache_config.DEFAULT_CACHE_SIZE_FACTOR) + list_root = _Node(None, None, None, None) list_root.next_node = list_root list_root.prev_node = list_root @@ -83,7 +90,7 @@ def __init__( lock = threading.Lock() def evict(): - while cache_len() > max_size: + while cache_len() > self.max_size: todelete = list_root.prev_node evicted_len = delete_node(todelete) cache.pop(todelete.key, None) @@ -236,6 +243,7 @@ def cache_contains(key): return key in cache self.sentinel = object() + self._on_resize = evict self.get = cache_get self.set = cache_set self.setdefault = cache_set_default @@ -266,3 +274,20 @@ def __len__(self): def __contains__(self, key): return self.contains(key) + + def set_cache_factor(self, factor: float) -> bool: + """ + Set the cache factor for this individual cache. + + This will trigger a resize if it changes, which may require evicting + items from the cache. + + Returns: + bool: Whether the cache changed size or not. + """ + new_size = math.floor(self._original_max_size * factor) + if new_size != self.max_size: + self.max_size = new_size + self._on_resize() + return True + return False diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index 82d3eefe0e43..b395255c1b26 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -38,7 +38,7 @@ def __init__(self, hs, name, timeout_ms=0): self.timeout_sec = timeout_ms / 1000.0 self._name = name - self._metrics = register_cache("response_cache", name, self) + self._metrics = register_cache("response_cache", name, self, resizable=False) def size(self): return len(self.pending_result_cache) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 235f64049c95..fe0b99ae24b1 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -14,11 +14,13 @@ # limitations under the License. import logging +import math from six import integer_types from sortedcontainers import SortedDict +from synapse.config import cache as cache_config from synapse.util import caches logger = logging.getLogger(__name__) @@ -35,17 +37,37 @@ class StreamChangeCache(object): """ def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache=None): - self._max_size = int(max_size * caches.CACHE_SIZE_FACTOR) + self._original_max_size = max_size + self.max_size = math.floor(max_size * cache_config.DEFAULT_CACHE_SIZE_FACTOR) self._entity_to_key = {} self._cache = SortedDict() self._earliest_known_stream_pos = current_stream_pos self.name = name - self.metrics = caches.register_cache("cache", self.name, self._cache) + self.metrics = caches.register_cache( + "cache", self.name, self._cache, resize_callback=self.set_cache_factor + ) if prefilled_cache: for entity, stream_pos in prefilled_cache.items(): self.entity_has_changed(entity, stream_pos) + def set_cache_factor(self, factor: float) -> bool: + """ + Set the cache factor for this individual cache. + + This will trigger a resize if it changes, which may require evicting + items from the cache. + + Returns: + bool: Whether the cache changed size or not. + """ + new_size = math.floor(self._original_max_size * factor) + if new_size != self.max_size: + self.max_size = new_size + self._evict() + return True + return False + def has_entity_changed(self, entity, stream_pos): """Returns True if the entity may have been updated since stream_pos """ @@ -133,13 +155,13 @@ def entity_has_changed(self, entity, stream_pos): self._cache.pop(old_pos, None) self._cache[stream_pos] = entity self._entity_to_key[entity] = stream_pos + self._evict() - while len(self._cache) > self._max_size: - k, r = self._cache.popitem(0) - self._earliest_known_stream_pos = max( - k, self._earliest_known_stream_pos - ) - self._entity_to_key.pop(r, None) + def _evict(self): + while len(self._cache) > self.max_size: + k, r = self._cache.popitem(0) + self._earliest_known_stream_pos = max(k, self._earliest_known_stream_pos) + self._entity_to_key.pop(r, None) def get_max_pos_of_last_change(self, entity): """Returns an upper bound of the stream id of the last change to an diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py index 99646c7cf09b..6437aa907e0f 100644 --- a/synapse/util/caches/ttlcache.py +++ b/synapse/util/caches/ttlcache.py @@ -38,7 +38,7 @@ def __init__(self, cache_name, timer=time.time): self._timer = timer - self._metrics = register_cache("ttl", cache_name, self) + self._metrics = register_cache("ttl", cache_name, self, resizable=False) def set(self, key, value, ttl): """Add/update an entry in the cache diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 9b81b536f546..fdb3f6156123 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -25,8 +25,8 @@ from tests import unittest -class CacheTestCase(unittest.TestCase): - def setUp(self): +class CacheTestCase(unittest.HomeserverTestCase): + def prepare(self, reactor, clock, homeserver): self.cache = Cache("test") def test_empty(self): @@ -96,7 +96,7 @@ def test_eviction_lru(self): cache.get(3) -class CacheDecoratorTestCase(unittest.TestCase): +class CacheDecoratorTestCase(unittest.HomeserverTestCase): @defer.inlineCallbacks def test_passthrough(self): class A(object): @@ -239,7 +239,7 @@ def test_eviction_context(self): callcount2 = [0] class A(object): - @cached(max_entries=4) # HACK: This makes it 2 due to cache factor + @cached(max_entries=2) def func(self, key): callcount[0] += 1 return key diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 270f853d602e..f5f63d8ed699 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -15,6 +15,7 @@ # limitations under the License. from synapse.metrics import REGISTRY, InFlightGauge, generate_latest +from synapse.util.caches.descriptors import Cache from tests import unittest @@ -129,3 +130,36 @@ def test_get_build(self): self.assertTrue(b"osversion=" in items[0]) self.assertTrue(b"pythonversion=" in items[0]) self.assertTrue(b"version=" in items[0]) + + +class CacheMetricsTests(unittest.HomeserverTestCase): + def test_cache_metric(self): + """ + Caches produce metrics reflecting their state when scraped. + """ + CACHE_NAME = "cache_metrics_test_fgjkbdfg" + cache = Cache(CACHE_NAME, max_entries=777) + + items = { + x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii") + for x in filter( + lambda x: b"cache_metrics_test_fgjkbdfg" in x, + generate_latest(REGISTRY).split(b"\n"), + ) + } + + self.assertEqual(items["synapse_util_caches_cache_size"], "0.0") + self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0") + + cache.prefill("1", "hi") + + items = { + x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii") + for x in filter( + lambda x: b"cache_metrics_test_fgjkbdfg" in x, + generate_latest(REGISTRY).split(b"\n"), + ) + } + + self.assertEqual(items["synapse_util_caches_cache_size"], "1.0") + self.assertEqual(items["synapse_util_caches_cache_max_size"], "777.0") diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py index 786947375df2..1cee7ba91a36 100644 --- a/tests/util/test_lrucache.py +++ b/tests/util/test_lrucache.py @@ -22,7 +22,7 @@ from .. import unittest -class LruCacheTestCase(unittest.TestCase): +class LruCacheTestCase(unittest.HomeserverTestCase): def test_get_set(self): cache = LruCache(1) cache["key"] = "value" diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py index f2be63706bba..97af491af6e4 100644 --- a/tests/util/test_stream_change_cache.py +++ b/tests/util/test_stream_change_cache.py @@ -1,11 +1,9 @@ -from mock import patch - from synapse.util.caches.stream_change_cache import StreamChangeCache from tests import unittest -class StreamChangeCacheTests(unittest.TestCase): +class StreamChangeCacheTests(unittest.HomeserverTestCase): """ Tests for StreamChangeCache. """ @@ -46,7 +44,6 @@ def test_has_entity_changed(self): self.assertTrue(cache.has_entity_changed("user@foo.com", 0)) self.assertTrue(cache.has_entity_changed("not@here.website", 0)) - @patch("synapse.util.caches.CACHE_SIZE_FACTOR", 1.0) def test_has_entity_changed_pops_off_start(self): """ StreamChangeCache.entity_has_changed will respect the max size and diff --git a/tests/utils.py b/tests/utils.py index 7dc9bdc505de..5c9543db7189 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -162,6 +162,7 @@ def default_config(name, parse=False): # disable user directory updates, because they get done in the # background, which upsets the test runner. "update_user_directory": False, + "caches": {"global_factor": 1}, } if parse: From 50fcb4a8c53b969d1bed0a73449eb4bde8d8daf9 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 22 Nov 2019 00:05:12 +1100 Subject: [PATCH 07/49] Re-sync every so often, in case caches appear --- synapse/server.py | 2 +- synapse/storage/data_stores/main/state.py | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index 9f06b83f9495..d0e6f850a497 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -242,6 +242,7 @@ def setup(self): self.datastores = DataStores(datastore, conn, self) conn.commit() self.start_time = int(self.get_clock().time()) + self.get_clock().looping_call(self.config.caches.resize_caches, 10000) logger.info("Finished setting up.") def setup_master(self): @@ -252,7 +253,6 @@ def setup_master(self): """ for i in self.REQUIRED_ON_MASTER_STARTUP: getattr(self, "get_" + i)() - self.config.caches.resize_caches() def get_reactor(self): """ diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 2c8f2c184f8b..180b75c71067 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -246,14 +246,10 @@ def __init__(self, db_conn, hs): # We size the non-members cache to be smaller than the members cache as the # vast majority of state in Matrix (today) is member events. - self._state_group_cache = DictionaryCache( - "*stateGroupCache*", - # TODO: this hasn't been tuned yet - 50000 * hs.config.caches.get_factor_for("stateGroupCache"), - ) + # TODO: this hasn't been tuned yet + self._state_group_cache = DictionaryCache("*stateGroupCache*", 50000) self._state_group_members_cache = DictionaryCache( - "*stateGroupMembersCache*", - 500000 * hs.config.caches.get_factor_for("stateGroupMembersCache"), + "*stateGroupMembersCache*", 500000 ) @defer.inlineCallbacks From 92d4d1342a060e82359299fa83570f9420fac033 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 2 Dec 2019 17:42:24 +1100 Subject: [PATCH 08/49] fixes --- synapse/benchmarks/suites/__init__.py | 1 - {synapse/benchmarks => synmark}/__init__.py | 7 +------ {synapse/benchmarks => synmark}/__main__.py | 7 +++---- synmark/suites/__init__.py | 6 ++++++ .../benchmarks => synmark}/suites/logging.py | 17 ++++++++++++++--- tox.ini | 2 +- 6 files changed, 25 insertions(+), 15 deletions(-) delete mode 100644 synapse/benchmarks/suites/__init__.py rename {synapse/benchmarks => synmark}/__init__.py (92%) rename {synapse/benchmarks => synmark}/__main__.py (82%) create mode 100644 synmark/suites/__init__.py rename {synapse/benchmarks => synmark}/suites/logging.py (92%) diff --git a/synapse/benchmarks/suites/__init__.py b/synapse/benchmarks/suites/__init__.py deleted file mode 100644 index 641e3299c213..000000000000 --- a/synapse/benchmarks/suites/__init__.py +++ /dev/null @@ -1 +0,0 @@ -SUITES = [("LOGGING", 1000), ("LOGGING", 10000)] diff --git a/synapse/benchmarks/__init__.py b/synmark/__init__.py similarity index 92% rename from synapse/benchmarks/__init__.py rename to synmark/__init__.py index d0450c4d9d84..64c2744ae7aa 100644 --- a/synapse/benchmarks/__init__.py +++ b/synmark/__init__.py @@ -31,13 +31,8 @@ def setup_database(): async def make_homeserver(reactor, config=None): - def wait(time): - d = Deferred() - reactor.callLater(time, d.callback, True) - return d cleanup_tasks = [] - clock = Clock(reactor) if not config: @@ -60,4 +55,4 @@ def cleanup(): for i in cleanup_tasks: i() - return hs, wait, cleanup + return hs, clock.sleep, cleanup diff --git a/synapse/benchmarks/__main__.py b/synmark/__main__.py similarity index 82% rename from synapse/benchmarks/__main__.py rename to synmark/__main__.py index 0d490791fc8b..e208b04a0b6c 100644 --- a/synapse/benchmarks/__main__.py +++ b/synmark/__main__.py @@ -17,7 +17,7 @@ from twisted.python import reflect -from synapse.benchmarks.suites import SUITES +from synmark.suites import SUITES if __name__ == "__main__": @@ -26,7 +26,6 @@ runner.args.inherit_environ = ["SYNAPSE_POSTGRES"] for suite, loops in SUITES: - - func = reflect.namedAny("synapse.benchmarks.suites.%s.main" % (suite.lower(),)) + print(suite, loops) runner.args.loops = loops - runner.bench_time_func(suite + "_" + str(loops), func) + runner.bench_time_func(suite.__name__ + "_" + str(loops), suite.main) diff --git a/synmark/suites/__init__.py b/synmark/suites/__init__.py new file mode 100644 index 000000000000..412c15f52c75 --- /dev/null +++ b/synmark/suites/__init__.py @@ -0,0 +1,6 @@ +from . import logging + + +SUITES = [(logging, 1000), (logging, 10000)] + + diff --git a/synapse/benchmarks/suites/logging.py b/synmark/suites/logging.py similarity index 92% rename from synapse/benchmarks/suites/logging.py rename to synmark/suites/logging.py index 6dd63c69a26c..bc967e97abf9 100644 --- a/synapse/benchmarks/suites/logging.py +++ b/synmark/suites/logging.py @@ -18,16 +18,17 @@ from io import StringIO from mock import Mock +import sys from pyperf import perf_counter from twisted.internet.defer import ensureDeferred from twisted.internet.protocol import ServerFactory -from twisted.logger import LogBeginner, Logger, LogPublisher, globalLogBeginner +from twisted.logger import LogBeginner, Logger, LogPublisher, globalLogBeginner, textFileLogObserver from twisted.protocols.basic import LineOnlyReceiver from twisted.python.failure import Failure -from synapse.benchmarks import make_homeserver, setup_database +from synmark import make_homeserver, setup_database from synapse.logging._structured import setup_structured_logging @@ -47,6 +48,9 @@ async def _main(reactor, loops): servers = [] + print("?") + + def protocol(): p = LineCounter() servers.append(p) @@ -84,12 +88,15 @@ def protocol(): hs, hs.config, log_config, logBeginner=beginner, redirect_stdlib_logging=False ) + print("hi") + # Wait for it to connect... await logging_system._observers[0]._service.whenConnected() # Send a bunch of useful messages for i in range(0, loops): logger.info("test message %s" % (i,)) + print(i) if ( len(logging_system._observers[0]._buffer) @@ -102,6 +109,7 @@ def protocol(): await wait(0.01) while servers[0].count != loops: + print(servers[0].count, loops) await wait(0.01) end = perf_counter() - start @@ -115,10 +123,13 @@ def protocol(): def main(loops): + print("hi?") + print(loops) + setup_database() if globalLogBeginner._temporaryObserver: - globalLogBeginner.beginLoggingTo([lambda event: None]) + globalLogBeginner.beginLoggingTo([textFileLogObserver(sys.__stderr__)]) file_out = StringIO() with redirect_stderr(file_out): diff --git a/tox.ini b/tox.ini index 7e981bbc682e..903a245fb027 100644 --- a/tox.ini +++ b/tox.ini @@ -109,7 +109,7 @@ deps = setenv = SYNAPSE_POSTGRES = 1 commands = - python -m synapse.benchmarks {posargs:} + python -m synmark {posargs:} [testenv:packaging] skip_install=True From c11c8ad39f11a8205778b0bf41bd143e288528f0 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 2 Dec 2019 18:30:11 +1100 Subject: [PATCH 09/49] more fixes --- synapse/logging/_terse_json.py | 1 + synmark/__init__.py | 25 ++++++++---- synmark/__main__.py | 69 +++++++++++++++++++++++++++++++--- synmark/suites/__init__.py | 5 +-- synmark/suites/logging.py | 63 +++++++------------------------ 5 files changed, 97 insertions(+), 66 deletions(-) diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 05fc64f409f3..03934956f49d 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -256,6 +256,7 @@ def writer(r): # transport is the same, just trigger a resumeProducing. if self._producer and r.transport is self._producer.transport: self._producer.resumeProducing() + self._connection_waiter = None return # If the producer is still producing, stop it. diff --git a/synmark/__init__.py b/synmark/__init__.py index 64c2744ae7aa..5cc8ed28ee5d 100644 --- a/synmark/__init__.py +++ b/synmark/__init__.py @@ -13,21 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet.defer import Deferred +import sys + +from twisted.internet import epollreactor +from twisted.internet.main import installReactor from synapse.config.homeserver import HomeServerConfig from synapse.util import Clock from tests.utils import default_config, setup_test_homeserver, setupdb -DB_SETUP = False - def setup_database(): - global DB_SETUP - if not DB_SETUP: - setupdb() - DB_SETUP = True + setupdb() async def make_homeserver(reactor, config=None): @@ -56,3 +54,16 @@ def cleanup(): i() return hs, clock.sleep, cleanup + + +def make_reactor(): + """ + Make an install a Twisted reactor. + """ + reactor = epollreactor.EPollReactor() + + if "twisted.internet.reactor" in sys.modules: + del sys.modules["twisted.internet.reactor"] + installReactor(reactor) + + return reactor diff --git a/synmark/__main__.py b/synmark/__main__.py index e208b04a0b6c..8a8fbdf6c742 100644 --- a/synmark/__main__.py +++ b/synmark/__main__.py @@ -13,19 +13,76 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys +from contextlib import redirect_stderr +from io import StringIO + import pyperf +from synmark import make_reactor, setup_database +from synmark.suites import SUITES -from twisted.python import reflect +from twisted.internet.defer import ensureDeferred +from twisted.logger import globalLogBeginner, textFileLogObserver +from twisted.python.failure import Failure + + +def make_test(main): + """ + Take main, the test function, and wrap it in a reactor start and stop. + """ + + def _main(loops): + + reactor = make_reactor() + + file_out = StringIO() + with redirect_stderr(file_out): + + d = ensureDeferred(main(reactor, loops)) + + def on_done(_): + if isinstance(_, Failure): + _.printTraceback() + print(file_out.getvalue()) + reactor.stop() + return _ + + d.addBoth(on_done) + reactor.run() + + return d.result + + return _main -from synmark.suites import SUITES if __name__ == "__main__": - runner = pyperf.Runner(processes=5, values=1, warmups=0) + def add_cmdline_args(cmd, args): + if args.log: + cmd.extend(["--log"]) + + runner = pyperf.Runner( + processes=3, min_time=2, show_name=True, add_cmdline_args=add_cmdline_args + ) + runner.argparser.add_argument("--log", action="store_true") runner.parse_args() + + orig_loops = runner.args.loops runner.args.inherit_environ = ["SYNAPSE_POSTGRES"] + if runner.args.worker: + if runner.args.log: + globalLogBeginner.beginLoggingTo( + [textFileLogObserver(sys.__stdout__)], redirectStandardIO=False + ) + setup_database() + for suite, loops in SUITES: - print(suite, loops) - runner.args.loops = loops - runner.bench_time_func(suite.__name__ + "_" + str(loops), suite.main) + if loops: + runner.args.loops = loops + else: + runner.args.loops = orig_loops + loops = "auto" + runner.bench_time_func( + suite.__name__ + "_" + str(loops), make_test(suite.main), + ) diff --git a/synmark/suites/__init__.py b/synmark/suites/__init__.py index 412c15f52c75..cfa3b0ba3823 100644 --- a/synmark/suites/__init__.py +++ b/synmark/suites/__init__.py @@ -1,6 +1,3 @@ from . import logging - -SUITES = [(logging, 1000), (logging, 10000)] - - +SUITES = [(logging, 1000), (logging, 10000), (logging, None)] diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py index bc967e97abf9..26b78e4569bf 100644 --- a/synmark/suites/logging.py +++ b/synmark/suites/logging.py @@ -14,21 +14,18 @@ # limitations under the License. import warnings -from contextlib import redirect_stderr from io import StringIO from mock import Mock -import sys from pyperf import perf_counter +from synmark import make_homeserver -from twisted.internet.defer import ensureDeferred +from twisted.internet.defer import Deferred from twisted.internet.protocol import ServerFactory -from twisted.logger import LogBeginner, Logger, LogPublisher, globalLogBeginner, textFileLogObserver +from twisted.logger import LogBeginner, Logger, LogPublisher from twisted.protocols.basic import LineOnlyReceiver -from twisted.python.failure import Failure -from synmark import make_homeserver, setup_database from synapse.logging._structured import setup_structured_logging @@ -43,13 +40,15 @@ def __init__(self, *args, **kwargs): def lineReceived(self, line): self.count += 1 + if self.count >= self.factory.wait_for and self.factory.on_done: + on_done = self.factory.on_done + self.factory.on_done = None + on_done.callback(True) -async def _main(reactor, loops): - servers = [] - - print("?") +async def main(reactor, loops): + servers = [] def protocol(): p = LineCounter() @@ -57,6 +56,8 @@ def protocol(): return p logger_factory = ServerFactory.forProtocol(protocol) + logger_factory.wait_for = loops + logger_factory.on_done = Deferred() port = reactor.listenTCP(0, logger_factory, interface="127.0.0.1") hs, wait, cleanup = await make_homeserver(reactor) @@ -81,22 +82,18 @@ def protocol(): } logger = Logger(namespace="synapse.logging.test_terse_json", observer=publisher) - - start = perf_counter() - logging_system = setup_structured_logging( hs, hs.config, log_config, logBeginner=beginner, redirect_stdlib_logging=False ) - print("hi") - # Wait for it to connect... await logging_system._observers[0]._service.whenConnected() + start = perf_counter() + # Send a bunch of useful messages for i in range(0, loops): logger.info("test message %s" % (i,)) - print(i) if ( len(logging_system._observers[0]._buffer) @@ -108,9 +105,7 @@ def protocol(): ): await wait(0.01) - while servers[0].count != loops: - print(servers[0].count, loops) - await wait(0.01) + await logger_factory.on_done end = perf_counter() - start @@ -119,33 +114,3 @@ def protocol(): cleanup() return end - - -def main(loops): - - print("hi?") - print(loops) - - setup_database() - - if globalLogBeginner._temporaryObserver: - globalLogBeginner.beginLoggingTo([textFileLogObserver(sys.__stderr__)]) - - file_out = StringIO() - with redirect_stderr(file_out): - - from twisted.internet import epollreactor - - reactor = epollreactor.EPollReactor() - d = ensureDeferred(_main(reactor, loops)) - - def on_done(_): - if isinstance(_, Failure): - _.printTraceback() - reactor.stop() - return _ - - d.addBoth(on_done) - reactor.run() - - return d.result From 0e368eec58d87143be8e4b19a0faf12fd6d1e3a0 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 2 Dec 2019 19:17:19 +1100 Subject: [PATCH 10/49] more fixes --- synmark/__init__.py | 15 +++++++++------ synmark/__main__.py | 8 +++++--- synmark/suites/logging.py | 4 +++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/synmark/__init__.py b/synmark/__init__.py index 5cc8ed28ee5d..570eb818d9c5 100644 --- a/synmark/__init__.py +++ b/synmark/__init__.py @@ -21,15 +21,17 @@ from synapse.config.homeserver import HomeServerConfig from synapse.util import Clock -from tests.utils import default_config, setup_test_homeserver, setupdb - - -def setup_database(): - setupdb() +from tests.utils import default_config, setup_test_homeserver async def make_homeserver(reactor, config=None): + """ + Make a Homeserver suitable for running benchmarks against. + Args: + reactor: A Twisted reactor to run under. + config: A HomeServerConfig to use, or None. + """ cleanup_tasks = [] clock = Clock(reactor) @@ -58,7 +60,8 @@ def cleanup(): def make_reactor(): """ - Make an install a Twisted reactor. + Instantiate and install a Twisted reactor suitable for testing (i.e. not the + default global one). """ reactor = epollreactor.EPollReactor() diff --git a/synmark/__main__.py b/synmark/__main__.py index 8a8fbdf6c742..ac59befbd497 100644 --- a/synmark/__main__.py +++ b/synmark/__main__.py @@ -18,17 +18,19 @@ from io import StringIO import pyperf -from synmark import make_reactor, setup_database +from synmark import make_reactor from synmark.suites import SUITES from twisted.internet.defer import ensureDeferred from twisted.logger import globalLogBeginner, textFileLogObserver from twisted.python.failure import Failure +from tests.utils import setupdb + def make_test(main): """ - Take main, the test function, and wrap it in a reactor start and stop. + Take a benchmark function and wrap it in a reactor start and stop. """ def _main(loops): @@ -75,7 +77,7 @@ def add_cmdline_args(cmd, args): globalLogBeginner.beginLoggingTo( [textFileLogObserver(sys.__stdout__)], redirectStandardIO=False ) - setup_database() + setupdb() for suite, loops in SUITES: if loops: diff --git a/synmark/suites/logging.py b/synmark/suites/logging.py index 26b78e4569bf..d8e4c7d58f26 100644 --- a/synmark/suites/logging.py +++ b/synmark/suites/logging.py @@ -47,7 +47,9 @@ def lineReceived(self, line): async def main(reactor, loops): - + """ + Benchmark how long it takes to send `loops` messages. + """ servers = [] def protocol(): From f7ec52670b0399ebcc3349932fac867a55e78e7a Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 2 Dec 2019 19:45:20 +1100 Subject: [PATCH 11/49] add some LruCache benchmarks --- synmark/__main__.py | 14 ++++++++--- synmark/suites/__init__.py | 4 +-- synmark/suites/lrucache.py | 41 +++++++++++++++++++++++++++++++ synmark/suites/lrucache_evict.py | 42 ++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 synmark/suites/lrucache.py create mode 100644 synmark/suites/lrucache_evict.py diff --git a/synmark/__main__.py b/synmark/__main__.py index ac59befbd497..d3f4b5ba0558 100644 --- a/synmark/__main__.py +++ b/synmark/__main__.py @@ -18,10 +18,11 @@ from io import StringIO import pyperf +from argparse import REMAINDER from synmark import make_reactor from synmark.suites import SUITES -from twisted.internet.defer import ensureDeferred +from twisted.internet.defer import ensureDeferred, Deferred from twisted.logger import globalLogBeginner, textFileLogObserver from twisted.python.failure import Failure @@ -40,7 +41,8 @@ def _main(loops): file_out = StringIO() with redirect_stderr(file_out): - d = ensureDeferred(main(reactor, loops)) + d = Deferred() + d.addCallback(lambda _: ensureDeferred(main(reactor, loops))) def on_done(_): if isinstance(_, Failure): @@ -50,6 +52,7 @@ def on_done(_): return _ d.addBoth(on_done) + reactor.callWhenRunning(lambda: d.callback(True)) reactor.run() return d.result @@ -62,11 +65,13 @@ def on_done(_): def add_cmdline_args(cmd, args): if args.log: cmd.extend(["--log"]) + cmd.extend(args.tests) runner = pyperf.Runner( - processes=3, min_time=2, show_name=True, add_cmdline_args=add_cmdline_args + processes=3, min_time=1.5, show_name=True, add_cmdline_args=add_cmdline_args ) runner.argparser.add_argument("--log", action="store_true") + runner.argparser.add_argument("tests", nargs=REMAINDER) runner.parse_args() orig_loops = runner.args.loops @@ -79,6 +84,9 @@ def add_cmdline_args(cmd, args): ) setupdb() + if runner.args.tests: + SUITES = list(filter(lambda x: x[0].__name__.split(".")[-1] in runner.args.tests, SUITES)) + for suite, loops in SUITES: if loops: runner.args.loops = loops diff --git a/synmark/suites/__init__.py b/synmark/suites/__init__.py index cfa3b0ba3823..39b872762ebd 100644 --- a/synmark/suites/__init__.py +++ b/synmark/suites/__init__.py @@ -1,3 +1,3 @@ -from . import logging +from . import logging, lrucache, lrucache_evict -SUITES = [(logging, 1000), (logging, 10000), (logging, None)] +SUITES = [(logging, 1000), (logging, 10000), (logging, None), (lrucache, None), (lrucache_evict, None)] diff --git a/synmark/suites/lrucache.py b/synmark/suites/lrucache.py new file mode 100644 index 000000000000..b7a96132c7ad --- /dev/null +++ b/synmark/suites/lrucache.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings +from io import StringIO + +from mock import Mock + +from pyperf import perf_counter +from synmark import make_homeserver + +from synapse.util.caches.lrucache import LruCache +from synapse.logging._structured import setup_structured_logging + + +async def main(reactor, loops): + """ + Benchmark `loops` number of insertions into LruCache without eviction. + """ + cache = LruCache(loops) + + start = perf_counter() + + for i in range(loops): + cache[i] = True + + end = perf_counter() - start + + return end diff --git a/synmark/suites/lrucache_evict.py b/synmark/suites/lrucache_evict.py new file mode 100644 index 000000000000..c88ff5a3f88c --- /dev/null +++ b/synmark/suites/lrucache_evict.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings +from io import StringIO + +from mock import Mock + +from pyperf import perf_counter +from synmark import make_homeserver + +from synapse.util.caches.lrucache import LruCache +from synapse.logging._structured import setup_structured_logging + + +async def main(reactor, loops): + """ + Benchmark `loops` number of insertions into LruCache where half of them are + evicted. + """ + cache = LruCache(loops // 2) + + start = perf_counter() + + for i in range(loops): + cache[i] = True + + end = perf_counter() - start + + return end From e174b2d19c5d703cd01240e2a31a96f0c89d4bc0 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 2 Dec 2019 19:49:13 +1100 Subject: [PATCH 12/49] fix style --- synmark/__main__.py | 8 +++++--- synmark/suites/__init__.py | 8 +++++++- synmark/suites/lrucache.py | 7 ------- synmark/suites/lrucache_evict.py | 7 ------- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/synmark/__main__.py b/synmark/__main__.py index d3f4b5ba0558..17df9ddeb797 100644 --- a/synmark/__main__.py +++ b/synmark/__main__.py @@ -14,15 +14,15 @@ # limitations under the License. import sys +from argparse import REMAINDER from contextlib import redirect_stderr from io import StringIO import pyperf -from argparse import REMAINDER from synmark import make_reactor from synmark.suites import SUITES -from twisted.internet.defer import ensureDeferred, Deferred +from twisted.internet.defer import Deferred, ensureDeferred from twisted.logger import globalLogBeginner, textFileLogObserver from twisted.python.failure import Failure @@ -85,7 +85,9 @@ def add_cmdline_args(cmd, args): setupdb() if runner.args.tests: - SUITES = list(filter(lambda x: x[0].__name__.split(".")[-1] in runner.args.tests, SUITES)) + SUITES = list( + filter(lambda x: x[0].__name__.split(".")[-1] in runner.args.tests, SUITES) + ) for suite, loops in SUITES: if loops: diff --git a/synmark/suites/__init__.py b/synmark/suites/__init__.py index 39b872762ebd..d8445fc3df75 100644 --- a/synmark/suites/__init__.py +++ b/synmark/suites/__init__.py @@ -1,3 +1,9 @@ from . import logging, lrucache, lrucache_evict -SUITES = [(logging, 1000), (logging, 10000), (logging, None), (lrucache, None), (lrucache_evict, None)] +SUITES = [ + (logging, 1000), + (logging, 10000), + (logging, None), + (lrucache, None), + (lrucache_evict, None), +] diff --git a/synmark/suites/lrucache.py b/synmark/suites/lrucache.py index b7a96132c7ad..69ab042ccc48 100644 --- a/synmark/suites/lrucache.py +++ b/synmark/suites/lrucache.py @@ -13,16 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import warnings -from io import StringIO - -from mock import Mock - from pyperf import perf_counter -from synmark import make_homeserver from synapse.util.caches.lrucache import LruCache -from synapse.logging._structured import setup_structured_logging async def main(reactor, loops): diff --git a/synmark/suites/lrucache_evict.py b/synmark/suites/lrucache_evict.py index c88ff5a3f88c..532b1cc70220 100644 --- a/synmark/suites/lrucache_evict.py +++ b/synmark/suites/lrucache_evict.py @@ -13,16 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import warnings -from io import StringIO - -from mock import Mock - from pyperf import perf_counter -from synmark import make_homeserver from synapse.util.caches.lrucache import LruCache -from synapse.logging._structured import setup_structured_logging async def main(reactor, loops): From 9735a08f04b02683f1ee6c86c59d83a9370cba36 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Tue, 3 Dec 2019 20:24:43 +1100 Subject: [PATCH 13/49] newsfile --- changelog.d/6446.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6446.misc diff --git a/changelog.d/6446.misc b/changelog.d/6446.misc new file mode 100644 index 000000000000..c42df16f1aa3 --- /dev/null +++ b/changelog.d/6446.misc @@ -0,0 +1 @@ +Add benchmarks for LruCache. From 0a02b2a1c5dc25d04a5b3cb536bc0a3a0bf7cf19 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 17 Jan 2020 01:05:50 +1100 Subject: [PATCH 14/49] cleanup --- synapse/api/auth.py | 7 +------ synapse/app/homeserver.py | 7 ------- synapse/config/cache.py | 1 + synapse/replication/slave/storage/client_ips.py | 1 - synapse/server.py | 4 ---- synapse/storage/data_stores/main/client_ips.py | 1 - 6 files changed, 2 insertions(+), 19 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 86f7f5b144e0..be81ebccef06 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -34,13 +34,8 @@ ResourceLimitError, ) from synapse.config.server import is_threepid_reserved -<<<<<<< HEAD -from synapse.types import UserID -from synapse.util.caches import register_cache -======= from synapse.types import StateMap, UserID -from synapse.util.caches import CACHE_SIZE_FACTOR, register_cache ->>>>>>> origin/develop +from synapse.util.caches import register_cache from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import Measure diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index d86d924bea86..0feb5bfa8642 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -66,16 +66,9 @@ from synapse.rest.key.v2 import KeyApiV2Resource from synapse.rest.well_known import WellKnownResource from synapse.server import HomeServer -<<<<<<< HEAD -from synapse.storage import DataStore, are_all_users_on_domain -from synapse.storage.engines import IncorrectDatabaseSetup, create_engine -from synapse.storage.prepare_database import UpgradeDatabaseException, prepare_database -======= from synapse.storage import DataStore from synapse.storage.engines import IncorrectDatabaseSetup from synapse.storage.prepare_database import UpgradeDatabaseException -from synapse.util.caches import CACHE_SIZE_FACTOR ->>>>>>> origin/develop from synapse.util.httpresourcetree import create_resource_tree from synapse.util.manhole import manhole from synapse.util.module_loader import load_module diff --git a/synapse/config/cache.py b/synapse/config/cache.py index e72c6ff3976b..d7921ce050f0 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -46,6 +46,7 @@ def add_resizable_cache(cache_name, cache_resize_callback): _CACHES[cache_name.lower()] = cache_resize_callback + cache_resize_callback(DEFAULT_CACHE_SIZE_FACTOR) class CacheConfig(Config): diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index f26258e677b1..1a38f53dfb8b 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -15,7 +15,6 @@ from synapse.storage.data_stores.main.client_ips import LAST_SEEN_GRANULARITY from synapse.storage.database import Database -from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.caches.descriptors import Cache from ._base import BaseSlavedStore diff --git a/synapse/server.py b/synapse/server.py index 4247675224aa..7926867b777e 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -239,11 +239,7 @@ def __init__(self, hostname: str, config: HomeServerConfig, reactor=None, **kwar def setup(self): logger.info("Setting up.") self.start_time = int(self.get_clock().time()) -<<<<<<< HEAD - self.get_clock().looping_call(self.config.caches.resize_caches, 10000) -======= self.datastores = DataStores(self.DATASTORE_CLASS, self) ->>>>>>> origin/develop logger.info("Finished setting up.") def setup_master(self): diff --git a/synapse/storage/data_stores/main/client_ips.py b/synapse/storage/data_stores/main/client_ips.py index ae188d0f498e..73acc162ed1d 100644 --- a/synapse/storage/data_stores/main/client_ips.py +++ b/synapse/storage/data_stores/main/client_ips.py @@ -22,7 +22,6 @@ from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore from synapse.storage.database import Database -from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.caches.descriptors import Cache logger = logging.getLogger(__name__) From a21702fe764db8737d35a895bf5c364edbdf66f7 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 17 Jan 2020 01:48:27 +1100 Subject: [PATCH 15/49] cleanup so it can refer to late-config --- synapse/app/homeserver.py | 2 +- synapse/config/cache.py | 2 ++ synapse/config/database.py | 2 -- synapse/storage/data_stores/main/events_worker.py | 2 +- synapse/storage/data_stores/main/relations.py | 4 ++-- synapse/storage/data_stores/state/store.py | 6 ++---- synapse/util/caches/descriptors.py | 5 ++++- tests/storage/test_appservice.py | 10 +++++----- tests/storage/test_base.py | 3 ++- tests/util/test_lrucache.py | 4 ++-- 10 files changed, 21 insertions(+), 19 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 0feb5bfa8642..2b114551a6e8 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -484,7 +484,7 @@ def phone_stats_home(hs, stats, stats_process=_stats_process): daily_sent_messages = yield hs.get_datastore().count_daily_sent_messages() stats["daily_sent_messages"] = daily_sent_messages stats["cache_factor"] = hs.config.caches.global_factor - stats["event_cache_size"] = hs.config.event_cache_size + stats["event_cache_size"] = hs.config.caches.event_cache_size # # Performance statistics diff --git a/synapse/config/cache.py b/synapse/config/cache.py index d7921ce050f0..1d65c64e1a0b 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -53,6 +53,8 @@ class CacheConfig(Config): section = "caches" def read_config(self, config, **kwargs): + self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) + global DEFAULT_CACHE_SIZE_FACTOR cache_config = config.get("caches", {}) diff --git a/synapse/config/database.py b/synapse/config/database.py index 219b32f67084..2244ebff3af3 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -57,8 +57,6 @@ class DatabaseConfig(Config): section = "database" def read_config(self, config, **kwargs): - self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) - # We *experimentally* support specifying multiple databases via the # `databases` key. This is a map from a label to database config in the # same format as the `database` config option, plus an extra diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py index 0cce5232f55b..c34a2efe2726 100644 --- a/synapse/storage/data_stores/main/events_worker.py +++ b/synapse/storage/data_stores/main/events_worker.py @@ -72,7 +72,7 @@ def __init__(self, database: Database, db_conn, hs): super(EventsWorkerStore, self).__init__(database, db_conn, hs) self._get_event_cache = Cache( - "*getEvent*", keylen=3, max_entries=hs.config.event_cache_size + "*getEvent*", keylen=3, max_entries=hs.config.caches.event_cache_size ) self._event_fetch_lock = threading.Condition() diff --git a/synapse/storage/data_stores/main/relations.py b/synapse/storage/data_stores/main/relations.py index 046c2b4845bb..98e6149bc668 100644 --- a/synapse/storage/data_stores/main/relations.py +++ b/synapse/storage/data_stores/main/relations.py @@ -31,7 +31,7 @@ class RelationsWorkerStore(SQLBaseStore): - @cached(tree=True) + @cached(tree=True, max_entries="event_cache_size") def get_relations_for_event( self, event_id, @@ -133,7 +133,7 @@ def _get_recent_references_for_event_txn(txn): "get_recent_references_for_event", _get_recent_references_for_event_txn ) - @cached(tree=True) + @cached(tree=True, max_entries="event_cache_size") def get_aggregation_groups_for_event( self, event_id, diff --git a/synapse/storage/data_stores/state/store.py b/synapse/storage/data_stores/state/store.py index c4ee9b7ccb36..e4c5b904e184 100644 --- a/synapse/storage/data_stores/state/store.py +++ b/synapse/storage/data_stores/state/store.py @@ -28,7 +28,6 @@ from synapse.storage.database import Database from synapse.storage.state import StateFilter from synapse.types import StateMap -from synapse.util.caches import get_cache_factor_for from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache @@ -90,11 +89,10 @@ def __init__(self, database: Database, db_conn, hs): self._state_group_cache = DictionaryCache( "*stateGroupCache*", # TODO: this hasn't been tuned yet - 50000 * get_cache_factor_for("stateGroupCache"), + 50000, ) self._state_group_members_cache = DictionaryCache( - "*stateGroupMembersCache*", - 500000 * get_cache_factor_for("stateGroupMembersCache"), + "*stateGroupMembersCache*", 500000, ) @cached(max_entries=10000, iterable=True) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index c4cf38abc75b..8c24965bd812 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -377,7 +377,10 @@ def __init__( self.tree = tree self.iterable = iterable - def __get__(self, obj, objtype=None): + def __get__(self, obj, owner): + if isinstance(self.max_entries, str): + self.max_entries = getattr(obj.hs.config.caches, self.max_entries) + cache = Cache( name=self.orig.__name__, max_entries=self.max_entries, diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index fd5251269645..62a4b31fb85d 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -43,7 +43,7 @@ def setUp(self): ) hs.config.app_service_config_files = self.as_yaml_files - hs.config.event_cache_size = 1 + hs.config.caches.event_cache_size = 1 hs.config.password_providers = [] self.as_token = "token1" @@ -110,7 +110,7 @@ def setUp(self): ) hs.config.app_service_config_files = self.as_yaml_files - hs.config.event_cache_size = 1 + hs.config.caches.event_cache_size = 1 hs.config.password_providers = [] self.as_list = [ @@ -422,7 +422,7 @@ def test_unique_works(self): ) hs.config.app_service_config_files = [f1, f2] - hs.config.event_cache_size = 1 + hs.config.caches.event_cache_size = 1 hs.config.password_providers = [] database = hs.get_datastores().databases[0] @@ -440,7 +440,7 @@ def test_duplicate_ids(self): ) hs.config.app_service_config_files = [f1, f2] - hs.config.event_cache_size = 1 + hs.config.caches.event_cache_size = 1 hs.config.password_providers = [] with self.assertRaises(ConfigError) as cm: @@ -464,7 +464,7 @@ def test_duplicate_as_tokens(self): ) hs.config.app_service_config_files = [f1, f2] - hs.config.event_cache_size = 1 + hs.config.caches.event_cache_size = 1 hs.config.password_providers = [] with self.assertRaises(ConfigError) as cm: diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index cdee0a9e60b4..278961c33144 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -51,7 +51,8 @@ def runWithConnection(func, *args, **kwargs): config = Mock() config._disable_native_upserts = True - config.event_cache_size = 1 + config.caches = Mock() + config.caches.event_cache_size = 1 hs = TestHomeServer("test", config=config) sqlite_config = {"name": "sqlite3"} diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py index 1cee7ba91a36..0adb2174af98 100644 --- a/tests/util/test_lrucache.py +++ b/tests/util/test_lrucache.py @@ -84,7 +84,7 @@ def test_clear(self): self.assertEquals(len(cache), 0) -class LruCacheCallbacksTestCase(unittest.TestCase): +class LruCacheCallbacksTestCase(unittest.HomeserverTestCase): def test_get(self): m = Mock() cache = LruCache(1) @@ -233,7 +233,7 @@ def test_eviction(self): self.assertEquals(m3.call_count, 1) -class LruCacheSizedTestCase(unittest.TestCase): +class LruCacheSizedTestCase(unittest.HomeserverTestCase): def test_evict(self): cache = LruCache(5, size_callback=len) cache["key1"] = [0] From 0b069b70a1cca8bc6b341ca941af1ac745698d51 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Fri, 17 Jan 2020 02:14:33 +1100 Subject: [PATCH 16/49] make expiring caches resizeable --- synapse/state/__init__.py | 3 +-- synapse/util/caches/__init__.py | 2 +- synapse/util/caches/expiringcache.py | 29 ++++++++++++++++++++++++---- synapse/util/caches/lrucache.py | 6 ++---- tests/util/test_expiring_cache.py | 2 +- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 48324b24370f..8c84156678a4 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -442,11 +442,10 @@ def __init__(self, hs): self._state_cache = None self.resolve_linearizer = Linearizer(name="state_resolve_lock") - cache_size = 100000 * hs.config.caches.get_factor_for("state_cache") self._state_cache = ExpiringCache( cache_name="state_cache", clock=self.clock, - max_len=cache_size, + max_len=100000, expiry_ms=EVICTION_TIMEOUT_SECONDS * 1000, iterable=True, reset_expiry_on_get=True, diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index d7cfb83624f5..07f725a4f7be 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -103,7 +103,7 @@ def collect(self): cache_hits.labels(cache_name).set(self.hits) cache_evicted.labels(cache_name).set(self.evicted_size) cache_total.labels(cache_name).set(self.hits + self.misses) - if hasattr(cache, "max_size"): + if getattr(cache, "max_size", None): cache_max_size.labels(cache_name).set(cache.max_size) if collect_callback: collect_callback() diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 6d41d9dc65fc..043289d2d767 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -18,6 +18,7 @@ from six import iteritems, itervalues +from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import register_cache @@ -55,18 +56,19 @@ def __init__( """ self._cache_name = cache_name + self.max_size = int(max_len * cache_config.DEFAULT_CACHE_SIZE_FACTOR) + self._original_max_size = max_len + self._clock = clock - self._max_len = max_len self._expiry_ms = expiry_ms - self._reset_expiry_on_get = reset_expiry_on_get self._cache = OrderedDict() self.iterable = iterable - self.metrics = register_cache("expiring", cache_name, self, resizable=False) + self.metrics = register_cache("expiring", cache_name, self) if not self._expiry_ms: # Don't bother starting the loop if things never expire @@ -82,9 +84,11 @@ def f(): def __setitem__(self, key, value): now = self._clock.time_msec() self._cache[key] = _CacheEntry(now, value) + self.evict() + def evict(self): # Evict if there are now too many items - while self._max_len and len(self) > self._max_len: + while self.max_size and len(self) > self.max_size: _key, value = self._cache.popitem(last=False) if self.iterable: self.metrics.inc_evictions(len(value.value)) @@ -170,6 +174,23 @@ def __len__(self): else: return len(self._cache) + def set_cache_factor(self, factor: float) -> bool: + """ + Set the cache factor for this individual cache. + + This will trigger a resize if it changes, which may require evicting + items from the cache. + + Returns: + bool: Whether the cache changed size or not. + """ + new_size = int(self._original_max_size * factor) + if new_size != self.max_size: + self.max_size = new_size + self.evict() + return True + return False + class _CacheEntry(object): __slots__ = ["time", "value"] diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 3779617ff9b0..8ff23a72daec 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import math import threading from functools import wraps @@ -81,7 +79,7 @@ def __init__( # Save the original max size, and apply the default size factor. self._original_max_size = max_size - self.max_size = math.floor(max_size * cache_config.DEFAULT_CACHE_SIZE_FACTOR) + self.max_size = int(max_size * cache_config.DEFAULT_CACHE_SIZE_FACTOR) list_root = _Node(None, None, None, None) list_root.next_node = list_root @@ -285,7 +283,7 @@ def set_cache_factor(self, factor: float) -> bool: Returns: bool: Whether the cache changed size or not. """ - new_size = math.floor(self._original_max_size * factor) + new_size = int(self._original_max_size * factor) if new_size != self.max_size: self.max_size = new_size self._on_resize() diff --git a/tests/util/test_expiring_cache.py b/tests/util/test_expiring_cache.py index 50bc7702d20b..49ffeebd0ed5 100644 --- a/tests/util/test_expiring_cache.py +++ b/tests/util/test_expiring_cache.py @@ -21,7 +21,7 @@ from .. import unittest -class ExpiringCacheTestCase(unittest.TestCase): +class ExpiringCacheTestCase(unittest.HomeserverTestCase): def test_get_set(self): clock = MockClock() cache = ExpiringCache("test", clock, max_len=1) From a96b5d9ca78dbf2e96f4a09a25effb702e7bc25d Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Tue, 21 Jan 2020 23:46:08 +1100 Subject: [PATCH 17/49] Load cache factors from the environment, and add a test. --- synapse/config/cache.py | 42 +++++++++---------------- tests/config/test_cache.py | 63 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 27 deletions(-) create mode 100644 tests/config/test_cache.py diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 1d65c64e1a0b..d6dadc5fde60 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -17,15 +17,11 @@ from collections import defaultdict from typing import DefaultDict -from twisted.logger import Logger - from ._base import Config, ConfigError -log = Logger() - _CACHES = {} -DEFAULT_CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) - +_CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR" +DEFAULT_CACHE_SIZE_FACTOR = float(os.environ.get(_CACHE_PREFIX, 0.5)) _DEFAULT_CONFIG = """\ # Cache configuration @@ -51,6 +47,7 @@ def add_resizable_cache(cache_name, cache_resize_callback): class CacheConfig(Config): section = "caches" + _environ = os.environ def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) @@ -68,10 +65,20 @@ def read_config(self, config, **kwargs): # Set the global one so that it's reflected in new caches DEFAULT_CACHE_SIZE_FACTOR = self.global_factor - individual_factors = cache_config.get("per_cache_factors", {}) or {} - if not isinstance(individual_factors, dict): + # Load cache factors from the environment, but override them with the + # ones in the config file if they exist + individual_factors = { + key[len(_CACHE_PREFIX) + 1 :].lower(): float(val) + for key, val in self._environ.items() + if key.startswith(_CACHE_PREFIX + "_") + } + + individual_factors_config = cache_config.get("per_cache_factors", {}) or {} + if not isinstance(individual_factors_config, dict): raise ConfigError("caches.per_cache_factors must be a dictionary") + individual_factors.update(individual_factors_config) + self.cache_factors = defaultdict( lambda: self.global_factor ) # type: DefaultDict[str, float] @@ -82,22 +89,3 @@ def read_config(self, config, **kwargs): "caches.per_cache_factors.%s must be a number" % (cache.lower(),) ) self.cache_factors[cache.lower()] = factor - - def resize_caches(self): - for cache_name, cache_resize_callback in _CACHES.items(): - cache_factor = self.cache_factors[cache_name] - log.debug( - "Setting cache factor for {cache_name} to {new_cache_factor}", - cache_name=cache_name, - new_cache_factor=cache_factor, - ) - changed = cache_resize_callback(cache_factor) - if changed: - log.info( - "Cache factor for {cache_name} set to {new_cache_factor}", - cache_name=cache_name, - new_cache_factor=cache_factor, - ) - - def get_factor_for(self, cache_name): - return self.cache_factors[cache_name.lower()] diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py new file mode 100644 index 000000000000..3b3a49c779ec --- /dev/null +++ b/tests/config/test_cache.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from synapse.config._base import Config, RootConfig +from synapse.config.cache import CacheConfig + +from tests.unittest import TestCase + + +class FakeServer(Config): + section = "server" + + +class TestConfig(RootConfig): + config_classes = [FakeServer, CacheConfig] + + +class CacheConfigTests(TestCase): + def test_individual_caches_from_environ(self): + """ + Individual cache factors will be loaded from the environment. + """ + config = {} + t = TestConfig() + t.caches._environ = { + "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2", + "SYNAPSE_NOT_CACHE": "BLAH", + } + t.read_config(config, config_dir_path="", data_dir_path="") + + self.assertEqual(dict(t.caches.cache_factors), {"something_or_other": 2.0}) + + def test_config_overrides_environ(self): + """ + Individual cache factors defined in config will take precedence over + ones in the environment. + """ + config = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}} + t = TestConfig() + t.caches._environ = { + "SYNAPSE_CACHE_FACTOR_SOMETHING_OR_OTHER": "2", + "SYNAPSE_CACHE_FACTOR_FOO": 1, + } + t.read_config(config, config_dir_path="", data_dir_path="") + + self.assertEqual( + dict(t.caches.cache_factors), + {"foo": 2.0, "bar": 3.0, "something_or_other": 2.0}, + ) From 2f4dbfa3e1f11862f3ad66c37c05ffaca4d105ad Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 22 Jan 2020 00:00:10 +1100 Subject: [PATCH 18/49] document as well as refactor so that CacheMetric is not nested --- synapse/util/caches/__init__.py | 117 +++++++++++++++++--------------- 1 file changed, 64 insertions(+), 53 deletions(-) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 07f725a4f7be..b675da44dd99 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019, 2020 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,11 +15,12 @@ # limitations under the License. import logging -from typing import Dict +from typing import Callable, Dict, Optional import six from six.moves import intern +import attr from prometheus_client.core import Gauge from synapse.config.cache import add_resizable_cache @@ -43,22 +44,71 @@ response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"]) +@attr.s +class CacheMetric(object): + + _cache = attr.ib() + _cache_type = attr.ib(type=str) + _cache_name = attr.ib(type=str) + _collect_callback = attr.ib(type=Optional[Callable]) + + hits = attr.ib(default=0) + misses = attr.ib(default=0) + evicted_size = attr.ib(default=0) + + def inc_hits(self): + self.hits += 1 + + def inc_misses(self): + self.misses += 1 + + def inc_evictions(self, size=1): + self.evicted_size += size + + def describe(self): + return [] + + def collect(self): + try: + if self._cache_type == "response_cache": + response_cache_size.labels(self._cache_name).set(len(self._cache)) + response_cache_hits.labels(self._cache_name).set(self.hits) + response_cache_evicted.labels(self._cache_name).set(self.evicted_size) + response_cache_total.labels(self._cache_name).set( + self.hits + self.misses + ) + else: + cache_size.labels(self._cache_name).set(len(self._cache)) + cache_hits.labels(self._cache_name).set(self.hits) + cache_evicted.labels(self._cache_name).set(self.evicted_size) + cache_total.labels(self._cache_name).set(self.hits + self.misses) + if getattr(self._cache, "max_size", None): + cache_max_size.labels(self._cache_name).set(self._cache.max_size) + if self._collect_callback: + self._collect_callback() + except Exception as e: + logger.warning("Error calculating metrics for %s: %s", self._cache_name, e) + raise + + def register_cache( - cache_type, - cache_name, + cache_type: str, + cache_name: str, cache, - collect_callback=None, - resizable=True, - resize_callback=None, -): + collect_callback: Optional[Callable] = None, + resizable: bool = True, + resize_callback: Optional[Callable] = None, +) -> CacheMetric: """Register a cache object for metric collection and resizing. Args: - cache_type (str): - cache_name (str): name of the cache - cache (object): cache itself - collect_callback (callable|None): if not None, a function which is called during - metric collection to update additional metrics. + cache_type + cache_name: name of the cache + cache: cache itself + collect_callback: If given, a function which is called during metric + collection to update additional metrics. + resizable: Whether this cache supports being resized. + resize_callback: A function which can be called to resize the cache. Returns: CacheMetric: an object which provides inc_{hits,misses,evictions} methods @@ -71,47 +121,8 @@ def register_cache( # Check if the metric is already registered. Unregister it, if so. # This usually happens during tests, as at runtime these caches are # effectively singletons. + metric = CacheMetric(cache, cache_type, cache_name, collect_callback) metric_name = "cache_%s_%s" % (cache_type, cache_name) - - class CacheMetric(object): - - hits = 0 - misses = 0 - evicted_size = 0 - - def inc_hits(self): - self.hits += 1 - - def inc_misses(self): - self.misses += 1 - - def inc_evictions(self, size=1): - self.evicted_size += size - - def describe(self): - return [] - - def collect(self): - try: - if cache_type == "response_cache": - response_cache_size.labels(cache_name).set(len(cache)) - response_cache_hits.labels(cache_name).set(self.hits) - response_cache_evicted.labels(cache_name).set(self.evicted_size) - response_cache_total.labels(cache_name).set(self.hits + self.misses) - else: - cache_size.labels(cache_name).set(len(cache)) - cache_hits.labels(cache_name).set(self.hits) - cache_evicted.labels(cache_name).set(self.evicted_size) - cache_total.labels(cache_name).set(self.hits + self.misses) - if getattr(cache, "max_size", None): - cache_max_size.labels(cache_name).set(cache.max_size) - if collect_callback: - collect_callback() - except Exception as e: - logger.warning("Error calculating metrics for %s: %s", cache_name, e) - raise - - metric = CacheMetric() caches_by_name[cache_name] = cache collectors_by_name[metric_name] = metric return metric From ac020dee7afca21004a5782d66150cba982b42fd Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 22 Jan 2020 02:38:50 +1100 Subject: [PATCH 19/49] fix style --- tests/config/test_cache.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index 3b3a49c779ec..94370265f122 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -13,8 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os - from synapse.config._base import Config, RootConfig from synapse.config.cache import CacheConfig From 18c1dbfbac3462cc3903aa9f438881804b5cda24 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Wed, 22 Jan 2020 02:57:15 +1100 Subject: [PATCH 20/49] don't need this comment --- synapse/util/caches/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index b675da44dd99..4b8a0c7a8fac 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -118,9 +118,6 @@ def register_cache( resize_callback = getattr(cache, "set_cache_factor") add_resizable_cache(cache_name, resize_callback) - # Check if the metric is already registered. Unregister it, if so. - # This usually happens during tests, as at runtime these caches are - # effectively singletons. metric = CacheMetric(cache, cache_type, cache_name, collect_callback) metric_name = "cache_%s_%s" % (cache_type, cache_name) caches_by_name[cache_name] = cache From 5f508e728af3f842f78ec435fd0393dd3a07e854 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 17 Feb 2020 17:47:56 +1100 Subject: [PATCH 21/49] add tests for individual cache sizing, and fix up the individual cache sizing logic that got deleted when resizing-on-the-fly did --- synapse/config/cache.py | 30 ++++++++++++++--- tests/config/test_cache.py | 66 +++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 5 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index d6dadc5fde60..dbc2971eebf9 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -39,20 +39,35 @@ # """ +# Callback to ensure that all caches are the correct size, registered when the +# configuration has been loaded. +_ENSURE_CORRECT_CACHE_SIZING = None + def add_resizable_cache(cache_name, cache_resize_callback): _CACHES[cache_name.lower()] = cache_resize_callback - cache_resize_callback(DEFAULT_CACHE_SIZE_FACTOR) + if _ENSURE_CORRECT_CACHE_SIZING: + _ENSURE_CORRECT_CACHE_SIZING() class CacheConfig(Config): section = "caches" _environ = os.environ + @staticmethod + def _reset(): + global DEFAULT_CACHE_SIZE_FACTOR + global _ENSURE_CORRECT_CACHE_SIZING + + DEFAULT_CACHE_SIZE_FACTOR = float(os.environ.get(_CACHE_PREFIX, 0.5)) + _ENSURE_CORRECT_CACHE_SIZING = None + _CACHES.clear() + def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) global DEFAULT_CACHE_SIZE_FACTOR + global _ENSURE_CORRECT_CACHE_SIZING cache_config = config.get("caches", {}) @@ -79,9 +94,7 @@ def read_config(self, config, **kwargs): individual_factors.update(individual_factors_config) - self.cache_factors = defaultdict( - lambda: self.global_factor - ) # type: DefaultDict[str, float] + self.cache_factors = dict() # type: Dict[str, float] for cache, factor in individual_factors.items(): if not isinstance(factor, (int, float)): @@ -89,3 +102,12 @@ def read_config(self, config, **kwargs): "caches.per_cache_factors.%s must be a number" % (cache.lower(),) ) self.cache_factors[cache.lower()] = factor + + # Register the global callback so that the individual cache sizes get set. + def ensure_cache_sizes(): + for cache_name, callback in _CACHES.items(): + new_factor = self.cache_factors.get(cache_name, self.global_factor) + callback(new_factor) + + _ENSURE_CORRECT_CACHE_SIZING = ensure_cache_sizes + _ENSURE_CORRECT_CACHE_SIZING() diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index 94370265f122..f8901a4612fd 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -14,7 +14,8 @@ # limitations under the License. from synapse.config._base import Config, RootConfig -from synapse.config.cache import CacheConfig +from synapse.config.cache import CacheConfig, add_resizable_cache +from synapse.util.caches.lrucache import LruCache from tests.unittest import TestCase @@ -28,6 +29,9 @@ class TestConfig(RootConfig): class CacheConfigTests(TestCase): + def setUp(self): + CacheConfig._reset() + def test_individual_caches_from_environ(self): """ Individual cache factors will be loaded from the environment. @@ -59,3 +63,63 @@ def test_config_overrides_environ(self): dict(t.caches.cache_factors), {"foo": 2.0, "bar": 3.0, "something_or_other": 2.0}, ) + + def test_individual_instantiated_before_config_load(self): + """ + If a cache is instantiated before the config is read, it will be given + the default cache size in the interim, and then resized once the config + is loaded. + """ + cache = LruCache(100) + add_resizable_cache("foo", cache.set_cache_factor) + self.assertEqual(cache.max_size, 50) + + config = {"caches": {"per_cache_factors": {"foo": 3}}} + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + self.assertEqual(cache.max_size, 300) + + def test_individual_instantiated_after_config_load(self): + """ + If a cache is instantiated after the config is read, it will be + immediately resized to the correct size given the per_cache_factor if + there is one. + """ + config = {"caches": {"per_cache_factors": {"foo": 2}}} + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + cache = LruCache(100) + add_resizable_cache("foo", cache.set_cache_factor) + self.assertEqual(cache.max_size, 200) + + def test_global_instantiated_before_config_load(self): + """ + If a cache is instantiated before the config is read, it will be given + the default cache size in the interim, and then resized to the new + default cache size once the config is loaded. + """ + cache = LruCache(100) + add_resizable_cache("foo", cache.set_cache_factor) + self.assertEqual(cache.max_size, 50) + + config = {"caches": {"global_factor": 4}} + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + self.assertEqual(cache.max_size, 400) + + def test_global_instantiated_after_config_load(self): + """ + If a cache is instantiated after the config is read, it will be + immediately resized to the correct size given the global factor if there + is no per-cache factor. + """ + config = {"caches": {"global_factor": 1.5}} + t = TestConfig() + t.read_config(config, config_dir_path="", data_dir_path="") + + cache = LruCache(100) + add_resizable_cache("foo", cache.set_cache_factor) + self.assertEqual(cache.max_size, 150) From 2619891343cc55b307ae36e6dda6247b2d6a5f73 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Mon, 17 Feb 2020 17:50:35 +1100 Subject: [PATCH 22/49] fix --- synapse/config/cache.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index dbc2971eebf9..bb68551954bb 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -14,8 +14,7 @@ # limitations under the License. import os -from collections import defaultdict -from typing import DefaultDict +from typing import Dict from ._base import Config, ConfigError From 0fc0a7deb814c9b24816a144852f7b41e4b7d248 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 27 Feb 2020 23:15:45 +1100 Subject: [PATCH 23/49] fix --- synapse/storage/data_stores/main/relations.py | 4 ++-- synapse/util/caches/descriptors.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/synapse/storage/data_stores/main/relations.py b/synapse/storage/data_stores/main/relations.py index 98e6149bc668..046c2b4845bb 100644 --- a/synapse/storage/data_stores/main/relations.py +++ b/synapse/storage/data_stores/main/relations.py @@ -31,7 +31,7 @@ class RelationsWorkerStore(SQLBaseStore): - @cached(tree=True, max_entries="event_cache_size") + @cached(tree=True) def get_relations_for_event( self, event_id, @@ -133,7 +133,7 @@ def _get_recent_references_for_event_txn(txn): "get_recent_references_for_event", _get_recent_references_for_event_txn ) - @cached(tree=True, max_entries="event_cache_size") + @cached(tree=True) def get_aggregation_groups_for_event( self, event_id, diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 8c24965bd812..2c8ea764b54c 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -378,9 +378,6 @@ def __init__( self.iterable = iterable def __get__(self, obj, owner): - if isinstance(self.max_entries, str): - self.max_entries = getattr(obj.hs.config.caches, self.max_entries) - cache = Cache( name=self.orig.__name__, max_entries=self.max_entries, From 965e25942637a597775099582b718db593ccd7e0 Mon Sep 17 00:00:00 2001 From: "Amber H. Brown" Date: Thu, 27 Feb 2020 23:18:12 +1100 Subject: [PATCH 24/49] fixes --- synapse/util/caches/lrucache.py | 3 +-- synapse/util/caches/stream_change_cache.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 8ff23a72daec..5802898c6d43 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -16,7 +16,6 @@ import threading from functools import wraps -from synapse.config import cache as cache_config from synapse.util.caches.treecache import TreeCache @@ -79,7 +78,7 @@ def __init__( # Save the original max size, and apply the default size factor. self._original_max_size = max_size - self.max_size = int(max_size * cache_config.DEFAULT_CACHE_SIZE_FACTOR) + self.max_size = int(max_size) list_root = _Node(None, None, None, None) list_root.next_node = list_root diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index fe0b99ae24b1..9b7b0308eb2b 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -20,7 +20,6 @@ from sortedcontainers import SortedDict -from synapse.config import cache as cache_config from synapse.util import caches logger = logging.getLogger(__name__) @@ -38,7 +37,7 @@ class StreamChangeCache(object): def __init__(self, name, current_stream_pos, max_size=10000, prefilled_cache=None): self._original_max_size = max_size - self.max_size = math.floor(max_size * cache_config.DEFAULT_CACHE_SIZE_FACTOR) + self.max_size = math.floor(max_size) self._entity_to_key = {} self._cache = SortedDict() self._earliest_known_stream_pos = current_stream_pos From 7117b89b583e284328f3f1cdeea12e44ae3e9421 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 3 Apr 2020 17:25:13 +0100 Subject: [PATCH 25/49] Update synapse/config/cache.py Co-Authored-By: Erik Johnston --- synapse/config/cache.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index bb68551954bb..bc90d820b018 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -55,6 +55,10 @@ class CacheConfig(Config): @staticmethod def _reset(): + """Resets the caches to their defaults. + + Used for tests. + """ global DEFAULT_CACHE_SIZE_FACTOR global _ENSURE_CORRECT_CACHE_SIZING From 4ed7aa1b342c4f4f287d3d0b7c013e259df355b8 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 3 Apr 2020 17:59:09 +0100 Subject: [PATCH 26/49] Move separate CacheConfig global vars into a single global dict --- synapse/config/cache.py | 41 ++++++++++++++-------------- synapse/util/caches/expiringcache.py | 2 -- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index bb68551954bb..cc5ca4141761 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -20,7 +20,16 @@ _CACHES = {} _CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR" -DEFAULT_CACHE_SIZE_FACTOR = float(os.environ.get(_CACHE_PREFIX, 0.5)) + +# Wrap all global vars into a single object to eliminate `global` calls +CACHE_PROPERTIES = { + "prefix": _CACHE_PREFIX, + "default_size_factor": float(os.environ.get(_CACHE_PREFIX, 0.5)), + + # Callback to ensure that all caches are the correct size, registered when the + # configuration has been loaded. + "ensure_correct_cache_sizing": None, +} _DEFAULT_CONFIG = """\ # Cache configuration @@ -38,15 +47,11 @@ # """ -# Callback to ensure that all caches are the correct size, registered when the -# configuration has been loaded. -_ENSURE_CORRECT_CACHE_SIZING = None - def add_resizable_cache(cache_name, cache_resize_callback): _CACHES[cache_name.lower()] = cache_resize_callback - if _ENSURE_CORRECT_CACHE_SIZING: - _ENSURE_CORRECT_CACHE_SIZING() + if CACHE_PROPERTIES["ensure_correct_cache_sizing"]: + CACHE_PROPERTIES["ensure_correct_cache_sizing"]() class CacheConfig(Config): @@ -55,36 +60,30 @@ class CacheConfig(Config): @staticmethod def _reset(): - global DEFAULT_CACHE_SIZE_FACTOR - global _ENSURE_CORRECT_CACHE_SIZING - - DEFAULT_CACHE_SIZE_FACTOR = float(os.environ.get(_CACHE_PREFIX, 0.5)) - _ENSURE_CORRECT_CACHE_SIZING = None + CACHE_PROPERTIES["default_size_factor"] = float(os.environ.get(_CACHE_PREFIX, 0.5)) + CACHE_PROPERTIES["ensure_correct_cache_sizing"] = None _CACHES.clear() def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) - global DEFAULT_CACHE_SIZE_FACTOR - global _ENSURE_CORRECT_CACHE_SIZING - cache_config = config.get("caches", {}) self.global_factor = cache_config.get( - "global_factor", DEFAULT_CACHE_SIZE_FACTOR + "global_factor", CACHE_PROPERTIES["default_cache_size_factor"] ) if not isinstance(self.global_factor, (int, float)): raise ConfigError("caches.global_factor must be a number.") # Set the global one so that it's reflected in new caches - DEFAULT_CACHE_SIZE_FACTOR = self.global_factor + CACHE_PROPERTIES["default_cache_size_factor"] = self.global_factor # Load cache factors from the environment, but override them with the # ones in the config file if they exist individual_factors = { - key[len(_CACHE_PREFIX) + 1 :].lower(): float(val) + key[len(CACHE_PROPERTIES["prefix"]) + 1:].lower(): float(val) for key, val in self._environ.items() - if key.startswith(_CACHE_PREFIX + "_") + if key.startswith(CACHE_PROPERTIES["prefix"] + "_") } individual_factors_config = cache_config.get("per_cache_factors", {}) or {} @@ -108,5 +107,5 @@ def ensure_cache_sizes(): new_factor = self.cache_factors.get(cache_name, self.global_factor) callback(new_factor) - _ENSURE_CORRECT_CACHE_SIZING = ensure_cache_sizes - _ENSURE_CORRECT_CACHE_SIZING() + CACHE_PROPERTIES["ensure_correct_cache_sizing"] = ensure_cache_sizes + CACHE_PROPERTIES["ensure_correct_cache_sizing"]() diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 043289d2d767..f2fefa0eed52 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -18,7 +18,6 @@ from six import iteritems, itervalues -from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import register_cache @@ -56,7 +55,6 @@ def __init__( """ self._cache_name = cache_name - self.max_size = int(max_len * cache_config.DEFAULT_CACHE_SIZE_FACTOR) self._original_max_size = max_len self._clock = clock From be0737adc3369e65468f8ae5595896aeecf9f3ba Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 3 Apr 2020 18:00:53 +0100 Subject: [PATCH 27/49] Move _DEFAULT_CONFIG to class method --- synapse/config/cache.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index cc5ca4141761..1ffb3d17e11b 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -31,22 +31,6 @@ "ensure_correct_cache_sizing": None, } -_DEFAULT_CONFIG = """\ -# Cache configuration -# -# 'global_factor' controls the global cache factor. This overrides the -# "SYNAPSE_CACHE_FACTOR" environment variable. -# -# 'per_cache_factors' is a dictionary of cache name to cache factor for that -# individual cache. -# -#caches: -# global_factor: 0.5 -# per_cache_factors: -# get_users_who_share_room_with_user: 2 -# -""" - def add_resizable_cache(cache_name, cache_resize_callback): _CACHES[cache_name.lower()] = cache_resize_callback @@ -64,6 +48,23 @@ def _reset(): CACHE_PROPERTIES["ensure_correct_cache_sizing"] = None _CACHES.clear() + def generate_config_section(self, **kwargs): + return """\ + # Cache configuration + # + # 'global_factor' controls the global cache factor. This overrides the + # "SYNAPSE_CACHE_FACTOR" environment variable. + # + # 'per_cache_factors' is a dictionary of cache name to cache factor for that + # individual cache. + # + #caches: + # global_factor: 0.5 + # per_cache_factors: + # get_users_who_share_room_with_user: 2 + # + """ + def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) From cc1dd98362fad83a206e23a3f3fd75d4664fc5de Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 3 Apr 2020 18:09:55 +0100 Subject: [PATCH 28/49] lint --- synapse/config/cache.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 741e3dae781f..bd9c5e5aaadf 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -25,7 +25,6 @@ CACHE_PROPERTIES = { "prefix": _CACHE_PREFIX, "default_size_factor": float(os.environ.get(_CACHE_PREFIX, 0.5)), - # Callback to ensure that all caches are the correct size, registered when the # configuration has been loaded. "ensure_correct_cache_sizing": None, @@ -48,7 +47,9 @@ def _reset(): Used for tests. """ - CACHE_PROPERTIES["default_size_factor"] = float(os.environ.get(_CACHE_PREFIX, 0.5)) + CACHE_PROPERTIES["default_size_factor"] = float( + os.environ.get(_CACHE_PREFIX, 0.5) + ) CACHE_PROPERTIES["ensure_correct_cache_sizing"] = None _CACHES.clear() @@ -86,7 +87,7 @@ def read_config(self, config, **kwargs): # Load cache factors from the environment, but override them with the # ones in the config file if they exist individual_factors = { - key[len(CACHE_PROPERTIES["prefix"]) + 1:].lower(): float(val) + key[len(CACHE_PROPERTIES["prefix"]) + 1 :].lower(): float(val) for key, val in self._environ.items() if key.startswith(CACHE_PROPERTIES["prefix"] + "_") } From de2565959f4ecac309bb120c3d9dd41c29b770b0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 3 Apr 2020 18:42:05 +0100 Subject: [PATCH 29/49] Fix typing --- synapse/config/cache.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index bd9c5e5aaadf..29338739b883 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -14,7 +14,7 @@ # limitations under the License. import os -from typing import Dict +from typing import Callable, Dict, Optional, Union from ._base import Config, ConfigError @@ -22,7 +22,7 @@ _CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR" # Wrap all global vars into a single object to eliminate `global` calls -CACHE_PROPERTIES = { +CACHE_PROPERTIES: Dict[str, Union[str, float, Optional[Callable[[], None]]]] = { "prefix": _CACHE_PREFIX, "default_size_factor": float(os.environ.get(_CACHE_PREFIX, 0.5)), # Callback to ensure that all caches are the correct size, registered when the @@ -33,6 +33,7 @@ def add_resizable_cache(cache_name, cache_resize_callback): _CACHES[cache_name.lower()] = cache_resize_callback + if CACHE_PROPERTIES["ensure_correct_cache_sizing"]: CACHE_PROPERTIES["ensure_correct_cache_sizing"]() From 6e979a17405292a2a42430bb0d35ed7745dbd54c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 3 Apr 2020 18:42:14 +0100 Subject: [PATCH 30/49] sample config --- docs/sample_config.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index be742969cc0f..7fbd9cfe2a48 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1927,3 +1927,18 @@ opentracing: # # logging: # false + + +# Cache configuration +# +# 'global_factor' controls the global cache factor. This overrides the +# "SYNAPSE_CACHE_FACTOR" environment variable. +# +# 'per_cache_factors' is a dictionary of cache name to cache factor for that +# individual cache. +# +#caches: +# global_factor: 0.5 +# per_cache_factors: +# get_users_who_share_room_with_user: 2 +# From fec13bedd6bc52cef651c9842e8430f856384f6b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 6 Apr 2020 17:43:05 +0100 Subject: [PATCH 31/49] Fix lint --- synapse/config/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 29338739b883..5699c54500de 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -99,7 +99,7 @@ def read_config(self, config, **kwargs): individual_factors.update(individual_factors_config) - self.cache_factors = dict() # type: Dict[str, float] + self.cache_factors = {} # type: Dict[str, float] for cache, factor in individual_factors.items(): if not isinstance(factor, (int, float)): From 905c83352d75e2810af21a641e3632f9687b0622 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 6 Apr 2020 17:52:38 +0100 Subject: [PATCH 32/49] Add some mypy ignores --- synapse/config/cache.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 5699c54500de..2eeaf146fd18 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -22,7 +22,7 @@ _CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR" # Wrap all global vars into a single object to eliminate `global` calls -CACHE_PROPERTIES: Dict[str, Union[str, float, Optional[Callable[[], None]]]] = { +CACHE_PROPERTIES = { "prefix": _CACHE_PREFIX, "default_size_factor": float(os.environ.get(_CACHE_PREFIX, 0.5)), # Callback to ensure that all caches are the correct size, registered when the @@ -35,7 +35,7 @@ def add_resizable_cache(cache_name, cache_resize_callback): _CACHES[cache_name.lower()] = cache_resize_callback if CACHE_PROPERTIES["ensure_correct_cache_sizing"]: - CACHE_PROPERTIES["ensure_correct_cache_sizing"]() + CACHE_PROPERTIES["ensure_correct_cache_sizing"]() # type: ignore[operator] class CacheConfig(Config): @@ -88,9 +88,9 @@ def read_config(self, config, **kwargs): # Load cache factors from the environment, but override them with the # ones in the config file if they exist individual_factors = { - key[len(CACHE_PROPERTIES["prefix"]) + 1 :].lower(): float(val) + key[len(CACHE_PROPERTIES["prefix"]) + 1 :].lower(): float(val) # type: ignore[arg-type] for key, val in self._environ.items() - if key.startswith(CACHE_PROPERTIES["prefix"] + "_") + if key.startswith(CACHE_PROPERTIES["prefix"] + "_") # type: ignore[operator] } individual_factors_config = cache_config.get("per_cache_factors", {}) or {} @@ -115,4 +115,4 @@ def ensure_cache_sizes(): callback(new_factor) CACHE_PROPERTIES["ensure_correct_cache_sizing"] = ensure_cache_sizes - CACHE_PROPERTIES["ensure_correct_cache_sizing"]() + CACHE_PROPERTIES["ensure_correct_cache_sizing"]() # type: ignore[operator] From 28f7f59d0f106d5217ae90b3127685d6f009b8d2 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 24 Apr 2020 15:43:05 +0100 Subject: [PATCH 33/49] Re-add default_size_factor multipler. Revert _max_size modification --- synapse/util/caches/expiringcache.py | 3 ++- synapse/util/caches/lrucache.py | 9 +++++---- tests/config/test_cache.py | 14 +++++++------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 7dabe0936326..c30734f4e59e 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -20,6 +20,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import register_cache +from synapse.config import cache as cache_config logger = logging.getLogger(__name__) @@ -56,7 +57,7 @@ def __init__( self._cache_name = cache_name self._original_max_size = max_len - self._max_size = max_len + self._max_size = int(max_len * cache_config.CACHE_PROPERTIES["default_size_factor"]) self._clock = clock diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index e44244b31a29..5830fe220421 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -17,6 +17,7 @@ from functools import wraps from synapse.util.caches.treecache import TreeCache +from synapse.config import cache as cache_config def enumerate_leaves(node, depth): @@ -78,7 +79,7 @@ def __init__( # Save the original max size, and apply the default size factor. self._original_max_size = max_size - self._max_size = int(max_size) + self.max_size = int(max_size * cache_config.CACHE_PROPERTIES["default_size_factor"]) list_root = _Node(None, None, None, None) list_root.next_node = list_root @@ -87,7 +88,7 @@ def __init__( lock = threading.Lock() def evict(): - while cache_len() > self._max_size: + while cache_len() > self.max_size: todelete = list_root.prev_node evicted_len = delete_node(todelete) cache.pop(todelete.key, None) @@ -283,8 +284,8 @@ def set_cache_factor(self, factor: float) -> bool: bool: Whether the cache changed size or not. """ new_size = int(self._original_max_size * factor) - if new_size != self._max_size: - self._max_size = new_size + if new_size != self.max_size: + self.max_size = new_size self._on_resize() return True return False diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index c6dc4e900ee9..f8901a4612fd 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -14,7 +14,7 @@ # limitations under the License. from synapse.config._base import Config, RootConfig -from synapse.config.cache import CacheConfig, add_resizable_cache, _CACHES, CACHE_PROPERTIES +from synapse.config.cache import CacheConfig, add_resizable_cache from synapse.util.caches.lrucache import LruCache from tests.unittest import TestCase @@ -72,13 +72,13 @@ def test_individual_instantiated_before_config_load(self): """ cache = LruCache(100) add_resizable_cache("foo", cache.set_cache_factor) - self.assertEqual(cache._max_size, 50) + self.assertEqual(cache.max_size, 50) config = {"caches": {"per_cache_factors": {"foo": 3}}} t = TestConfig() t.read_config(config, config_dir_path="", data_dir_path="") - self.assertEqual(cache._max_size, 300) + self.assertEqual(cache.max_size, 300) def test_individual_instantiated_after_config_load(self): """ @@ -92,7 +92,7 @@ def test_individual_instantiated_after_config_load(self): cache = LruCache(100) add_resizable_cache("foo", cache.set_cache_factor) - self.assertEqual(cache._max_size, 200) + self.assertEqual(cache.max_size, 200) def test_global_instantiated_before_config_load(self): """ @@ -102,13 +102,13 @@ def test_global_instantiated_before_config_load(self): """ cache = LruCache(100) add_resizable_cache("foo", cache.set_cache_factor) - self.assertEqual(cache._max_size, 50) + self.assertEqual(cache.max_size, 50) config = {"caches": {"global_factor": 4}} t = TestConfig() t.read_config(config, config_dir_path="", data_dir_path="") - self.assertEqual(cache._max_size, 400) + self.assertEqual(cache.max_size, 400) def test_global_instantiated_after_config_load(self): """ @@ -122,4 +122,4 @@ def test_global_instantiated_after_config_load(self): cache = LruCache(100) add_resizable_cache("foo", cache.set_cache_factor) - self.assertEqual(cache._max_size, 150) + self.assertEqual(cache.max_size, 150) From 5f3eddcdc35987ae135e478e54cea055a21f3eb5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 24 Apr 2020 15:58:16 +0100 Subject: [PATCH 34/49] Refactor globals in CacheConfig --- synapse/config/cache.py | 81 ++++++++++++---------- synapse/util/caches/expiringcache.py | 4 +- synapse/util/caches/lrucache.py | 4 +- synapse/util/caches/stream_change_cache.py | 2 +- tests/config/test_cache.py | 4 +- 5 files changed, 53 insertions(+), 42 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 5b831def2b31..253133028c77 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -14,28 +14,40 @@ # limitations under the License. import os -from typing import Callable, Dict, Optional, Union +from typing import Callable, Dict from ._base import Config, ConfigError +# The prefix for all cache factor-related environment variables _CACHES = {} _CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR" -# Wrap all global vars into a single object to eliminate `global` calls -CACHE_PROPERTIES = { - "prefix": _CACHE_PREFIX, - "default_size_factor": float(os.environ.get(_CACHE_PREFIX, 0.5)), - # Callback to ensure that all caches are the correct size, registered when the - # configuration has been loaded. - "ensure_correct_cache_sizing": None, -} +class CacheProperties(object): + def __init__(self): + # The default size factor for all caches + self.default_size_factor = 0.5 + self.resize_all_caches = None -def add_resizable_cache(cache_name, cache_resize_callback): + +properties = CacheProperties() + + +def add_resizable_cache(cache_name: str, cache_resize_callback: Callable): + """Register a cache that's size can dynamically change + + Args: + cache_name: A reference to the cache + cache_resize_callback: A callback function that will be ran whenever + the cache needs to be resized + """ _CACHES[cache_name.lower()] = cache_resize_callback - if CACHE_PROPERTIES["ensure_correct_cache_sizing"]: - CACHE_PROPERTIES["ensure_correct_cache_sizing"]() # type: ignore[operator] + # Ensure all loaded caches are resized + # This method should only run once the config has been read, + # as it uses variables from it + if properties.resize_all_caches: + properties.resize_all_caches() class CacheConfig(Config): @@ -43,15 +55,10 @@ class CacheConfig(Config): _environ = os.environ @staticmethod - def _reset(): - """Resets the caches to their defaults. - - Used for tests. - """ - CACHE_PROPERTIES["default_size_factor"] = float( - os.environ.get(_CACHE_PREFIX, 0.5) - ) - CACHE_PROPERTIES["ensure_correct_cache_sizing"] = None + def reset(): + """Resets the caches to their defaults. Used for tests.""" + properties.default_size_factor = float(os.environ.get(_CACHE_PREFIX, 0.5)) + properties.resize_all_caches = None _CACHES.clear() def generate_config_section(self, **kwargs): @@ -73,34 +80,31 @@ def generate_config_section(self, **kwargs): def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) + self.cache_factors = dict() # type: Dict[str, float] cache_config = config.get("caches", {}) self.global_factor = cache_config.get( - "global_factor", CACHE_PROPERTIES["default_size_factor"] + "global_factor", properties.default_size_factor ) if not isinstance(self.global_factor, (int, float)): raise ConfigError("caches.global_factor must be a number.") # Set the global one so that it's reflected in new caches - CACHE_PROPERTIES["default_size_factor"] = self.global_factor + properties.default_size_factor = self.global_factor # Load cache factors from the environment, but override them with the # ones in the config file if they exist individual_factors = { - key[len(CACHE_PROPERTIES["prefix"]) + 1 :].lower(): float(val) # type: ignore[arg-type] + key[len(_CACHE_PREFIX) + 1 :].lower(): float(val) for key, val in self._environ.items() - if key.startswith(CACHE_PROPERTIES["prefix"] + "_") # type: ignore[operator] + if key.startswith(_CACHE_PREFIX + "_") } - individual_factors_config = cache_config.get("per_cache_factors", {}) or {} if not isinstance(individual_factors_config, dict): raise ConfigError("caches.per_cache_factors must be a dictionary") - individual_factors.update(individual_factors_config) - self.cache_factors = {} # type: Dict[str, float] - for cache, factor in individual_factors.items(): if not isinstance(factor, (int, float)): raise ConfigError( @@ -108,11 +112,16 @@ def read_config(self, config, **kwargs): ) self.cache_factors[cache.lower()] = factor - # Register the global callback so that the individual cache sizes get set. - def ensure_cache_sizes(): - for cache_name, callback in _CACHES.items(): - new_factor = self.cache_factors.get(cache_name, self.global_factor) - callback(new_factor) + # Resize all caches (if necessary) with the new factors we've loaded + properties.resize_all_caches = self.resize_all_caches + self.resize_all_caches() + + def resize_all_caches(self): + """Ensure all cache sizes are up to date - CACHE_PROPERTIES["ensure_correct_cache_sizing"] = ensure_cache_sizes - CACHE_PROPERTIES["ensure_correct_cache_sizing"]() # type: ignore[operator] + For each cache, run the mapped callback function with either + a specific cache factor or the default, global one. + """ + for cache_name, callback in _CACHES.items(): + new_factor = self.cache_factors.get(cache_name, self.global_factor) + callback(new_factor) diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index c30734f4e59e..3acb02a7bd17 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -18,9 +18,9 @@ from six import iteritems, itervalues +from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import run_as_background_process from synapse.util.caches import register_cache -from synapse.config import cache as cache_config logger = logging.getLogger(__name__) @@ -57,7 +57,7 @@ def __init__( self._cache_name = cache_name self._original_max_size = max_len - self._max_size = int(max_len * cache_config.CACHE_PROPERTIES["default_size_factor"]) + self._max_size = int(max_len * cache_config.properties.default_size_factor) self._clock = clock diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 5830fe220421..226b41ae3ca8 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -16,8 +16,8 @@ import threading from functools import wraps -from synapse.util.caches.treecache import TreeCache from synapse.config import cache as cache_config +from synapse.util.caches.treecache import TreeCache def enumerate_leaves(node, depth): @@ -79,7 +79,7 @@ def __init__( # Save the original max size, and apply the default size factor. self._original_max_size = max_size - self.max_size = int(max_size * cache_config.CACHE_PROPERTIES["default_size_factor"]) + self.max_size = int(max_size * cache_config.properties.default_size_factor) list_root = _Node(None, None, None, None) list_root.next_node = list_root diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index c81793d80477..d21f9881c074 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -14,8 +14,8 @@ # limitations under the License. import logging -from typing import Dict, Iterable, List, Mapping, Optional, Set import math +from typing import Dict, Iterable, List, Mapping, Optional, Set from six import integer_types diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index f8901a4612fd..c9e8f4e90ebf 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -30,7 +30,8 @@ class TestConfig(RootConfig): class CacheConfigTests(TestCase): def setUp(self): - CacheConfig._reset() + # Reset caches before each test + TestConfig().caches.reset() def test_individual_caches_from_environ(self): """ @@ -71,6 +72,7 @@ def test_individual_instantiated_before_config_load(self): is loaded. """ cache = LruCache(100) + add_resizable_cache("foo", cache.set_cache_factor) self.assertEqual(cache.max_size, 50) From fcc9c3ac01718dd629788f241420412aadc10ae3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 24 Apr 2020 21:37:31 +0100 Subject: [PATCH 35/49] dict() -> {} --- synapse/config/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 253133028c77..09e4ab26bc6b 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -80,7 +80,7 @@ def generate_config_section(self, **kwargs): def read_config(self, config, **kwargs): self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) - self.cache_factors = dict() # type: Dict[str, float] + self.cache_factors = {} # type: Dict[str, float] cache_config = config.get("caches", {}) From 5acca120641f8891e3bdfacf505fa36a6115f7c1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 27 Apr 2020 20:08:45 +0100 Subject: [PATCH 36/49] Some variable cleanups in cache.py --- synapse/config/cache.py | 33 ++++++++++++++++------------ synapse/http/client.py | 4 +++- synapse/util/caches/expiringcache.py | 2 +- synapse/util/caches/lrucache.py | 2 +- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 09e4ab26bc6b..8278a6e1aaef 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -21,13 +21,15 @@ # The prefix for all cache factor-related environment variables _CACHES = {} _CACHE_PREFIX = "SYNAPSE_CACHE_FACTOR" +_DEFAULT_FACTOR_SIZE = 0.5 +_DEFAULT_EVENT_CACHE_SIZE = "10K" class CacheProperties(object): def __init__(self): - # The default size factor for all caches - self.default_size_factor = 0.5 - self.resize_all_caches = None + # The default factor size for all caches + self.default_factor_size = _DEFAULT_FACTOR_SIZE + self.resize_all_caches_func = None properties = CacheProperties() @@ -43,11 +45,12 @@ def add_resizable_cache(cache_name: str, cache_resize_callback: Callable): """ _CACHES[cache_name.lower()] = cache_resize_callback - # Ensure all loaded caches are resized + # Ensure all loaded caches are sized appropriately + # # This method should only run once the config has been read, - # as it uses variables from it - if properties.resize_all_caches: - properties.resize_all_caches() + # as it uses values read from it + if properties.resize_all_caches_func: + properties.resize_all_caches_func() class CacheConfig(Config): @@ -57,8 +60,8 @@ class CacheConfig(Config): @staticmethod def reset(): """Resets the caches to their defaults. Used for tests.""" - properties.default_size_factor = float(os.environ.get(_CACHE_PREFIX, 0.5)) - properties.resize_all_caches = None + properties.default_factor_size = float(os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)) + properties.resize_all_caches_func = None _CACHES.clear() def generate_config_section(self, **kwargs): @@ -79,19 +82,18 @@ def generate_config_section(self, **kwargs): """ def read_config(self, config, **kwargs): - self.event_cache_size = self.parse_size(config.get("event_cache_size", "10K")) + self.event_cache_size = self.parse_size(config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)) self.cache_factors = {} # type: Dict[str, float] cache_config = config.get("caches", {}) - self.global_factor = cache_config.get( - "global_factor", properties.default_size_factor + "global_factor", properties.default_factor_size ) if not isinstance(self.global_factor, (int, float)): raise ConfigError("caches.global_factor must be a number.") # Set the global one so that it's reflected in new caches - properties.default_size_factor = self.global_factor + properties.default_factor_size = self.global_factor # Load cache factors from the environment, but override them with the # ones in the config file if they exist @@ -113,9 +115,12 @@ def read_config(self, config, **kwargs): self.cache_factors[cache.lower()] = factor # Resize all caches (if necessary) with the new factors we've loaded - properties.resize_all_caches = self.resize_all_caches self.resize_all_caches() + # Store this function so that it can be called from other classes without + # needing an instance of Config + properties.resize_all_caches_func = self.resize_all_caches + def resize_all_caches(self): """Ensure all cache sizes are up to date diff --git a/synapse/http/client.py b/synapse/http/client.py index b79e2fbe79c8..ecab96d854b8 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -240,7 +240,9 @@ def __getattr__(_self, attr): # tends to do so in batches, so we need to allow the pool to keep # lots of idle connections around. pool = HTTPConnectionPool(self.reactor) - # XXX: Why does this use the cache factor???? + # XXX: The justification for using the cache factor here is that larger instances + # will need both more cache and more connections. + # Still, this should probably be a separate dial pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5)) pool.cachedConnectionTimeout = 2 * 60 diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 3acb02a7bd17..4238a0fd3365 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -57,7 +57,7 @@ def __init__( self._cache_name = cache_name self._original_max_size = max_len - self._max_size = int(max_len * cache_config.properties.default_size_factor) + self._max_size = int(max_len * cache_config.properties.default_factor_size) self._clock = clock diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 226b41ae3ca8..cfd2b330279a 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -79,7 +79,7 @@ def __init__( # Save the original max size, and apply the default size factor. self._original_max_size = max_size - self.max_size = int(max_size * cache_config.properties.default_size_factor) + self.max_size = int(max_size * cache_config.properties.default_factor_size) list_root = _Node(None, None, None, None) list_root.next_node = list_root From fdef3ada27cef8dea9ba11195fddb9b81140d7f1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 27 Apr 2020 20:09:50 +0100 Subject: [PATCH 37/49] Make add_resizable_cache args clearer in cache tests --- tests/config/test_cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index c9e8f4e90ebf..88d530264371 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -73,7 +73,7 @@ def test_individual_instantiated_before_config_load(self): """ cache = LruCache(100) - add_resizable_cache("foo", cache.set_cache_factor) + add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) self.assertEqual(cache.max_size, 50) config = {"caches": {"per_cache_factors": {"foo": 3}}} @@ -93,7 +93,7 @@ def test_individual_instantiated_after_config_load(self): t.read_config(config, config_dir_path="", data_dir_path="") cache = LruCache(100) - add_resizable_cache("foo", cache.set_cache_factor) + add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) self.assertEqual(cache.max_size, 200) def test_global_instantiated_before_config_load(self): @@ -103,7 +103,7 @@ def test_global_instantiated_before_config_load(self): default cache size once the config is loaded. """ cache = LruCache(100) - add_resizable_cache("foo", cache.set_cache_factor) + add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) self.assertEqual(cache.max_size, 50) config = {"caches": {"global_factor": 4}} @@ -123,5 +123,5 @@ def test_global_instantiated_after_config_load(self): t.read_config(config, config_dir_path="", data_dir_path="") cache = LruCache(100) - add_resizable_cache("foo", cache.set_cache_factor) + add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) self.assertEqual(cache.max_size, 150) From ca98241e18082bf6ecd7fb11953053dba5bce9ca Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 27 Apr 2020 20:10:15 +0100 Subject: [PATCH 38/49] lint --- synapse/config/cache.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 8278a6e1aaef..583fcaaf6550 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -60,7 +60,9 @@ class CacheConfig(Config): @staticmethod def reset(): """Resets the caches to their defaults. Used for tests.""" - properties.default_factor_size = float(os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE)) + properties.default_factor_size = float( + os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE) + ) properties.resize_all_caches_func = None _CACHES.clear() @@ -82,7 +84,9 @@ def generate_config_section(self, **kwargs): """ def read_config(self, config, **kwargs): - self.event_cache_size = self.parse_size(config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)) + self.event_cache_size = self.parse_size( + config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE) + ) self.cache_factors = {} # type: Dict[str, float] cache_config = config.get("caches", {}) From 48709a9b2201f296ccac246050ad5f5be25a4f43 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 30 Apr 2020 16:00:42 +0100 Subject: [PATCH 39/49] Pull global cache prefix from environment by default --- synapse/config/cache.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 583fcaaf6550..2610ae9c2563 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -28,7 +28,9 @@ class CacheProperties(object): def __init__(self): # The default factor size for all caches - self.default_factor_size = _DEFAULT_FACTOR_SIZE + self.default_factor_size = float( + os.environ.get(_CACHE_PREFIX, _DEFAULT_FACTOR_SIZE) + ) self.resize_all_caches_func = None From 9e3a9ba4f56377f4f16920760cf66644cbd154d7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 15:11:14 +0100 Subject: [PATCH 40/49] Make config follow the guidelines --- docs/sample_config.yaml | 27 +++++++++++++++------------ synapse/config/cache.py | 26 ++++++++++++++------------ 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 7390377f3424..941c6e66e9af 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1975,16 +1975,19 @@ opentracing: # false -# Cache configuration -# -# 'global_factor' controls the global cache factor. This overrides the -# "SYNAPSE_CACHE_FACTOR" environment variable. -# -# 'per_cache_factors' is a dictionary of cache name to cache factor for that -# individual cache. -# -#caches: -# global_factor: 0.5 -# per_cache_factors: -# get_users_who_share_room_with_user: 2 +## Cache Configuration ## + +# Caching can be configured through the following options. # +caches: + # Controls the global cache factor. This overrides the "SYNAPSE_CACHE_FACTOR" + # environment variable. + # + # global_factor: 0.5 + + # A dictionary of cache name to cache factor for that + # individual cache. + # + # per_cache_factors: + # get_users_who_share_room_with_user: 2 + diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 2610ae9c2563..7a7dbb3d8097 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -70,19 +70,21 @@ def reset(): def generate_config_section(self, **kwargs): return """\ - # Cache configuration - # - # 'global_factor' controls the global cache factor. This overrides the - # "SYNAPSE_CACHE_FACTOR" environment variable. - # - # 'per_cache_factors' is a dictionary of cache name to cache factor for that - # individual cache. - # - #caches: - # global_factor: 0.5 - # per_cache_factors: - # get_users_who_share_room_with_user: 2 + ## Cache Configuration ## + + # Caching can be configured through the following options. # + caches: + # Controls the global cache factor. This overrides the "SYNAPSE_CACHE_FACTOR" + # environment variable. + # + # global_factor: 0.5 + + # A dictionary of cache name to cache factor for that + # individual cache. + # + # per_cache_factors: + # get_users_who_share_room_with_user: 2 """ def read_config(self, config, **kwargs): From fa56e16c859c789e04ac5cae483f9ccdb09fda21 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 15:20:13 +0100 Subject: [PATCH 41/49] sample config --- docs/sample_config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 941c6e66e9af..6b0d7c2b0da9 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1990,4 +1990,3 @@ caches: # # per_cache_factors: # get_users_who_share_room_with_user: 2 - From 83cf583adfee9a20dc81780c30f75530fe7ca237 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 15:35:46 +0100 Subject: [PATCH 42/49] Allow creating an LruCache that's not affected by cache factor --- synapse/storage/data_stores/main/events_worker.py | 5 ++++- synapse/util/caches/descriptors.py | 11 ++++++++++- synapse/util/caches/expiringcache.py | 2 +- synapse/util/caches/lrucache.py | 12 +++++++++++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py index 25511be5cbfd..e40a8999a94b 100644 --- a/synapse/storage/data_stores/main/events_worker.py +++ b/synapse/storage/data_stores/main/events_worker.py @@ -75,7 +75,10 @@ def __init__(self, database: Database, db_conn, hs): super(EventsWorkerStore, self).__init__(database, db_conn, hs) self._get_event_cache = Cache( - "*getEvent*", keylen=3, max_entries=hs.config.caches.event_cache_size + "*getEvent*", + keylen=3, + max_entries=hs.config.caches.event_cache_size, + apply_cache_factor_from_config=False, ) self._event_fetch_lock = threading.Condition() diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 2c8ea764b54c..296706a39634 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -88,7 +88,15 @@ class Cache(object): "_pending_deferred_cache", ) - def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False): + def __init__( + self, + name, + max_entries=1000, + keylen=1, + tree=False, + iterable=False, + apply_cache_factor_from_config=True, + ): cache_type = TreeCache if tree else dict self._pending_deferred_cache = cache_type() @@ -98,6 +106,7 @@ def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False) cache_type=cache_type, size_callback=(lambda d: len(d)) if iterable else None, evicted_callback=self._on_evicted, + apply_cache_factor_from_config=apply_cache_factor_from_config, ) self.name = name diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 4238a0fd3365..2726b67b6d5a 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -52,11 +52,11 @@ def __init__( an item on access. Defaults to False. iterable (bool): If true, the size is calculated by summing the sizes of all entries, rather than the number of entries. - """ self._cache_name = cache_name self._original_max_size = max_len + self._max_size = int(max_len * cache_config.properties.default_factor_size) self._clock = clock diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index cfd2b330279a..293a3f172ab8 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -57,6 +57,7 @@ def __init__( cache_type=dict, size_callback=None, evicted_callback=None, + apply_cache_factor_from_config=True, ): """ Args: @@ -73,13 +74,22 @@ def __init__( evicted_callback (func(int)|None): if not None, called on eviction with the size of the evicted entry + + apply_cache_factor_from_config (bool): If true, `max_size` will be + multiplied by a cache factor derived from the homeserver config. """ cache = cache_type() self.cache = cache # Used for introspection. # Save the original max size, and apply the default size factor. self._original_max_size = max_size - self.max_size = int(max_size * cache_config.properties.default_factor_size) + # We previously didn't apply the cache factor here, and as such some caches were + # not affected by the global cache factor. Add an option here to disable applying + # the cache factor when a cache is created + if apply_cache_factor_from_config: + self.max_size = int(max_size * cache_config.properties.default_factor_size) + else: + self.max_size = int(max_size) list_root = _Node(None, None, None, None) list_root.next_node = list_root From 424215a0c300c4a27bceb3e9e76fc8d6b5b4d56b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 15:36:02 +0100 Subject: [PATCH 43/49] Fix cache: config block parsing --- synapse/config/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 7a7dbb3d8097..403893a3d3d8 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -93,7 +93,7 @@ def read_config(self, config, **kwargs): ) self.cache_factors = {} # type: Dict[str, float] - cache_config = config.get("caches", {}) + cache_config = config.get("caches") or {} self.global_factor = cache_config.get( "global_factor", properties.default_factor_size ) From c7f3bf66c20fa5ea1550658a543ea1a8b4126b21 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 18:03:45 +0100 Subject: [PATCH 44/49] Environment takes precedence over config values --- synapse/config/cache.py | 23 +++++++++++++---------- synapse/util/caches/descriptors.py | 26 ++++++++++++++++++++------ synapse/util/caches/lrucache.py | 19 ++++++++++--------- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 403893a3d3d8..a8dc93c225c5 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -103,17 +103,11 @@ def read_config(self, config, **kwargs): # Set the global one so that it's reflected in new caches properties.default_factor_size = self.global_factor - # Load cache factors from the environment, but override them with the - # ones in the config file if they exist - individual_factors = { - key[len(_CACHE_PREFIX) + 1 :].lower(): float(val) - for key, val in self._environ.items() - if key.startswith(_CACHE_PREFIX + "_") - } - individual_factors_config = cache_config.get("per_cache_factors", {}) or {} - if not isinstance(individual_factors_config, dict): + # Load cache factors from the config, but override them with the + # environment if they exist + individual_factors = cache_config.get("per_cache_factors", {}) or {} + if not isinstance(individual_factors, dict): raise ConfigError("caches.per_cache_factors must be a dictionary") - individual_factors.update(individual_factors_config) for cache, factor in individual_factors.items(): if not isinstance(factor, (int, float)): @@ -122,6 +116,15 @@ def read_config(self, config, **kwargs): ) self.cache_factors[cache.lower()] = factor + # Override with environment + individual_factors.update( + { + key[len(_CACHE_PREFIX) + 1 :].lower(): float(val) + for key, val in self._environ.items() + if key.startswith(_CACHE_PREFIX + "_") + } + ) + # Resize all caches (if necessary) with the new factors we've loaded self.resize_all_caches() diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 296706a39634..cd4826242035 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -90,13 +90,27 @@ class Cache(object): def __init__( self, - name, - max_entries=1000, - keylen=1, - tree=False, - iterable=False, - apply_cache_factor_from_config=True, + name: str, + max_entries: int = 1000, + keylen: int = 1, + tree: bool = False, + iterable: bool = False, + apply_cache_factor_from_config: bool = True, ): + """ + Args: + name: The name of the cache + max_entries: Maximum amount of entries that the cache will hold + keylen: The length of the tuple used as the cache key + tree: Use a TreeCache instead of a dict as the underlying cache type + iterable: If True, count each item in the cached object as an entry, + rather than each cached object + apply_cache_factor_from_config: Whether cache factors specified in the + config file affect `max_entries` + + Returns: + Cache + """ cache_type = TreeCache if tree else dict self._pending_deferred_cache = cache_type() diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 293a3f172ab8..29fabac3cdb7 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -15,6 +15,7 @@ import threading from functools import wraps +from typing import Callable, Optional, Type, Union from synapse.config import cache as cache_config from synapse.util.caches.treecache import TreeCache @@ -52,18 +53,18 @@ class LruCache(object): def __init__( self, - max_size, - keylen=1, - cache_type=dict, - size_callback=None, - evicted_callback=None, - apply_cache_factor_from_config=True, + max_size: int, + keylen: int = 1, + cache_type: Type[Union[dict, TreeCache]] = dict, + size_callback: Optional[Callable] = None, + evicted_callback: Optional[Callable] = None, + apply_cache_factor_from_config: bool = True, ): """ Args: - max_size (int): + max_size: The maximum amount of entries the cache can hold - keylen (int): + keylen: The length of the tuple used as the cache key cache_type (type): type of underlying cache to be used. Typically one of dict @@ -76,7 +77,7 @@ def __init__( entry apply_cache_factor_from_config (bool): If true, `max_size` will be - multiplied by a cache factor derived from the homeserver config. + multiplied by a cache factor derived from the homeserver config """ cache = cache_type() self.cache = cache # Used for introspection. From 485f17737bc0f138210044286d0ebfbeaf030e4d Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 18:22:15 +0100 Subject: [PATCH 45/49] fix tests --- synapse/config/cache.py | 19 +++++++++---------- tests/config/test_cache.py | 6 +++--- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/synapse/config/cache.py b/synapse/config/cache.py index a8dc93c225c5..ffce1d0a0d8b 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -103,20 +103,12 @@ def read_config(self, config, **kwargs): # Set the global one so that it's reflected in new caches properties.default_factor_size = self.global_factor - # Load cache factors from the config, but override them with the - # environment if they exist + # Load cache factors from the config individual_factors = cache_config.get("per_cache_factors", {}) or {} if not isinstance(individual_factors, dict): raise ConfigError("caches.per_cache_factors must be a dictionary") - for cache, factor in individual_factors.items(): - if not isinstance(factor, (int, float)): - raise ConfigError( - "caches.per_cache_factors.%s must be a number" % (cache.lower(),) - ) - self.cache_factors[cache.lower()] = factor - - # Override with environment + # Override factors from environment if necessary individual_factors.update( { key[len(_CACHE_PREFIX) + 1 :].lower(): float(val) @@ -125,6 +117,13 @@ def read_config(self, config, **kwargs): } ) + for cache, factor in individual_factors.items(): + if not isinstance(factor, (int, float)): + raise ConfigError( + "caches.per_cache_factors.%s must be a number" % (cache.lower(),) + ) + self.cache_factors[cache.lower()] = factor + # Resize all caches (if necessary) with the new factors we've loaded self.resize_all_caches() diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index 88d530264371..292027912569 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -49,8 +49,8 @@ def test_individual_caches_from_environ(self): def test_config_overrides_environ(self): """ - Individual cache factors defined in config will take precedence over - ones in the environment. + Individual cache factors defined in the environment will take precedence + over those in the config. """ config = {"caches": {"per_cache_factors": {"foo": 2, "bar": 3}}} t = TestConfig() @@ -62,7 +62,7 @@ def test_config_overrides_environ(self): self.assertEqual( dict(t.caches.cache_factors), - {"foo": 2.0, "bar": 3.0, "something_or_other": 2.0}, + {"foo": 1.0, "bar": 3.0, "something_or_other": 2.0}, ) def test_individual_instantiated_before_config_load(self): From fe890508cc8d5bf7139d9f01207db762fa636b69 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 19:43:31 +0100 Subject: [PATCH 46/49] Fix and clarify config comments --- docs/sample_config.yaml | 12 ++++++++---- synapse/config/cache.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 6b0d7c2b0da9..5a39d2c46970 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1980,13 +1980,17 @@ opentracing: # Caching can be configured through the following options. # caches: - # Controls the global cache factor. This overrides the "SYNAPSE_CACHE_FACTOR" - # environment variable. + # Controls the global cache factor. This can be overridden by the + # "SYNAPSE_CACHE_FACTOR" environment variable. # # global_factor: 0.5 - # A dictionary of cache name to cache factor for that - # individual cache. + # A dictionary of cache name to cache factor for that individual + # cache. This can be overridden by environment variables + # comprised of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in + # capital letters and underscores. + # + # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 # # per_cache_factors: # get_users_who_share_room_with_user: 2 diff --git a/synapse/config/cache.py b/synapse/config/cache.py index ffce1d0a0d8b..6b0dd0ab9f71 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -75,13 +75,17 @@ def generate_config_section(self, **kwargs): # Caching can be configured through the following options. # caches: - # Controls the global cache factor. This overrides the "SYNAPSE_CACHE_FACTOR" - # environment variable. + # Controls the global cache factor. This can be overridden by the + # "SYNAPSE_CACHE_FACTOR" environment variable. # # global_factor: 0.5 - # A dictionary of cache name to cache factor for that - # individual cache. + # A dictionary of cache name to cache factor for that individual + # cache. This can be overridden by environment variables + # comprised of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in + # capital letters and underscores. + # + # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 # # per_cache_factors: # get_users_who_share_room_with_user: 2 From ae6070bc1227f07e65056fe3455d4683adc02576 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 1 May 2020 19:44:44 +0100 Subject: [PATCH 47/49] Move event_cache_size to CacheConfig --- docs/sample_config.yaml | 10 ++++++---- synapse/config/cache.py | 6 ++++++ synapse/config/database.py | 4 ---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 5a39d2c46970..18ed1daf16bb 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -634,10 +634,6 @@ database: args: database: DATADIR/homeserver.db -# Number of events to cache in memory. -# -#event_cache_size: 10K - ## Logging ## @@ -1979,6 +1975,12 @@ opentracing: # Caching can be configured through the following options. # + +# The number of events to cache in memory. Not affected by the +# caches.global_factor. +# +#event_cache_size: 10K + caches: # Controls the global cache factor. This can be overridden by the # "SYNAPSE_CACHE_FACTOR" environment variable. diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 6b0dd0ab9f71..017a5d528921 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -74,6 +74,12 @@ def generate_config_section(self, **kwargs): # Caching can be configured through the following options. # + + # The number of events to cache in memory. Not affected by the + # caches.global_factor. + # + #event_cache_size: 10K + caches: # Controls the global cache factor. This can be overridden by the # "SYNAPSE_CACHE_FACTOR" environment variable. diff --git a/synapse/config/database.py b/synapse/config/database.py index eaee71a8d74a..e330ddb0758d 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -68,10 +68,6 @@ name: sqlite3 args: database: %(database_path)s - -# Number of events to cache in memory. -# -#event_cache_size: 10K """ From 4448633c70968e643af7c045be9a6116a1a4cf19 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 May 2020 14:08:43 +0100 Subject: [PATCH 48/49] Follow config file conventions. Move cache config location --- docs/sample_config.yaml | 64 +++++++++++++++++++++--------------- synapse/config/cache.py | 32 +++++++++++------- synapse/config/homeserver.py | 2 +- 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 18ed1daf16bb..eeca1f408772 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -591,6 +591,43 @@ acme: +## Caching ## + +# Caching can be configured through the following options. +# +# A cache 'factor' is a multiplier that can be applied to each of +# Synapse's caches in order to increase or decrease the maximum +# number of entries that can be stored. + +# The number of events to cache in memory. Not affected by +# caches.global_factor. +# +#event_cache_size: 10K + +caches: + # Controls the global cache factor, which is the default cache factor + # for all caches if a specific factor for that cache is not otherwise + # set. + # + # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment + # variable. Setting by environment variable takes priority over + # setting through the config file. + # + #global_factor: 0.5 + + # A dictionary of cache name to cache factor for that individual + # cache. Overrides the global cache factor for a given cache. + # + # These can also be set through environment variables comprised + # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital + # letters and underscores. Setting by environment variable + # takes priority over setting through the config file. + # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 + # + per_cache_factors: + #get_users_who_share_room_with_user: 2 + + ## Database ## # The 'database' setting defines the database that synapse uses to store all of @@ -1969,30 +2006,3 @@ opentracing: # # logging: # false - - -## Cache Configuration ## - -# Caching can be configured through the following options. -# - -# The number of events to cache in memory. Not affected by the -# caches.global_factor. -# -#event_cache_size: 10K - -caches: - # Controls the global cache factor. This can be overridden by the - # "SYNAPSE_CACHE_FACTOR" environment variable. - # - # global_factor: 0.5 - - # A dictionary of cache name to cache factor for that individual - # cache. This can be overridden by environment variables - # comprised of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in - # capital letters and underscores. - # - # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 - # - # per_cache_factors: - # get_users_who_share_room_with_user: 2 diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 017a5d528921..0a112c88ed44 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -70,31 +70,41 @@ def reset(): def generate_config_section(self, **kwargs): return """\ - ## Cache Configuration ## + ## Caching ## # Caching can be configured through the following options. # + # A cache 'factor' is a multiplier that can be applied to each of + # Synapse's caches in order to increase or decrease the maximum + # number of entries that can be stored. - # The number of events to cache in memory. Not affected by the + # The number of events to cache in memory. Not affected by # caches.global_factor. # #event_cache_size: 10K caches: - # Controls the global cache factor. This can be overridden by the - # "SYNAPSE_CACHE_FACTOR" environment variable. + # Controls the global cache factor, which is the default cache factor + # for all caches if a specific factor for that cache is not otherwise + # set. # - # global_factor: 0.5 + # This can also be set by the "SYNAPSE_CACHE_FACTOR" environment + # variable. Setting by environment variable takes priority over + # setting through the config file. + # + #global_factor: 0.5 # A dictionary of cache name to cache factor for that individual - # cache. This can be overridden by environment variables - # comprised of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in - # capital letters and underscores. + # cache. Overrides the global cache factor for a given cache. # + # These can also be set through environment variables comprised + # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital + # letters and underscores. Setting by environment variable + # takes priority over setting through the config file. # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 # - # per_cache_factors: - # get_users_who_share_room_with_user: 2 + per_cache_factors: + #get_users_who_share_room_with_user: 2 """ def read_config(self, config, **kwargs): @@ -114,7 +124,7 @@ def read_config(self, config, **kwargs): properties.default_factor_size = self.global_factor # Load cache factors from the config - individual_factors = cache_config.get("per_cache_factors", {}) or {} + individual_factors = cache_config.get("per_cache_factors") or {} if not isinstance(individual_factors, dict): raise ConfigError("caches.per_cache_factors must be a dictionary") diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index 23afad79e246..ebd1644f09de 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -55,6 +55,7 @@ class HomeServerConfig(RootConfig): config_classes = [ ServerConfig, TlsConfig, + CacheConfig, DatabaseConfig, LoggingConfig, RatelimitConfig, @@ -85,5 +86,4 @@ class HomeServerConfig(RootConfig): ThirdPartyRulesConfig, TracerConfig, RedisConfig, - CacheConfig, ] From f8ed4323ce72fde6ece5d97251558c5870b88065 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 May 2020 14:16:46 +0100 Subject: [PATCH 49/49] Attempt to explain the default global cache factor --- docs/sample_config.yaml | 8 +++++--- synapse/config/cache.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index eeca1f408772..e313cdc03811 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -613,7 +613,9 @@ caches: # variable. Setting by environment variable takes priority over # setting through the config file. # - #global_factor: 0.5 + # Defaults to 0.5, which will half the size of all caches. + # + #global_factor: 1.0 # A dictionary of cache name to cache factor for that individual # cache. Overrides the global cache factor for a given cache. @@ -622,10 +624,10 @@ caches: # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital # letters and underscores. Setting by environment variable # takes priority over setting through the config file. - # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 + # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0 # per_cache_factors: - #get_users_who_share_room_with_user: 2 + #get_users_who_share_room_with_user: 2.0 ## Database ## diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 0a112c88ed44..91036a012e2b 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -92,7 +92,9 @@ def generate_config_section(self, **kwargs): # variable. Setting by environment variable takes priority over # setting through the config file. # - #global_factor: 0.5 + # Defaults to 0.5, which will half the size of all caches. + # + #global_factor: 1.0 # A dictionary of cache name to cache factor for that individual # cache. Overrides the global cache factor for a given cache. @@ -101,10 +103,10 @@ def generate_config_section(self, **kwargs): # of "SYNAPSE_CACHE_FACTOR_" + the name of the cache in capital # letters and underscores. Setting by environment variable # takes priority over setting through the config file. - # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2 + # Ex. SYNAPSE_CACHE_FACTOR_GET_USERS_WHO_SHARE_ROOM_WITH_USER=2.0 # per_cache_factors: - #get_users_who_share_room_with_user: 2 + #get_users_who_share_room_with_user: 2.0 """ def read_config(self, config, **kwargs):