Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit cde8af9

Browse files
authoredMay 13, 2022
Add config flags to allow for cache auto-tuning (#12701)
1 parent e8ae472 commit cde8af9

File tree

7 files changed

+266
-54
lines changed

7 files changed

+266
-54
lines changed
 

‎changelog.d/12701.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a config options to allow for auto-tuning of caches.

‎docs/sample_config.yaml

+18
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,24 @@ caches:
784784
#
785785
#cache_entry_ttl: 30m
786786

787+
# This flag enables cache autotuning, and is further specified by the sub-options `max_cache_memory_usage`,
788+
# `target_cache_memory_usage`, `min_cache_ttl`. These flags work in conjunction with each other to maintain
789+
# a balance between cache memory usage and cache entry availability. You must be using jemalloc to utilize
790+
# this option, and all three of the options must be specified for this feature to work.
791+
#cache_autotuning:
792+
# This flag sets a ceiling on much memory the cache can use before caches begin to be continuously evicted.
793+
# They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in
794+
# the flag below, or until the `min_cache_ttl` is hit.
795+
#max_cache_memory_usage: 1024M
796+
797+
# This flag sets a rough target for the desired memory usage of the caches.
798+
#target_cache_memory_usage: 758M
799+
800+
# 'min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when
801+
# caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches
802+
# from being emptied while Synapse is evicting due to memory.
803+
#min_cache_ttl: 5m
804+
787805
# Controls how long the results of a /sync request are cached for after
788806
# a successful response is returned. A higher duration can help clients with
789807
# intermittent connections, at the cost of higher memory usage.

‎docs/usage/configuration/config_documentation.md

+15-2
Original file line numberDiff line numberDiff line change
@@ -1119,16 +1119,29 @@ Caching can be configured through the following sub-options:
11191119
with intermittent connections, at the cost of higher memory usage.
11201120
By default, this is zero, which means that sync responses are not cached
11211121
at all.
1122-
1122+
* `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and
1123+
`min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory
1124+
usage and cache entry availability. You must be using [jemalloc](https://github.com/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu)
1125+
to utilize this option, and all three of the options must be specified for this feature to work.
1126+
* `max_cache_memory_usage` sets a ceiling on how much memory the cache can use before caches begin to be continuously evicted.
1127+
They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in
1128+
the flag below, or until the `min_cache_ttl` is hit.
1129+
* `target_memory_usage` sets a rough target for the desired memory usage of the caches.
1130+
* `min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when
1131+
caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches
1132+
from being emptied while Synapse is evicting due to memory.
11231133

11241134
Example configuration:
11251135
```yaml
11261136
caches:
11271137
global_factor: 1.0
11281138
per_cache_factors:
11291139
get_users_who_share_room_with_user: 2.0
1130-
expire_caches: false
11311140
sync_response_cache_duration: 2m
1141+
cache_autotuning:
1142+
max_cache_memory_usage: 1024M
1143+
target_cache_memory_usage: 758M
1144+
min_cache_ttl: 5m
11321145
```
11331146

11341147
### Reloading cache factors

‎synapse/config/cache.py

+33
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,24 @@ def generate_config_section(self, **kwargs: Any) -> str:
176176
#
177177
#cache_entry_ttl: 30m
178178
179+
# This flag enables cache autotuning, and is further specified by the sub-options `max_cache_memory_usage`,
180+
# `target_cache_memory_usage`, `min_cache_ttl`. These flags work in conjunction with each other to maintain
181+
# a balance between cache memory usage and cache entry availability. You must be using jemalloc to utilize
182+
# this option, and all three of the options must be specified for this feature to work.
183+
#cache_autotuning:
184+
# This flag sets a ceiling on much memory the cache can use before caches begin to be continuously evicted.
185+
# They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in
186+
# the flag below, or until the `min_cache_ttl` is hit.
187+
#max_cache_memory_usage: 1024M
188+
189+
# This flag sets a rough target for the desired memory usage of the caches.
190+
#target_cache_memory_usage: 758M
191+
192+
# 'min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when
193+
# caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches
194+
# from being emptied while Synapse is evicting due to memory.
195+
#min_cache_ttl: 5m
196+
179197
# Controls how long the results of a /sync request are cached for after
180198
# a successful response is returned. A higher duration can help clients with
181199
# intermittent connections, at the cost of higher memory usage.
@@ -263,6 +281,21 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
263281
)
264282
self.expiry_time_msec = self.parse_duration(expiry_time)
265283

284+
self.cache_autotuning = cache_config.get("cache_autotuning")
285+
if self.cache_autotuning:
286+
max_memory_usage = self.cache_autotuning.get("max_cache_memory_usage")
287+
self.cache_autotuning["max_cache_memory_usage"] = self.parse_size(
288+
max_memory_usage
289+
)
290+
291+
target_mem_size = self.cache_autotuning.get("target_cache_memory_usage")
292+
self.cache_autotuning["target_cache_memory_usage"] = self.parse_size(
293+
target_mem_size
294+
)
295+
296+
min_cache_ttl = self.cache_autotuning.get("min_cache_ttl")
297+
self.cache_autotuning["min_cache_ttl"] = self.parse_duration(min_cache_ttl)
298+
266299
self.sync_response_cache_duration = self.parse_duration(
267300
cache_config.get("sync_response_cache_duration", 0)
268301
)

‎synapse/metrics/jemalloc.py

+73-41
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import re
1919
from typing import Iterable, Optional, overload
2020

21+
import attr
2122
from prometheus_client import REGISTRY, Metric
2223
from typing_extensions import Literal
2324

@@ -27,52 +28,24 @@
2728
logger = logging.getLogger(__name__)
2829

2930

30-
def _setup_jemalloc_stats() -> None:
31-
"""Checks to see if jemalloc is loaded, and hooks up a collector to record
32-
statistics exposed by jemalloc.
33-
"""
34-
35-
# Try to find the loaded jemalloc shared library, if any. We need to
36-
# introspect into what is loaded, rather than loading whatever is on the
37-
# path, as if we load a *different* jemalloc version things will seg fault.
38-
39-
# We look in `/proc/self/maps`, which only exists on linux.
40-
if not os.path.exists("/proc/self/maps"):
41-
logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
42-
return
43-
44-
# We're looking for a path at the end of the line that includes
45-
# "libjemalloc".
46-
regex = re.compile(r"/\S+/libjemalloc.*$")
47-
48-
jemalloc_path = None
49-
with open("/proc/self/maps") as f:
50-
for line in f:
51-
match = regex.search(line.strip())
52-
if match:
53-
jemalloc_path = match.group()
54-
55-
if not jemalloc_path:
56-
# No loaded jemalloc was found.
57-
logger.debug("jemalloc not found")
58-
return
59-
60-
logger.debug("Found jemalloc at %s", jemalloc_path)
61-
62-
jemalloc = ctypes.CDLL(jemalloc_path)
31+
@attr.s(slots=True, frozen=True, auto_attribs=True)
32+
class JemallocStats:
33+
jemalloc: ctypes.CDLL
6334

6435
@overload
6536
def _mallctl(
66-
name: str, read: Literal[True] = True, write: Optional[int] = None
37+
self, name: str, read: Literal[True] = True, write: Optional[int] = None
6738
) -> int:
6839
...
6940

7041
@overload
71-
def _mallctl(name: str, read: Literal[False], write: Optional[int] = None) -> None:
42+
def _mallctl(
43+
self, name: str, read: Literal[False], write: Optional[int] = None
44+
) -> None:
7245
...
7346

7447
def _mallctl(
75-
name: str, read: bool = True, write: Optional[int] = None
48+
self, name: str, read: bool = True, write: Optional[int] = None
7649
) -> Optional[int]:
7750
"""Wrapper around `mallctl` for reading and writing integers to
7851
jemalloc.
@@ -120,7 +93,7 @@ def _mallctl(
12093
# Where oldp/oldlenp is a buffer where the old value will be written to
12194
# (if not null), and newp/newlen is the buffer with the new value to set
12295
# (if not null). Note that they're all references *except* newlen.
123-
result = jemalloc.mallctl(
96+
result = self.jemalloc.mallctl(
12497
name.encode("ascii"),
12598
input_var_ref,
12699
input_len_ref,
@@ -136,21 +109,80 @@ def _mallctl(
136109

137110
return input_var.value
138111

139-
def _jemalloc_refresh_stats() -> None:
112+
def refresh_stats(self) -> None:
140113
"""Request that jemalloc updates its internal statistics. This needs to
141114
be called before querying for stats, otherwise it will return stale
142115
values.
143116
"""
144117
try:
145-
_mallctl("epoch", read=False, write=1)
118+
self._mallctl("epoch", read=False, write=1)
146119
except Exception as e:
147120
logger.warning("Failed to reload jemalloc stats: %s", e)
148121

122+
def get_stat(self, name: str) -> int:
123+
"""Request the stat of the given name at the time of the last
124+
`refresh_stats` call. This may throw if we fail to read
125+
the stat.
126+
"""
127+
return self._mallctl(f"stats.{name}")
128+
129+
130+
_JEMALLOC_STATS: Optional[JemallocStats] = None
131+
132+
133+
def get_jemalloc_stats() -> Optional[JemallocStats]:
134+
"""Returns an interface to jemalloc, if it is being used.
135+
136+
Note that this will always return None until `setup_jemalloc_stats` has been
137+
called.
138+
"""
139+
return _JEMALLOC_STATS
140+
141+
142+
def _setup_jemalloc_stats() -> None:
143+
"""Checks to see if jemalloc is loaded, and hooks up a collector to record
144+
statistics exposed by jemalloc.
145+
"""
146+
147+
global _JEMALLOC_STATS
148+
149+
# Try to find the loaded jemalloc shared library, if any. We need to
150+
# introspect into what is loaded, rather than loading whatever is on the
151+
# path, as if we load a *different* jemalloc version things will seg fault.
152+
153+
# We look in `/proc/self/maps`, which only exists on linux.
154+
if not os.path.exists("/proc/self/maps"):
155+
logger.debug("Not looking for jemalloc as no /proc/self/maps exist")
156+
return
157+
158+
# We're looking for a path at the end of the line that includes
159+
# "libjemalloc".
160+
regex = re.compile(r"/\S+/libjemalloc.*$")
161+
162+
jemalloc_path = None
163+
with open("/proc/self/maps") as f:
164+
for line in f:
165+
match = regex.search(line.strip())
166+
if match:
167+
jemalloc_path = match.group()
168+
169+
if not jemalloc_path:
170+
# No loaded jemalloc was found.
171+
logger.debug("jemalloc not found")
172+
return
173+
174+
logger.debug("Found jemalloc at %s", jemalloc_path)
175+
176+
jemalloc_dll = ctypes.CDLL(jemalloc_path)
177+
178+
stats = JemallocStats(jemalloc_dll)
179+
_JEMALLOC_STATS = stats
180+
149181
class JemallocCollector(Collector):
150182
"""Metrics for internal jemalloc stats."""
151183

152184
def collect(self) -> Iterable[Metric]:
153-
_jemalloc_refresh_stats()
185+
stats.refresh_stats()
154186

155187
g = GaugeMetricFamily(
156188
"jemalloc_stats_app_memory_bytes",
@@ -184,7 +216,7 @@ def collect(self) -> Iterable[Metric]:
184216
"metadata",
185217
):
186218
try:
187-
value = _mallctl(f"stats.{t}")
219+
value = stats.get_stat(t)
188220
except Exception as e:
189221
# There was an error fetching the value, skip.
190222
logger.warning("Failed to read jemalloc stats.%s: %s", t, e)

‎synapse/util/caches/lrucache.py

+69-10
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import logging
16+
import math
1617
import threading
1718
import weakref
1819
from enum import Enum
@@ -40,6 +41,7 @@
4041

4142
from synapse.config import cache as cache_config
4243
from synapse.metrics.background_process_metrics import wrap_as_background_process
44+
from synapse.metrics.jemalloc import get_jemalloc_stats
4345
from synapse.util import Clock, caches
4446
from synapse.util.caches import CacheMetric, EvictionReason, register_cache
4547
from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
@@ -106,10 +108,16 @@ def update_last_access(self, clock: Clock) -> None:
106108

107109

108110
@wrap_as_background_process("LruCache._expire_old_entries")
109-
async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None:
111+
async def _expire_old_entries(
112+
clock: Clock, expiry_seconds: int, autotune_config: Optional[dict]
113+
) -> None:
110114
"""Walks the global cache list to find cache entries that haven't been
111-
accessed in the given number of seconds.
115+
accessed in the given number of seconds, or if a given memory threshold has been breached.
112116
"""
117+
if autotune_config:
118+
max_cache_memory_usage = autotune_config["max_cache_memory_usage"]
119+
target_cache_memory_usage = autotune_config["target_cache_memory_usage"]
120+
min_cache_ttl = autotune_config["min_cache_ttl"] / 1000
113121

114122
now = int(clock.time())
115123
node = GLOBAL_ROOT.prev_node
@@ -119,11 +127,36 @@ async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None:
119127

120128
logger.debug("Searching for stale caches")
121129

130+
evicting_due_to_memory = False
131+
132+
# determine if we're evicting due to memory
133+
jemalloc_interface = get_jemalloc_stats()
134+
if jemalloc_interface and autotune_config:
135+
try:
136+
jemalloc_interface.refresh_stats()
137+
mem_usage = jemalloc_interface.get_stat("allocated")
138+
if mem_usage > max_cache_memory_usage:
139+
logger.info("Begin memory-based cache eviction.")
140+
evicting_due_to_memory = True
141+
except Exception:
142+
logger.warning(
143+
"Unable to read allocated memory, skipping memory-based cache eviction."
144+
)
145+
122146
while node is not GLOBAL_ROOT:
123147
# Only the root node isn't a `_TimedListNode`.
124148
assert isinstance(node, _TimedListNode)
125149

126-
if node.last_access_ts_secs > now - expiry_seconds:
150+
# if node has not aged past expiry_seconds and we are not evicting due to memory usage, there's
151+
# nothing to do here
152+
if (
153+
node.last_access_ts_secs > now - expiry_seconds
154+
and not evicting_due_to_memory
155+
):
156+
break
157+
158+
# if entry is newer than min_cache_entry_ttl then do not evict and don't evict anything newer
159+
if evicting_due_to_memory and now - node.last_access_ts_secs < min_cache_ttl:
127160
break
128161

129162
cache_entry = node.get_cache_entry()
@@ -136,10 +169,29 @@ async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None:
136169
assert cache_entry is not None
137170
cache_entry.drop_from_cache()
138171

172+
# Check mem allocation periodically if we are evicting a bunch of caches
173+
if jemalloc_interface and evicting_due_to_memory and (i + 1) % 100 == 0:
174+
try:
175+
jemalloc_interface.refresh_stats()
176+
mem_usage = jemalloc_interface.get_stat("allocated")
177+
if mem_usage < target_cache_memory_usage:
178+
evicting_due_to_memory = False
179+
logger.info("Stop memory-based cache eviction.")
180+
except Exception:
181+
logger.warning(
182+
"Unable to read allocated memory, this may affect memory-based cache eviction."
183+
)
184+
# If we've failed to read the current memory usage then we
185+
# should stop trying to evict based on memory usage
186+
evicting_due_to_memory = False
187+
139188
# If we do lots of work at once we yield to allow other stuff to happen.
140189
if (i + 1) % 10000 == 0:
141190
logger.debug("Waiting during drop")
142-
await clock.sleep(0)
191+
if node.last_access_ts_secs > now - expiry_seconds:
192+
await clock.sleep(0.5)
193+
else:
194+
await clock.sleep(0)
143195
logger.debug("Waking during drop")
144196

145197
node = next_node
@@ -156,21 +208,28 @@ async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None:
156208

157209
def setup_expire_lru_cache_entries(hs: "HomeServer") -> None:
158210
"""Start a background job that expires all cache entries if they have not
159-
been accessed for the given number of seconds.
211+
been accessed for the given number of seconds, or if a given memory usage threshold has been
212+
breached.
160213
"""
161-
if not hs.config.caches.expiry_time_msec:
214+
if not hs.config.caches.expiry_time_msec and not hs.config.caches.cache_autotuning:
162215
return
163216

164-
logger.info(
165-
"Expiring LRU caches after %d seconds", hs.config.caches.expiry_time_msec / 1000
166-
)
217+
if hs.config.caches.expiry_time_msec:
218+
expiry_time = hs.config.caches.expiry_time_msec / 1000
219+
logger.info("Expiring LRU caches after %d seconds", expiry_time)
220+
else:
221+
expiry_time = math.inf
167222

168223
global USE_GLOBAL_LIST
169224
USE_GLOBAL_LIST = True
170225

171226
clock = hs.get_clock()
172227
clock.looping_call(
173-
_expire_old_entries, 30 * 1000, clock, hs.config.caches.expiry_time_msec / 1000
228+
_expire_old_entries,
229+
30 * 1000,
230+
clock,
231+
expiry_time,
232+
hs.config.caches.cache_autotuning,
174233
)
175234

176235

‎tests/util/test_lrucache.py

+57-1
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414

1515

1616
from typing import List
17-
from unittest.mock import Mock
17+
from unittest.mock import Mock, patch
1818

19+
from synapse.metrics.jemalloc import JemallocStats
1920
from synapse.util.caches.lrucache import LruCache, setup_expire_lru_cache_entries
2021
from synapse.util.caches.treecache import TreeCache
2122

@@ -316,3 +317,58 @@ def test_evict(self):
316317

317318
self.assertEqual(cache.get("key1"), None)
318319
self.assertEqual(cache.get("key2"), 3)
320+
321+
322+
class MemoryEvictionTestCase(unittest.HomeserverTestCase):
323+
@override_config(
324+
{
325+
"caches": {
326+
"cache_autotuning": {
327+
"max_cache_memory_usage": "700M",
328+
"target_cache_memory_usage": "500M",
329+
"min_cache_ttl": "5m",
330+
}
331+
}
332+
}
333+
)
334+
@patch("synapse.util.caches.lrucache.get_jemalloc_stats")
335+
def test_evict_memory(self, jemalloc_interface) -> None:
336+
mock_jemalloc_class = Mock(spec=JemallocStats)
337+
jemalloc_interface.return_value = mock_jemalloc_class
338+
339+
# set the return value of get_stat() to be greater than max_cache_memory_usage
340+
mock_jemalloc_class.get_stat.return_value = 924288000
341+
342+
setup_expire_lru_cache_entries(self.hs)
343+
cache = LruCache(4, clock=self.hs.get_clock())
344+
345+
cache["key1"] = 1
346+
cache["key2"] = 2
347+
348+
# advance the reactor less than the min_cache_ttl
349+
self.reactor.advance(60 * 2)
350+
351+
# our items should still be in the cache
352+
self.assertEqual(cache.get("key1"), 1)
353+
self.assertEqual(cache.get("key2"), 2)
354+
355+
# advance the reactor past the min_cache_ttl
356+
self.reactor.advance(60 * 6)
357+
358+
# the items should be cleared from cache
359+
self.assertEqual(cache.get("key1"), None)
360+
self.assertEqual(cache.get("key2"), None)
361+
362+
# add more stuff to caches
363+
cache["key1"] = 1
364+
cache["key2"] = 2
365+
366+
# set the return value of get_stat() to be lower than target_cache_memory_usage
367+
mock_jemalloc_class.get_stat.return_value = 10000
368+
369+
# advance the reactor past the min_cache_ttl
370+
self.reactor.advance(60 * 6)
371+
372+
# the items should still be in the cache
373+
self.assertEqual(cache.get("key1"), 1)
374+
self.assertEqual(cache.get("key2"), 2)

0 commit comments

Comments
 (0)
This repository has been archived.