Skip to content

Commit f6281f5

Browse files
ffelixgantonpirker
andauthored
Fix lru cache copying (#3883)
A simpler and better LRU Cache implementation that prevents data leaking between copied caches. Fixes #3852 --------- Co-authored-by: Anton Pirker <[email protected]>
1 parent 8ced660 commit f6281f5

File tree

3 files changed

+93
-161
lines changed

3 files changed

+93
-161
lines changed

Diff for: sentry_sdk/_lru_cache.py

+35-160
Original file line numberDiff line numberDiff line change
@@ -1,181 +1,56 @@
1-
"""
2-
A fork of Python 3.6's stdlib lru_cache (found in Python's 'cpython/Lib/functools.py')
3-
adapted into a data structure for single threaded uses.
1+
from typing import TYPE_CHECKING
42

5-
https://github.com/python/cpython/blob/v3.6.12/Lib/functools.py
3+
if TYPE_CHECKING:
4+
from typing import Any
65

76

8-
Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
9-
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation;
10-
11-
All Rights Reserved
12-
13-
14-
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
15-
--------------------------------------------
16-
17-
1. This LICENSE AGREEMENT is between the Python Software Foundation
18-
("PSF"), and the Individual or Organization ("Licensee") accessing and
19-
otherwise using this software ("Python") in source or binary form and
20-
its associated documentation.
21-
22-
2. Subject to the terms and conditions of this License Agreement, PSF hereby
23-
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
24-
analyze, test, perform and/or display publicly, prepare derivative works,
25-
distribute, and otherwise use Python alone or in any derivative version,
26-
provided, however, that PSF's License Agreement and PSF's notice of copyright,
27-
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
28-
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation;
29-
All Rights Reserved" are retained in Python alone or in any derivative version
30-
prepared by Licensee.
31-
32-
3. In the event Licensee prepares a derivative work that is based on
33-
or incorporates Python or any part thereof, and wants to make
34-
the derivative work available to others as provided herein, then
35-
Licensee hereby agrees to include in any such work a brief summary of
36-
the changes made to Python.
37-
38-
4. PSF is making Python available to Licensee on an "AS IS"
39-
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
40-
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
41-
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
42-
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
43-
INFRINGE ANY THIRD PARTY RIGHTS.
44-
45-
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
46-
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
47-
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
48-
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
49-
50-
6. This License Agreement will automatically terminate upon a material
51-
breach of its terms and conditions.
52-
53-
7. Nothing in this License Agreement shall be deemed to create any
54-
relationship of agency, partnership, or joint venture between PSF and
55-
Licensee. This License Agreement does not grant permission to use PSF
56-
trademarks or trade name in a trademark sense to endorse or promote
57-
products or services of Licensee, or any third party.
58-
59-
8. By copying, installing or otherwise using Python, Licensee
60-
agrees to be bound by the terms and conditions of this License
61-
Agreement.
62-
63-
"""
64-
65-
from copy import copy, deepcopy
66-
67-
SENTINEL = object()
68-
69-
70-
# aliases to the entries in a node
71-
PREV = 0
72-
NEXT = 1
73-
KEY = 2
74-
VALUE = 3
7+
_SENTINEL = object()
758

769

7710
class LRUCache:
7811
def __init__(self, max_size):
79-
assert max_size > 0
80-
12+
# type: (int) -> None
13+
if max_size <= 0:
14+
raise AssertionError(f"invalid max_size: {max_size}")
8115
self.max_size = max_size
82-
self.full = False
83-
84-
self.cache = {}
85-
86-
# root of the circularly linked list to keep track of
87-
# the least recently used key
88-
self.root = [] # type: ignore
89-
# the node looks like [PREV, NEXT, KEY, VALUE]
90-
self.root[:] = [self.root, self.root, None, None]
91-
16+
self._data = {} # type: dict[Any, Any]
9217
self.hits = self.misses = 0
18+
self.full = False
9319

9420
def __copy__(self):
95-
cache = LRUCache(self.max_size)
96-
cache.full = self.full
97-
cache.cache = copy(self.cache)
98-
cache.root = deepcopy(self.root)
99-
return cache
21+
# type: () -> LRUCache
22+
new = LRUCache(max_size=self.max_size)
23+
new.hits = self.hits
24+
new.misses = self.misses
25+
new.full = self.full
26+
new._data = self._data.copy()
27+
return new
10028

10129
def set(self, key, value):
102-
link = self.cache.get(key, SENTINEL)
103-
104-
if link is not SENTINEL:
105-
# have to move the node to the front of the linked list
106-
link_prev, link_next, _key, _value = link
107-
108-
# first remove the node from the lsnked list
109-
link_prev[NEXT] = link_next
110-
link_next[PREV] = link_prev
111-
112-
# insert the node between the root and the last
113-
last = self.root[PREV]
114-
last[NEXT] = self.root[PREV] = link
115-
link[PREV] = last
116-
link[NEXT] = self.root
117-
118-
# update the value
119-
link[VALUE] = value
120-
30+
# type: (Any, Any) -> None
31+
current = self._data.pop(key, _SENTINEL)
32+
if current is not _SENTINEL:
33+
self._data[key] = value
12134
elif self.full:
122-
# reuse the root node, so update its key/value
123-
old_root = self.root
124-
old_root[KEY] = key
125-
old_root[VALUE] = value
126-
127-
self.root = old_root[NEXT]
128-
old_key = self.root[KEY]
129-
130-
self.root[KEY] = self.root[VALUE] = None
131-
132-
del self.cache[old_key]
133-
134-
self.cache[key] = old_root
135-
35+
self._data.pop(next(iter(self._data)))
36+
self._data[key] = value
13637
else:
137-
# insert new node after last
138-
last = self.root[PREV]
139-
link = [last, self.root, key, value]
140-
last[NEXT] = self.root[PREV] = self.cache[key] = link
141-
self.full = len(self.cache) >= self.max_size
38+
self._data[key] = value
39+
self.full = len(self._data) >= self.max_size
14240

14341
def get(self, key, default=None):
144-
link = self.cache.get(key, SENTINEL)
145-
146-
if link is SENTINEL:
42+
# type: (Any, Any) -> Any
43+
try:
44+
ret = self._data.pop(key)
45+
except KeyError:
14746
self.misses += 1
148-
return default
149-
150-
# have to move the node to the front of the linked list
151-
link_prev, link_next, _key, _value = link
152-
153-
# first remove the node from the lsnked list
154-
link_prev[NEXT] = link_next
155-
link_next[PREV] = link_prev
156-
157-
# insert the node between the root and the last
158-
last = self.root[PREV]
159-
last[NEXT] = self.root[PREV] = link
160-
link[PREV] = last
161-
link[NEXT] = self.root
162-
163-
self.hits += 1
47+
ret = default
48+
else:
49+
self.hits += 1
50+
self._data[key] = ret
16451

165-
return link[VALUE]
52+
return ret
16653

16754
def get_all(self):
168-
nodes = []
169-
node = self.root[NEXT]
170-
171-
# To ensure the loop always terminates we iterate to the maximum
172-
# size of the LRU cache.
173-
for _ in range(self.max_size):
174-
# The cache may not be full. We exit early if we've wrapped
175-
# around to the head.
176-
if node is self.root:
177-
break
178-
nodes.append((node[KEY], node[VALUE]))
179-
node = node[NEXT]
180-
181-
return nodes
55+
# type: () -> list[tuple[Any, Any]]
56+
return list(self._data.items())

Diff for: tests/test_lru_cache.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import pytest
2-
from copy import copy
2+
from copy import copy, deepcopy
33

44
from sentry_sdk._lru_cache import LRUCache
55

@@ -76,3 +76,38 @@ def test_cache_copy():
7676
cache.get(1)
7777
assert copied.get_all() == [(1, 1), (2, 2), (3, 3)]
7878
assert cache.get_all() == [(2, 2), (3, 3), (1, 1)]
79+
80+
81+
def test_cache_deepcopy():
82+
cache = LRUCache(3)
83+
cache.set(0, 0)
84+
cache.set(1, 1)
85+
86+
copied = deepcopy(cache)
87+
cache.set(2, 2)
88+
cache.set(3, 3)
89+
assert copied.get_all() == [(0, 0), (1, 1)]
90+
assert cache.get_all() == [(1, 1), (2, 2), (3, 3)]
91+
92+
copied = deepcopy(cache)
93+
cache.get(1)
94+
assert copied.get_all() == [(1, 1), (2, 2), (3, 3)]
95+
assert cache.get_all() == [(2, 2), (3, 3), (1, 1)]
96+
97+
98+
def test_cache_pollution():
99+
cache1 = LRUCache(max_size=2)
100+
cache1.set(1, True)
101+
cache2 = copy(cache1)
102+
cache2.set(1, False)
103+
assert cache1.get(1) is True
104+
assert cache2.get(1) is False
105+
106+
107+
def test_cache_pollution_deepcopy():
108+
cache1 = LRUCache(max_size=2)
109+
cache1.set(1, True)
110+
cache2 = deepcopy(cache1)
111+
cache2.set(1, False)
112+
assert cache1.get(1) is True
113+
assert cache2.get(1) is False

Diff for: tests/test_scope.py

+22
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,28 @@ def test_all_slots_copied():
4343
assert getattr(scope_copy, attr) == getattr(scope, attr)
4444

4545

46+
def test_scope_flags_copy():
47+
# Assert forking creates a deepcopy of the flag buffer. The new
48+
# scope is free to mutate without consequence to the old scope. The
49+
# old scope is free to mutate without consequence to the new scope.
50+
old_scope = Scope()
51+
old_scope.flags.set("a", True)
52+
53+
new_scope = old_scope.fork()
54+
new_scope.flags.set("a", False)
55+
old_scope.flags.set("b", True)
56+
new_scope.flags.set("c", True)
57+
58+
assert old_scope.flags.get() == [
59+
{"flag": "a", "result": True},
60+
{"flag": "b", "result": True},
61+
]
62+
assert new_scope.flags.get() == [
63+
{"flag": "a", "result": False},
64+
{"flag": "c", "result": True},
65+
]
66+
67+
4668
def test_merging(sentry_init, capture_events):
4769
sentry_init()
4870

0 commit comments

Comments
 (0)