Skip to content

Commit 82691b1

Browse files
committed
evaluate using cattrs + orjson instead of custom serialization/deserialization
seems like as of cachew v0.13.0, on test_many, cattrs + orjson is - 31% faster for writing - 17% faster for reading Note that this includes other overhead like writing to the DB, so actual serialization/deserialization is even faster than that performance stats: original version: ``` src/cachew/tests/test_cachew.py::test_many[1000000-False] [INFO 2023-09-10 23:22:55,188 cachew __init__.py:1141] cachew.tests.test_cachew:test_many.<locals>.iter_data: wrote 1000000 objects to cachew (sqlite /tmp/pytest-of-adhoc/pytest-74/test_many_1000000_False_0/test_many) test_many: initial write to cache took 7.2s test_many: cache size is 23.06048Mb [INFO 2023-09-10 23:22:55,306 cachew __init__.py:1005] cachew.tests.test_cachew:test_many.<locals>.iter_data: loading 1000000 objects from cachew (sqlite /tmp/pytest-of-adhoc/pytest-74/test_many_1000000_False_0/test_many) test_many: reading from cache took 3.7s [INFO 2023-09-10 23:22:58,995 cachew __init__.py:1005] cachew.tests.test_cachew:test_many.<locals>.iter_data: loading 1000000 objects from cachew (sqlite /tmp/pytest-of-adhoc/pytest-74/test_many_1000000_False_0/test_many) PASSED ``` with cattrs + orjson ``` src/cachew/tests/test_cachew.py::test_many[1000000-False] [INFO 2023-09-10 23:23:35,704 cachew __init__.py:1055] cachew.tests.test_cachew:test_many.<locals>.iter_data: wrote 1000000 objects to cachew (sqlite /tmp/pytest-of-adhoc/pytest-75/test_many_1000000_False_0/test_many) test_many: initial write to cache took 4.9s test_many: cache size is 72.904704Mb [INFO 2023-09-10 23:23:36,048 cachew __init__.py:916 ] cachew.tests.test_cachew:test_many.<locals>.iter_data: loading 1000000 objects from cachew (sqlite /tmp/pytest-of-adhoc/pytest-75/test_many_1000000_False_0/test_many) test_many: reading from cache took 3.1s [INFO 2023-09-10 23:23:39,114 cachew __init__.py:916 ] cachew.tests.test_cachew:test_many.<locals>.iter_data: loading 1000000 objects from cachew (sqlite /tmp/pytest-of-adhoc/pytest-75/test_many_1000000_False_0/test_many) PASSED ``` This will break many other things at the moment though, so can't use it straightaway
1 parent 175afad commit 82691b1

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

src/cachew/__init__.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import dataclasses
1818
import warnings
1919

20+
import cattrs
21+
import orjson
2022

2123
import appdirs
2224

@@ -562,9 +564,10 @@ def do_begin(conn):
562564

563565
self.binder = NTBinder.make(tp=cls)
564566
# actual cache
565-
self.table_cache = Table('cache' , self.meta, *self.binder.columns)
567+
# FIXME change table definition
568+
self.table_cache = Table('cache' , self.meta, Column('data', sqlalchemy.String))
566569
# temporary table, we use it to insert and then (atomically?) rename to the above table at the very end
567-
self.table_cache_tmp = Table('cache_tmp', self.meta, *self.binder.columns)
570+
self.table_cache_tmp = Table('cache_tmp', self.meta, Column('data', sqlalchemy.String))
568571

569572
def __enter__(self) -> 'DbHelper':
570573
return self
@@ -882,7 +885,7 @@ def composite_hash(self, *args, **kwargs) -> Dict[str, Any]:
882885
}
883886
kwargs = {**defaults, **kwargs}
884887
binder = NTBinder.make(tp=self.cls_)
885-
schema = str(binder.columns) # todo not super nice, but works fine for now
888+
schema = str('FIXME') # todo not super nice, but works fine for now
886889
hash_parts = {
887890
'cachew' : CACHEW_VERSION,
888891
'schema' : schema,
@@ -993,11 +996,14 @@ def cachew_wrapper(
993996

994997
logger.debug('old hash: %s', old_hash)
995998

999+
converter = cattrs.Converter()
9961000

9971001
def cached_items():
9981002
rows = conn.execute(table_cache.select())
999-
for row in rows:
1000-
yield binder.from_row(row)
1003+
for (js,) in rows:
1004+
xx = orjson.loads(js)
1005+
rr = converter.structure(xx, binder.type_)
1006+
yield rr
10011007

10021008
if new_hash == old_hash:
10031009
logger.debug('hash matched: loading from cache')
@@ -1107,9 +1113,11 @@ def flush() -> None:
11071113
dict(zip(column_names, row))
11081114
for row in chunk
11091115
]
1110-
conn.execute(insert_into_table_cache_tmp, chunk_dict)
1116+
conn.execute(insert_into_table_cache_tmp, [{'data': c} for c in chunk])
11111117
chunk = []
11121118

1119+
converter = cattrs.Converter()
1120+
11131121
total_objects = 0
11141122
for d in datas:
11151123
try:
@@ -1118,8 +1126,11 @@ def flush() -> None:
11181126
except GeneratorExit:
11191127
early_exit = True
11201128
return
1121-
1122-
chunk.append(binder.to_row(d))
1129+
1130+
1131+
js = converter.unstructure(d, binder.type_)
1132+
js = orjson.dumps(js) # .decode('utf8') # TODO just dump as bytes?? decoding takes time
1133+
chunk.append(js)
11231134
if len(chunk) >= chunk_by:
11241135
flush()
11251136
flush()

src/cachew/tests/test_cachew.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def test_ntbinder_primitive(tp, val) -> None:
7878
assert vv == val
7979

8080

81-
class UUU(NamedTuple):
81+
@dataclass
82+
class UUU:
8283
xx: int
8384
yy: int
8485

@@ -285,7 +286,10 @@ def fun2() -> Iterable[Union[UGood, UBad]]:
285286
list(fun2())
286287

287288

288-
class TE2(NamedTuple):
289+
# NOTE: had to change this from NamedTuple, since cattrs doesn't seem to handle NamedTuple correctly atm
290+
# performance for current cachew is same for dataclass/NamedTuple, so doesn't impact benchmarks
291+
@dataclass
292+
class TE2:
289293
value: int
290294
uuu: UUU
291295
value2: int

0 commit comments

Comments
 (0)