Skip to content

Commit 598b82a

Browse files
committed
Switch from pyrsistent to (my new) rpds bindings
This represents a big speedup, as unfortunately even with the C extension, pyrsistent was showing up quite high on profiling output. I need to benchmark a bit more on PyPy -- pyrsistent has a pure-python implementation which was super fast on PyPy, not sure whether rpds will beat that, so we may bring pyrsistent back in the mix for PyPy, but TBD.
1 parent 6a2d5b9 commit 598b82a

File tree

6 files changed

+38
-75
lines changed

6 files changed

+38
-75
lines changed

docs/conf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ def setup(app):
9494
"https://jsonschema-specifications.readthedocs.io/en/latest/",
9595
None,
9696
),
97-
"pyrsistent": ("https://pyrsistent.readthedocs.io/en/latest/", None),
9897
"python": ("https://docs.python.org/", None),
9998
"setuptools": ("https://setuptools.pypa.io/en/latest/", None),
10099
}

docs/requirements.txt

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ alabaster==0.7.13
88
# via sphinx
99
attrs==22.2.0
1010
# via referencing
11-
babel==2.11.0
11+
babel==2.12.1
1212
# via sphinx
1313
beautifulsoup4==4.11.2
1414
# via furo
@@ -49,14 +49,12 @@ pygments==2.14.0
4949
# sphinx
5050
pygments-github-lexers==0.0.5
5151
# via -r docs/requirements.in
52-
pyrsistent==0.19.3
53-
# via referencing
54-
pytz==2022.7.1
55-
# via babel
5652
file:.#egg=referencing
5753
# via -r docs/requirements.in
5854
requests==2.28.2
5955
# via sphinx
56+
rpds-py==0.4.1
57+
# via referencing
6058
snowballstemmer==2.2.0
6159
# via sphinx
6260
soupsieve==2.4

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ classifiers = [
3333
dynamic = ["version"]
3434
dependencies = [
3535
"attrs>=22.2.0",
36-
"pyrsistent>=0.19.3",
36+
"rpds-py>=0.4.1",
3737
]
3838

3939
[project.urls]

referencing/_core.py

Lines changed: 28 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,18 @@
11
from __future__ import annotations
22

33
from collections.abc import Iterable, Iterator, Sequence
4-
from typing import (
5-
Any,
6-
Callable,
7-
ClassVar,
8-
Generic,
9-
Protocol,
10-
Tuple,
11-
TypeVar,
12-
cast,
13-
)
4+
from typing import Any, Callable, ClassVar, Generic, Protocol, TypeVar
145
from urllib.parse import unquote, urldefrag, urljoin
156

167
from attrs import evolve, field
17-
from pyrsistent import PMap as PMapType, plist, pmap, pset
18-
from pyrsistent.typing import PList, PMap, PSet
8+
from rpds import HashTrieMap, HashTrieSet, List
199

2010
from referencing import exceptions
2111
from referencing._attrs import frozen
2212
from referencing.typing import URI, Anchor as AnchorType, D, Mapping, Retrieve
2313

24-
EMPTY_RESOURCES: PMap[URI, Resource[Any]] = pmap({}, pre_size=64)
25-
EMPTY_ANCHORS = cast(PMap[Tuple[URI, str], AnchorType[Any]], EMPTY_RESOURCES)
26-
EMPTY_UNCRAWLED: PSet[URI] = pset(pre_size=128)
27-
EMPTY_PREVIOUS_RESOLVERS: PList[URI] = plist()
14+
EMPTY_UNCRAWLED: HashTrieSet[URI] = HashTrieSet()
15+
EMPTY_PREVIOUS_RESOLVERS: List[URI] = List()
2816

2917

3018
class _MaybeInSubresource(Protocol[D]):
@@ -212,14 +200,6 @@ def _fail_to_retrieve(uri: URI):
212200
raise exceptions.NoSuchResource(ref=uri)
213201

214202

215-
def _to_pmap(
216-
value: dict[URI, Resource[D]]
217-
| PMap[URI, Resource[D]]
218-
| list[tuple[URI, Resource[D]]],
219-
):
220-
return value if isinstance(value, PMapType) else pmap(value)
221-
222-
223203
@frozen
224204
class Registry(Mapping[URI, Resource[D]]):
225205
r"""
@@ -245,14 +225,12 @@ class Registry(Mapping[URI, Resource[D]]):
245225
even according to the retrieval logic.
246226
"""
247227

248-
_resources: PMap[URI, Resource[D]] = field(
249-
default=EMPTY_RESOURCES,
250-
converter=_to_pmap,
228+
_resources: HashTrieMap[URI, Resource[D]] = field( # type: ignore[reportGeneralTypeIssues] # noqa: E501
229+
default=HashTrieMap(),
230+
converter=HashTrieMap.convert,
251231
)
252-
_anchors: PMap[tuple[URI, str], AnchorType[D]] = field(
253-
default=EMPTY_ANCHORS,
254-
)
255-
_uncrawled: PSet[URI] = field(default=EMPTY_UNCRAWLED)
232+
_anchors: HashTrieMap[tuple[URI, str], AnchorType[D]] = HashTrieMap() # type: ignore[reportGeneralTypeIssues] # noqa: E501
233+
_uncrawled: HashTrieSet[URI] = EMPTY_UNCRAWLED
256234
_retrieve: Retrieve[D] = field(default=_fail_to_retrieve)
257235

258236
def __getitem__(self, uri: URI) -> Resource[D]:
@@ -301,19 +279,15 @@ def __rmatmul__(self, new: Resource[D] | Iterable[Resource[D]]):
301279
if isinstance(new, Resource):
302280
new = (new,)
303281

304-
resources = self._resources.evolver()
305-
uncrawled = self._uncrawled.evolver()
282+
resources = self._resources
283+
uncrawled = self._uncrawled
306284
for resource in new:
307285
id = resource.id()
308286
if id is None:
309287
raise exceptions.NoInternalID(resource=resource)
310-
uncrawled.add(id)
311-
resources.set(id, resource)
312-
return evolve(
313-
self,
314-
resources=resources.persistent(),
315-
uncrawled=uncrawled.persistent(),
316-
)
288+
uncrawled = uncrawled.insert(id)
289+
resources = resources.insert(id, resource)
290+
return evolve(self, resources=resources, uncrawled=uncrawled)
317291

318292
def __repr__(self) -> str:
319293
size = len(self)
@@ -365,7 +339,7 @@ def remove(self, uri: URI):
365339
self,
366340
resources=self._resources.remove(uri),
367341
uncrawled=self._uncrawled.discard(uri),
368-
anchors=pmap(
342+
anchors=HashTrieMap(
369343
(k, v) for k, v in self._anchors.items() if k[0] != uri
370344
),
371345
)
@@ -394,23 +368,23 @@ def crawl(self) -> Registry[D]:
394368
"""
395369
Immediately crawl all added resources, discovering subresources.
396370
"""
397-
resources = self._resources.evolver()
398-
anchors = self._anchors.evolver()
371+
resources = self._resources
372+
anchors = self._anchors
399373
uncrawled = [(uri, resources[uri]) for uri in self._uncrawled]
400374
while uncrawled:
401375
uri, resource = uncrawled.pop()
402376

403377
id = resource.id()
404378
if id is not None:
405379
uri = urljoin(uri, id)
406-
resources[uri] = resource
380+
resources = resources.insert(uri, resource)
407381
for each in resource.anchors():
408-
anchors.set((uri, each.name), each)
382+
anchors = anchors.insert((uri, each.name), each)
409383
uncrawled.extend((uri, each) for each in resource.subresources())
410384
return evolve(
411385
self,
412-
resources=resources.persistent(),
413-
anchors=anchors.persistent(),
386+
resources=resources,
387+
anchors=anchors,
414388
uncrawled=EMPTY_UNCRAWLED,
415389
)
416390

@@ -427,16 +401,12 @@ def with_resources(
427401
r"""
428402
Add the given `Resource`\ s to the registry, without crawling them.
429403
"""
430-
resources = self._resources.evolver()
431-
uncrawled = self._uncrawled.evolver()
404+
resources = self._resources
405+
uncrawled = self._uncrawled
432406
for uri, resource in pairs:
433-
uncrawled.add(uri)
434-
resources[uri] = resource
435-
return evolve(
436-
self,
437-
resources=resources.persistent(),
438-
uncrawled=uncrawled.persistent(),
439-
)
407+
uncrawled = uncrawled.insert(uri)
408+
resources = resources.insert(uri, resource)
409+
return evolve(self, resources=resources, uncrawled=uncrawled)
440410

441411
def with_contents(
442412
self,
@@ -540,11 +510,7 @@ class Resolver(Generic[D]):
540510

541511
_base_uri: str = field(alias="base_uri")
542512
_registry: Registry[D] = field(alias="registry")
543-
_previous: PList[URI] = field(
544-
default=EMPTY_PREVIOUS_RESOLVERS,
545-
repr=False,
546-
alias="previous",
547-
)
513+
_previous: List[URI] = field(default=List(), repr=False, alias="previous")
548514

549515
def lookup(self, ref: URI) -> Resolved[D]:
550516
"""
@@ -614,7 +580,7 @@ def _evolve(self, base_uri: str, **kwargs: Any):
614580
"""
615581
previous = self._previous
616582
if self._base_uri and (not previous or base_uri != self._base_uri):
617-
previous = previous.cons(self._base_uri)
583+
previous = previous.push_front(self._base_uri)
618584
return evolve(self, base_uri=base_uri, previous=previous, **kwargs)
619585

620586

referencing/tests/test_core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pyrsistent import pmap
1+
from rpds import HashTrieMap
22
import pytest
33

44
from referencing import Anchor, Registry, Resource, Specification, exceptions
@@ -188,7 +188,7 @@ def test_init(self):
188188

189189
def test_dict_conversion(self):
190190
"""
191-
Passing a `dict` to `Registry` gets converted to a `pmap`.
191+
Passing a `dict` to `Registry` gets converted to a `HashTrieMap`.
192192
193193
So continuing to use the registry works.
194194
"""
@@ -244,7 +244,7 @@ def test_combine(self):
244244
("http://example.com/baz", three),
245245
("http://example.com/foo/quux", four),
246246
],
247-
anchors=pmap(
247+
anchors=HashTrieMap(
248248
{
249249
("http://example.com/foo/quux", "foo"): Anchor(
250250
name="foo",

test-requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ packaging==23.0
1515
# via pytest
1616
pluggy==1.0.0
1717
# via pytest
18-
pyrsistent==0.19.3
19-
# via referencing
20-
pytest==7.2.1
18+
pytest==7.2.2
2119
# via
2220
# -r test-requirements.in
2321
# pytest-subtests
2422
pytest-subtests==0.10.0
2523
# via -r test-requirements.in
2624
file:.#egg=referencing
2725
# via -r test-requirements.in
26+
rpds-py==0.4.1
27+
# via referencing

0 commit comments

Comments
 (0)