Skip to content

Commit 1a4bd96

Browse files
committed
Decide which keywords need processing once, rather than each validation.
On pathological examples (like the benchmark here) this avoids lots of iterating over useless keywords. In particular on aforementioned benchamrk, this takes us (on my laptop) from: beginning of schema: Mean +- std dev: 3.91 us +- 0.03 us middle of schema: Mean +- std dev: 3.95 ms +- 0.04 ms end of schema: Mean +- std dev: 4.03 ms +- 0.24 ms valid: Mean +- std dev: 3.92 ms +- 0.05 ms to: beginning of schema: Mean +- std dev: 3.94 us +- 0.02 us middle of schema: Mean +- std dev: 6.59 us +- 0.06 us end of schema: Mean +- std dev: 7.31 us +- 0.06 us valid: Mean +- std dev: 5.18 us +- 0.03 us where clearly we now do essentially equivalent work no matter how many useless keywords are interspersed in the schema.
1 parent 4eee04b commit 1a4bd96

File tree

2 files changed

+49
-6
lines changed

2 files changed

+49
-6
lines changed
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
A benchmark for validation of schemas containing lots of useless keywords.
3+
4+
Checks we filter them out once, ahead of time.
5+
"""
6+
7+
from pyperf import Runner
8+
9+
from jsonschema import Draft202012Validator
10+
11+
NUM_USELESS = 100000
12+
schema = dict(
13+
[
14+
("not", {"const": 42}),
15+
*((str(i), i) for i in range(NUM_USELESS)),
16+
("type", "integer"),
17+
*((str(i), i) for i in range(NUM_USELESS, NUM_USELESS)),
18+
("minimum", 37),
19+
],
20+
)
21+
validator = Draft202012Validator(schema)
22+
23+
valid = 3737
24+
invalid = 12
25+
26+
27+
if __name__ == "__main__":
28+
runner = Runner()
29+
runner.bench_func("beginning of schema", lambda: validator.is_valid(42))
30+
runner.bench_func("middle of schema", lambda: validator.is_valid("foo"))
31+
runner.bench_func("end of schema", lambda: validator.is_valid(12))
32+
runner.bench_func("valid", lambda: validator.is_valid(3737))

jsonschema/validators.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ class Validator:
230230
ID_OF = staticmethod(id_of)
231231

232232
_APPLICABLE_VALIDATORS = applicable_validators
233+
_validators = field(init=False, repr=False, eq=False)
233234

234235
schema: referencing.jsonschema.Schema = field(repr=reprlib.repr)
235236
_ref_resolver = field(default=None, repr=False, alias="resolver")
@@ -287,6 +288,15 @@ def __attrs_post_init__(self):
287288
resource = specification.create_resource(self.schema)
288289
self._resolver = registry.resolver_with_root(resource)
289290

291+
if self.schema is True or self.schema is False:
292+
self._validators = []
293+
else:
294+
self._validators = [
295+
(self.VALIDATORS[k], k, v)
296+
for k, v in applicable_validators(self.schema)
297+
if k in self.VALIDATORS
298+
]
299+
290300
# REMOVEME: Legacy ref resolution state management.
291301
push_scope = getattr(self._ref_resolver, "push_scope", None)
292302
if push_scope is not None:
@@ -349,8 +359,13 @@ def iter_errors(self, instance, _schema=None):
349359
DeprecationWarning,
350360
stacklevel=2,
351361
)
362+
validators = [
363+
(self.VALIDATORS[k], k, v)
364+
for k, v in applicable_validators(_schema)
365+
if k in self.VALIDATORS
366+
]
352367
else:
353-
_schema = self.schema
368+
_schema, validators = self.schema, self._validators
354369

355370
if _schema is True:
356371
return
@@ -364,11 +379,7 @@ def iter_errors(self, instance, _schema=None):
364379
)
365380
return
366381

367-
for k, v in applicable_validators(_schema):
368-
validator = self.VALIDATORS.get(k)
369-
if validator is None:
370-
continue
371-
382+
for validator, k, v in validators:
372383
errors = validator(self, v, instance, _schema) or ()
373384
for error in errors:
374385
# set details if not already set by the called fn

0 commit comments

Comments
 (0)