Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit c5d1e6d

Browse files
authored
Properly parse event_fields in filters (#15607)
The event_fields property in filters should use the proper escape rules, namely backslashes can be escaped with an additional backslash. This adds tests (adapted from matrix-js-sdk) and implements the logic to properly split the event_fields strings.
1 parent 201597f commit c5d1e6d

File tree

5 files changed

+99
-34
lines changed

5 files changed

+99
-34
lines changed

changelog.d/15607.bugfix

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a long-standing bug where filters with multiple backslashes were rejected.

synapse/api/filtering.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -128,20 +128,7 @@
128128
"account_data": {"$ref": "#/definitions/filter"},
129129
"room": {"$ref": "#/definitions/room_filter"},
130130
"event_format": {"type": "string", "enum": ["client", "federation"]},
131-
"event_fields": {
132-
"type": "array",
133-
"items": {
134-
"type": "string",
135-
# Don't allow '\\' in event field filters. This makes matching
136-
# events a lot easier as we can then use a negative lookbehind
137-
# assertion to split '\.' If we allowed \\ then it would
138-
# incorrectly split '\\.' See synapse.events.utils.serialize_event
139-
#
140-
# Note that because this is a regular expression, we have to escape
141-
# each backslash in the pattern.
142-
"pattern": r"^((?!\\\\).)*$",
143-
},
144-
},
131+
"event_fields": {"type": "array", "items": {"type": "string"}},
145132
},
146133
"additionalProperties": True, # Allow new fields for forward compatibility
147134
}

synapse/events/utils.py

+58-14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
Iterable,
2323
List,
2424
Mapping,
25+
Match,
2526
MutableMapping,
2627
Optional,
2728
Union,
@@ -46,12 +47,10 @@
4647
from synapse.handlers.relations import BundledAggregations
4748

4849

49-
# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
50-
# (?<!stuff) matches if the current position in the string is not preceded
51-
# by a match for 'stuff'.
52-
# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
53-
# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
54-
SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
50+
# Split strings on "." but not "\." (or "\\\.").
51+
SPLIT_FIELD_REGEX = re.compile(r"\\*\.")
52+
# Find escaped characters, e.g. those with a \ in front of them.
53+
ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)")
5554

5655
CANONICALJSON_MAX_INT = (2**53) - 1
5756
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
@@ -253,14 +252,65 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
253252
sub_out_dict[key_to_move] = sub_dict[key_to_move]
254253

255254

255+
def _escape_slash(m: Match[str]) -> str:
256+
"""
257+
Replacement function; replace a backslash-backslash or backslash-dot with the
258+
second character. Leaves any other string alone.
259+
"""
260+
if m.group(1) in ("\\", "."):
261+
return m.group(1)
262+
return m.group(0)
263+
264+
265+
def _split_field(field: str) -> List[str]:
266+
"""
267+
Splits strings on unescaped dots and removes escaping.
268+
269+
Args:
270+
field: A string representing a path to a field.
271+
272+
Returns:
273+
A list of nested fields to traverse.
274+
"""
275+
276+
# Convert the field and remove escaping:
277+
#
278+
# 1. "content.body.thing\.with\.dots"
279+
# 2. ["content", "body", "thing\.with\.dots"]
280+
# 3. ["content", "body", "thing.with.dots"]
281+
282+
# Find all dots (and their preceding backslashes). If the dot is unescaped
283+
# then emit a new field part.
284+
result = []
285+
prev_start = 0
286+
for match in SPLIT_FIELD_REGEX.finditer(field):
287+
# If the match is an *even* number of characters than the dot was escaped.
288+
if len(match.group()) % 2 == 0:
289+
continue
290+
291+
# Add a new part (up to the dot, exclusive) after escaping.
292+
result.append(
293+
ESCAPE_SEQUENCE_PATTERN.sub(
294+
_escape_slash, field[prev_start : match.end() - 1]
295+
)
296+
)
297+
prev_start = match.end()
298+
299+
# Add any part of the field after the last unescaped dot. (Note that if the
300+
# character is a dot this correctly adds a blank string.)
301+
result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:]))
302+
303+
return result
304+
305+
256306
def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
257307
"""Return a new dict with only the fields in 'dictionary' which are present
258308
in 'fields'.
259309
260310
If there are no event fields specified then all fields are included.
261311
The entries may include '.' characters to indicate sub-fields.
262312
So ['content.body'] will include the 'body' field of the 'content' object.
263-
A literal '.' character in a field name may be escaped using a '\'.
313+
A literal '.' or '\' character in a field name may be escaped using a '\'.
264314
265315
Args:
266316
dictionary: The dictionary to read from.
@@ -275,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
275325

276326
# for each field, convert it:
277327
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
278-
split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
279-
280-
# for each element of the output array of arrays:
281-
# remove escaping so we can use the right key names.
282-
split_fields[:] = [
283-
[f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
284-
]
328+
split_fields = [_split_field(f) for f in fields]
285329

286330
output: JsonDict = {}
287331
for field_array in split_fields:

tests/api/test_filtering.py

-6
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,6 @@ def test_errors_on_invalid_filters(self) -> None:
4848
invalid_filters: List[JsonDict] = [
4949
# `account_data` must be a dictionary
5050
{"account_data": "Hello World"},
51-
# `event_fields` entries must not contain backslashes
52-
{"event_fields": [r"\\foo"]},
5351
# `event_format` must be "client" or "federation"
5452
{"event_format": "other"},
5553
# `not_rooms` must contain valid room IDs
@@ -114,10 +112,6 @@ def test_valid_filters(self) -> None:
114112
"event_format": "client",
115113
"event_fields": ["type", "content", "sender"],
116114
},
117-
# a single backslash should be permitted (though it is debatable whether
118-
# it should be permitted before anything other than `.`, and what that
119-
# actually means)
120-
#
121115
# (note that event_fields is implemented in
122116
# synapse.events.utils.serialize_event, and so whether this actually works
123117
# is tested elsewhere. We just want to check that it is allowed through the

tests/events/test_utils.py

+39
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@
1616
from typing import Any, List, Mapping, Optional
1717

1818
import attr
19+
from parameterized import parameterized
1920

2021
from synapse.api.constants import EventContentFields
2122
from synapse.api.room_versions import RoomVersions
2223
from synapse.events import EventBase, make_event_from_dict
2324
from synapse.events.utils import (
2425
PowerLevelsContent,
2526
SerializeEventConfig,
27+
_split_field,
2628
copy_and_fixup_power_levels_contents,
2729
maybe_upsert_event_field,
2830
prune_event,
@@ -794,3 +796,40 @@ def test_invalid_types_raise_type_error(self) -> None:
794796
def test_invalid_nesting_raises_type_error(self) -> None:
795797
with self.assertRaises(TypeError):
796798
copy_and_fixup_power_levels_contents({"a": {"b": {"c": 1}}}) # type: ignore[dict-item]
799+
800+
801+
class SplitFieldTestCase(stdlib_unittest.TestCase):
802+
@parameterized.expand(
803+
[
804+
# A field with no dots.
805+
["m", ["m"]],
806+
# Simple dotted fields.
807+
["m.foo", ["m", "foo"]],
808+
["m.foo.bar", ["m", "foo", "bar"]],
809+
# Backslash is used as an escape character.
810+
[r"m\.foo", ["m.foo"]],
811+
[r"m\\.foo", ["m\\", "foo"]],
812+
[r"m\\\.foo", [r"m\.foo"]],
813+
[r"m\\\\.foo", ["m\\\\", "foo"]],
814+
[r"m\foo", [r"m\foo"]],
815+
[r"m\\foo", [r"m\foo"]],
816+
[r"m\\\foo", [r"m\\foo"]],
817+
[r"m\\\\foo", [r"m\\foo"]],
818+
# Ensure that escapes at the end don't cause issues.
819+
["m.foo\\", ["m", "foo\\"]],
820+
["m.foo\\", ["m", "foo\\"]],
821+
[r"m.foo\.", ["m", "foo."]],
822+
[r"m.foo\\.", ["m", "foo\\", ""]],
823+
[r"m.foo\\\.", ["m", r"foo\."]],
824+
# Empty parts (corresponding to properties which are an empty string) are allowed.
825+
[".m", ["", "m"]],
826+
["..m", ["", "", "m"]],
827+
["m.", ["m", ""]],
828+
["m..", ["m", "", ""]],
829+
["m..foo", ["m", "", "foo"]],
830+
# Invalid escape sequences.
831+
[r"\m", [r"\m"]],
832+
]
833+
)
834+
def test_split_field(self, input: str, expected: str) -> None:
835+
self.assertEqual(_split_field(input), expected)

0 commit comments

Comments
 (0)