22
22
Iterable ,
23
23
List ,
24
24
Mapping ,
25
+ Match ,
25
26
MutableMapping ,
26
27
Optional ,
27
28
Union ,
46
47
from synapse .handlers .relations import BundledAggregations
47
48
48
49
49
- # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
50
- # (?<!stuff) matches if the current position in the string is not preceded
51
- # by a match for 'stuff'.
52
- # TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
53
- # the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
54
- SPLIT_FIELD_REGEX = re .compile (r"(?<!\\)\." )
50
+ # Split strings on "." but not "\." (or "\\\.").
51
+ SPLIT_FIELD_REGEX = re .compile (r"\\*\." )
52
+ # Find escaped characters, e.g. those with a \ in front of them.
53
+ ESCAPE_SEQUENCE_PATTERN = re .compile (r"\\(.)" )
55
54
56
55
CANONICALJSON_MAX_INT = (2 ** 53 ) - 1
57
56
CANONICALJSON_MIN_INT = - CANONICALJSON_MAX_INT
@@ -253,14 +252,65 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
253
252
sub_out_dict [key_to_move ] = sub_dict [key_to_move ]
254
253
255
254
255
+ def _escape_slash (m : Match [str ]) -> str :
256
+ """
257
+ Replacement function; replace a backslash-backslash or backslash-dot with the
258
+ second character. Leaves any other string alone.
259
+ """
260
+ if m .group (1 ) in ("\\ " , "." ):
261
+ return m .group (1 )
262
+ return m .group (0 )
263
+
264
+
265
+ def _split_field (field : str ) -> List [str ]:
266
+ """
267
+ Splits strings on unescaped dots and removes escaping.
268
+
269
+ Args:
270
+ field: A string representing a path to a field.
271
+
272
+ Returns:
273
+ A list of nested fields to traverse.
274
+ """
275
+
276
+ # Convert the field and remove escaping:
277
+ #
278
+ # 1. "content.body.thing\.with\.dots"
279
+ # 2. ["content", "body", "thing\.with\.dots"]
280
+ # 3. ["content", "body", "thing.with.dots"]
281
+
282
+ # Find all dots (and their preceding backslashes). If the dot is unescaped
283
+ # then emit a new field part.
284
+ result = []
285
+ prev_start = 0
286
+ for match in SPLIT_FIELD_REGEX .finditer (field ):
287
+ # If the match is an *even* number of characters than the dot was escaped.
288
+ if len (match .group ()) % 2 == 0 :
289
+ continue
290
+
291
+ # Add a new part (up to the dot, exclusive) after escaping.
292
+ result .append (
293
+ ESCAPE_SEQUENCE_PATTERN .sub (
294
+ _escape_slash , field [prev_start : match .end () - 1 ]
295
+ )
296
+ )
297
+ prev_start = match .end ()
298
+
299
+ # Add any part of the field after the last unescaped dot. (Note that if the
300
+ # character is a dot this correctly adds a blank string.)
301
+ result .append (re .sub (r"\\(.)" , _escape_slash , field [prev_start :]))
302
+
303
+ return result
304
+
305
+
256
306
def only_fields (dictionary : JsonDict , fields : List [str ]) -> JsonDict :
257
307
"""Return a new dict with only the fields in 'dictionary' which are present
258
308
in 'fields'.
259
309
260
310
If there are no event fields specified then all fields are included.
261
311
The entries may include '.' characters to indicate sub-fields.
262
312
So ['content.body'] will include the 'body' field of the 'content' object.
263
- A literal '.' character in a field name may be escaped using a '\' .
313
+ A literal '.' or ' \' character in a field name may be escaped using a '\' .
264
314
265
315
Args:
266
316
dictionary: The dictionary to read from.
@@ -275,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
275
325
276
326
# for each field, convert it:
277
327
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
278
- split_fields = [SPLIT_FIELD_REGEX .split (f ) for f in fields ]
279
-
280
- # for each element of the output array of arrays:
281
- # remove escaping so we can use the right key names.
282
- split_fields [:] = [
283
- [f .replace (r"\." , r"." ) for f in field_array ] for field_array in split_fields
284
- ]
328
+ split_fields = [_split_field (f ) for f in fields ]
285
329
286
330
output : JsonDict = {}
287
331
for field_array in split_fields :
0 commit comments