101
101
--- Get a key to lookup index by `lookup_index_name` (part of `index_cache`).
102
102
---
103
103
--- @tparam table filter filter for objects, its keys (names of fields) will
104
- --- form the result
104
+ --- form the result of the function
105
105
---
106
106
--- @treturn string `name_list_str` (key for lookup by `lookup_index_name`)
107
107
local function filter_names_fingerprint (filter )
@@ -121,14 +121,86 @@ local function filter_names_fingerprint(filter)
121
121
return name_list_str
122
122
end
123
123
124
+ --- Get an index using parts tree built by @{build_index_parts_tree}.
125
+ ---
126
+ --- @tparam table node root of the prefix tree for certain collection
127
+ ---
128
+ --- @tparam table filter map of key-value to filter objects against
129
+ ---
130
+ --- @treturn string `index_name` or `nil` is found index
131
+ ---
132
+ --- @treturn number `len` is a number of index parts will be used at lookup
133
+ --- plus 1 (because it calculated artificial root node as others)
134
+ ---
135
+ --- Complexity
136
+ --- ----------
137
+ ---
138
+ --- In short: O(N^2 * COUNT(index parts)).
139
+ ---
140
+ --- Say we have N fields in filter, M indexes and K index parts at max. We look
141
+ --- for successors for each filter item (<= N items) in each of the tree node
142
+ --- (<= M * K nodes), so have O(I * N * (M * K)) of somewhat we call 'iteration'
143
+ --- of I complexity. Most heavy operation within an iteraton is table.copy(),
144
+ --- we can assume it has O(N) complexity. So we have overall complexity O(N^2 *
145
+ --- (M * K)). Nodes count (M * K) can be limited upside as count of index
146
+ --- parts in all indexes, so we have O(N^2 * COUNT(index parts)).
147
+ ---
148
+ --- We can consider worst case scenario when any node has any of filter keys as
149
+ --- a successor. In this case nodes count is not real constraint for recursion.
150
+ --- In such case we can calculate complexity as iteration of weight I
151
+ --- (calculated above as O(N^2)) and iteration count as permutations of N
152
+ --- filter items (N!). In such case we'll have O(N^2 * N!) or O(N^(3/2) * N^N)
153
+ --- (Stirling's approximation).
154
+ ---
155
+ --- Expectations
156
+ --- ------------
157
+ ---
158
+ --- We expect typical filter size as 1 or 2 and tree depth (excluding
159
+ --- artificial root node) of the same order. So despite horrible complexity
160
+ --- estimation it expected to be non-so-heavy. Our guess is that it worth to
161
+ --- try hard to find best index before a large request.
162
+ ---
163
+ --- Future optimizations
164
+ --- --------------------
165
+ ---
166
+ --- * replace table.copy() with something more light (maybe 'closed set' of
167
+ --- filter items);
168
+ --- * cache index name btw block requests of the same collection request (when
169
+ --- we'll have block executor) and maybe even btw different requests with the
170
+ -- same filter keys.
171
+ local function get_best_matched_index (node , filter )
172
+ local index_name = (node .index_names or {})[1 ]
173
+ local max_branch_len = 0
174
+
175
+ -- optimization: don't run the loop below if there are no successors of the
176
+ -- current node
177
+ if node .successors == nil then
178
+ return index_name , 1
179
+ end
180
+
181
+ for k , v in pairs (filter ) do
182
+ local successor_node = (node .successors or {})[k ]
183
+ if successor_node ~= nil then
184
+ local new_filter = table .copy (filter )
185
+ new_filter [k ] = nil
186
+ local branch_index_name , branch_len =
187
+ get_best_matched_index (successor_node , new_filter )
188
+ if branch_index_name ~= nil and branch_len > max_branch_len then
189
+ index_name = branch_index_name
190
+ max_branch_len = branch_len
191
+ end
192
+ end
193
+ end
194
+
195
+ return index_name , max_branch_len + 1
196
+ end
197
+
124
198
-- XXX: raw idea: we can store field-to-field_no mapping when creating
125
199
-- `lookup_index_name` to faster form the value_list
126
200
127
201
--- Flatten filter values (transform to a list) against specific index to
128
202
--- passing it to index:pairs().
129
203
---
130
- --- Only full keys are supported for a compound index for now.
131
- ---
132
204
--- @tparam table self the data accessor
133
205
---
134
206
--- @tparam table filter filter for objects, its values will ordered to form
144
216
--- passed index
145
217
---
146
218
--- @treturn table `value_list` the value to pass to index:pairs()
219
+ ---
220
+ --- @treturn table `new_filter` the `filter` value w/o values extracted to
221
+ --- `value_list`
147
222
local function flatten_filter (self , filter , collection_name , index_name )
148
223
assert (type (self ) == ' table' ,
149
224
' self must be a table, got ' .. type (self ))
@@ -153,6 +228,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
153
228
' index_name must be a string, got ' .. type (index_name ))
154
229
155
230
local value_list = {}
231
+ local new_filter = table .copy (filter )
156
232
157
233
-- fill value_list
158
234
local index_meta = self .indexes [collection_name ][index_name ]
@@ -163,6 +239,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
163
239
local value = filter [field_name ]
164
240
if value == nil then break end
165
241
value_list [# value_list + 1 ] = value
242
+ new_filter [field_name ] = nil
166
243
end
167
244
168
245
-- check for correctness: non-empty value_list
@@ -172,26 +249,11 @@ local function flatten_filter(self, filter, collection_name, index_name)
172
249
json .encode (filter ), index_name ))
173
250
end
174
251
175
- -- check for correctness: all filter fields are used
176
- local count = 0
177
- for k , v in pairs (filter ) do
178
- count = count + 1
179
- end
180
- if count ~= # value_list then -- avoid extra json.encode()
181
- assert (count ~= # value_list ,
182
- (' filter items count does not match index fields count: ' ..
183
- ' filter: %s, index_name: %s' ):format (json .encode (filter ),
184
- index_name ))
185
- end
186
-
187
- local full_match = # value_list == # index_meta .fields
188
- return full_match , value_list
252
+ local full_match = # value_list == # index_meta .fields and
253
+ next (new_filter ) == nil
254
+ return full_match , value_list , new_filter
189
255
end
190
256
191
- -- XXX: support partial match for primary/secondary indexes and support to skip
192
- -- fields to get an index (full_match must be false in the case because
193
- -- returned items will be additionally filtered after unflatten).
194
-
195
257
--- Choose an index for lookup tuple(s) by a 'filter'. The filter holds fields
196
258
--- values of object(s) we want to find. It uses prebuilt `lookup_index_name`
197
259
--- table representing available indexes, which created by the
230
292
---
231
293
--- @treturn string `index_name` is name of the found index or nil
232
294
---
233
- --- @treturn table `value_list` is values list from the `filter` argument
234
- --- ordered in the such way that can be passed to the found index (has some
235
- --- meaning only when `index_name ~= nil`)
295
+ --- @treturn table `new_filter` is the filter value w/o values extracted into
296
+ --- `value_list`
297
+ ---
298
+ --- @treturn table `value_list` (optional) is values list from the `filter`
299
+ --- argument ordered in the such way that it can be passed to the found index
300
+ -- (has some meaning only when `index_name ~= nil`)
236
301
---
237
302
--- @treturn table `pivot` (optional) an offset argument represented depending
238
303
--- of a case: whether we'll lookup for the offset by an index; it is either
@@ -259,6 +324,10 @@ local get_index_name = function(self, collection_name, from, filter, args)
259
324
assert (type (lookup_index_name ) == ' table' ,
260
325
' lookup_index_name must be a table, got ' .. type (lookup_index_name ))
261
326
327
+ local parts_tree = index_cache .parts_tree
328
+ assert (type (parts_tree ) == ' table' ,
329
+ ' parts_tree must be a table, got ' .. type (parts_tree ))
330
+
262
331
local connection_indexes = index_cache .connection_indexes
263
332
assert (type (connection_indexes ) == ' table' ,
264
333
' connection_indexes must be a table, got ' .. type (connection_indexes ))
@@ -276,6 +345,7 @@ local get_index_name = function(self, collection_name, from, filter, args)
276
345
assert (connection_type ~= nil , ' connection_type must not be nil' )
277
346
local full_match = connection_type == ' 1:1' and next (filter ) == nil
278
347
local value_list = from .destination_args_values
348
+ local new_filter = filter
279
349
280
350
local pivot
281
351
if args .offset ~= nil then
@@ -296,21 +366,22 @@ local get_index_name = function(self, collection_name, from, filter, args)
296
366
pivot = {filter = pivot_filter }
297
367
end
298
368
299
- return full_match , index_name , value_list , pivot
369
+ return full_match , index_name , new_filter , value_list , pivot
300
370
end
301
371
302
372
-- The 'fast offset' case. Here we fetch top-level objects starting from
303
373
-- passed offset. Select will be performed by the primary index and
304
374
-- corresponding offset in `pivot.value_list`, then the result will be
305
- -- postprocessed using `filter `, if necessary.
375
+ -- postprocessed using `new_filter `, if necessary.
306
376
if args .offset ~= nil then
307
377
local index_name , index_meta = get_primary_index_meta (self ,
308
378
collection_name )
309
379
local full_match
310
380
local pivot_value_list
381
+ local new_filter = filter
311
382
if type (args .offset ) == ' table' then
312
- full_match , pivot_value_list = flatten_filter (self , args . offset ,
313
- collection_name , index_name )
383
+ full_match , pivot_value_list , new_filter = flatten_filter (self ,
384
+ args . offset , collection_name , index_name )
314
385
assert (full_match == true , ' offset by a partial key is forbidden' )
315
386
else
316
387
assert (# index_meta .fields == 1 ,
@@ -320,22 +391,34 @@ local get_index_name = function(self, collection_name, from, filter, args)
320
391
end
321
392
local pivot = {value_list = pivot_value_list }
322
393
full_match = full_match and next (filter ) == nil
323
- return full_match , index_name , filter , pivot
394
+ return full_match , index_name , new_filter , nil , pivot
324
395
end
325
396
326
397
-- The 'no offset' case. Here we fetch top-level object either by found
327
398
-- index or using full scan (if the index was not found).
399
+
400
+ -- try to find full index
328
401
local name_list_str = filter_names_fingerprint (filter )
329
402
assert (lookup_index_name [collection_name ] ~= nil ,
330
403
(' cannot find any index for collection "%s"' ):format (collection_name ))
331
404
local index_name = lookup_index_name [collection_name ][name_list_str ]
332
405
local full_match = false
333
406
local value_list = nil
407
+ local new_filter = filter
408
+
409
+ -- try to find partial index
410
+ if index_name == nil then
411
+ local root = parts_tree [collection_name ]
412
+ index_name = get_best_matched_index (root , filter )
413
+ end
414
+
415
+ -- fill full_match and value_list appropriatelly
334
416
if index_name ~= nil then
335
- full_match , value_list = flatten_filter (self , filter , collection_name ,
336
- index_name )
417
+ full_match , value_list , new_filter = flatten_filter (self , filter ,
418
+ collection_name , index_name )
337
419
end
338
- return full_match , index_name , value_list
420
+
421
+ return full_match , index_name , new_filter , value_list
339
422
end
340
423
341
424
--- Build `lookup_index_name` table (part of `index_cache`) to use in the
@@ -402,6 +485,64 @@ local function build_lookup_index_name(indexes)
402
485
return lookup_index_name
403
486
end
404
487
488
+ --- Build `parts_tree` to use in @{get_index_name} for lookup best matching
489
+ --- index.
490
+ ---
491
+ --- @tparam table indexes indexes metainformation as defined in the @{new}
492
+ --- function
493
+ ---
494
+ --- Schetch example:
495
+ ---
496
+ --- * collection_1:
497
+ --- * index 1 parts: foo, bar, baz;
498
+ --- * index 2 parts: foo, abc;
499
+ --- * index 3 parts: abc, efg, hij.
500
+ ---
501
+ --- Resulting table of prefix trees (contains one field for collection_1):
502
+ ---
503
+ --- ```
504
+ --- * collection_1:
505
+ --- \
506
+ --- + --> root node --> foo --> bar --> baz
507
+ --- \ \
508
+ --- \ + --> abc
509
+ --- \
510
+ --- + ------> abc --> efg --> hij
511
+ --- ```
512
+ ---
513
+ --- @treturn table `roots` resulting table of prefix trees
514
+ ---
515
+ --- * `roots` is a table which maps `collection names` to `root nodes` of
516
+ --- prefix trees;
517
+ --- * 'collection name` is a string (name of a collection);
518
+ --- * `root node` is a table with `successors` field;
519
+ --- * `successors` field value is a map from `index part` to `non-root node`;
520
+ --- * `index part` is a string (name of corresponding field in an object);
521
+ --- * `non-root node` is a table with `index_names` field and optional
522
+ --- `successors` field;
523
+ --- * `index_names` field value is a list of `index name`;
524
+ --- * `index name` is a string (name of an index).
525
+ local function build_index_parts_tree (indexes )
526
+ local roots = {}
527
+
528
+ for collection_name , indexes_meta in pairs (indexes ) do
529
+ local root = {}
530
+ roots [collection_name ] = root
531
+ for index_name , index_meta in pairs (indexes_meta ) do
532
+ local cur = root
533
+ for _ , field in ipairs (index_meta .fields ) do
534
+ cur .successors = cur .successors or {}
535
+ cur .successors [field ] = cur .successors [field ] or {}
536
+ cur = cur .successors [field ]
537
+ cur .index_names = cur .index_names or {}
538
+ cur .index_names [# cur .index_names + 1 ] = index_name
539
+ end
540
+ end
541
+ end
542
+
543
+ return roots
544
+ end
545
+
405
546
--- Build `connection_indexes` table (part of `index_cache`) to use in the
406
547
--- @{get_index_name} function.
407
548
---
491
632
local function build_index_cache (indexes , collections )
492
633
return {
493
634
lookup_index_name = build_lookup_index_name (indexes ),
635
+ parts_tree = build_index_parts_tree (indexes ),
494
636
connection_indexes = build_connection_indexes (indexes , collections ),
495
637
}
496
638
end
@@ -682,8 +824,8 @@ local function select_internal(self, collection_name, from, filter, args, extra)
682
824
(' cannot find collection "%s"' ):format (collection_name ))
683
825
684
826
-- search for suitable index
685
- local full_match , index_name , index_value , pivot = get_index_name (
686
- self , collection_name , from , filter , args )
827
+ local full_match , index_name , filter , index_value , pivot = get_index_name (
828
+ self , collection_name , from , filter , args ) -- we redefine filter here
687
829
local index = index_name ~= nil and
688
830
self .funcs .get_index (collection_name , index_name ) or nil
689
831
if from ~= nil then
0 commit comments