101
101
--- Get a key to lookup index by `lookup_index_name` (part of `index_cache`).
102
102
---
103
103
--- @tparam table filter filter for objects, its keys (names of fields) will
104
- --- form the result
104
+ --- form the result of the function
105
105
---
106
106
--- @treturn string `name_list_str` (key for lookup by `lookup_index_name`)
107
107
local function filter_names_fingerprint (filter )
@@ -121,14 +121,91 @@ local function filter_names_fingerprint(filter)
121
121
return name_list_str
122
122
end
123
123
124
+ --- Get an index using parts tree built by @{build_index_parts_tree}.
125
+ ---
126
+ --- @tparam table node root of the prefix tree for certain collection
127
+ ---
128
+ --- @tparam table filter map of key-value to filter objects against
129
+ ---
130
+ --- @treturn string `index_name` or `nil` is found index
131
+ ---
132
+ --- @treturn number `max_branch_len` is a number of index parts will be used at
133
+ --- lookup plus 1 (because it calculated artificial root node as well as other
134
+ --- nodes)
135
+ ---
136
+ --- Complexity
137
+ --- ----------
138
+ ---
139
+ --- In short: O(SIZE(filter)^2 * COUNT(index parts for all indexes)).
140
+ ---
141
+ --- Say we have N fields in filter (N = SIZE(filter), M indexes and K index
142
+ --- parts at max ((M * K) and COUNT(index parts for all indexes) both are are
143
+ --- upside limits of nodes count in the tree). We look for successors for
144
+ --- each filter item (<= N items) in each of the tree node (<= M * K nodes),
145
+ --- so have O(I * N * (M * K)) of somewhat we call 'iteration' of I
146
+ --- complexity. Most heavy operation within an iteraton is table.copy(), we
147
+ --- can assume it has O(N) complexity. So we have overall complexity O(N^2 *
148
+ --- (M * K)).
149
+ ---
150
+ --- We can consider worst case scenario when any node has any of filter keys as
151
+ --- a successor. In this case nodes count is not real constraint for recursion.
152
+ --- In such case we can calculate complexity as iteration of weight I
153
+ --- (calculated above as O(N^2)) and iteration count as permutations of N
154
+ --- filter items (N!). In such case we'll have O(N^2 * N!) or O(N^(3/2) * N^N)
155
+ --- (Stirling's approximation).
156
+ ---
157
+ --- Expectations
158
+ --- ------------
159
+ ---
160
+ --- We expect typical filter size as 1 or 2 and tree depth (excluding
161
+ --- artificial root node) of the same order. So despite horrible complexity
162
+ --- estimation it expected to be non-so-heavy. Our guess is that it worth to
163
+ --- try hard to find best index before a large request.
164
+ ---
165
+ --- Future optimizations
166
+ --- --------------------
167
+ ---
168
+ --- * replace table.copy() with something more light: maybe 'closed set' of
169
+ -- filter items or {remove filter[k], invoke the function, add
170
+ --- back filter[k]} (but it needed to be done in such way that will not
171
+ --- invalidate pairs());
172
+ --- * cache index name btw block requests of the same collection request (when
173
+ --- we'll have block executor) and maybe even btw different requests with the
174
+ -- same filter keys.
175
+ local function get_best_matched_index (node , filter )
176
+ local index_name = (node .index_names or {})[1 ]
177
+ local max_branch_len = 1
178
+
179
+ -- optimization: don't run the loop below if there are no successors of the
180
+ -- current node
181
+ if node .successors == nil then
182
+ return index_name , 1
183
+ end
184
+
185
+ for k , v in pairs (filter ) do
186
+ local successor_node = (node .successors or {})[k ]
187
+ if successor_node ~= nil then
188
+ local new_filter = table .copy (filter )
189
+ new_filter [k ] = nil
190
+ local branch_index_name , branch_len =
191
+ get_best_matched_index (successor_node , new_filter )
192
+ branch_len = branch_len + 1
193
+ if branch_index_name ~= nil and branch_len > max_branch_len then
194
+ index_name = branch_index_name
195
+ max_branch_len = branch_len
196
+ end
197
+ end
198
+ end
199
+
200
+ return index_name , max_branch_len
201
+ end
202
+
124
203
-- XXX: raw idea: we can store field-to-field_no mapping when creating
125
204
-- `lookup_index_name` to faster form the value_list
126
205
127
206
--- Flatten filter values (transform to a list) against specific index to
128
207
--- passing it to index:pairs().
129
208
---
130
- --- Only full keys are supported for a compound index for now.
131
- ---
132
209
--- @tparam table self the data accessor
133
210
---
134
211
--- @tparam table filter filter for objects, its values will ordered to form
144
221
--- passed index
145
222
---
146
223
--- @treturn table `value_list` the value to pass to index:pairs()
224
+ ---
225
+ --- @treturn table `new_filter` the `filter` value w/o values extracted to
226
+ --- `value_list`
147
227
local function flatten_filter (self , filter , collection_name , index_name )
148
228
assert (type (self ) == ' table' ,
149
229
' self must be a table, got ' .. type (self ))
@@ -153,6 +233,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
153
233
' index_name must be a string, got ' .. type (index_name ))
154
234
155
235
local value_list = {}
236
+ local new_filter = table .copy (filter )
156
237
157
238
-- fill value_list
158
239
local index_meta = self .indexes [collection_name ][index_name ]
@@ -163,6 +244,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
163
244
local value = filter [field_name ]
164
245
if value == nil then break end
165
246
value_list [# value_list + 1 ] = value
247
+ new_filter [field_name ] = nil
166
248
end
167
249
168
250
-- check for correctness: non-empty value_list
@@ -172,26 +254,11 @@ local function flatten_filter(self, filter, collection_name, index_name)
172
254
json .encode (filter ), index_name ))
173
255
end
174
256
175
- -- check for correctness: all filter fields are used
176
- local count = 0
177
- for k , v in pairs (filter ) do
178
- count = count + 1
179
- end
180
- if count ~= # value_list then -- avoid extra json.encode()
181
- assert (count ~= # value_list ,
182
- (' filter items count does not match index fields count: ' ..
183
- ' filter: %s, index_name: %s' ):format (json .encode (filter ),
184
- index_name ))
185
- end
186
-
187
- local full_match = # value_list == # index_meta .fields
188
- return full_match , value_list
257
+ local full_match = # value_list == # index_meta .fields and
258
+ next (new_filter ) == nil
259
+ return full_match , value_list , new_filter
189
260
end
190
261
191
- -- XXX: support partial match for primary/secondary indexes and support to skip
192
- -- fields to get an index (full_match must be false in the case because
193
- -- returned items will be additionally filtered after unflatten).
194
-
195
262
--- Choose an index for lookup tuple(s) by a 'filter'. The filter holds fields
196
263
--- values of object(s) we want to find. It uses prebuilt `lookup_index_name`
197
264
--- table representing available indexes, which created by the
230
297
---
231
298
--- @treturn string `index_name` is name of the found index or nil
232
299
---
233
- --- @treturn table `value_list` is values list from the `filter` argument
234
- --- ordered in the such way that can be passed to the found index (has some
235
- --- meaning only when `index_name ~= nil`)
300
+ --- @treturn table `new_filter` is the filter value w/o values extracted into
301
+ --- `value_list`
302
+ ---
303
+ --- @treturn table `value_list` (optional) is values list from the `filter`
304
+ --- argument ordered in the such way that it can be passed to the found index
305
+ -- (has some meaning only when `index_name ~= nil`)
236
306
---
237
307
--- @treturn table `pivot` (optional) an offset argument represented depending
238
308
--- of a case: whether we'll lookup for the offset by an index; it is either
@@ -259,6 +329,10 @@ local get_index_name = function(self, collection_name, from, filter, args)
259
329
assert (type (lookup_index_name ) == ' table' ,
260
330
' lookup_index_name must be a table, got ' .. type (lookup_index_name ))
261
331
332
+ local parts_tree = index_cache .parts_tree
333
+ assert (type (parts_tree ) == ' table' ,
334
+ ' parts_tree must be a table, got ' .. type (parts_tree ))
335
+
262
336
local connection_indexes = index_cache .connection_indexes
263
337
assert (type (connection_indexes ) == ' table' ,
264
338
' connection_indexes must be a table, got ' .. type (connection_indexes ))
@@ -276,6 +350,7 @@ local get_index_name = function(self, collection_name, from, filter, args)
276
350
assert (connection_type ~= nil , ' connection_type must not be nil' )
277
351
local full_match = connection_type == ' 1:1' and next (filter ) == nil
278
352
local value_list = from .destination_args_values
353
+ local new_filter = filter
279
354
280
355
local pivot
281
356
if args .offset ~= nil then
@@ -296,21 +371,22 @@ local get_index_name = function(self, collection_name, from, filter, args)
296
371
pivot = {filter = pivot_filter }
297
372
end
298
373
299
- return full_match , index_name , value_list , pivot
374
+ return full_match , index_name , new_filter , value_list , pivot
300
375
end
301
376
302
377
-- The 'fast offset' case. Here we fetch top-level objects starting from
303
378
-- passed offset. Select will be performed by the primary index and
304
379
-- corresponding offset in `pivot.value_list`, then the result will be
305
- -- postprocessed using `filter `, if necessary.
380
+ -- postprocessed using `new_filter `, if necessary.
306
381
if args .offset ~= nil then
307
382
local index_name , index_meta = get_primary_index_meta (self ,
308
383
collection_name )
309
384
local full_match
310
385
local pivot_value_list
386
+ local new_filter = filter
311
387
if type (args .offset ) == ' table' then
312
- full_match , pivot_value_list = flatten_filter (self , args . offset ,
313
- collection_name , index_name )
388
+ full_match , pivot_value_list , new_filter = flatten_filter (self ,
389
+ args . offset , collection_name , index_name )
314
390
assert (full_match == true , ' offset by a partial key is forbidden' )
315
391
else
316
392
assert (# index_meta .fields == 1 ,
@@ -320,22 +396,34 @@ local get_index_name = function(self, collection_name, from, filter, args)
320
396
end
321
397
local pivot = {value_list = pivot_value_list }
322
398
full_match = full_match and next (filter ) == nil
323
- return full_match , index_name , filter , pivot
399
+ return full_match , index_name , new_filter , nil , pivot
324
400
end
325
401
326
402
-- The 'no offset' case. Here we fetch top-level object either by found
327
403
-- index or using full scan (if the index was not found).
404
+
405
+ -- try to find full index
328
406
local name_list_str = filter_names_fingerprint (filter )
329
407
assert (lookup_index_name [collection_name ] ~= nil ,
330
408
(' cannot find any index for collection "%s"' ):format (collection_name ))
331
409
local index_name = lookup_index_name [collection_name ][name_list_str ]
332
410
local full_match = false
333
411
local value_list = nil
412
+ local new_filter = filter
413
+
414
+ -- try to find partial index
415
+ if index_name == nil then
416
+ local root = parts_tree [collection_name ]
417
+ index_name = get_best_matched_index (root , filter )
418
+ end
419
+
420
+ -- fill full_match and value_list appropriatelly
334
421
if index_name ~= nil then
335
- full_match , value_list = flatten_filter (self , filter , collection_name ,
336
- index_name )
422
+ full_match , value_list , new_filter = flatten_filter (self , filter ,
423
+ collection_name , index_name )
337
424
end
338
- return full_match , index_name , value_list
425
+
426
+ return full_match , index_name , new_filter , value_list
339
427
end
340
428
341
429
--- Build `lookup_index_name` table (part of `index_cache`) to use in the
@@ -402,6 +490,67 @@ local function build_lookup_index_name(indexes)
402
490
return lookup_index_name
403
491
end
404
492
493
+ --- Build `parts_tree` to use in @{get_index_name} for lookup best matching
494
+ --- index.
495
+ ---
496
+ --- @tparam table indexes indexes metainformation as defined in the @{new}
497
+ --- function
498
+ ---
499
+ --- Schetch example:
500
+ ---
501
+ --- * collection_1:
502
+ --- * index 1 parts: foo, bar, baz;
503
+ --- * index 2 parts: foo, abc;
504
+ --- * index 3 parts: abc, efg, hij;
505
+ -- * index 4 parts: abc.
506
+ ---
507
+ --- Resulting table of prefix trees (contains one field for collection_1):
508
+ ---
509
+ --- ```
510
+ --- * collection_1:
511
+ --- \
512
+ --- + --> root node --> foo --> bar --> baz ~~> index 1
513
+ --- \ \
514
+ --- \ + --> abc ~~> index 2
515
+ --- \
516
+ --- + ------> abc --> efg --> hij ~~ index 3
517
+ --- \
518
+ --- + ~~> index 4
519
+ --- ```
520
+ ---
521
+ --- @treturn table `roots` resulting table of prefix trees
522
+ ---
523
+ --- * `roots` is a table which maps `collection names` to `root nodes` of
524
+ --- prefix trees;
525
+ --- * 'collection name` is a string (name of a collection);
526
+ --- * `root node` is a table with `successors` field;
527
+ --- * `successors` field value is a map from `index part` to `non-root node`;
528
+ --- * `index part` is a string (name of corresponding field in an object);
529
+ --- * `non-root node` is a table with `index_names` field and optional
530
+ --- `successors` field;
531
+ --- * `index_names` field value is a list of `index name`;
532
+ --- * `index name` is a string (name of an index).
533
+ local function build_index_parts_tree (indexes )
534
+ local roots = {}
535
+
536
+ for collection_name , indexes_meta in pairs (indexes ) do
537
+ local root = {}
538
+ roots [collection_name ] = root
539
+ for index_name , index_meta in pairs (indexes_meta ) do
540
+ local cur = root
541
+ for _ , field in ipairs (index_meta .fields ) do
542
+ cur .successors = cur .successors or {}
543
+ cur .successors [field ] = cur .successors [field ] or {}
544
+ cur = cur .successors [field ]
545
+ cur .index_names = cur .index_names or {}
546
+ cur .index_names [# cur .index_names + 1 ] = index_name
547
+ end
548
+ end
549
+ end
550
+
551
+ return roots
552
+ end
553
+
405
554
--- Build `connection_indexes` table (part of `index_cache`) to use in the
406
555
--- @{get_index_name} function.
407
556
---
491
640
local function build_index_cache (indexes , collections )
492
641
return {
493
642
lookup_index_name = build_lookup_index_name (indexes ),
643
+ parts_tree = build_index_parts_tree (indexes ),
494
644
connection_indexes = build_connection_indexes (indexes , collections ),
495
645
}
496
646
end
@@ -682,8 +832,8 @@ local function select_internal(self, collection_name, from, filter, args, extra)
682
832
(' cannot find collection "%s"' ):format (collection_name ))
683
833
684
834
-- search for suitable index
685
- local full_match , index_name , index_value , pivot = get_index_name (
686
- self , collection_name , from , filter , args )
835
+ local full_match , index_name , filter , index_value , pivot = get_index_name (
836
+ self , collection_name , from , filter , args ) -- we redefine filter here
687
837
local index = index_name ~= nil and
688
838
self .funcs .get_index (collection_name , index_name ) or nil
689
839
if from ~= nil then
0 commit comments