103
103
--- Get a key to lookup index by `lookup_index_name` (part of `index_cache`).
104
104
---
105
105
--- @tparam table filter filter for objects, its keys (names of fields) will
106
- --- form the result
106
+ --- form the result of the function
107
107
---
108
108
--- @treturn string `name_list_str` (key for lookup by `lookup_index_name`)
109
109
local function filter_names_fingerprint (filter )
@@ -123,14 +123,91 @@ local function filter_names_fingerprint(filter)
123
123
return name_list_str
124
124
end
125
125
126
+ --- Get an index using parts tree built by @{build_index_parts_tree}.
127
+ ---
128
+ --- @tparam table node root of the prefix tree for certain collection
129
+ ---
130
+ --- @tparam table filter map of key-value to filter objects against
131
+ ---
132
+ --- @treturn string `index_name` or `nil` is found index
133
+ ---
134
+ --- @treturn number `max_branch_len` is a number of index parts will be used at
135
+ --- lookup plus 1 (because it calculated artificial root node as well as other
136
+ --- nodes)
137
+ ---
138
+ --- Complexity
139
+ --- ----------
140
+ ---
141
+ --- In short: O(SIZE(filter)^2 * COUNT(index parts for all indexes)).
142
+ ---
143
+ --- Say we have N fields in filter (N = SIZE(filter), M indexes and K index
144
+ --- parts at max ((M * K) and COUNT(index parts for all indexes) both are are
145
+ --- upside limits of nodes count in the tree). We look for successors for
146
+ --- each filter item (<= N items) in each of the tree node (<= M * K nodes),
147
+ --- so have O(I * N * (M * K)) of somewhat we call 'iteration' of I
148
+ --- complexity. Most heavy operation within an iteraton is table.copy(), we
149
+ --- can assume it has O(N) complexity. So we have overall complexity O(N^2 *
150
+ --- (M * K)).
151
+ ---
152
+ --- We can consider worst case scenario when any node has any of filter keys as
153
+ --- a successor. In this case nodes count is not real constraint for recursion.
154
+ --- In such case we can calculate complexity as iteration of weight I
155
+ --- (calculated above as O(N^2)) and iteration count as permutations of N
156
+ --- filter items (N!). In such case we'll have O(N^2 * N!) or O(N^(3/2) * N^N)
157
+ --- (Stirling's approximation).
158
+ ---
159
+ --- Expectations
160
+ --- ------------
161
+ ---
162
+ --- We expect typical filter size as 1 or 2 and tree depth (excluding
163
+ --- artificial root node) of the same order. So despite horrible complexity
164
+ --- estimation it expected to be non-so-heavy. Our guess is that it worth to
165
+ --- try hard to find best index before a large request.
166
+ ---
167
+ --- Future optimizations
168
+ --- --------------------
169
+ ---
170
+ --- * replace table.copy() with something more light: maybe 'closed set' of
171
+ -- filter items or {remove filter[k], invoke the function, add
172
+ --- back filter[k]} (but it needed to be done in such way that will not
173
+ --- invalidate pairs());
174
+ --- * cache index name btw block requests of the same collection request (when
175
+ --- we'll have block executor) and maybe even btw different requests with the
176
+ -- same filter keys.
177
+ local function get_best_matched_index (node , filter )
178
+ local index_name = (node .index_names or {})[1 ]
179
+ local max_branch_len = 1
180
+
181
+ -- optimization: don't run the loop below if there are no successors of the
182
+ -- current node
183
+ if node .successors == nil then
184
+ return index_name , 1
185
+ end
186
+
187
+ for k , v in pairs (filter ) do
188
+ local successor_node = (node .successors or {})[k ]
189
+ if successor_node ~= nil then
190
+ local new_filter = table .copy (filter )
191
+ new_filter [k ] = nil
192
+ local branch_index_name , branch_len =
193
+ get_best_matched_index (successor_node , new_filter )
194
+ branch_len = branch_len + 1
195
+ if branch_index_name ~= nil and branch_len > max_branch_len then
196
+ index_name = branch_index_name
197
+ max_branch_len = branch_len
198
+ end
199
+ end
200
+ end
201
+
202
+ return index_name , max_branch_len
203
+ end
204
+
126
205
-- XXX: raw idea: we can store field-to-field_no mapping when creating
127
206
-- `lookup_index_name` to faster form the value_list
128
207
129
208
--- Flatten filter values (transform to a list) against specific index to
130
209
--- passing it to index:pairs().
131
210
---
132
- --- Only full keys are supported for a compound index for now.
133
- ---
134
211
--- @tparam table self the data accessor
135
212
---
136
213
--- @tparam table filter filter for objects, its values will ordered to form
146
223
--- passed index
147
224
---
148
225
--- @treturn table `value_list` the value to pass to index:pairs()
226
+ ---
227
+ --- @treturn table `new_filter` the `filter` value w/o values extracted to
228
+ --- `value_list`
149
229
local function flatten_filter (self , filter , collection_name , index_name )
150
230
assert (type (self ) == ' table' ,
151
231
' self must be a table, got ' .. type (self ))
@@ -155,6 +235,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
155
235
' index_name must be a string, got ' .. type (index_name ))
156
236
157
237
local value_list = {}
238
+ local new_filter = table .copy (filter )
158
239
159
240
-- fill value_list
160
241
local index_meta = self .indexes [collection_name ][index_name ]
@@ -165,6 +246,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
165
246
local value = filter [field_name ]
166
247
if value == nil then break end
167
248
value_list [# value_list + 1 ] = value
249
+ new_filter [field_name ] = nil
168
250
end
169
251
170
252
-- check for correctness: non-empty value_list
@@ -174,26 +256,11 @@ local function flatten_filter(self, filter, collection_name, index_name)
174
256
json .encode (filter ), index_name ))
175
257
end
176
258
177
- -- check for correctness: all filter fields are used
178
- local count = 0
179
- for k , v in pairs (filter ) do
180
- count = count + 1
181
- end
182
- if count ~= # value_list then -- avoid extra json.encode()
183
- assert (count ~= # value_list ,
184
- (' filter items count does not match index fields count: ' ..
185
- ' filter: %s, index_name: %s' ):format (json .encode (filter ),
186
- index_name ))
187
- end
188
-
189
- local full_match = # value_list == # index_meta .fields
190
- return full_match , value_list
259
+ local full_match = # value_list == # index_meta .fields and
260
+ next (new_filter ) == nil
261
+ return full_match , value_list , new_filter
191
262
end
192
263
193
- -- XXX: support partial match for primary/secondary indexes and support to skip
194
- -- fields to get an index (full_match must be false in the case because
195
- -- returned items will be additionally filtered after unflatten).
196
-
197
264
--- Choose an index for lookup tuple(s) by a 'filter'. The filter holds fields
198
265
--- values of object(s) we want to find. It uses prebuilt `lookup_index_name`
199
266
--- table representing available indexes, which created by the
232
299
---
233
300
--- @treturn string `index_name` is name of the found index or nil
234
301
---
235
- --- @treturn table `value_list` is values list from the `filter` argument
236
- --- ordered in the such way that can be passed to the found index (has some
237
- --- meaning only when `index_name ~= nil`)
302
+ --- @treturn table `new_filter` is the filter value w/o values extracted into
303
+ --- `value_list`
304
+ ---
305
+ --- @treturn table `value_list` (optional) is values list from the `filter`
306
+ --- argument ordered in the such way that it can be passed to the found index
307
+ -- (has some meaning only when `index_name ~= nil`)
238
308
---
239
309
--- @treturn table `pivot` (optional) an offset argument represented depending
240
310
--- of a case: whether we'll lookup for the offset by an index; it is either
@@ -261,6 +331,10 @@ local get_index_name = function(self, collection_name, from, filter, args)
261
331
assert (type (lookup_index_name ) == ' table' ,
262
332
' lookup_index_name must be a table, got ' .. type (lookup_index_name ))
263
333
334
+ local parts_tree = index_cache .parts_tree
335
+ assert (type (parts_tree ) == ' table' ,
336
+ ' parts_tree must be a table, got ' .. type (parts_tree ))
337
+
264
338
local connection_indexes = index_cache .connection_indexes
265
339
assert (type (connection_indexes ) == ' table' ,
266
340
' connection_indexes must be a table, got ' .. type (connection_indexes ))
@@ -278,6 +352,7 @@ local get_index_name = function(self, collection_name, from, filter, args)
278
352
assert (connection_type ~= nil , ' connection_type must not be nil' )
279
353
local full_match = connection_type == ' 1:1' and next (filter ) == nil
280
354
local value_list = from .destination_args_values
355
+ local new_filter = filter
281
356
282
357
local pivot
283
358
if args .offset ~= nil then
@@ -298,21 +373,22 @@ local get_index_name = function(self, collection_name, from, filter, args)
298
373
pivot = {filter = pivot_filter }
299
374
end
300
375
301
- return full_match , index_name , value_list , pivot
376
+ return full_match , index_name , new_filter , value_list , pivot
302
377
end
303
378
304
379
-- The 'fast offset' case. Here we fetch top-level objects starting from
305
380
-- passed offset. Select will be performed by the primary index and
306
381
-- corresponding offset in `pivot.value_list`, then the result will be
307
- -- postprocessed using `filter `, if necessary.
382
+ -- postprocessed using `new_filter `, if necessary.
308
383
if args .offset ~= nil then
309
384
local index_name , index_meta = get_primary_index_meta (self ,
310
385
collection_name )
311
386
local full_match
312
387
local pivot_value_list
388
+ local new_filter = filter
313
389
if type (args .offset ) == ' table' then
314
- full_match , pivot_value_list = flatten_filter (self , args . offset ,
315
- collection_name , index_name )
390
+ full_match , pivot_value_list , new_filter = flatten_filter (self ,
391
+ args . offset , collection_name , index_name )
316
392
assert (full_match == true , ' offset by a partial key is forbidden' )
317
393
else
318
394
assert (# index_meta .fields == 1 ,
@@ -322,22 +398,34 @@ local get_index_name = function(self, collection_name, from, filter, args)
322
398
end
323
399
local pivot = {value_list = pivot_value_list }
324
400
full_match = full_match and next (filter ) == nil
325
- return full_match , index_name , filter , pivot
401
+ return full_match , index_name , new_filter , nil , pivot
326
402
end
327
403
328
404
-- The 'no offset' case. Here we fetch top-level object either by found
329
405
-- index or using full scan (if the index was not found).
406
+
407
+ -- try to find full index
330
408
local name_list_str = filter_names_fingerprint (filter )
331
409
assert (lookup_index_name [collection_name ] ~= nil ,
332
410
(' cannot find any index for collection "%s"' ):format (collection_name ))
333
411
local index_name = lookup_index_name [collection_name ][name_list_str ]
334
412
local full_match = false
335
413
local value_list = nil
414
+ local new_filter = filter
415
+
416
+ -- try to find partial index
417
+ if index_name == nil then
418
+ local root = parts_tree [collection_name ]
419
+ index_name = get_best_matched_index (root , filter )
420
+ end
421
+
422
+ -- fill full_match and value_list appropriatelly
336
423
if index_name ~= nil then
337
- full_match , value_list = flatten_filter (self , filter , collection_name ,
338
- index_name )
424
+ full_match , value_list , new_filter = flatten_filter (self , filter ,
425
+ collection_name , index_name )
339
426
end
340
- return full_match , index_name , value_list
427
+
428
+ return full_match , index_name , new_filter , value_list
341
429
end
342
430
343
431
--- Build `lookup_index_name` table (part of `index_cache`) to use in the
@@ -404,6 +492,67 @@ local function build_lookup_index_name(indexes)
404
492
return lookup_index_name
405
493
end
406
494
495
+ --- Build `parts_tree` to use in @{get_index_name} for lookup best matching
496
+ --- index.
497
+ ---
498
+ --- @tparam table indexes indexes metainformation as defined in the @{new}
499
+ --- function
500
+ ---
501
+ --- Schetch example:
502
+ ---
503
+ --- * collection_1:
504
+ --- * index 1 parts: foo, bar, baz;
505
+ --- * index 2 parts: foo, abc;
506
+ --- * index 3 parts: abc, efg, hij;
507
+ -- * index 4 parts: abc.
508
+ ---
509
+ --- Resulting table of prefix trees (contains one field for collection_1):
510
+ ---
511
+ --- ```
512
+ --- * collection_1:
513
+ --- \
514
+ --- + --> root node --> foo --> bar --> baz ~~> index 1
515
+ --- \ \
516
+ --- \ + --> abc ~~> index 2
517
+ --- \
518
+ --- + ------> abc --> efg --> hij ~~ index 3
519
+ --- \
520
+ --- + ~~> index 4
521
+ --- ```
522
+ ---
523
+ --- @treturn table `roots` resulting table of prefix trees
524
+ ---
525
+ --- * `roots` is a table which maps `collection names` to `root nodes` of
526
+ --- prefix trees;
527
+ --- * 'collection name` is a string (name of a collection);
528
+ --- * `root node` is a table with `successors` field;
529
+ --- * `successors` field value is a map from `index part` to `non-root node`;
530
+ --- * `index part` is a string (name of corresponding field in an object);
531
+ --- * `non-root node` is a table with `index_names` field and optional
532
+ --- `successors` field;
533
+ --- * `index_names` field value is a list of `index name`;
534
+ --- * `index name` is a string (name of an index).
535
+ local function build_index_parts_tree (indexes )
536
+ local roots = {}
537
+
538
+ for collection_name , indexes_meta in pairs (indexes ) do
539
+ local root = {}
540
+ roots [collection_name ] = root
541
+ for index_name , index_meta in pairs (indexes_meta ) do
542
+ local cur = root
543
+ for _ , field in ipairs (index_meta .fields ) do
544
+ cur .successors = cur .successors or {}
545
+ cur .successors [field ] = cur .successors [field ] or {}
546
+ cur = cur .successors [field ]
547
+ cur .index_names = cur .index_names or {}
548
+ cur .index_names [# cur .index_names + 1 ] = index_name
549
+ end
550
+ end
551
+ end
552
+
553
+ return roots
554
+ end
555
+
407
556
--- Build `connection_indexes` table (part of `index_cache`) to use in the
408
557
--- @{get_index_name} function.
409
558
---
493
642
local function build_index_cache (indexes , collections )
494
643
return {
495
644
lookup_index_name = build_lookup_index_name (indexes ),
645
+ parts_tree = build_index_parts_tree (indexes ),
496
646
connection_indexes = build_connection_indexes (indexes , collections ),
497
647
}
498
648
end
@@ -687,8 +837,8 @@ local function select_internal(self, collection_name, from, filter, args, extra)
687
837
(' cannot find collection "%s"' ):format (collection_name ))
688
838
689
839
-- search for suitable index
690
- local full_match , index_name , index_value , pivot = get_index_name (
691
- self , collection_name , from , filter , args )
840
+ local full_match , index_name , filter , index_value , pivot = get_index_name (
841
+ self , collection_name , from , filter , args ) -- we redefine filter here
692
842
local index = index_name ~= nil and
693
843
self .funcs .get_index (collection_name , index_name ) or nil
694
844
if from .collection_name ~= ' Query' then
0 commit comments