Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Commit b7ab8ee

Browse files
committed
Support lookup by a partial set of index parts
* Fixes #30. * Fixes #38.
1 parent d96bdae commit b7ab8ee

File tree

5 files changed

+317
-45
lines changed

5 files changed

+317
-45
lines changed

graphql/accessor_general.lua

Lines changed: 176 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ end
101101
--- Get a key to lookup index by `lookup_index_name` (part of `index_cache`).
102102
---
103103
--- @tparam table filter filter for objects, its keys (names of fields) will
104-
--- form the result
104+
--- form the result of the function
105105
---
106106
--- @treturn string `name_list_str` (key for lookup by `lookup_index_name`)
107107
local function filter_names_fingerprint(filter)
@@ -121,14 +121,86 @@ local function filter_names_fingerprint(filter)
121121
return name_list_str
122122
end
123123

124+
--- Get an index using parts tree built by @{build_index_parts_tree}.
125+
---
126+
--- @tparam table node root of the prefix tree for certain collection
127+
---
128+
--- @tparam table filter map of key-value to filter objects against
129+
---
130+
--- @treturn string `index_name` or `nil` is found index
131+
---
132+
--- @treturn number `len` is a number of index parts will be used at lookup
133+
--- plus 1 (because it calculated artificial root node as others)
134+
---
135+
--- Complexity
136+
--- ----------
137+
---
138+
--- In short: O(N^2 * COUNT(index parts)).
139+
---
140+
--- Say we have N fields in filter, M indexes and K index parts at max. We look
141+
--- for successors for each filter item (<= N items) in each of the tree node
142+
--- (<= M * K nodes), so have O(I * N * (M * K)) of somewhat we call 'iteration'
143+
--- of I complexity. Most heavy operation within an iteraton is table.copy(),
144+
--- we can assume it has O(N) complexity. So we have overall complexity O(N^2 *
145+
--- (M * K)). Nodes count (M * K) can be limited upside as count of index
146+
--- parts in all indexes, so we have O(N^2 * COUNT(index parts)).
147+
---
148+
--- We can consider worst case scenario when any node has any of filter keys as
149+
--- a successor. In this case nodes count is not real constraint for recursion.
150+
--- In such case we can calculate complexity as iteration of weight I
151+
--- (calculated above as O(N^2)) and iteration count as permutations of N
152+
--- filter items (N!). In such case we'll have O(N^2 * N!) or O(N^(3/2) * N^N)
153+
--- (Stirling's approximation).
154+
---
155+
--- Expectations
156+
--- ------------
157+
---
158+
--- We expect typical filter size as 1 or 2 and tree depth (excluding
159+
--- artificial root node) of the same order. So despite horrible complexity
160+
--- estimation it expected to be non-so-heavy. Our guess is that it worth to
161+
--- try hard to find best index before a large request.
162+
---
163+
--- Future optimizations
164+
--- --------------------
165+
---
166+
--- * replace table.copy() with something more light (maybe 'closed set' of
167+
--- filter items);
168+
--- * cache index name btw block requests of the same collection request (when
169+
--- we'll have block executor) and maybe even btw different requests with the
170+
-- same filter keys.
171+
local function get_best_matched_index(node, filter)
172+
local index_name = (node.index_names or {})[1]
173+
local max_branch_len = 0
174+
175+
-- optimization: don't run the loop below if there are no successors of the
176+
-- current node
177+
if node.successors == nil then
178+
return index_name, 1
179+
end
180+
181+
for k, v in pairs(filter) do
182+
local successor_node = (node.successors or {})[k]
183+
if successor_node ~= nil then
184+
local new_filter = table.copy(filter)
185+
new_filter[k] = nil
186+
local branch_index_name, branch_len =
187+
get_best_matched_index(successor_node, new_filter)
188+
if branch_index_name ~= nil and branch_len > max_branch_len then
189+
index_name = branch_index_name
190+
max_branch_len = branch_len
191+
end
192+
end
193+
end
194+
195+
return index_name, max_branch_len + 1
196+
end
197+
124198
-- XXX: raw idea: we can store field-to-field_no mapping when creating
125199
-- `lookup_index_name` to faster form the value_list
126200

127201
--- Flatten filter values (transform to a list) against specific index to
128202
--- passing it to index:pairs().
129203
---
130-
--- Only full keys are supported for a compound index for now.
131-
---
132204
--- @tparam table self the data accessor
133205
---
134206
--- @tparam table filter filter for objects, its values will ordered to form
@@ -144,6 +216,9 @@ end
144216
--- passed index
145217
---
146218
--- @treturn table `value_list` the value to pass to index:pairs()
219+
---
220+
--- @treturn table `new_filter` the `filter` value w/o values extracted to
221+
--- `value_list`
147222
local function flatten_filter(self, filter, collection_name, index_name)
148223
assert(type(self) == 'table',
149224
'self must be a table, got ' .. type(self))
@@ -153,6 +228,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
153228
'index_name must be a string, got ' .. type(index_name))
154229

155230
local value_list = {}
231+
local new_filter = table.copy(filter)
156232

157233
-- fill value_list
158234
local index_meta = self.indexes[collection_name][index_name]
@@ -163,6 +239,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
163239
local value = filter[field_name]
164240
if value == nil then break end
165241
value_list[#value_list + 1] = value
242+
new_filter[field_name] = nil
166243
end
167244

168245
-- check for correctness: non-empty value_list
@@ -172,26 +249,11 @@ local function flatten_filter(self, filter, collection_name, index_name)
172249
json.encode(filter), index_name))
173250
end
174251

175-
-- check for correctness: all filter fields are used
176-
local count = 0
177-
for k, v in pairs(filter) do
178-
count = count + 1
179-
end
180-
if count ~= #value_list then -- avoid extra json.encode()
181-
assert(count ~= #value_list,
182-
('filter items count does not match index fields count: ' ..
183-
'filter: %s, index_name: %s'):format(json.encode(filter),
184-
index_name))
185-
end
186-
187-
local full_match = #value_list == #index_meta.fields
188-
return full_match, value_list
252+
local full_match = #value_list == #index_meta.fields and
253+
next(new_filter) == nil
254+
return full_match, value_list, new_filter
189255
end
190256

191-
-- XXX: support partial match for primary/secondary indexes and support to skip
192-
-- fields to get an index (full_match must be false in the case because
193-
-- returned items will be additionally filtered after unflatten).
194-
195257
--- Choose an index for lookup tuple(s) by a 'filter'. The filter holds fields
196258
--- values of object(s) we want to find. It uses prebuilt `lookup_index_name`
197259
--- table representing available indexes, which created by the
@@ -230,9 +292,12 @@ end
230292
---
231293
--- @treturn string `index_name` is name of the found index or nil
232294
---
233-
--- @treturn table `value_list` is values list from the `filter` argument
234-
--- ordered in the such way that can be passed to the found index (has some
235-
--- meaning only when `index_name ~= nil`)
295+
--- @treturn table `new_filter` is the filter value w/o values extracted into
296+
--- `value_list`
297+
---
298+
--- @treturn table `value_list` (optional) is values list from the `filter`
299+
--- argument ordered in the such way that it can be passed to the found index
300+
-- (has some meaning only when `index_name ~= nil`)
236301
---
237302
--- @treturn table `pivot` (optional) an offset argument represented depending
238303
--- of a case: whether we'll lookup for the offset by an index; it is either
@@ -259,6 +324,10 @@ local get_index_name = function(self, collection_name, from, filter, args)
259324
assert(type(lookup_index_name) == 'table',
260325
'lookup_index_name must be a table, got ' .. type(lookup_index_name))
261326

327+
local parts_tree = index_cache.parts_tree
328+
assert(type(parts_tree) == 'table',
329+
'parts_tree must be a table, got ' .. type(parts_tree))
330+
262331
local connection_indexes = index_cache.connection_indexes
263332
assert(type(connection_indexes) == 'table',
264333
'connection_indexes must be a table, got ' .. type(connection_indexes))
@@ -276,6 +345,7 @@ local get_index_name = function(self, collection_name, from, filter, args)
276345
assert(connection_type ~= nil, 'connection_type must not be nil')
277346
local full_match = connection_type == '1:1' and next(filter) == nil
278347
local value_list = from.destination_args_values
348+
local new_filter = filter
279349

280350
local pivot
281351
if args.offset ~= nil then
@@ -296,21 +366,22 @@ local get_index_name = function(self, collection_name, from, filter, args)
296366
pivot = {filter = pivot_filter}
297367
end
298368

299-
return full_match, index_name, value_list, pivot
369+
return full_match, index_name, new_filter, value_list, pivot
300370
end
301371

302372
-- The 'fast offset' case. Here we fetch top-level objects starting from
303373
-- passed offset. Select will be performed by the primary index and
304374
-- corresponding offset in `pivot.value_list`, then the result will be
305-
-- postprocessed using `filter`, if necessary.
375+
-- postprocessed using `new_filter`, if necessary.
306376
if args.offset ~= nil then
307377
local index_name, index_meta = get_primary_index_meta(self,
308378
collection_name)
309379
local full_match
310380
local pivot_value_list
381+
local new_filter = filter
311382
if type(args.offset) == 'table' then
312-
full_match, pivot_value_list = flatten_filter(self, args.offset,
313-
collection_name, index_name)
383+
full_match, pivot_value_list, new_filter = flatten_filter(self,
384+
args.offset, collection_name, index_name)
314385
assert(full_match == true, 'offset by a partial key is forbidden')
315386
else
316387
assert(#index_meta.fields == 1,
@@ -320,22 +391,34 @@ local get_index_name = function(self, collection_name, from, filter, args)
320391
end
321392
local pivot = {value_list = pivot_value_list}
322393
full_match = full_match and next(filter) == nil
323-
return full_match, index_name, filter, pivot
394+
return full_match, index_name, new_filter, nil, pivot
324395
end
325396

326397
-- The 'no offset' case. Here we fetch top-level object either by found
327398
-- index or using full scan (if the index was not found).
399+
400+
-- try to find full index
328401
local name_list_str = filter_names_fingerprint(filter)
329402
assert(lookup_index_name[collection_name] ~= nil,
330403
('cannot find any index for collection "%s"'):format(collection_name))
331404
local index_name = lookup_index_name[collection_name][name_list_str]
332405
local full_match = false
333406
local value_list = nil
407+
local new_filter = filter
408+
409+
-- try to find partial index
410+
if index_name == nil then
411+
local root = parts_tree[collection_name]
412+
index_name = get_best_matched_index(root, filter)
413+
end
414+
415+
-- fill full_match and value_list appropriatelly
334416
if index_name ~= nil then
335-
full_match, value_list = flatten_filter(self, filter, collection_name,
336-
index_name)
417+
full_match, value_list, new_filter = flatten_filter(self, filter,
418+
collection_name, index_name)
337419
end
338-
return full_match, index_name, value_list
420+
421+
return full_match, index_name, new_filter, value_list
339422
end
340423

341424
--- Build `lookup_index_name` table (part of `index_cache`) to use in the
@@ -402,6 +485,64 @@ local function build_lookup_index_name(indexes)
402485
return lookup_index_name
403486
end
404487

488+
--- Build `parts_tree` to use in @{get_index_name} for lookup best matching
489+
--- index.
490+
---
491+
--- @tparam table indexes indexes metainformation as defined in the @{new}
492+
--- function
493+
---
494+
--- Schetch example:
495+
---
496+
--- * collection_1:
497+
--- * index 1 parts: foo, bar, baz;
498+
--- * index 2 parts: foo, abc;
499+
--- * index 3 parts: abc, efg, hij.
500+
---
501+
--- Resulting table of prefix trees (contains one field for collection_1):
502+
---
503+
--- ```
504+
--- * collection_1:
505+
--- \
506+
--- + --> root node --> foo --> bar --> baz
507+
--- \ \
508+
--- \ + --> abc
509+
--- \
510+
--- + ------> abc --> efg --> hij
511+
--- ```
512+
---
513+
--- @treturn table `roots` resulting table of prefix trees
514+
---
515+
--- * `roots` is a table which maps `collection names` to `root nodes` of
516+
--- prefix trees;
517+
--- * 'collection name` is a string (name of a collection);
518+
--- * `root node` is a table with `successors` field;
519+
--- * `successors` field value is a map from `index part` to `non-root node`;
520+
--- * `index part` is a string (name of corresponding field in an object);
521+
--- * `non-root node` is a table with `index_names` field and optional
522+
--- `successors` field;
523+
--- * `index_names` field value is a list of `index name`;
524+
--- * `index name` is a string (name of an index).
525+
local function build_index_parts_tree(indexes)
526+
local roots = {}
527+
528+
for collection_name, indexes_meta in pairs(indexes) do
529+
local root = {}
530+
roots[collection_name] = root
531+
for index_name, index_meta in pairs(indexes_meta) do
532+
local cur = root
533+
for _, field in ipairs(index_meta.fields) do
534+
cur.successors = cur.successors or {}
535+
cur.successors[field] = cur.successors[field] or {}
536+
cur = cur.successors[field]
537+
cur.index_names = cur.index_names or {}
538+
cur.index_names[#cur.index_names + 1] = index_name
539+
end
540+
end
541+
end
542+
543+
return roots
544+
end
545+
405546
--- Build `connection_indexes` table (part of `index_cache`) to use in the
406547
--- @{get_index_name} function.
407548
---
@@ -491,6 +632,7 @@ end
491632
local function build_index_cache(indexes, collections)
492633
return {
493634
lookup_index_name = build_lookup_index_name(indexes),
635+
parts_tree = build_index_parts_tree(indexes),
494636
connection_indexes = build_connection_indexes(indexes, collections),
495637
}
496638
end
@@ -682,8 +824,8 @@ local function select_internal(self, collection_name, from, filter, args, extra)
682824
('cannot find collection "%s"'):format(collection_name))
683825

684826
-- search for suitable index
685-
local full_match, index_name, index_value, pivot = get_index_name(
686-
self, collection_name, from, filter, args)
827+
local full_match, index_name, filter, index_value, pivot = get_index_name(
828+
self, collection_name, from, filter, args) -- we redefine filter here
687829
local index = index_name ~= nil and
688830
self.funcs.get_index(collection_name, index_name) or nil
689831
if from ~= nil then

test/local/space_compound_index.result

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ user_collection:
77
user_num: 12
88
...
99

10+
RESULT
11+
---
12+
user_collection: []
13+
...
14+
1015
RESULT
1116
---
1217
user_collection:
@@ -117,6 +122,16 @@ user_collection:
117122
user_num: 20
118123
...
119124

125+
RESULT
126+
---
127+
user_collection: []
128+
...
129+
130+
RESULT
131+
---
132+
user_collection: []
133+
...
134+
120135
RESULT
121136
---
122137
user_collection:
@@ -157,6 +172,16 @@ user_collection:
157172
last_name: last name b
158173
...
159174

175+
RESULT
176+
---
177+
user_collection:
178+
- order_connection: []
179+
user_str: user_str_b
180+
first_name: first name b
181+
user_num: 12
182+
last_name: last name b
183+
...
184+
160185
RESULT
161186
---
162187
user_collection:

0 commit comments

Comments
 (0)