Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Commit c6df7d0

Browse files
committed
Support lookup by a partial set of index parts
* Fixes #30. * Fixes #38.
1 parent d96bdae commit c6df7d0

File tree

5 files changed

+325
-45
lines changed

5 files changed

+325
-45
lines changed

graphql/accessor_general.lua

Lines changed: 184 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ end
101101
--- Get a key to lookup index by `lookup_index_name` (part of `index_cache`).
102102
---
103103
--- @tparam table filter filter for objects, its keys (names of fields) will
104-
--- form the result
104+
--- form the result of the function
105105
---
106106
--- @treturn string `name_list_str` (key for lookup by `lookup_index_name`)
107107
local function filter_names_fingerprint(filter)
@@ -121,14 +121,91 @@ local function filter_names_fingerprint(filter)
121121
return name_list_str
122122
end
123123

124+
--- Get an index using parts tree built by @{build_index_parts_tree}.
125+
---
126+
--- @tparam table node root of the prefix tree for certain collection
127+
---
128+
--- @tparam table filter map of key-value to filter objects against
129+
---
130+
--- @treturn string `index_name` or `nil` is found index
131+
---
132+
--- @treturn number `max_branch_len` is a number of index parts will be used at
133+
--- lookup plus 1 (because it calculated artificial root node as well as other
134+
--- nodes)
135+
---
136+
--- Complexity
137+
--- ----------
138+
---
139+
--- In short: O(SIZE(filter)^2 * COUNT(index parts for all indexes)).
140+
---
141+
--- Say we have N fields in filter (N = SIZE(filter), M indexes and K index
142+
--- parts at max ((M * K) and COUNT(index parts for all indexes) both are are
143+
--- upside limits of nodes count in the tree). We look for successors for
144+
--- each filter item (<= N items) in each of the tree node (<= M * K nodes),
145+
--- so have O(I * N * (M * K)) of somewhat we call 'iteration' of I
146+
--- complexity. Most heavy operation within an iteraton is table.copy(), we
147+
--- can assume it has O(N) complexity. So we have overall complexity O(N^2 *
148+
--- (M * K)).
149+
---
150+
--- We can consider worst case scenario when any node has any of filter keys as
151+
--- a successor. In this case nodes count is not real constraint for recursion.
152+
--- In such case we can calculate complexity as iteration of weight I
153+
--- (calculated above as O(N^2)) and iteration count as permutations of N
154+
--- filter items (N!). In such case we'll have O(N^2 * N!) or O(N^(3/2) * N^N)
155+
--- (Stirling's approximation).
156+
---
157+
--- Expectations
158+
--- ------------
159+
---
160+
--- We expect typical filter size as 1 or 2 and tree depth (excluding
161+
--- artificial root node) of the same order. So despite horrible complexity
162+
--- estimation it expected to be non-so-heavy. Our guess is that it worth to
163+
--- try hard to find best index before a large request.
164+
---
165+
--- Future optimizations
166+
--- --------------------
167+
---
168+
--- * replace table.copy() with something more light: maybe 'closed set' of
169+
-- filter items or {remove filter[k], invoke the function, add
170+
--- back filter[k]} (but it needed to be done in such way that will not
171+
--- invalidate pairs());
172+
--- * cache index name btw block requests of the same collection request (when
173+
--- we'll have block executor) and maybe even btw different requests with the
174+
-- same filter keys.
175+
local function get_best_matched_index(node, filter)
176+
local index_name = (node.index_names or {})[1]
177+
local max_branch_len = 1
178+
179+
-- optimization: don't run the loop below if there are no successors of the
180+
-- current node
181+
if node.successors == nil then
182+
return index_name, 1
183+
end
184+
185+
for k, v in pairs(filter) do
186+
local successor_node = (node.successors or {})[k]
187+
if successor_node ~= nil then
188+
local new_filter = table.copy(filter)
189+
new_filter[k] = nil
190+
local branch_index_name, branch_len =
191+
get_best_matched_index(successor_node, new_filter)
192+
branch_len = branch_len + 1
193+
if branch_index_name ~= nil and branch_len > max_branch_len then
194+
index_name = branch_index_name
195+
max_branch_len = branch_len
196+
end
197+
end
198+
end
199+
200+
return index_name, max_branch_len
201+
end
202+
124203
-- XXX: raw idea: we can store field-to-field_no mapping when creating
125204
-- `lookup_index_name` to faster form the value_list
126205

127206
--- Flatten filter values (transform to a list) against specific index to
128207
--- passing it to index:pairs().
129208
---
130-
--- Only full keys are supported for a compound index for now.
131-
---
132209
--- @tparam table self the data accessor
133210
---
134211
--- @tparam table filter filter for objects, its values will ordered to form
@@ -144,6 +221,9 @@ end
144221
--- passed index
145222
---
146223
--- @treturn table `value_list` the value to pass to index:pairs()
224+
---
225+
--- @treturn table `new_filter` the `filter` value w/o values extracted to
226+
--- `value_list`
147227
local function flatten_filter(self, filter, collection_name, index_name)
148228
assert(type(self) == 'table',
149229
'self must be a table, got ' .. type(self))
@@ -153,6 +233,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
153233
'index_name must be a string, got ' .. type(index_name))
154234

155235
local value_list = {}
236+
local new_filter = table.copy(filter)
156237

157238
-- fill value_list
158239
local index_meta = self.indexes[collection_name][index_name]
@@ -163,6 +244,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
163244
local value = filter[field_name]
164245
if value == nil then break end
165246
value_list[#value_list + 1] = value
247+
new_filter[field_name] = nil
166248
end
167249

168250
-- check for correctness: non-empty value_list
@@ -172,26 +254,11 @@ local function flatten_filter(self, filter, collection_name, index_name)
172254
json.encode(filter), index_name))
173255
end
174256

175-
-- check for correctness: all filter fields are used
176-
local count = 0
177-
for k, v in pairs(filter) do
178-
count = count + 1
179-
end
180-
if count ~= #value_list then -- avoid extra json.encode()
181-
assert(count ~= #value_list,
182-
('filter items count does not match index fields count: ' ..
183-
'filter: %s, index_name: %s'):format(json.encode(filter),
184-
index_name))
185-
end
186-
187-
local full_match = #value_list == #index_meta.fields
188-
return full_match, value_list
257+
local full_match = #value_list == #index_meta.fields and
258+
next(new_filter) == nil
259+
return full_match, value_list, new_filter
189260
end
190261

191-
-- XXX: support partial match for primary/secondary indexes and support to skip
192-
-- fields to get an index (full_match must be false in the case because
193-
-- returned items will be additionally filtered after unflatten).
194-
195262
--- Choose an index for lookup tuple(s) by a 'filter'. The filter holds fields
196263
--- values of object(s) we want to find. It uses prebuilt `lookup_index_name`
197264
--- table representing available indexes, which created by the
@@ -230,9 +297,12 @@ end
230297
---
231298
--- @treturn string `index_name` is name of the found index or nil
232299
---
233-
--- @treturn table `value_list` is values list from the `filter` argument
234-
--- ordered in the such way that can be passed to the found index (has some
235-
--- meaning only when `index_name ~= nil`)
300+
--- @treturn table `new_filter` is the filter value w/o values extracted into
301+
--- `value_list`
302+
---
303+
--- @treturn table `value_list` (optional) is values list from the `filter`
304+
--- argument ordered in the such way that it can be passed to the found index
305+
-- (has some meaning only when `index_name ~= nil`)
236306
---
237307
--- @treturn table `pivot` (optional) an offset argument represented depending
238308
--- of a case: whether we'll lookup for the offset by an index; it is either
@@ -259,6 +329,10 @@ local get_index_name = function(self, collection_name, from, filter, args)
259329
assert(type(lookup_index_name) == 'table',
260330
'lookup_index_name must be a table, got ' .. type(lookup_index_name))
261331

332+
local parts_tree = index_cache.parts_tree
333+
assert(type(parts_tree) == 'table',
334+
'parts_tree must be a table, got ' .. type(parts_tree))
335+
262336
local connection_indexes = index_cache.connection_indexes
263337
assert(type(connection_indexes) == 'table',
264338
'connection_indexes must be a table, got ' .. type(connection_indexes))
@@ -276,6 +350,7 @@ local get_index_name = function(self, collection_name, from, filter, args)
276350
assert(connection_type ~= nil, 'connection_type must not be nil')
277351
local full_match = connection_type == '1:1' and next(filter) == nil
278352
local value_list = from.destination_args_values
353+
local new_filter = filter
279354

280355
local pivot
281356
if args.offset ~= nil then
@@ -296,21 +371,22 @@ local get_index_name = function(self, collection_name, from, filter, args)
296371
pivot = {filter = pivot_filter}
297372
end
298373

299-
return full_match, index_name, value_list, pivot
374+
return full_match, index_name, new_filter, value_list, pivot
300375
end
301376

302377
-- The 'fast offset' case. Here we fetch top-level objects starting from
303378
-- passed offset. Select will be performed by the primary index and
304379
-- corresponding offset in `pivot.value_list`, then the result will be
305-
-- postprocessed using `filter`, if necessary.
380+
-- postprocessed using `new_filter`, if necessary.
306381
if args.offset ~= nil then
307382
local index_name, index_meta = get_primary_index_meta(self,
308383
collection_name)
309384
local full_match
310385
local pivot_value_list
386+
local new_filter = filter
311387
if type(args.offset) == 'table' then
312-
full_match, pivot_value_list = flatten_filter(self, args.offset,
313-
collection_name, index_name)
388+
full_match, pivot_value_list, new_filter = flatten_filter(self,
389+
args.offset, collection_name, index_name)
314390
assert(full_match == true, 'offset by a partial key is forbidden')
315391
else
316392
assert(#index_meta.fields == 1,
@@ -320,22 +396,34 @@ local get_index_name = function(self, collection_name, from, filter, args)
320396
end
321397
local pivot = {value_list = pivot_value_list}
322398
full_match = full_match and next(filter) == nil
323-
return full_match, index_name, filter, pivot
399+
return full_match, index_name, new_filter, nil, pivot
324400
end
325401

326402
-- The 'no offset' case. Here we fetch top-level object either by found
327403
-- index or using full scan (if the index was not found).
404+
405+
-- try to find full index
328406
local name_list_str = filter_names_fingerprint(filter)
329407
assert(lookup_index_name[collection_name] ~= nil,
330408
('cannot find any index for collection "%s"'):format(collection_name))
331409
local index_name = lookup_index_name[collection_name][name_list_str]
332410
local full_match = false
333411
local value_list = nil
412+
local new_filter = filter
413+
414+
-- try to find partial index
415+
if index_name == nil then
416+
local root = parts_tree[collection_name]
417+
index_name = get_best_matched_index(root, filter)
418+
end
419+
420+
-- fill full_match and value_list appropriatelly
334421
if index_name ~= nil then
335-
full_match, value_list = flatten_filter(self, filter, collection_name,
336-
index_name)
422+
full_match, value_list, new_filter = flatten_filter(self, filter,
423+
collection_name, index_name)
337424
end
338-
return full_match, index_name, value_list
425+
426+
return full_match, index_name, new_filter, value_list
339427
end
340428

341429
--- Build `lookup_index_name` table (part of `index_cache`) to use in the
@@ -402,6 +490,67 @@ local function build_lookup_index_name(indexes)
402490
return lookup_index_name
403491
end
404492

493+
--- Build `parts_tree` to use in @{get_index_name} for lookup best matching
494+
--- index.
495+
---
496+
--- @tparam table indexes indexes metainformation as defined in the @{new}
497+
--- function
498+
---
499+
--- Schetch example:
500+
---
501+
--- * collection_1:
502+
--- * index 1 parts: foo, bar, baz;
503+
--- * index 2 parts: foo, abc;
504+
--- * index 3 parts: abc, efg, hij;
505+
-- * index 4 parts: abc.
506+
---
507+
--- Resulting table of prefix trees (contains one field for collection_1):
508+
---
509+
--- ```
510+
--- * collection_1:
511+
--- \
512+
--- + --> root node --> foo --> bar --> baz ~~> index 1
513+
--- \ \
514+
--- \ + --> abc ~~> index 2
515+
--- \
516+
--- + ------> abc --> efg --> hij ~~ index 3
517+
--- \
518+
--- + ~~> index 4
519+
--- ```
520+
---
521+
--- @treturn table `roots` resulting table of prefix trees
522+
---
523+
--- * `roots` is a table which maps `collection names` to `root nodes` of
524+
--- prefix trees;
525+
--- * 'collection name` is a string (name of a collection);
526+
--- * `root node` is a table with `successors` field;
527+
--- * `successors` field value is a map from `index part` to `non-root node`;
528+
--- * `index part` is a string (name of corresponding field in an object);
529+
--- * `non-root node` is a table with `index_names` field and optional
530+
--- `successors` field;
531+
--- * `index_names` field value is a list of `index name`;
532+
--- * `index name` is a string (name of an index).
533+
local function build_index_parts_tree(indexes)
534+
local roots = {}
535+
536+
for collection_name, indexes_meta in pairs(indexes) do
537+
local root = {}
538+
roots[collection_name] = root
539+
for index_name, index_meta in pairs(indexes_meta) do
540+
local cur = root
541+
for _, field in ipairs(index_meta.fields) do
542+
cur.successors = cur.successors or {}
543+
cur.successors[field] = cur.successors[field] or {}
544+
cur = cur.successors[field]
545+
cur.index_names = cur.index_names or {}
546+
cur.index_names[#cur.index_names + 1] = index_name
547+
end
548+
end
549+
end
550+
551+
return roots
552+
end
553+
405554
--- Build `connection_indexes` table (part of `index_cache`) to use in the
406555
--- @{get_index_name} function.
407556
---
@@ -491,6 +640,7 @@ end
491640
local function build_index_cache(indexes, collections)
492641
return {
493642
lookup_index_name = build_lookup_index_name(indexes),
643+
parts_tree = build_index_parts_tree(indexes),
494644
connection_indexes = build_connection_indexes(indexes, collections),
495645
}
496646
end
@@ -682,8 +832,8 @@ local function select_internal(self, collection_name, from, filter, args, extra)
682832
('cannot find collection "%s"'):format(collection_name))
683833

684834
-- search for suitable index
685-
local full_match, index_name, index_value, pivot = get_index_name(
686-
self, collection_name, from, filter, args)
835+
local full_match, index_name, filter, index_value, pivot = get_index_name(
836+
self, collection_name, from, filter, args) -- we redefine filter here
687837
local index = index_name ~= nil and
688838
self.funcs.get_index(collection_name, index_name) or nil
689839
if from ~= nil then

test/local/space_compound_index.result

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ user_collection:
77
user_num: 12
88
...
99

10+
RESULT
11+
---
12+
user_collection: []
13+
...
14+
1015
RESULT
1116
---
1217
user_collection:
@@ -117,6 +122,16 @@ user_collection:
117122
user_num: 20
118123
...
119124

125+
RESULT
126+
---
127+
user_collection: []
128+
...
129+
130+
RESULT
131+
---
132+
user_collection: []
133+
...
134+
120135
RESULT
121136
---
122137
user_collection:
@@ -157,6 +172,16 @@ user_collection:
157172
last_name: last name b
158173
...
159174

175+
RESULT
176+
---
177+
user_collection:
178+
- order_connection: []
179+
user_str: user_str_b
180+
first_name: first name b
181+
user_num: 12
182+
last_name: last name b
183+
...
184+
160185
RESULT
161186
---
162187
user_collection:

0 commit comments

Comments
 (0)