Skip to content
This repository was archived by the owner on Apr 14, 2022. It is now read-only.

Commit ccd8fc3

Browse files
committed
Support lookup by a partial set of index parts
* Fixes #30. * Fixes #38.
1 parent 44b9155 commit ccd8fc3

File tree

5 files changed

+325
-45
lines changed

5 files changed

+325
-45
lines changed

graphql/accessor_general.lua

Lines changed: 184 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ end
103103
--- Get a key to lookup index by `lookup_index_name` (part of `index_cache`).
104104
---
105105
--- @tparam table filter filter for objects, its keys (names of fields) will
106-
--- form the result
106+
--- form the result of the function
107107
---
108108
--- @treturn string `name_list_str` (key for lookup by `lookup_index_name`)
109109
local function filter_names_fingerprint(filter)
@@ -123,14 +123,91 @@ local function filter_names_fingerprint(filter)
123123
return name_list_str
124124
end
125125

126+
--- Get an index using parts tree built by @{build_index_parts_tree}.
127+
---
128+
--- @tparam table node root of the prefix tree for certain collection
129+
---
130+
--- @tparam table filter map of key-value to filter objects against
131+
---
132+
--- @treturn string `index_name` or `nil` is found index
133+
---
134+
--- @treturn number `max_branch_len` is a number of index parts will be used at
135+
--- lookup plus 1 (because it calculated artificial root node as well as other
136+
--- nodes)
137+
---
138+
--- Complexity
139+
--- ----------
140+
---
141+
--- In short: O(SIZE(filter)^2 * COUNT(index parts for all indexes)).
142+
---
143+
--- Say we have N fields in filter (N = SIZE(filter), M indexes and K index
144+
--- parts at max ((M * K) and COUNT(index parts for all indexes) both are are
145+
--- upside limits of nodes count in the tree). We look for successors for
146+
--- each filter item (<= N items) in each of the tree node (<= M * K nodes),
147+
--- so have O(I * N * (M * K)) of somewhat we call 'iteration' of I
148+
--- complexity. Most heavy operation within an iteraton is table.copy(), we
149+
--- can assume it has O(N) complexity. So we have overall complexity O(N^2 *
150+
--- (M * K)).
151+
---
152+
--- We can consider worst case scenario when any node has any of filter keys as
153+
--- a successor. In this case nodes count is not real constraint for recursion.
154+
--- In such case we can calculate complexity as iteration of weight I
155+
--- (calculated above as O(N^2)) and iteration count as permutations of N
156+
--- filter items (N!). In such case we'll have O(N^2 * N!) or O(N^(3/2) * N^N)
157+
--- (Stirling's approximation).
158+
---
159+
--- Expectations
160+
--- ------------
161+
---
162+
--- We expect typical filter size as 1 or 2 and tree depth (excluding
163+
--- artificial root node) of the same order. So despite horrible complexity
164+
--- estimation it expected to be non-so-heavy. Our guess is that it worth to
165+
--- try hard to find best index before a large request.
166+
---
167+
--- Future optimizations
168+
--- --------------------
169+
---
170+
--- * replace table.copy() with something more light: maybe 'closed set' of
171+
-- filter items or {remove filter[k], invoke the function, add
172+
--- back filter[k]} (but it needed to be done in such way that will not
173+
--- invalidate pairs());
174+
--- * cache index name btw block requests of the same collection request (when
175+
--- we'll have block executor) and maybe even btw different requests with the
176+
-- same filter keys.
177+
local function get_best_matched_index(node, filter)
178+
local index_name = (node.index_names or {})[1]
179+
local max_branch_len = 1
180+
181+
-- optimization: don't run the loop below if there are no successors of the
182+
-- current node
183+
if node.successors == nil then
184+
return index_name, 1
185+
end
186+
187+
for k, v in pairs(filter) do
188+
local successor_node = (node.successors or {})[k]
189+
if successor_node ~= nil then
190+
local new_filter = table.copy(filter)
191+
new_filter[k] = nil
192+
local branch_index_name, branch_len =
193+
get_best_matched_index(successor_node, new_filter)
194+
branch_len = branch_len + 1
195+
if branch_index_name ~= nil and branch_len > max_branch_len then
196+
index_name = branch_index_name
197+
max_branch_len = branch_len
198+
end
199+
end
200+
end
201+
202+
return index_name, max_branch_len
203+
end
204+
126205
-- XXX: raw idea: we can store field-to-field_no mapping when creating
127206
-- `lookup_index_name` to faster form the value_list
128207

129208
--- Flatten filter values (transform to a list) against specific index to
130209
--- passing it to index:pairs().
131210
---
132-
--- Only full keys are supported for a compound index for now.
133-
---
134211
--- @tparam table self the data accessor
135212
---
136213
--- @tparam table filter filter for objects, its values will ordered to form
@@ -146,6 +223,9 @@ end
146223
--- passed index
147224
---
148225
--- @treturn table `value_list` the value to pass to index:pairs()
226+
---
227+
--- @treturn table `new_filter` the `filter` value w/o values extracted to
228+
--- `value_list`
149229
local function flatten_filter(self, filter, collection_name, index_name)
150230
assert(type(self) == 'table',
151231
'self must be a table, got ' .. type(self))
@@ -155,6 +235,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
155235
'index_name must be a string, got ' .. type(index_name))
156236

157237
local value_list = {}
238+
local new_filter = table.copy(filter)
158239

159240
-- fill value_list
160241
local index_meta = self.indexes[collection_name][index_name]
@@ -165,6 +246,7 @@ local function flatten_filter(self, filter, collection_name, index_name)
165246
local value = filter[field_name]
166247
if value == nil then break end
167248
value_list[#value_list + 1] = value
249+
new_filter[field_name] = nil
168250
end
169251

170252
-- check for correctness: non-empty value_list
@@ -174,26 +256,11 @@ local function flatten_filter(self, filter, collection_name, index_name)
174256
json.encode(filter), index_name))
175257
end
176258

177-
-- check for correctness: all filter fields are used
178-
local count = 0
179-
for k, v in pairs(filter) do
180-
count = count + 1
181-
end
182-
if count ~= #value_list then -- avoid extra json.encode()
183-
assert(count ~= #value_list,
184-
('filter items count does not match index fields count: ' ..
185-
'filter: %s, index_name: %s'):format(json.encode(filter),
186-
index_name))
187-
end
188-
189-
local full_match = #value_list == #index_meta.fields
190-
return full_match, value_list
259+
local full_match = #value_list == #index_meta.fields and
260+
next(new_filter) == nil
261+
return full_match, value_list, new_filter
191262
end
192263

193-
-- XXX: support partial match for primary/secondary indexes and support to skip
194-
-- fields to get an index (full_match must be false in the case because
195-
-- returned items will be additionally filtered after unflatten).
196-
197264
--- Choose an index for lookup tuple(s) by a 'filter'. The filter holds fields
198265
--- values of object(s) we want to find. It uses prebuilt `lookup_index_name`
199266
--- table representing available indexes, which created by the
@@ -232,9 +299,12 @@ end
232299
---
233300
--- @treturn string `index_name` is name of the found index or nil
234301
---
235-
--- @treturn table `value_list` is values list from the `filter` argument
236-
--- ordered in the such way that can be passed to the found index (has some
237-
--- meaning only when `index_name ~= nil`)
302+
--- @treturn table `new_filter` is the filter value w/o values extracted into
303+
--- `value_list`
304+
---
305+
--- @treturn table `value_list` (optional) is values list from the `filter`
306+
--- argument ordered in the such way that it can be passed to the found index
307+
-- (has some meaning only when `index_name ~= nil`)
238308
---
239309
--- @treturn table `pivot` (optional) an offset argument represented depending
240310
--- of a case: whether we'll lookup for the offset by an index; it is either
@@ -261,6 +331,10 @@ local get_index_name = function(self, collection_name, from, filter, args)
261331
assert(type(lookup_index_name) == 'table',
262332
'lookup_index_name must be a table, got ' .. type(lookup_index_name))
263333

334+
local parts_tree = index_cache.parts_tree
335+
assert(type(parts_tree) == 'table',
336+
'parts_tree must be a table, got ' .. type(parts_tree))
337+
264338
local connection_indexes = index_cache.connection_indexes
265339
assert(type(connection_indexes) == 'table',
266340
'connection_indexes must be a table, got ' .. type(connection_indexes))
@@ -278,6 +352,7 @@ local get_index_name = function(self, collection_name, from, filter, args)
278352
assert(connection_type ~= nil, 'connection_type must not be nil')
279353
local full_match = connection_type == '1:1' and next(filter) == nil
280354
local value_list = from.destination_args_values
355+
local new_filter = filter
281356

282357
local pivot
283358
if args.offset ~= nil then
@@ -298,21 +373,22 @@ local get_index_name = function(self, collection_name, from, filter, args)
298373
pivot = {filter = pivot_filter}
299374
end
300375

301-
return full_match, index_name, value_list, pivot
376+
return full_match, index_name, new_filter, value_list, pivot
302377
end
303378

304379
-- The 'fast offset' case. Here we fetch top-level objects starting from
305380
-- passed offset. Select will be performed by the primary index and
306381
-- corresponding offset in `pivot.value_list`, then the result will be
307-
-- postprocessed using `filter`, if necessary.
382+
-- postprocessed using `new_filter`, if necessary.
308383
if args.offset ~= nil then
309384
local index_name, index_meta = get_primary_index_meta(self,
310385
collection_name)
311386
local full_match
312387
local pivot_value_list
388+
local new_filter = filter
313389
if type(args.offset) == 'table' then
314-
full_match, pivot_value_list = flatten_filter(self, args.offset,
315-
collection_name, index_name)
390+
full_match, pivot_value_list, new_filter = flatten_filter(self,
391+
args.offset, collection_name, index_name)
316392
assert(full_match == true, 'offset by a partial key is forbidden')
317393
else
318394
assert(#index_meta.fields == 1,
@@ -322,22 +398,34 @@ local get_index_name = function(self, collection_name, from, filter, args)
322398
end
323399
local pivot = {value_list = pivot_value_list}
324400
full_match = full_match and next(filter) == nil
325-
return full_match, index_name, filter, pivot
401+
return full_match, index_name, new_filter, nil, pivot
326402
end
327403

328404
-- The 'no offset' case. Here we fetch top-level object either by found
329405
-- index or using full scan (if the index was not found).
406+
407+
-- try to find full index
330408
local name_list_str = filter_names_fingerprint(filter)
331409
assert(lookup_index_name[collection_name] ~= nil,
332410
('cannot find any index for collection "%s"'):format(collection_name))
333411
local index_name = lookup_index_name[collection_name][name_list_str]
334412
local full_match = false
335413
local value_list = nil
414+
local new_filter = filter
415+
416+
-- try to find partial index
417+
if index_name == nil then
418+
local root = parts_tree[collection_name]
419+
index_name = get_best_matched_index(root, filter)
420+
end
421+
422+
-- fill full_match and value_list appropriatelly
336423
if index_name ~= nil then
337-
full_match, value_list = flatten_filter(self, filter, collection_name,
338-
index_name)
424+
full_match, value_list, new_filter = flatten_filter(self, filter,
425+
collection_name, index_name)
339426
end
340-
return full_match, index_name, value_list
427+
428+
return full_match, index_name, new_filter, value_list
341429
end
342430

343431
--- Build `lookup_index_name` table (part of `index_cache`) to use in the
@@ -404,6 +492,67 @@ local function build_lookup_index_name(indexes)
404492
return lookup_index_name
405493
end
406494

495+
--- Build `parts_tree` to use in @{get_index_name} for lookup best matching
496+
--- index.
497+
---
498+
--- @tparam table indexes indexes metainformation as defined in the @{new}
499+
--- function
500+
---
501+
--- Schetch example:
502+
---
503+
--- * collection_1:
504+
--- * index 1 parts: foo, bar, baz;
505+
--- * index 2 parts: foo, abc;
506+
--- * index 3 parts: abc, efg, hij;
507+
-- * index 4 parts: abc.
508+
---
509+
--- Resulting table of prefix trees (contains one field for collection_1):
510+
---
511+
--- ```
512+
--- * collection_1:
513+
--- \
514+
--- + --> root node --> foo --> bar --> baz ~~> index 1
515+
--- \ \
516+
--- \ + --> abc ~~> index 2
517+
--- \
518+
--- + ------> abc --> efg --> hij ~~ index 3
519+
--- \
520+
--- + ~~> index 4
521+
--- ```
522+
---
523+
--- @treturn table `roots` resulting table of prefix trees
524+
---
525+
--- * `roots` is a table which maps `collection names` to `root nodes` of
526+
--- prefix trees;
527+
--- * 'collection name` is a string (name of a collection);
528+
--- * `root node` is a table with `successors` field;
529+
--- * `successors` field value is a map from `index part` to `non-root node`;
530+
--- * `index part` is a string (name of corresponding field in an object);
531+
--- * `non-root node` is a table with `index_names` field and optional
532+
--- `successors` field;
533+
--- * `index_names` field value is a list of `index name`;
534+
--- * `index name` is a string (name of an index).
535+
local function build_index_parts_tree(indexes)
536+
local roots = {}
537+
538+
for collection_name, indexes_meta in pairs(indexes) do
539+
local root = {}
540+
roots[collection_name] = root
541+
for index_name, index_meta in pairs(indexes_meta) do
542+
local cur = root
543+
for _, field in ipairs(index_meta.fields) do
544+
cur.successors = cur.successors or {}
545+
cur.successors[field] = cur.successors[field] or {}
546+
cur = cur.successors[field]
547+
cur.index_names = cur.index_names or {}
548+
cur.index_names[#cur.index_names + 1] = index_name
549+
end
550+
end
551+
end
552+
553+
return roots
554+
end
555+
407556
--- Build `connection_indexes` table (part of `index_cache`) to use in the
408557
--- @{get_index_name} function.
409558
---
@@ -493,6 +642,7 @@ end
493642
local function build_index_cache(indexes, collections)
494643
return {
495644
lookup_index_name = build_lookup_index_name(indexes),
645+
parts_tree = build_index_parts_tree(indexes),
496646
connection_indexes = build_connection_indexes(indexes, collections),
497647
}
498648
end
@@ -687,8 +837,8 @@ local function select_internal(self, collection_name, from, filter, args, extra)
687837
('cannot find collection "%s"'):format(collection_name))
688838

689839
-- search for suitable index
690-
local full_match, index_name, index_value, pivot = get_index_name(
691-
self, collection_name, from, filter, args)
840+
local full_match, index_name, filter, index_value, pivot = get_index_name(
841+
self, collection_name, from, filter, args) -- we redefine filter here
692842
local index = index_name ~= nil and
693843
self.funcs.get_index(collection_name, index_name) or nil
694844
if from.collection_name ~= 'Query' then

test/local/space_compound_index.result

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ user_collection:
77
user_num: 12
88
...
99

10+
RESULT
11+
---
12+
user_collection: []
13+
...
14+
1015
RESULT
1116
---
1217
user_collection:
@@ -117,6 +122,16 @@ user_collection:
117122
user_num: 20
118123
...
119124

125+
RESULT
126+
---
127+
user_collection: []
128+
...
129+
130+
RESULT
131+
---
132+
user_collection: []
133+
...
134+
120135
RESULT
121136
---
122137
user_collection:
@@ -157,6 +172,16 @@ user_collection:
157172
last_name: last name b
158173
...
159174

175+
RESULT
176+
---
177+
user_collection:
178+
- order_connection: []
179+
user_str: user_str_b
180+
first_name: first name b
181+
user_num: 12
182+
last_name: last name b
183+
...
184+
160185
RESULT
161186
---
162187
user_collection:

0 commit comments

Comments
 (0)