Skip to content
This repository was archived by the owner on Jun 26, 2023. It is now read-only.

Commit 4c92071

Browse files
authored
Datastore based pinner (#4)
feat: store pins in datastore instead of a DAG Adds a new `/pins` namespace to the given datastore and uses that to store pins as cbor binary, keyed by unique pin ID. The new datastore pinner stores pins in the datastore as individual key-value items. This is faster than the dag pinner, which stored all pins in a single dag that had to be rewritten every time a pin was added or removed. The new pinner provides a secondary indexing mechanism that can be used to index any data that a pin has. Secondary indexing logic is provided by the `dsindex` package. The new pinner currently includes indexing by CID. Both the new datastore pinner (`dspinner` package) and the old dag pinner (`ipldpinner` package) implementations are included to support migration between the two. Migration logic is provided by the `pinconv` package. Other features in new pinner: - Benchmarks are provided to compare performance of between the old and new pinners - New pinner does not keep in-memory set of pinned CIDs, instead it relies on the datastore - Separate recursive and direct CID indexes allow searching for pins without having to load pin data to check the mode - New pinner can rebuild indexes on load, if saved pins appear out of sync with the indexes
1 parent 9e800d1 commit 4c92071

File tree

15 files changed

+3635
-540
lines changed

15 files changed

+3635
-540
lines changed

.gitignore

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
*~
2+
*.log
3+
4+
# Test binary, build with `go test -c`
5+
*.test
6+
7+
# Output of the go coverage tool
8+
*.out

dsindex/error.go

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package dsindex
2+
3+
import "errors"
4+
5+
var (
6+
ErrEmptyKey = errors.New("key is empty")
7+
ErrEmptyValue = errors.New("value is empty")
8+
)

dsindex/indexer.go

+285
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
// Package dsindex provides secondary indexing functionality for a datastore.
2+
package dsindex
3+
4+
import (
5+
"context"
6+
"fmt"
7+
"path"
8+
9+
ds "github.com/ipfs/go-datastore"
10+
"github.com/ipfs/go-datastore/namespace"
11+
"github.com/ipfs/go-datastore/query"
12+
"github.com/multiformats/go-multibase"
13+
)
14+
15+
// Indexer maintains a secondary index. An index is a collection of key-value
16+
// mappings where the key is the secondary index that maps to one or more
17+
// values, where each value is a unique key being indexed.
18+
type Indexer interface {
19+
// Add adds the specified value to the key
20+
Add(ctx context.Context, key, value string) error
21+
22+
// Delete deletes the specified value from the key. If the value is not in
23+
// the datastore, this method returns no error.
24+
Delete(ctx context.Context, key, value string) error
25+
26+
// DeleteKey deletes all values in the given key. If a key is not in the
27+
// datastore, this method returns no error. Returns a count of values that
28+
// were deleted.
29+
DeleteKey(ctx context.Context, key string) (count int, err error)
30+
31+
// DeleteAll deletes all keys managed by this Indexer. Returns a count of
32+
// the values that were deleted.
33+
DeleteAll(ctx context.Context) (count int, err error)
34+
35+
// ForEach calls the function for each value in the specified key, until
36+
// there are no more values, or until the function returns false. If key
37+
// is empty string, then all keys are iterated.
38+
ForEach(ctx context.Context, key string, fn func(key, value string) bool) error
39+
40+
// HasValue determines if the key contains the specified value
41+
HasValue(ctx context.Context, key, value string) (bool, error)
42+
43+
// HasAny determines if any value is in the specified key. If key is
44+
// empty string, then all values are searched.
45+
HasAny(ctx context.Context, key string) (bool, error)
46+
47+
// Search returns all values for the given key
48+
Search(ctx context.Context, key string) (values []string, err error)
49+
}
50+
51+
// indexer is a simple implementation of Indexer. This implementation relies
52+
// on the underlying data store to support efficient querying by prefix.
53+
//
54+
// TODO: Consider adding caching
55+
type indexer struct {
56+
dstore ds.Datastore
57+
}
58+
59+
// New creates a new datastore index. All indexes are stored under the
60+
// specified index name.
61+
//
62+
// To persist the actions of calling Indexer functions, it is necessary to call
63+
// dstore.Sync.
64+
func New(dstore ds.Datastore, name ds.Key) Indexer {
65+
return &indexer{
66+
dstore: namespace.Wrap(dstore, name),
67+
}
68+
}
69+
70+
func (x *indexer) Add(ctx context.Context, key, value string) error {
71+
if key == "" {
72+
return ErrEmptyKey
73+
}
74+
if value == "" {
75+
return ErrEmptyValue
76+
}
77+
dsKey := ds.NewKey(encode(key)).ChildString(encode(value))
78+
return x.dstore.Put(dsKey, []byte{})
79+
}
80+
81+
func (x *indexer) Delete(ctx context.Context, key, value string) error {
82+
if key == "" {
83+
return ErrEmptyKey
84+
}
85+
if value == "" {
86+
return ErrEmptyValue
87+
}
88+
return x.dstore.Delete(ds.NewKey(encode(key)).ChildString(encode(value)))
89+
}
90+
91+
func (x *indexer) DeleteKey(ctx context.Context, key string) (int, error) {
92+
if key == "" {
93+
return 0, ErrEmptyKey
94+
}
95+
return x.deletePrefix(ctx, encode(key))
96+
}
97+
98+
func (x *indexer) DeleteAll(ctx context.Context) (int, error) {
99+
return x.deletePrefix(ctx, "")
100+
}
101+
102+
func (x *indexer) ForEach(ctx context.Context, key string, fn func(key, value string) bool) error {
103+
if key != "" {
104+
key = encode(key)
105+
}
106+
107+
q := query.Query{
108+
Prefix: key,
109+
KeysOnly: true,
110+
}
111+
results, err := x.dstore.Query(q)
112+
if err != nil {
113+
return err
114+
}
115+
116+
for {
117+
r, ok := results.NextSync()
118+
if !ok {
119+
break
120+
}
121+
if r.Error != nil {
122+
err = r.Error
123+
break
124+
}
125+
if ctx.Err() != nil {
126+
err = ctx.Err()
127+
break
128+
}
129+
ent := r.Entry
130+
decIdx, err := decode(path.Base(path.Dir(ent.Key)))
131+
if err != nil {
132+
err = fmt.Errorf("cannot decode index: %v", err)
133+
break
134+
}
135+
decKey, err := decode(path.Base(ent.Key))
136+
if err != nil {
137+
err = fmt.Errorf("cannot decode key: %v", err)
138+
break
139+
}
140+
if !fn(decIdx, decKey) {
141+
break
142+
}
143+
}
144+
results.Close()
145+
146+
return err
147+
}
148+
149+
func (x *indexer) HasValue(ctx context.Context, key, value string) (bool, error) {
150+
if key == "" {
151+
return false, ErrEmptyKey
152+
}
153+
if value == "" {
154+
return false, ErrEmptyValue
155+
}
156+
return x.dstore.Has(ds.NewKey(encode(key)).ChildString(encode(value)))
157+
}
158+
159+
func (x *indexer) HasAny(ctx context.Context, key string) (bool, error) {
160+
var any bool
161+
err := x.ForEach(ctx, key, func(key, value string) bool {
162+
any = true
163+
return false
164+
})
165+
return any, err
166+
}
167+
168+
func (x *indexer) Search(ctx context.Context, key string) ([]string, error) {
169+
if key == "" {
170+
return nil, ErrEmptyKey
171+
}
172+
ents, err := x.queryPrefix(ctx, encode(key))
173+
if err != nil {
174+
return nil, err
175+
}
176+
if len(ents) == 0 {
177+
return nil, nil
178+
}
179+
180+
values := make([]string, len(ents))
181+
for i := range ents {
182+
values[i], err = decode(path.Base(ents[i].Key))
183+
if err != nil {
184+
return nil, fmt.Errorf("cannot decode value: %v", err)
185+
}
186+
}
187+
return values, nil
188+
}
189+
190+
// SyncIndex synchronizes the keys in the target Indexer to match those of the
191+
// ref Indexer. This function does not change this indexer's key root (name
192+
// passed into New).
193+
func SyncIndex(ctx context.Context, ref, target Indexer) (bool, error) {
194+
// Build reference index map
195+
refs := map[string]string{}
196+
err := ref.ForEach(ctx, "", func(key, value string) bool {
197+
refs[value] = key
198+
return true
199+
})
200+
if err != nil {
201+
return false, err
202+
}
203+
if len(refs) == 0 {
204+
return false, nil
205+
}
206+
207+
// Compare current indexes
208+
dels := map[string]string{}
209+
err = target.ForEach(ctx, "", func(key, value string) bool {
210+
refKey, ok := refs[value]
211+
if ok && refKey == key {
212+
// same in both; delete from refs, do not add to dels
213+
delete(refs, value)
214+
} else {
215+
dels[value] = key
216+
}
217+
return true
218+
})
219+
if err != nil {
220+
return false, err
221+
}
222+
223+
// Items in dels are keys that no longer exist
224+
for value, key := range dels {
225+
err = target.Delete(ctx, key, value)
226+
if err != nil {
227+
return false, err
228+
}
229+
}
230+
231+
// What remains in refs are keys that need to be added
232+
for value, key := range refs {
233+
err = target.Add(ctx, key, value)
234+
if err != nil {
235+
return false, err
236+
}
237+
}
238+
239+
return len(refs) != 0 || len(dels) != 0, nil
240+
}
241+
242+
func (x *indexer) deletePrefix(ctx context.Context, prefix string) (int, error) {
243+
ents, err := x.queryPrefix(ctx, prefix)
244+
if err != nil {
245+
return 0, err
246+
}
247+
248+
for i := range ents {
249+
err = x.dstore.Delete(ds.NewKey(ents[i].Key))
250+
if err != nil {
251+
return 0, err
252+
}
253+
}
254+
255+
return len(ents), nil
256+
}
257+
258+
func (x *indexer) queryPrefix(ctx context.Context, prefix string) ([]query.Entry, error) {
259+
q := query.Query{
260+
Prefix: prefix,
261+
KeysOnly: true,
262+
}
263+
results, err := x.dstore.Query(q)
264+
if err != nil {
265+
return nil, err
266+
}
267+
return results.Rest()
268+
}
269+
270+
func encode(data string) string {
271+
encData, err := multibase.Encode(multibase.Base64url, []byte(data))
272+
if err != nil {
273+
// programming error; using unsupported encoding
274+
panic(err.Error())
275+
}
276+
return encData
277+
}
278+
279+
func decode(data string) (string, error) {
280+
_, b, err := multibase.Decode(data)
281+
if err != nil {
282+
return "", err
283+
}
284+
return string(b), nil
285+
}

0 commit comments

Comments
 (0)