Skip to content

Commit 475ed65

Browse files
authored
Merge pull request #38 from ipfs/feat/preload
add an ADL to preload hamt loading
2 parents 4d36ba6 + f9e443f commit 475ed65

File tree

5 files changed

+75
-2
lines changed

5 files changed

+75
-2
lines changed

Diff for: README.md

+11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@ This is an IPLD ADL that provides string based pathing for protobuf nodes. The t
44

55
Note that while it works internally with go-codec-dagpb, the Reify method (used to get a UnixFSNode from a DagPB node should actually work successfully with go-ipld-prime-proto nodes)
66

7+
## Usage
8+
9+
The primary interaction with this package is to register an ADL on a link system. This is done with via a helper method.
10+
11+
```go
12+
AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem)
13+
```
14+
15+
For link systems which have UnixFS reification registered, two ADLs will be available to the [`InterpretAs`](https://ipld.io/specs/selectors/) selector: 'unixfs' and 'unixfs-preload'. The different between these two ADLs is that the preload variant will access all blocks within a UnixFS Object (file or directory) when that object is accessed by a selector traversal. The non-preload variant in contrast will only access the subset of blocks strictly needed for the traversal. In practice, this means the subset of a sharded directory needed to access a specific file, or the sub-range of a file directly accessed by a range selector.
16+
17+
718
## License
819

920
Apache-2.0/MIT © Protocol Labs

Diff for: file/file.go

+19
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,25 @@ func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSyst
3333
}, nil
3434
}
3535

36+
// NewUnixFSFileWithPreload is the same as NewUnixFSFile but it performs a full load of constituent
37+
// blocks where the file spans multiple blocks. This is useful where a system needs to watch the
38+
// LinkSystem for block loads to determine which blocks make up this file.
39+
// NewUnixFSFileWithPreload is used by the "unixfs-preload" reifier.
40+
func NewUnixFSFileWithPreload(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) {
41+
f, err := NewUnixFSFile(ctx, substrate, lsys)
42+
if err != nil {
43+
return nil, err
44+
}
45+
r, err := f.AsLargeBytes()
46+
if err != nil {
47+
return nil, err
48+
}
49+
if _, err := io.Copy(io.Discard, r); err != nil {
50+
return nil, err
51+
}
52+
return f, nil
53+
}
54+
3655
// A LargeBytesNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type.
3756
type LargeBytesNode interface {
3857
ipld.Node

Diff for: hamt/shardeddir.go

+16
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,22 @@ func NewUnixFSHAMTShard(ctx context.Context, substrate dagpb.PBNode, data data.U
5454
}, nil
5555
}
5656

57+
// NewUnixFSHAMTShardWithPreload attempts to construct a UnixFSHAMTShard node from the base protobuf node plus
58+
// a decoded UnixFSData structure, and then iterate through and load the full set of hamt shards.
59+
func NewUnixFSHAMTShardWithPreload(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) {
60+
n, err := NewUnixFSHAMTShard(ctx, substrate, data, lsys)
61+
if err != nil {
62+
return n, err
63+
}
64+
65+
traverse := n.Length()
66+
if traverse == -1 {
67+
return n, fmt.Errorf("could not fully explore hamt during preload")
68+
}
69+
70+
return n, nil
71+
}
72+
5773
func (n UnixFSHAMTShard) Substrate() ipld.Node {
5874
return n._substrate
5975
}

Diff for: reification.go

+28-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ import (
1515
// Reify looks at an ipld Node and tries to interpret it as a UnixFSNode
1616
// if successful, it returns the UnixFSNode
1717
func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) {
18+
return doReify(lnkCtx, maybePBNodeRoot, lsys, true)
19+
}
20+
21+
// nonLazyReify works like reify but will load all of a directory or file as it reaches them.
22+
func nonLazyReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) {
23+
return doReify(lnkCtx, maybePBNodeRoot, lsys, false)
24+
}
25+
26+
func doReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem, lazy bool) (ipld.Node, error) {
1827
pbNode, ok := maybePBNodeRoot.(dagpb.PBNode)
1928
if !ok {
2029
return maybePBNodeRoot, nil
@@ -28,7 +37,12 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy
2837
// we could not decode the UnixFS data, therefore, not UnixFS
2938
return defaultReifier(lnkCtx.Ctx, pbNode, lsys)
3039
}
31-
builder, ok := reifyFuncs[data.FieldDataType().Int()]
40+
var builder reifyTypeFunc
41+
if lazy {
42+
builder, ok = lazyReifyFuncs[data.FieldDataType().Int()]
43+
} else {
44+
builder, ok = reifyFuncs[data.FieldDataType().Int()]
45+
}
3246
if !ok {
3347
return nil, fmt.Errorf("no reification for this UnixFS node type")
3448
}
@@ -38,6 +52,14 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy
3852
type reifyTypeFunc func(context.Context, dagpb.PBNode, data.UnixFSData, *ipld.LinkSystem) (ipld.Node, error)
3953

4054
var reifyFuncs = map[int64]reifyTypeFunc{
55+
data.Data_File: unixFSFileReifierWithPreload,
56+
data.Data_Metadata: defaultUnixFSReifier,
57+
data.Data_Raw: unixFSFileReifier,
58+
data.Data_Symlink: defaultUnixFSReifier,
59+
data.Data_Directory: directory.NewUnixFSBasicDir,
60+
data.Data_HAMTShard: hamt.NewUnixFSHAMTShardWithPreload,
61+
}
62+
var lazyReifyFuncs = map[int64]reifyTypeFunc{
4163
data.Data_File: unixFSFileReifier,
4264
data.Data_Metadata: defaultUnixFSReifier,
4365
data.Data_Raw: unixFSFileReifier,
@@ -47,7 +69,7 @@ var reifyFuncs = map[int64]reifyTypeFunc{
4769
}
4870

4971
// treat non-unixFS nodes like directories -- allow them to lookup by link
50-
// TODO: Make this a separate node as directors gain more functionality
72+
// TODO: Make this a separate node as directories gain more functionality
5173
func defaultReifier(_ context.Context, substrate dagpb.PBNode, _ *ipld.LinkSystem) (ipld.Node, error) {
5274
return &_PathedPBNode{_substrate: substrate}, nil
5375
}
@@ -56,6 +78,10 @@ func unixFSFileReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixF
5678
return file.NewUnixFSFile(ctx, substrate, ls)
5779
}
5880

81+
func unixFSFileReifierWithPreload(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) {
82+
return file.NewUnixFSFileWithPreload(ctx, substrate, ls)
83+
}
84+
5985
func defaultUnixFSReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) {
6086
return defaultReifier(ctx, substrate, ls)
6187
}

Diff for: signaling.go

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
1515
lsys.KnownReifiers = make(map[string]linking.NodeReifier)
1616
}
1717
lsys.KnownReifiers["unixfs"] = Reify
18+
lsys.KnownReifiers["unixfs-preload"] = nonLazyReify
1819
}
1920

2021
// UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory

0 commit comments

Comments
 (0)