Skip to content

add an ADL to preload hamt loading #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ This is an IPLD ADL that provides string based pathing for protobuf nodes. The t

Note that while it works internally with go-codec-dagpb, the Reify method (used to get a UnixFSNode from a DagPB node should actually work successfully with go-ipld-prime-proto nodes)

## Usage

The primary interaction with this package is to register an ADL on a link system. This is done with via a helper method.

```go
AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem)
```

For link systems which have UnixFS reification registered, two ADLs will be available to the [`InterpretAs`](https://ipld.io/specs/selectors/) selector: 'unixfs' and 'unixfs-preload'. The different between these two ADLs is that the preload variant will access all blocks within a UnixFS Object (file or directory) when that object is accessed by a selector traversal. The non-preload variant in contrast will only access the subset of blocks strictly needed for the traversal. In practice, this means the subset of a sharded directory needed to access a specific file, or the sub-range of a file directly accessed by a range selector.


## License

Apache-2.0/MIT © Protocol Labs
19 changes: 19 additions & 0 deletions file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,25 @@ func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSyst
}, nil
}

// NewUnixFSFileWithPreload is the same as NewUnixFSFile but it performs a full load of constituent
// blocks where the file spans multiple blocks. This is useful where a system needs to watch the
// LinkSystem for block loads to determine which blocks make up this file.
// NewUnixFSFileWithPreload is used by the "unixfs-preload" reifier.
func NewUnixFSFileWithPreload(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (LargeBytesNode, error) {
f, err := NewUnixFSFile(ctx, substrate, lsys)
if err != nil {
return nil, err
}
r, err := f.AsLargeBytes()
if err != nil {
return nil, err
}
if _, err := io.Copy(io.Discard, r); err != nil {
return nil, err
}
return f, nil
}

// A LargeBytesNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type.
type LargeBytesNode interface {
ipld.Node
Expand Down
16 changes: 16 additions & 0 deletions hamt/shardeddir.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ func NewUnixFSHAMTShard(ctx context.Context, substrate dagpb.PBNode, data data.U
}, nil
}

// NewUnixFSHAMTShardWithPreload attempts to construct a UnixFSHAMTShard node from the base protobuf node plus
// a decoded UnixFSData structure, and then iterate through and load the full set of hamt shards.
func NewUnixFSHAMTShardWithPreload(ctx context.Context, substrate dagpb.PBNode, data data.UnixFSData, lsys *ipld.LinkSystem) (ipld.Node, error) {
n, err := NewUnixFSHAMTShard(ctx, substrate, data, lsys)
if err != nil {
return n, err
}

traverse := n.Length()
if traverse == -1 {
return n, fmt.Errorf("could not fully explore hamt during preload")
}

return n, nil
}

func (n UnixFSHAMTShard) Substrate() ipld.Node {
return n._substrate
}
Expand Down
30 changes: 28 additions & 2 deletions reification.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ import (
// Reify looks at an ipld Node and tries to interpret it as a UnixFSNode
// if successful, it returns the UnixFSNode
func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) {
return doReify(lnkCtx, maybePBNodeRoot, lsys, true)
}

// nonLazyReify works like reify but will load all of a directory or file as it reaches them.
func nonLazyReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem) (ipld.Node, error) {
return doReify(lnkCtx, maybePBNodeRoot, lsys, false)
}

func doReify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSystem, lazy bool) (ipld.Node, error) {
pbNode, ok := maybePBNodeRoot.(dagpb.PBNode)
if !ok {
return maybePBNodeRoot, nil
Expand All @@ -28,7 +37,12 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy
// we could not decode the UnixFS data, therefore, not UnixFS
return defaultReifier(lnkCtx.Ctx, pbNode, lsys)
}
builder, ok := reifyFuncs[data.FieldDataType().Int()]
var builder reifyTypeFunc
if lazy {
builder, ok = lazyReifyFuncs[data.FieldDataType().Int()]
} else {
builder, ok = reifyFuncs[data.FieldDataType().Int()]
}
if !ok {
return nil, fmt.Errorf("no reification for this UnixFS node type")
}
Expand All @@ -38,6 +52,14 @@ func Reify(lnkCtx ipld.LinkContext, maybePBNodeRoot ipld.Node, lsys *ipld.LinkSy
type reifyTypeFunc func(context.Context, dagpb.PBNode, data.UnixFSData, *ipld.LinkSystem) (ipld.Node, error)

var reifyFuncs = map[int64]reifyTypeFunc{
data.Data_File: unixFSFileReifierWithPreload,
data.Data_Metadata: defaultUnixFSReifier,
data.Data_Raw: unixFSFileReifier,
data.Data_Symlink: defaultUnixFSReifier,
data.Data_Directory: directory.NewUnixFSBasicDir,
data.Data_HAMTShard: hamt.NewUnixFSHAMTShardWithPreload,
}
var lazyReifyFuncs = map[int64]reifyTypeFunc{
data.Data_File: unixFSFileReifier,
data.Data_Metadata: defaultUnixFSReifier,
data.Data_Raw: unixFSFileReifier,
Expand All @@ -47,7 +69,7 @@ var reifyFuncs = map[int64]reifyTypeFunc{
}

// treat non-unixFS nodes like directories -- allow them to lookup by link
// TODO: Make this a separate node as directors gain more functionality
// TODO: Make this a separate node as directories gain more functionality
func defaultReifier(_ context.Context, substrate dagpb.PBNode, _ *ipld.LinkSystem) (ipld.Node, error) {
return &_PathedPBNode{_substrate: substrate}, nil
}
Expand All @@ -56,6 +78,10 @@ func unixFSFileReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixF
return file.NewUnixFSFile(ctx, substrate, ls)
}

func unixFSFileReifierWithPreload(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) {
return file.NewUnixFSFileWithPreload(ctx, substrate, ls)
}

func defaultUnixFSReifier(ctx context.Context, substrate dagpb.PBNode, _ data.UnixFSData, ls *ipld.LinkSystem) (ipld.Node, error) {
return defaultReifier(ctx, substrate, ls)
}
Expand Down
1 change: 1 addition & 0 deletions signaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func AddUnixFSReificationToLinkSystem(lsys *ipld.LinkSystem) {
lsys.KnownReifiers = make(map[string]linking.NodeReifier)
}
lsys.KnownReifiers["unixfs"] = Reify
lsys.KnownReifiers["unixfs-preload"] = nonLazyReify
}

// UnixFSPathSelector creates a selector for a file/path inside of a UnixFS directory
Expand Down