Skip to content

Commit fe18dab

Browse files
committed
Add EstimateUnixFSFileDefaultChunking for estimating how many bytes the car file representing a unixfs file of a given size will take
Fix #58
1 parent 2450f69 commit fe18dab

File tree

4 files changed

+159
-4
lines changed

4 files changed

+159
-4
lines changed

data/builder/file.go

+86
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
basicnode "github.com/ipld/go-ipld-prime/node/basic"
1414
"github.com/multiformats/go-multicodec"
1515
multihash "github.com/multiformats/go-multihash/core"
16+
"github.com/multiformats/go-varint"
1617

1718
// raw needed for opening as bytes
1819
_ "github.com/ipld/go-ipld-prime/codec/raw"
@@ -57,6 +58,91 @@ func BuildUnixFSFile(r io.Reader, chunker string, ls *ipld.LinkSystem) (ipld.Lin
5758
}
5859
}
5960

61+
// EstimateUnixFSFile estimates the byte size of the car file that would be
62+
// needed to hold a UnixFS file containing data of the given length.
63+
func EstimateUnixFSFileDefaultChunking(dataLength uint64) uint64 {
64+
blkSize := chunk.DefaultBlockSize
65+
blocks := dataLength / uint64(blkSize)
66+
remainder := dataLength % uint64(blkSize)
67+
68+
size := dataLength
69+
cidExample, _ := leafLinkProto.Prefix.Sum([]byte{0})
70+
cidLength := uint64(len(cidExample.Bytes()))
71+
72+
links := []uint64{}
73+
for i := uint64(0); i < blocks; i++ {
74+
links = append(links, uint64(chunk.DefaultBlockSize))
75+
}
76+
// account for the uvarint + cid length of each block of raw data.
77+
size += uint64(len(links)) * (cidLength + uint64(varint.UvarintSize(cidLength+uint64(blkSize))))
78+
if remainder > 0 {
79+
links = append(links, remainder)
80+
size += cidLength + uint64(varint.UvarintSize(cidLength+uint64(remainder)))
81+
}
82+
83+
// account for the metadata overhead nodes.
84+
ls := cidlink.DefaultLinkSystem()
85+
storage := cidlink.Memory{}
86+
ls.StorageReadOpener = storage.OpenRead
87+
ls.StorageWriteOpener = storage.OpenWrite
88+
89+
icnt := 0
90+
for len(links) > 1 {
91+
nxtLnks := []uint64{}
92+
for len(links) > 1 {
93+
icnt++
94+
children := uint64(DefaultLinksPerBlock)
95+
if len(links) < DefaultLinksPerBlock {
96+
children = uint64(len(links))
97+
}
98+
childrenLinks := links[:children]
99+
links = links[children:]
100+
totalSize := uint64(0)
101+
for _, l := range childrenLinks {
102+
totalSize += l
103+
}
104+
105+
node, _ := BuildUnixFS(func(b *Builder) {
106+
FileSize(b, totalSize)
107+
BlockSizes(b, childrenLinks)
108+
})
109+
110+
// Pack into the dagpb node.
111+
dpbb := dagpb.Type.PBNode.NewBuilder()
112+
pbm, _ := dpbb.BeginMap(2)
113+
pblb, _ := pbm.AssembleEntry("Links")
114+
pbl, _ := pblb.BeginList(int64(len(childrenLinks)))
115+
for _, c := range childrenLinks {
116+
pbln, _ := BuildUnixFSDirectoryEntry("", int64(c), cidlink.Link{Cid: cidExample})
117+
pbl.AssembleValue().AssignNode(pbln)
118+
}
119+
pbl.Finish()
120+
pbm.AssembleKey().AssignString("Data")
121+
pbm.AssembleValue().AssignBytes(data.EncodeUnixFSData(node))
122+
pbm.Finish()
123+
pbn := dpbb.Build()
124+
pbLnk := ls.MustStore(ipld.LinkContext{}, fileLinkProto, pbn)
125+
pbRcrd, _ := ls.LoadRaw(ipld.LinkContext{}, pbLnk)
126+
127+
// dagpb overhead
128+
intermediateNodeSize := uint64(len(pbRcrd))
129+
130+
size += intermediateNodeSize + cidLength + uint64(varint.UvarintSize(cidLength+intermediateNodeSize))
131+
nxtLnks = append(nxtLnks, totalSize)
132+
}
133+
if len(links) == 1 {
134+
nxtLnks = append(nxtLnks, links[0])
135+
}
136+
links = nxtLnks
137+
}
138+
fmt.Printf("estimated %d intermeidate nodes\n", icnt)
139+
140+
// add the car header
141+
size += 59
142+
143+
return size
144+
}
145+
60146
var fileLinkProto = cidlink.LinkPrototype{
61147
Prefix: cid.Prefix{
62148
Version: 1,

data/builder/file_test.go

+69-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
1-
package builder
1+
package builder_test
22

33
import (
44
"bytes"
55
"context"
6+
"fmt"
7+
"io"
8+
"math/rand"
69
"testing"
710

11+
"github.com/ipfs/go-unixfsnode/data/builder"
12+
"github.com/multiformats/go-multicodec"
13+
multihash "github.com/multiformats/go-multihash/core"
14+
815
"github.com/ipfs/go-cid"
916
u "github.com/ipfs/go-ipfs-util"
1017
"github.com/ipfs/go-unixfsnode/file"
18+
carv1 "github.com/ipld/go-car"
19+
"github.com/ipld/go-car/v2"
1120
dagpb "github.com/ipld/go-codec-dagpb"
1221
"github.com/ipld/go-ipld-prime"
22+
"github.com/ipld/go-ipld-prime/linking"
1323
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
24+
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
1425
)
1526

1627
func TestBuildUnixFSFile(t *testing.T) {
@@ -23,7 +34,7 @@ func TestBuildUnixFSFile(t *testing.T) {
2334
ls.StorageReadOpener = storage.OpenRead
2435
ls.StorageWriteOpener = storage.OpenWrite
2536

26-
f, _, err := BuildUnixFSFile(r, "", &ls)
37+
f, _, err := builder.BuildUnixFSFile(r, "", &ls)
2738
if err != nil {
2839
t.Fatal(err)
2940
}
@@ -43,6 +54,61 @@ func TestBuildUnixFSFile(t *testing.T) {
4354
}
4455
}
4556

57+
func TestEstimateUnixFSFileDefaultChunking(t *testing.T) {
58+
for i := 100; i < 1000000000; i *= 10 {
59+
b := make([]byte, i)
60+
rand.Read(b)
61+
62+
ls := cidlink.DefaultLinkSystem()
63+
storage := cidlink.Memory{}
64+
ls.StorageReadOpener = storage.OpenRead
65+
nPB := 0
66+
67+
ls.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) {
68+
w, bwc, err := storage.OpenWrite(lc)
69+
return w, func(lnk ipld.Link) error {
70+
if lnk.(cidlink.Link).Cid.Prefix().Codec == uint64(multicodec.DagPb) {
71+
nPB++
72+
}
73+
return bwc(lnk)
74+
}, err
75+
}
76+
rt, _, err := builder.BuildUnixFSFile(bytes.NewReader(b), "", &ls)
77+
if err != nil {
78+
t.Fatal(err)
79+
}
80+
81+
ob := bytes.NewBuffer(nil)
82+
_, err = car.TraverseV1(context.Background(), &ls, rt.(cidlink.Link).Cid, selectorparse.CommonSelector_ExploreAllRecursively, ob)
83+
if err != nil {
84+
t.Fatal(err)
85+
}
86+
fileLen := len(ob.Bytes())
87+
88+
estimate := builder.EstimateUnixFSFileDefaultChunking(uint64(i))
89+
if estimate != uint64(fileLen) {
90+
fmt.Printf("%d intermediate nodes.\n", nPB)
91+
t.Fatalf("estimate for file length %d was %d. should be %d", i, estimate, fileLen)
92+
}
93+
}
94+
}
95+
96+
func TestS(t *testing.T) {
97+
p := cid.Prefix{
98+
Version: 1,
99+
Codec: uint64(multicodec.DagPb),
100+
MhType: multihash.SHA2_256,
101+
MhLength: 32,
102+
}
103+
rt, _ := p.Sum([]byte{0})
104+
ch := carv1.CarHeader{
105+
Roots: []cid.Cid{rt},
106+
Version: 1,
107+
}
108+
s, _ := carv1.HeaderSize(&ch)
109+
t.Fatalf("hs: %d\n", s)
110+
}
111+
46112
func TestUnixFSFileRoundtrip(t *testing.T) {
47113
buf := make([]byte, 10*1024*1024)
48114
u.NewSeededRand(0xdeadbeef).Read(buf)
@@ -53,7 +119,7 @@ func TestUnixFSFileRoundtrip(t *testing.T) {
53119
ls.StorageReadOpener = storage.OpenRead
54120
ls.StorageWriteOpener = storage.OpenWrite
55121

56-
f, _, err := BuildUnixFSFile(r, "", &ls)
122+
f, _, err := builder.BuildUnixFSFile(r, "", &ls)
57123
if err != nil {
58124
t.Fatal(err)
59125
}

go.mod

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ require (
1010
github.com/ipfs/go-ipld-format v0.4.0
1111
github.com/ipfs/go-merkledag v0.10.0
1212
github.com/ipfs/go-unixfs v0.4.4
13+
github.com/ipld/go-car v0.5.0
1314
github.com/ipld/go-car/v2 v2.8.0
1415
github.com/ipld/go-codec-dagpb v1.6.0
1516
github.com/ipld/go-ipld-prime v0.20.0
1617
github.com/multiformats/go-multicodec v0.8.1
1718
github.com/multiformats/go-multihash v0.2.1
19+
github.com/multiformats/go-varint v0.0.7
1820
github.com/spaolacci/murmur3 v1.1.0
1921
github.com/stretchr/testify v1.8.2
2022
google.golang.org/protobuf v1.28.1
@@ -51,7 +53,6 @@ require (
5153
github.com/multiformats/go-base32 v0.1.0 // indirect
5254
github.com/multiformats/go-base36 v0.2.0 // indirect
5355
github.com/multiformats/go-multibase v0.1.1 // indirect
54-
github.com/multiformats/go-varint v0.0.7 // indirect
5556
github.com/opentracing/opentracing-go v1.2.0 // indirect
5657
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect
5758
github.com/pmezard/go-difflib v1.0.0 // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ github.com/ipfs/go-unixfs v0.4.4 h1:D/dLBOJgny5ZLIur2vIXVQVW0EyDHdOMBDEhgHrt6rY=
108108
github.com/ipfs/go-unixfs v0.4.4/go.mod h1:TSG7G1UuT+l4pNj91raXAPkX0BhJi3jST1FDTfQ5QyM=
109109
github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs=
110110
github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU=
111+
github.com/ipld/go-car v0.5.0 h1:kcCEa3CvYMs0iE5BzD5sV7O2EwMiCIp3uF8tA6APQT8=
112+
github.com/ipld/go-car v0.5.0/go.mod h1:ppiN5GWpjOZU9PgpAZ9HbZd9ZgSpwPMr48fGRJOWmvE=
111113
github.com/ipld/go-car/v2 v2.8.0 h1:8tUI+VM1mAQ2Qa7ScK++lfyuZYcGQ70bZ6NpGOcJj5o=
112114
github.com/ipld/go-car/v2 v2.8.0/go.mod h1:a+BnAxUqgr7wcWxW/lI6ctyEQ2v9gjBChPytwFMp2f4=
113115
github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc=

0 commit comments

Comments
 (0)