Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 86f33ed

Browse files
authored
Merge pull request #515 from smola/reuse-packed-objects
storage: reuse deltas from packfiles
2 parents 7b08a30 + 16b24f8 commit 86f33ed

12 files changed

+493
-93
lines changed

plumbing/format/packfile/delta_selector.go

+135-10
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,123 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
4747
func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
4848
var objectsToPack []*ObjectToPack
4949
for _, h := range hashes {
50-
o, err := dw.storer.EncodedObject(plumbing.AnyObject, h)
50+
o, err := dw.encodedDeltaObject(h)
5151
if err != nil {
5252
return nil, err
5353
}
5454

55-
objectsToPack = append(objectsToPack, newObjectToPack(o))
55+
otp := newObjectToPack(o)
56+
if _, ok := o.(plumbing.DeltaObject); ok {
57+
otp.Original = nil
58+
}
59+
60+
objectsToPack = append(objectsToPack, otp)
61+
}
62+
63+
if err := dw.fixAndBreakChains(objectsToPack); err != nil {
64+
return nil, err
5665
}
5766

5867
return objectsToPack, nil
5968
}
6069

70+
func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
71+
edos, ok := dw.storer.(storer.DeltaObjectStorer)
72+
if !ok {
73+
return dw.encodedObject(h)
74+
}
75+
76+
return edos.DeltaObject(plumbing.AnyObject, h)
77+
}
78+
79+
func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
80+
return dw.storer.EncodedObject(plumbing.AnyObject, h)
81+
}
82+
83+
func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error {
84+
m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack))
85+
for _, otp := range objectsToPack {
86+
m[otp.Hash()] = otp
87+
}
88+
89+
for _, otp := range objectsToPack {
90+
if err := dw.fixAndBreakChainsOne(m, otp); err != nil {
91+
return err
92+
}
93+
}
94+
95+
return nil
96+
}
97+
98+
func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error {
99+
if !otp.Object.Type().IsDelta() {
100+
return nil
101+
}
102+
103+
// Initial ObjectToPack instances might have a delta assigned to Object
104+
// but no actual base initially. Once Base is assigned to a delta, it means
105+
// we already fixed it.
106+
if otp.Base != nil {
107+
return nil
108+
}
109+
110+
do, ok := otp.Object.(plumbing.DeltaObject)
111+
if !ok {
112+
// if this is not a DeltaObject, then we cannot retrieve its base,
113+
// so we have to break the delta chain here.
114+
return dw.undeltify(otp)
115+
}
116+
117+
base, ok := objectsToPack[do.BaseHash()]
118+
if !ok {
119+
// The base of the delta is not in our list of objects to pack, so
120+
// we break the chain.
121+
return dw.undeltify(otp)
122+
}
123+
124+
if base.Size() <= otp.Size() {
125+
// Bases should be bigger
126+
return dw.undeltify(otp)
127+
}
128+
129+
if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
130+
return err
131+
}
132+
133+
otp.SetDelta(base, otp.Object)
134+
return nil
135+
}
136+
137+
func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error {
138+
if otp.Original != nil {
139+
return nil
140+
}
141+
142+
if !otp.Object.Type().IsDelta() {
143+
return nil
144+
}
145+
146+
obj, err := dw.encodedObject(otp.Hash())
147+
if err != nil {
148+
return err
149+
}
150+
151+
otp.Original = obj
152+
return nil
153+
}
154+
155+
// undeltify undeltifies an *ObjectToPack by retrieving the original object from
156+
// the storer and resetting it.
157+
func (dw *deltaSelector) undeltify(otp *ObjectToPack) error {
158+
if err := dw.restoreOriginal(otp); err != nil {
159+
return err
160+
}
161+
162+
otp.Object = otp.Original
163+
otp.Depth = 0
164+
return nil
165+
}
166+
61167
func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
62168
sort.Sort(byTypeAndSize(objectsToPack))
63169
}
@@ -66,15 +172,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
66172
for i := 0; i < len(objectsToPack); i++ {
67173
target := objectsToPack[i]
68174

69-
// We only want to create deltas from specific types
70-
if !applyDelta[target.Original.Type()] {
175+
// If we already have a delta, we don't try to find a new one for this
176+
// object. This happens when a delta is set to be reused from an existing
177+
// packfile.
178+
if target.IsDelta() {
179+
continue
180+
}
181+
182+
// We only want to create deltas from specific types.
183+
if !applyDelta[target.Type()] {
71184
continue
72185
}
73186

74187
for j := i - 1; j >= 0; j-- {
75188
base := objectsToPack[j]
76189
// Objects must use only the same type as their delta base.
77-
if base.Original.Type() != target.Original.Type() {
190+
// Since objectsToPack is sorted by type and size, once we find
191+
// a different type, we know we won't find more of them.
192+
if base.Type() != target.Type() {
78193
break
79194
}
80195

@@ -89,7 +204,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
89204

90205
func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
91206
// If the sizes are radically different, this is a bad pairing.
92-
if target.Original.Size() < base.Original.Size()>>4 {
207+
if target.Size() < base.Size()>>4 {
93208
return nil
94209
}
95210

@@ -106,10 +221,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
106221
}
107222

108223
// If we have to insert a lot to make this work, find another.
109-
if base.Original.Size()-target.Object.Size() > msz {
224+
if base.Size()-target.Size() > msz {
110225
return nil
111226
}
112227

228+
// Original object might not be present if we're reusing a delta, so we
229+
// ensure it is restored.
230+
if err := dw.restoreOriginal(target); err != nil {
231+
return err
232+
}
233+
234+
if err := dw.restoreOriginal(base); err != nil {
235+
return err
236+
}
237+
113238
// Now we can generate the delta using originals
114239
delta, err := GetDelta(base.Original, target.Original)
115240
if err != nil {
@@ -162,13 +287,13 @@ func (a byTypeAndSize) Len() int { return len(a) }
162287
func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
163288

164289
func (a byTypeAndSize) Less(i, j int) bool {
165-
if a[i].Object.Type() < a[j].Object.Type() {
290+
if a[i].Type() < a[j].Type() {
166291
return false
167292
}
168293

169-
if a[i].Object.Type() > a[j].Object.Type() {
294+
if a[i].Type() > a[j].Type() {
170295
return true
171296
}
172297

173-
return a[i].Object.Size() > a[j].Object.Size()
298+
return a[i].Size() > a[j].Size()
174299
}

plumbing/format/packfile/encoder.go

+18-11
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ type Encoder struct {
1818
w *offsetWriter
1919
zw *zlib.Writer
2020
hasher plumbing.Hasher
21+
// offsets is a map of object hashes to corresponding offsets in the packfile.
22+
// It is used to determine offset of the base of a delta when a OFS_DELTA is
23+
// used.
2124
offsets map[plumbing.Hash]int64
2225
useRefDeltas bool
2326
}
@@ -78,25 +81,24 @@ func (e *Encoder) head(numEntries int) error {
7881

7982
func (e *Encoder) entry(o *ObjectToPack) error {
8083
offset := e.w.Offset()
84+
e.offsets[o.Hash()] = offset
8185

8286
if o.IsDelta() {
8387
if err := e.writeDeltaHeader(o, offset); err != nil {
8488
return err
8589
}
8690
} else {
87-
if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil {
91+
if err := e.entryHead(o.Type(), o.Size()); err != nil {
8892
return err
8993
}
9094
}
9195

92-
// Save the position using the original hash, maybe a delta will need it
93-
e.offsets[o.Original.Hash()] = offset
94-
9596
e.zw.Reset(e.w)
9697
or, err := o.Object.Reader()
9798
if err != nil {
9899
return err
99100
}
101+
100102
_, err = io.Copy(e.zw, or)
101103
if err != nil {
102104
return err
@@ -117,9 +119,9 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
117119
}
118120

119121
if e.useRefDeltas {
120-
return e.writeRefDeltaHeader(o.Base.Original.Hash())
122+
return e.writeRefDeltaHeader(o.Base.Hash())
121123
} else {
122-
return e.writeOfsDeltaHeader(offset, o.Base.Original.Hash())
124+
return e.writeOfsDeltaHeader(offset, o.Base.Hash())
123125
}
124126
}
125127

@@ -128,14 +130,19 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
128130
}
129131

130132
func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error {
131-
// because it is an offset delta, we need the base
132-
// object position
133-
offset, ok := e.offsets[base]
133+
baseOffset, ok := e.offsets[base]
134134
if !ok {
135-
return fmt.Errorf("delta base not found. Hash: %v", base)
135+
return fmt.Errorf("base for delta not found, base hash: %v", base)
136+
}
137+
138+
// for OFS_DELTA, offset of the base is interpreted as negative offset
139+
// relative to the type-byte of the header of the ofs-delta entry.
140+
relativeOffset := deltaOffset-baseOffset
141+
if relativeOffset <= 0 {
142+
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
136143
}
137144

138-
return binary.WriteVariableWidthInt(e.w, deltaOffset-offset)
145+
return binary.WriteVariableWidthInt(e.w, relativeOffset)
139146
}
140147

141148
func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package packfile_test
2+
3+
import (
4+
"bytes"
5+
"math/rand"
6+
7+
"gopkg.in/src-d/go-git.v4/plumbing"
8+
. "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
9+
"gopkg.in/src-d/go-git.v4/plumbing/storer"
10+
"gopkg.in/src-d/go-git.v4/storage/filesystem"
11+
"gopkg.in/src-d/go-git.v4/storage/memory"
12+
13+
"github.com/src-d/go-git-fixtures"
14+
. "gopkg.in/check.v1"
15+
)
16+
17+
type EncoderAdvancedSuite struct {
18+
fixtures.Suite
19+
}
20+
21+
var _ = Suite(&EncoderAdvancedSuite{})
22+
23+
func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
24+
fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
25+
fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
26+
ByTag("packfile").ByTag(".git").One())
27+
fixs.Test(c, func(f *fixtures.Fixture) {
28+
storage, err := filesystem.NewStorage(f.DotGit())
29+
c.Assert(err, IsNil)
30+
s.testEncodeDecode(c, storage)
31+
})
32+
33+
}
34+
35+
func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
36+
37+
objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
38+
c.Assert(err, IsNil)
39+
40+
expectedObjects := map[plumbing.Hash]bool{}
41+
var hashes []plumbing.Hash
42+
err = objIter.ForEach(func(o plumbing.EncodedObject) error {
43+
expectedObjects[o.Hash()] = true
44+
hashes = append(hashes, o.Hash())
45+
return err
46+
47+
})
48+
c.Assert(err, IsNil)
49+
50+
// Shuffle hashes to avoid delta selector getting order right just because
51+
// the initial order is correct.
52+
auxHashes := make([]plumbing.Hash, len(hashes))
53+
for i, j := range rand.Perm(len(hashes)) {
54+
auxHashes[j] = hashes[i]
55+
}
56+
hashes = auxHashes
57+
58+
buf := bytes.NewBuffer(nil)
59+
enc := NewEncoder(buf, storage, false)
60+
_, err = enc.Encode(hashes)
61+
c.Assert(err, IsNil)
62+
63+
scanner := NewScanner(buf)
64+
storage = memory.NewStorage()
65+
d, err := NewDecoder(scanner, storage)
66+
c.Assert(err, IsNil)
67+
_, err = d.Decode()
68+
c.Assert(err, IsNil)
69+
70+
objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
71+
c.Assert(err, IsNil)
72+
obtainedObjects := map[plumbing.Hash]bool{}
73+
err = objIter.ForEach(func(o plumbing.EncodedObject) error {
74+
obtainedObjects[o.Hash()] = true
75+
return nil
76+
})
77+
c.Assert(err, IsNil)
78+
c.Assert(obtainedObjects, DeepEquals, expectedObjects)
79+
80+
for h := range obtainedObjects {
81+
if !expectedObjects[h] {
82+
c.Errorf("obtained unexpected object: %s", h)
83+
}
84+
}
85+
86+
for h := range expectedObjects {
87+
if !obtainedObjects[h] {
88+
c.Errorf("missing object: %s", h)
89+
}
90+
}
91+
}

0 commit comments

Comments
 (0)