Skip to content

Commit 1c950ee

Browse files
committed
Split source files by patch type (text/binary)
The tests were already split up in this way, so it makes sense to have the parsing functions split as well.
1 parent d42fb0e commit 1c950ee

File tree

6 files changed

+406
-392
lines changed

6 files changed

+406
-392
lines changed

gitdiff/binary.go

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package gitdiff
2+
3+
import (
4+
"bytes"
5+
"compress/zlib"
6+
"fmt"
7+
"io"
8+
"io/ioutil"
9+
"strconv"
10+
"strings"
11+
)
12+
13+
func (p *parser) ParseBinaryFragments(f *File) (n int, err error) {
14+
isBinary, hasData, err := p.ParseBinaryMarker()
15+
if err != nil || !isBinary {
16+
return 0, err
17+
}
18+
19+
f.IsBinary = true
20+
if !hasData {
21+
return 0, nil
22+
}
23+
24+
forward, err := p.ParseBinaryFragmentHeader()
25+
if err != nil {
26+
return 0, err
27+
}
28+
if forward == nil {
29+
return 0, p.Errorf(0, "missing data for binary patch")
30+
}
31+
if err := p.ParseBinaryChunk(forward); err != nil {
32+
return 0, err
33+
}
34+
f.BinaryFragment = forward
35+
36+
// valid for reverse to not exist, but it must be valid if present
37+
reverse, err := p.ParseBinaryFragmentHeader()
38+
if err != nil {
39+
return 1, err
40+
}
41+
if reverse == nil {
42+
return 1, nil
43+
}
44+
if err := p.ParseBinaryChunk(reverse); err != nil {
45+
return 1, err
46+
}
47+
f.ReverseBinaryFragment = reverse
48+
49+
return 1, nil
50+
}
51+
52+
func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) {
53+
switch p.Line(0) {
54+
case "GIT binary patch\n":
55+
hasData = true
56+
case "Binary files differ\n":
57+
case "Files differ\n":
58+
default:
59+
return false, false, nil
60+
}
61+
62+
if err = p.Next(); err != nil && err != io.EOF {
63+
return false, false, err
64+
}
65+
return true, hasData, nil
66+
}
67+
68+
func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) {
69+
parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2)
70+
if len(parts) < 2 {
71+
return nil, nil
72+
}
73+
74+
frag := &BinaryFragment{}
75+
switch parts[0] {
76+
case "delta":
77+
frag.Method = BinaryPatchDelta
78+
case "literal":
79+
frag.Method = BinaryPatchLiteral
80+
default:
81+
return nil, nil
82+
}
83+
84+
var err error
85+
if frag.Size, err = strconv.ParseInt(parts[1], 10, 64); err != nil {
86+
nerr := err.(*strconv.NumError)
87+
return nil, p.Errorf(0, "binary patch: invalid size: %v", nerr.Err)
88+
}
89+
90+
if err := p.Next(); err != nil && err != io.EOF {
91+
return nil, err
92+
}
93+
return frag, nil
94+
}
95+
96+
func (p *parser) ParseBinaryChunk(frag *BinaryFragment) error {
97+
// Binary fragments are encoded as a series of base85 encoded lines. Each
98+
// line starts with a character in [A-Za-z] giving the number of bytes on
99+
// the line, where A = 1 and z = 52, and ends with a newline character.
100+
//
101+
// The base85 encoding means each line is a multiple of 5 characters + 2
102+
// additional characters for the length byte and the newline. The fragment
103+
// ends with a blank line.
104+
const (
105+
shortestValidLine = "A00000\n"
106+
maxBytesPerLine = 52
107+
)
108+
109+
var data bytes.Buffer
110+
buf := make([]byte, maxBytesPerLine)
111+
for {
112+
line := p.Line(0)
113+
if line == "\n" {
114+
break
115+
}
116+
if len(line) < len(shortestValidLine) || (len(line)-2)%5 != 0 {
117+
return p.Errorf(0, "binary patch: corrupt data line")
118+
}
119+
120+
byteCount, seq := int(line[0]), line[1:len(line)-1]
121+
switch {
122+
case 'A' <= byteCount && byteCount <= 'Z':
123+
byteCount = byteCount - 'A' + 1
124+
case 'a' <= byteCount && byteCount <= 'z':
125+
byteCount = byteCount - 'a' + 27
126+
default:
127+
return p.Errorf(0, "binary patch: invalid length byte")
128+
}
129+
130+
// base85 encodes every 4 bytes into 5 characters, with up to 3 bytes of end padding
131+
maxByteCount := len(seq) / 5 * 4
132+
if byteCount > maxByteCount || byteCount < maxByteCount-3 {
133+
return p.Errorf(0, "binary patch: incorrect byte count")
134+
}
135+
136+
if err := base85Decode(buf[:byteCount], []byte(seq)); err != nil {
137+
return p.Errorf(0, "binary patch: %v", err)
138+
}
139+
data.Write(buf[:byteCount])
140+
141+
if err := p.Next(); err != nil {
142+
if err == io.EOF {
143+
return p.Errorf(0, "binary patch: unexpected EOF")
144+
}
145+
return err
146+
}
147+
}
148+
149+
if err := inflateBinaryChunk(frag, &data); err != nil {
150+
return p.Errorf(0, "binary patch: %v", err)
151+
}
152+
153+
// consume the empty line that ended the fragment
154+
if err := p.Next(); err != nil && err != io.EOF {
155+
return err
156+
}
157+
return nil
158+
}
159+
160+
func inflateBinaryChunk(frag *BinaryFragment, r io.Reader) error {
161+
zr, err := zlib.NewReader(r)
162+
if err != nil {
163+
return err
164+
}
165+
166+
data, err := ioutil.ReadAll(zr)
167+
if err != nil {
168+
return err
169+
}
170+
if err := zr.Close(); err != nil {
171+
return err
172+
}
173+
174+
if int64(len(data)) != frag.Size {
175+
return fmt.Errorf("%d byte fragment inflated to %d", frag.Size, len(data))
176+
}
177+
frag.Data = data
178+
return nil
179+
}
File renamed without changes.

gitdiff/file_header.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,53 @@ const (
1313
devNull = "/dev/null"
1414
)
1515

16+
// ParseNextFileHeader finds and parses the next file header in the stream. If
17+
// a header is found, it returns a file and all input before the header. It
18+
// returns nil if no headers are found before the end of the input.
19+
func (p *parser) ParseNextFileHeader() (*File, string, error) {
20+
var preamble strings.Builder
21+
var file *File
22+
for {
23+
// check for disconnected fragment headers (corrupt patch)
24+
frag, err := p.ParseTextFragmentHeader()
25+
if err != nil {
26+
// not a valid header, nothing to worry about
27+
goto NextLine
28+
}
29+
if frag != nil {
30+
return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header())
31+
}
32+
33+
// check for a git-generated patch
34+
file, err = p.ParseGitFileHeader()
35+
if err != nil {
36+
return nil, "", err
37+
}
38+
if file != nil {
39+
return file, preamble.String(), nil
40+
}
41+
42+
// check for a "traditional" patch
43+
file, err = p.ParseTraditionalFileHeader()
44+
if err != nil {
45+
return nil, "", err
46+
}
47+
if file != nil {
48+
return file, preamble.String(), nil
49+
}
50+
51+
NextLine:
52+
preamble.WriteString(p.Line(0))
53+
if err := p.Next(); err != nil {
54+
if err == io.EOF {
55+
break
56+
}
57+
return nil, "", err
58+
}
59+
}
60+
return nil, "", nil
61+
}
62+
1663
func (p *parser) ParseGitFileHeader() (*File, error) {
1764
const prefix = "diff --git "
1865

0 commit comments

Comments
 (0)