Skip to content

Commit ddb648f

Browse files
escholtzianlancetaylor
authored andcommitted
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a primary concern. For example, copying files from an existing zip to a new zip without incurring the decompression and compression overhead. Using an optimized, external compression method and writing the output to a zip archive. And compressing file contents in parallel and then sequentially writing the compressed bytes to a zip archive. TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge Fixes #34974 Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6 Reviewed-on: https://go-review.googlesource.com/c/go/+/312310 Run-TryBot: Ian Lance Taylor <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> Trust: Carlos Amedee <[email protected]>
1 parent 9f34703 commit ddb648f

File tree

5 files changed

+541
-61
lines changed

5 files changed

+541
-61
lines changed

src/archive/zip/reader.go

+82-33
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,9 @@ type File struct {
5252
FileHeader
5353
zip *Reader
5454
zipr io.ReaderAt
55-
zipsize int64
5655
headerOffset int64
57-
}
58-
59-
func (f *File) hasDataDescriptor() bool {
60-
return f.Flags&0x8 != 0
56+
zip64 bool // zip64 extended information extra field presence
57+
descErr error // error reading the data descriptor during init
6158
}
6259

6360
// OpenReader will open the Zip file specified by name and return a ReadCloser.
@@ -112,14 +109,15 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
112109
// a bad one, and then only report an ErrFormat or UnexpectedEOF if
113110
// the file count modulo 65536 is incorrect.
114111
for {
115-
f := &File{zip: z, zipr: r, zipsize: size}
112+
f := &File{zip: z, zipr: r}
116113
err = readDirectoryHeader(f, buf)
117114
if err == ErrFormat || err == io.ErrUnexpectedEOF {
118115
break
119116
}
120117
if err != nil {
121118
return err
122119
}
120+
f.readDataDescriptor()
123121
z.File = append(z.File, f)
124122
}
125123
if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
@@ -180,26 +178,68 @@ func (f *File) Open() (io.ReadCloser, error) {
180178
return nil, ErrAlgorithm
181179
}
182180
var rc io.ReadCloser = dcomp(r)
183-
var desr io.Reader
184-
if f.hasDataDescriptor() {
185-
desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
186-
}
187181
rc = &checksumReader{
188182
rc: rc,
189183
hash: crc32.NewIEEE(),
190184
f: f,
191-
desr: desr,
192185
}
193186
return rc, nil
194187
}
195188

189+
// OpenRaw returns a Reader that provides access to the File's contents without
190+
// decompression.
191+
func (f *File) OpenRaw() (io.Reader, error) {
192+
bodyOffset, err := f.findBodyOffset()
193+
if err != nil {
194+
return nil, err
195+
}
196+
r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
197+
return r, nil
198+
}
199+
200+
func (f *File) readDataDescriptor() {
201+
if !f.hasDataDescriptor() {
202+
return
203+
}
204+
205+
bodyOffset, err := f.findBodyOffset()
206+
if err != nil {
207+
f.descErr = err
208+
return
209+
}
210+
211+
// In section 4.3.9.2 of the spec: "However ZIP64 format MAY be used
212+
// regardless of the size of a file. When extracting, if the zip64
213+
// extended information extra field is present for the file the
214+
// compressed and uncompressed sizes will be 8 byte values."
215+
//
216+
// Historically, this package has used the compressed and uncompressed
217+
// sizes from the central directory to determine if the package is
218+
// zip64.
219+
//
220+
// For this case we allow either the extra field or sizes to determine
221+
// the data descriptor length.
222+
zip64 := f.zip64 || f.isZip64()
223+
n := int64(dataDescriptorLen)
224+
if zip64 {
225+
n = dataDescriptor64Len
226+
}
227+
size := int64(f.CompressedSize64)
228+
r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, n)
229+
dd, err := readDataDescriptor(r, zip64)
230+
if err != nil {
231+
f.descErr = err
232+
return
233+
}
234+
f.CRC32 = dd.crc32
235+
}
236+
196237
type checksumReader struct {
197238
rc io.ReadCloser
198239
hash hash.Hash32
199240
nread uint64 // number of bytes read so far
200241
f *File
201-
desr io.Reader // if non-nil, where to read the data descriptor
202-
err error // sticky error
242+
err error // sticky error
203243
}
204244

205245
func (r *checksumReader) Stat() (fs.FileInfo, error) {
@@ -220,12 +260,12 @@ func (r *checksumReader) Read(b []byte) (n int, err error) {
220260
if r.nread != r.f.UncompressedSize64 {
221261
return 0, io.ErrUnexpectedEOF
222262
}
223-
if r.desr != nil {
224-
if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
225-
if err1 == io.EOF {
263+
if r.f.hasDataDescriptor() {
264+
if r.f.descErr != nil {
265+
if r.f.descErr == io.EOF {
226266
err = io.ErrUnexpectedEOF
227267
} else {
228-
err = err1
268+
err = r.f.descErr
229269
}
230270
} else if r.hash.Sum32() != r.f.CRC32 {
231271
err = ErrChecksum
@@ -336,6 +376,8 @@ parseExtras:
336376

337377
switch fieldTag {
338378
case zip64ExtraID:
379+
f.zip64 = true
380+
339381
// update directory values from the zip64 extra block.
340382
// They should only be consulted if the sizes read earlier
341383
// are maxed out.
@@ -435,8 +477,9 @@ parseExtras:
435477
return nil
436478
}
437479

438-
func readDataDescriptor(r io.Reader, f *File) error {
439-
var buf [dataDescriptorLen]byte
480+
func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) {
481+
// Create enough space for the largest possible size
482+
var buf [dataDescriptor64Len]byte
440483

441484
// The spec says: "Although not originally assigned a
442485
// signature, the value 0x08074b50 has commonly been adopted
@@ -446,10 +489,9 @@ func readDataDescriptor(r io.Reader, f *File) error {
446489
// descriptors and should account for either case when reading
447490
// ZIP files to ensure compatibility."
448491
//
449-
// dataDescriptorLen includes the size of the signature but
450-
// first read just those 4 bytes to see if it exists.
492+
// First read just those 4 bytes to see if the signature exists.
451493
if _, err := io.ReadFull(r, buf[:4]); err != nil {
452-
return err
494+
return nil, err
453495
}
454496
off := 0
455497
maybeSig := readBuf(buf[:4])
@@ -458,21 +500,28 @@ func readDataDescriptor(r io.Reader, f *File) error {
458500
// bytes.
459501
off += 4
460502
}
461-
if _, err := io.ReadFull(r, buf[off:12]); err != nil {
462-
return err
503+
504+
end := dataDescriptorLen - 4
505+
if zip64 {
506+
end = dataDescriptor64Len - 4
463507
}
464-
b := readBuf(buf[:12])
465-
if b.uint32() != f.CRC32 {
466-
return ErrChecksum
508+
if _, err := io.ReadFull(r, buf[off:end]); err != nil {
509+
return nil, err
467510
}
511+
b := readBuf(buf[:end])
468512

469-
// The two sizes that follow here can be either 32 bits or 64 bits
470-
// but the spec is not very clear on this and different
471-
// interpretations has been made causing incompatibilities. We
472-
// already have the sizes from the central directory so we can
473-
// just ignore these.
513+
out := &dataDescriptor{
514+
crc32: b.uint32(),
515+
}
474516

475-
return nil
517+
if zip64 {
518+
out.compressedSize = b.uint64()
519+
out.uncompressedSize = b.uint64()
520+
} else {
521+
out.compressedSize = uint64(b.uint32())
522+
out.uncompressedSize = uint64(b.uint32())
523+
}
524+
return out, nil
476525
}
477526

478527
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {

0 commit comments

Comments
 (0)