@@ -52,12 +52,9 @@ type File struct {
52
52
FileHeader
53
53
zip * Reader
54
54
zipr io.ReaderAt
55
- zipsize int64
56
55
headerOffset int64
57
- }
58
-
59
- func (f * File ) hasDataDescriptor () bool {
60
- return f .Flags & 0x8 != 0
56
+ zip64 bool // zip64 extended information extra field presence
57
+ descErr error // error reading the data descriptor during init
61
58
}
62
59
63
60
// OpenReader will open the Zip file specified by name and return a ReadCloser.
@@ -112,14 +109,15 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
112
109
// a bad one, and then only report an ErrFormat or UnexpectedEOF if
113
110
// the file count modulo 65536 is incorrect.
114
111
for {
115
- f := & File {zip : z , zipr : r , zipsize : size }
112
+ f := & File {zip : z , zipr : r }
116
113
err = readDirectoryHeader (f , buf )
117
114
if err == ErrFormat || err == io .ErrUnexpectedEOF {
118
115
break
119
116
}
120
117
if err != nil {
121
118
return err
122
119
}
120
+ f .readDataDescriptor ()
123
121
z .File = append (z .File , f )
124
122
}
125
123
if uint16 (len (z .File )) != uint16 (end .directoryRecords ) { // only compare 16 bits here
@@ -180,26 +178,68 @@ func (f *File) Open() (io.ReadCloser, error) {
180
178
return nil , ErrAlgorithm
181
179
}
182
180
var rc io.ReadCloser = dcomp (r )
183
- var desr io.Reader
184
- if f .hasDataDescriptor () {
185
- desr = io .NewSectionReader (f .zipr , f .headerOffset + bodyOffset + size , dataDescriptorLen )
186
- }
187
181
rc = & checksumReader {
188
182
rc : rc ,
189
183
hash : crc32 .NewIEEE (),
190
184
f : f ,
191
- desr : desr ,
192
185
}
193
186
return rc , nil
194
187
}
195
188
189
+ // OpenRaw returns a Reader that provides access to the File's contents without
190
+ // decompression.
191
+ func (f * File ) OpenRaw () (io.Reader , error ) {
192
+ bodyOffset , err := f .findBodyOffset ()
193
+ if err != nil {
194
+ return nil , err
195
+ }
196
+ r := io .NewSectionReader (f .zipr , f .headerOffset + bodyOffset , int64 (f .CompressedSize64 ))
197
+ return r , nil
198
+ }
199
+
200
+ func (f * File ) readDataDescriptor () {
201
+ if ! f .hasDataDescriptor () {
202
+ return
203
+ }
204
+
205
+ bodyOffset , err := f .findBodyOffset ()
206
+ if err != nil {
207
+ f .descErr = err
208
+ return
209
+ }
210
+
211
+ // In section 4.3.9.2 of the spec: "However ZIP64 format MAY be used
212
+ // regardless of the size of a file. When extracting, if the zip64
213
+ // extended information extra field is present for the file the
214
+ // compressed and uncompressed sizes will be 8 byte values."
215
+ //
216
+ // Historically, this package has used the compressed and uncompressed
217
+ // sizes from the central directory to determine if the package is
218
+ // zip64.
219
+ //
220
+ // For this case we allow either the extra field or sizes to determine
221
+ // the data descriptor length.
222
+ zip64 := f .zip64 || f .isZip64 ()
223
+ n := int64 (dataDescriptorLen )
224
+ if zip64 {
225
+ n = dataDescriptor64Len
226
+ }
227
+ size := int64 (f .CompressedSize64 )
228
+ r := io .NewSectionReader (f .zipr , f .headerOffset + bodyOffset + size , n )
229
+ dd , err := readDataDescriptor (r , zip64 )
230
+ if err != nil {
231
+ f .descErr = err
232
+ return
233
+ }
234
+ f .CRC32 = dd .crc32
235
+ }
236
+
196
237
type checksumReader struct {
197
238
rc io.ReadCloser
198
239
hash hash.Hash32
199
240
nread uint64 // number of bytes read so far
200
241
f * File
201
- desr io.Reader // if non-nil, where to read the data descriptor
202
- err error // sticky error
242
+ err error // sticky error
203
243
}
204
244
205
245
func (r * checksumReader ) Stat () (fs.FileInfo , error ) {
@@ -220,12 +260,12 @@ func (r *checksumReader) Read(b []byte) (n int, err error) {
220
260
if r .nread != r .f .UncompressedSize64 {
221
261
return 0 , io .ErrUnexpectedEOF
222
262
}
223
- if r .desr != nil {
224
- if err1 := readDataDescriptor ( r . desr , r . f ); err1 != nil {
225
- if err1 == io .EOF {
263
+ if r .f . hasDataDescriptor () {
264
+ if r . f . descErr != nil {
265
+ if r . f . descErr == io .EOF {
226
266
err = io .ErrUnexpectedEOF
227
267
} else {
228
- err = err1
268
+ err = r . f . descErr
229
269
}
230
270
} else if r .hash .Sum32 () != r .f .CRC32 {
231
271
err = ErrChecksum
@@ -336,6 +376,8 @@ parseExtras:
336
376
337
377
switch fieldTag {
338
378
case zip64ExtraID :
379
+ f .zip64 = true
380
+
339
381
// update directory values from the zip64 extra block.
340
382
// They should only be consulted if the sizes read earlier
341
383
// are maxed out.
@@ -435,8 +477,9 @@ parseExtras:
435
477
return nil
436
478
}
437
479
438
- func readDataDescriptor (r io.Reader , f * File ) error {
439
- var buf [dataDescriptorLen ]byte
480
+ func readDataDescriptor (r io.Reader , zip64 bool ) (* dataDescriptor , error ) {
481
+ // Create enough space for the largest possible size
482
+ var buf [dataDescriptor64Len ]byte
440
483
441
484
// The spec says: "Although not originally assigned a
442
485
// signature, the value 0x08074b50 has commonly been adopted
@@ -446,10 +489,9 @@ func readDataDescriptor(r io.Reader, f *File) error {
446
489
// descriptors and should account for either case when reading
447
490
// ZIP files to ensure compatibility."
448
491
//
449
- // dataDescriptorLen includes the size of the signature but
450
- // first read just those 4 bytes to see if it exists.
492
+ // First read just those 4 bytes to see if the signature exists.
451
493
if _ , err := io .ReadFull (r , buf [:4 ]); err != nil {
452
- return err
494
+ return nil , err
453
495
}
454
496
off := 0
455
497
maybeSig := readBuf (buf [:4 ])
@@ -458,21 +500,28 @@ func readDataDescriptor(r io.Reader, f *File) error {
458
500
// bytes.
459
501
off += 4
460
502
}
461
- if _ , err := io .ReadFull (r , buf [off :12 ]); err != nil {
462
- return err
503
+
504
+ end := dataDescriptorLen - 4
505
+ if zip64 {
506
+ end = dataDescriptor64Len - 4
463
507
}
464
- b := readBuf (buf [:12 ])
465
- if b .uint32 () != f .CRC32 {
466
- return ErrChecksum
508
+ if _ , err := io .ReadFull (r , buf [off :end ]); err != nil {
509
+ return nil , err
467
510
}
511
+ b := readBuf (buf [:end ])
468
512
469
- // The two sizes that follow here can be either 32 bits or 64 bits
470
- // but the spec is not very clear on this and different
471
- // interpretations has been made causing incompatibilities. We
472
- // already have the sizes from the central directory so we can
473
- // just ignore these.
513
+ out := & dataDescriptor {
514
+ crc32 : b .uint32 (),
515
+ }
474
516
475
- return nil
517
+ if zip64 {
518
+ out .compressedSize = b .uint64 ()
519
+ out .uncompressedSize = b .uint64 ()
520
+ } else {
521
+ out .compressedSize = uint64 (b .uint32 ())
522
+ out .uncompressedSize = uint64 (b .uint32 ())
523
+ }
524
+ return out , nil
476
525
}
477
526
478
527
func readDirectoryEnd (r io.ReaderAt , size int64 ) (dir * directoryEnd , err error ) {
0 commit comments