Skip to content

Commit 796b5be

Browse files
authored
Merge PR #249, Update Inflater dynamic header reader to support partial reads
* Use IList<byte> for building Huffman Trees * Permits using ArraySegment instead of copying the source array * Uses Enumerable state machine * Skips two array copies by using ArraySegment * Throw usable exceptions when invalid values are being read * Fixes #253
1 parent 28e14cc commit 796b5be

File tree

4 files changed

+181
-96
lines changed

4 files changed

+181
-96
lines changed

src/ICSharpCode.SharpZipLib/Zip/Compression/Inflater.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ private bool Decode()
429429
mode = DECODE_HUFFMAN;
430430
break;
431431
case DeflaterConstants.DYN_TREES:
432-
dynHeader = new InflaterDynHeader();
432+
dynHeader = new InflaterDynHeader(input);
433433
mode = DECODE_DYN_HEADER;
434434
break;
435435
default:
@@ -470,12 +470,12 @@ private bool Decode()
470470
}
471471

472472
case DECODE_DYN_HEADER:
473-
if (!dynHeader.Decode(input)) {
473+
if (!dynHeader.AttemptRead()) {
474474
return false;
475475
}
476476

477-
litlenTree = dynHeader.BuildLitLenTree();
478-
distTree = dynHeader.BuildDistTree();
477+
litlenTree = dynHeader.LiteralLengthTree;
478+
distTree = dynHeader.DistanceTree;
479479
mode = DECODE_HUFFMAN;
480480
goto case DECODE_HUFFMAN; // fall through
481481

Lines changed: 121 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
34

45
namespace ICSharpCode.SharpZipLib.Zip.Compression
@@ -7,100 +8,148 @@ class InflaterDynHeader
78
{
89
#region Constants
910

10-
static readonly int[] BL_ORDER =
11-
{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
11+
// maximum number of literal/length codes
12+
const int LITLEN_MAX = 286;
13+
14+
// maximum number of distance codes
15+
const int DIST_MAX = 30;
16+
17+
// maximum data code lengths to read
18+
const int CODELEN_MAX = LITLEN_MAX + DIST_MAX;
19+
20+
// maximum meta code length codes to read
21+
const int META_MAX = 19;
22+
23+
static readonly int[] MetaCodeLengthIndex =
24+
{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
25+
1226
#endregion
1327

14-
public bool Decode(StreamManipulator input)
28+
/// <summary>
29+
/// Continue decoding header from <see cref="input"/> until more bits are needed or decoding has been completed
30+
/// </summary>
31+
/// <returns>Returns whether decoding could be completed</returns>
32+
public bool AttemptRead()
33+
=> !state.MoveNext() || state.Current;
34+
35+
public InflaterDynHeader(StreamManipulator input)
1536
{
16-
try
37+
this.input = input;
38+
stateMachine = CreateStateMachine();
39+
state = stateMachine.GetEnumerator();
40+
}
41+
42+
private IEnumerable<bool> CreateStateMachine()
43+
{
44+
45+
// Read initial code length counts from header
46+
while (!input.TryGetBits(5, ref litLenCodeCount, 257)) yield return false;
47+
while (!input.TryGetBits(5, ref distanceCodeCount, 1)) yield return false;
48+
while (!input.TryGetBits(4, ref metaCodeCount, 4)) yield return false;
49+
var dataCodeCount = litLenCodeCount + distanceCodeCount;
50+
51+
if (litLenCodeCount > LITLEN_MAX) throw new ValueOutOfRangeException(nameof(litLenCodeCount));
52+
if (distanceCodeCount > DIST_MAX) throw new ValueOutOfRangeException(nameof(distanceCodeCount));
53+
if (metaCodeCount > META_MAX) throw new ValueOutOfRangeException(nameof(metaCodeCount));
54+
55+
// Load code lengths for the meta tree from the header bits
56+
for (int i=0; i < metaCodeCount; i++)
57+
{
58+
while (!input.TryGetBits(3, ref codeLengths, MetaCodeLengthIndex[i])) yield return false;
59+
}
60+
61+
var metaCodeTree = new InflaterHuffmanTree(codeLengths);
62+
63+
// Decompress the meta tree symbols into the data table code lengths
64+
int index = 0;
65+
while (index < dataCodeCount)
1766
{
18-
lnum = input.GrabBits(5) + 257;
19-
dnum = input.GrabBits(5) + 1;
20-
blnum = input.GrabBits(4) + 4;
21-
num = lnum + dnum;
67+
byte codeLength;
68+
int symbol;
2269

23-
lengths = new byte[19];
70+
while ((symbol = metaCodeTree.GetSymbol(input)) < 0) yield return false;
2471

25-
for (int i = 0; i < blnum; i++)
72+
if (symbol < 16)
2673
{
27-
lengths[BL_ORDER[i]] = (byte)input.GrabBits(3, true);
74+
// append literal code length
75+
codeLengths[index++] = (byte)symbol;
2876
}
29-
blTree = new InflaterHuffmanTree(lengths);
30-
lengths = new byte[num];
31-
32-
int index = 0;
33-
while (index < lnum + dnum)
77+
else
3478
{
35-
byte len;
36-
37-
int symbol = blTree.GetSymbol(input);
38-
if (symbol < 0)
39-
return false;
40-
if (symbol < 16)
41-
lengths[index++] = (byte)symbol;
42-
else
79+
int repeatCount = 0;
80+
81+
if (symbol == 16) // Repeat last code length 3..6 times
82+
{
83+
84+
if (index == 0)
85+
throw new StreamDecodingException("Cannot repeat previous code length when no other code length has been read");
86+
87+
codeLength = codeLengths[index - 1];
88+
89+
// 2 bits + 3, [3..6]
90+
while(!input.TryGetBits(2, ref repeatCount, 3)) yield return false;
91+
}
92+
else if (symbol == 17) // Repeat zero 3..10 times
4393
{
44-
len = 0;
45-
if (symbol == 16)
46-
{
47-
if (index == 0)
48-
return false; // No last length!
49-
len = lengths[index - 1];
50-
symbol = input.GrabBits(2, true) + 3;
51-
}
52-
else if (symbol == 17)
53-
{
54-
// repeat zero 3..10 times
55-
symbol = input.GrabBits(3, true) + 3;
56-
}
57-
else
58-
{
59-
// (symbol == 18), repeat zero 11..138 times
60-
symbol = input.GrabBits(7, true) + 11;
61-
}
62-
63-
if (index + symbol > lnum + dnum)
64-
return false; // too many lengths!
65-
66-
// repeat last or zero symbol times
67-
while (symbol-- > 0)
68-
lengths[index++] = len;
94+
codeLength = 0;
95+
96+
// 3 bits + 3, [3..10]
97+
while (!input.TryGetBits(3, ref repeatCount, 3)) yield return false;
6998
}
70-
}
99+
else // (symbol == 18), Repeat zero 11..138 times
100+
{
101+
codeLength = 0;
71102

72-
if (lengths[256] == 0)
73-
return false; // No end-of-block code!
103+
// 7 bits + 11, [11..138]
104+
while (!input.TryGetBits(7, ref repeatCount, 11)) yield return false;
105+
}
74106

75-
return true;
76-
}
77-
catch (Exception x)
78-
{
79-
return false;
107+
if (index + repeatCount > dataCodeCount)
108+
throw new StreamDecodingException("Cannot repeat code lengths past total number of data code lengths");
109+
110+
while (repeatCount-- > 0)
111+
codeLengths[index++] = codeLength;
112+
}
80113
}
81-
}
82114

83-
public InflaterHuffmanTree BuildLitLenTree()
84-
{
85-
byte[] litlenLens = new byte[lnum];
86-
Array.Copy(lengths, 0, litlenLens, 0, lnum);
87-
return new InflaterHuffmanTree(litlenLens);
88-
}
115+
if (codeLengths[256] == 0)
116+
throw new StreamDecodingException("Inflater dynamic header end-of-block code missing");
89117

90-
public InflaterHuffmanTree BuildDistTree()
91-
{
92-
byte[] distLens = new byte[dnum];
93-
Array.Copy(lengths, lnum, distLens, 0, dnum);
94-
return new InflaterHuffmanTree(distLens);
118+
litLenTree = new InflaterHuffmanTree(new ArraySegment<byte>(codeLengths, 0, litLenCodeCount));
119+
distTree = new InflaterHuffmanTree(new ArraySegment<byte>(codeLengths, litLenCodeCount, distanceCodeCount));
120+
121+
yield return true;
95122
}
96123

124+
/// <summary>
125+
/// Get literal/length huffman tree, must not be used before <see cref="AttemptRead"/> has returned true
126+
/// </summary>
127+
/// <exception cref="StreamDecodingException">If hader has not been successfully read by the state machine</exception>
128+
public InflaterHuffmanTree LiteralLengthTree
129+
=> litLenTree ?? throw new StreamDecodingException("Header properties were accessed before header had been successfully read");
130+
131+
/// <summary>
132+
/// Get distance huffman tree, must not be used before <see cref="AttemptRead"/> has returned true
133+
/// </summary>
134+
/// <exception cref="StreamDecodingException">If hader has not been successfully read by the state machine</exception>
135+
public InflaterHuffmanTree DistanceTree
136+
=> distTree ?? throw new StreamDecodingException("Header properties were accessed before header had been successfully read");
137+
97138
#region Instance Fields
98-
byte[] lengths;
99139

100-
InflaterHuffmanTree blTree;
140+
private readonly StreamManipulator input;
141+
private readonly IEnumerator<bool> state;
142+
private readonly IEnumerable<bool> stateMachine;
143+
144+
private byte[] codeLengths = new byte[CODELEN_MAX];
145+
146+
private InflaterHuffmanTree litLenTree;
147+
private InflaterHuffmanTree distTree;
148+
149+
int litLenCodeCount, distanceCodeCount, metaCodeCount;
101150

102-
int lnum, dnum, blnum, num;
103151
#endregion
104152

105153
}
154+
106155
}

src/ICSharpCode.SharpZipLib/Zip/Compression/InflaterHuffmanTree.cs

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
34

45
namespace ICSharpCode.SharpZipLib.Zip.Compression
@@ -63,18 +64,18 @@ static InflaterHuffmanTree()
6364
/// <param name = "codeLengths">
6465
/// the array of code lengths
6566
/// </param>
66-
public InflaterHuffmanTree(byte[] codeLengths)
67+
public InflaterHuffmanTree(IList<byte> codeLengths)
6768
{
6869
BuildTree(codeLengths);
6970
}
7071
#endregion
7172

72-
void BuildTree(byte[] codeLengths)
73+
void BuildTree(IList<byte> codeLengths)
7374
{
7475
int[] blCount = new int[MAX_BITLEN + 1];
7576
int[] nextCode = new int[MAX_BITLEN + 1];
7677

77-
for (int i = 0; i < codeLengths.Length; i++) {
78+
for (int i = 0; i < codeLengths.Count; i++) {
7879
int bits = codeLengths[i];
7980
if (bits > 0) {
8081
blCount[bits]++;
@@ -115,7 +116,7 @@ void BuildTree(byte[] codeLengths)
115116
}
116117
}
117118

118-
for (int i = 0; i < codeLengths.Length; i++) {
119+
for (int i = 0; i < codeLengths.Count; i++) {
119120
int bits = codeLengths[i];
120121
if (bits == 0) {
121122
continue;
@@ -154,36 +155,49 @@ void BuildTree(byte[] codeLengths)
154155
public int GetSymbol(StreamManipulator input)
155156
{
156157
int lookahead, symbol;
157-
if ((lookahead = input.PeekBits(9)) >= 0) {
158-
if ((symbol = tree[lookahead]) >= 0) {
158+
if ((lookahead = input.PeekBits(9)) >= 0)
159+
{
160+
if ((symbol = tree[lookahead]) >= 0)
161+
{
159162
input.DropBits(symbol & 15);
160163
return symbol >> 4;
161164
}
162165
int subtree = -(symbol >> 4);
163166
int bitlen = symbol & 15;
164-
if ((lookahead = input.PeekBits(bitlen)) >= 0) {
167+
if ((lookahead = input.PeekBits(bitlen)) >= 0)
168+
{
165169
symbol = tree[subtree | (lookahead >> 9)];
166170
input.DropBits(symbol & 15);
167171
return symbol >> 4;
168-
} else {
172+
}
173+
else
174+
{
169175
int bits = input.AvailableBits;
170176
lookahead = input.PeekBits(bits);
171177
symbol = tree[subtree | (lookahead >> 9)];
172-
if ((symbol & 15) <= bits) {
178+
if ((symbol & 15) <= bits)
179+
{
173180
input.DropBits(symbol & 15);
174181
return symbol >> 4;
175-
} else {
182+
}
183+
else
184+
{
176185
return -1;
177186
}
178187
}
179-
} else {
188+
}
189+
else // Less than 9 bits
190+
{
180191
int bits = input.AvailableBits;
181192
lookahead = input.PeekBits(bits);
182193
symbol = tree[lookahead];
183-
if (symbol >= 0 && (symbol & 15) <= bits) {
194+
if (symbol >= 0 && (symbol & 15) <= bits)
195+
{
184196
input.DropBits(symbol & 15);
185197
return symbol >> 4;
186-
} else {
198+
}
199+
else
200+
{
187201
return -1;
188202
}
189203
}

src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/StreamManipulator.cs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,38 @@ public int PeekBits(int bitCount)
4242
}
4343

4444
/// <summary>
45-
/// Grabs the next n bits from the input and throws if <paramref name="allowZero"/> is false and the result is 0.
45+
/// Tries to grab the next <paramref name="bitCount"/> bits from the input and
46+
/// sets <paramref name="output"/> to the value, adding <paramref name="outputOffset"/>.
4647
/// </summary>
47-
public int GrabBits(int bitCount, bool allowZero = false)
48+
/// <returns>true if enough bits could be read, otherwise false</returns>
49+
public bool TryGetBits(int bitCount, ref int output, int outputOffset = 0)
4850
{
49-
var val = PeekBits(bitCount);
50-
if (!allowZero && val == 0)
51-
throw new SharpZipBaseException(bitCount + "-bit value cannot be zero");
51+
var bits = PeekBits(bitCount);
52+
if (bits < 0)
53+
{
54+
return false;
55+
}
56+
output = bits + outputOffset;
5257
DropBits(bitCount);
53-
return val;
54-
}
58+
return true;
59+
}
60+
61+
/// <summary>
62+
/// Tries to grab the next <paramref name="bitCount"/> bits from the input and
63+
/// sets <paramref name="output"/> to the value, adding <paramref name="outputOffset"/>.
64+
/// </summary>
65+
/// <returns>true if enough bits could be read, otherwise false</returns>
66+
public bool TryGetBits(int bitCount, ref byte[] array, int index)
67+
{
68+
var bits = PeekBits(bitCount);
69+
if (bits < 0)
70+
{
71+
return false;
72+
}
73+
array[index] = (byte)bits;
74+
DropBits(bitCount);
75+
return true;
76+
}
5577

5678
/// <summary>
5779
/// Drops the next n bits from the input. You should have called PeekBits

0 commit comments

Comments
 (0)