Skip to content

Remove Boyer-Moore in favor of IndexOf #51815

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 6 additions & 41 deletions src/Http/WebUtilities/src/MultipartBoundary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,61 +7,26 @@ namespace Microsoft.AspNetCore.WebUtilities;

internal sealed class MultipartBoundary
{
private readonly int[] _skipTable = new int[256];
private readonly string _boundary;
private readonly byte[] _boundaryBytes;
private bool _expectLeadingCrlf;

public MultipartBoundary(string boundary, bool expectLeadingCrlf = true)
{
ArgumentNullException.ThrowIfNull(boundary);

_boundary = boundary;
_expectLeadingCrlf = expectLeadingCrlf;
Initialize(_boundary, _expectLeadingCrlf);
}
_boundaryBytes = Encoding.UTF8.GetBytes("\r\n--" + boundary);

private void Initialize(string boundary, bool expectLeadingCrlf)
{
if (expectLeadingCrlf)
{
BoundaryBytes = Encoding.UTF8.GetBytes("\r\n--" + boundary);
}
else
{
BoundaryBytes = Encoding.UTF8.GetBytes("--" + boundary);
}
FinalBoundaryLength = BoundaryBytes.Length + 2; // Include the final '--' terminator.

var length = BoundaryBytes.Length;
for (var i = 0; i < _skipTable.Length; ++i)
{
_skipTable[i] = length;
}
for (var i = 0; i < length; ++i)
{
_skipTable[BoundaryBytes[i]] = Math.Max(1, length - 1 - i);
}
}

public int GetSkipValue(byte input)
{
return _skipTable[input];
}

public bool ExpectLeadingCrlf
public void ExpectLeadingCrlf()
{
get { return _expectLeadingCrlf; }
set
{
if (value != _expectLeadingCrlf)
{
_expectLeadingCrlf = value;
Initialize(_boundary, _expectLeadingCrlf);
}
}
_expectLeadingCrlf = true;
}

public byte[] BoundaryBytes { get; private set; } = default!; // This gets initialized as part of Initialize called from in the ctor.
// Return either "--{boundary}" or "\r\n--{boundary}" depending on if we're looking for the end of a section
public ReadOnlySpan<byte> BoundaryBytes => _boundaryBytes.AsSpan(_expectLeadingCrlf ? 0 : 2);

public int FinalBoundaryLength { get; private set; }
}
2 changes: 1 addition & 1 deletion src/Http/WebUtilities/src/MultipartReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public MultipartReader(string boundary, Stream stream, int bufferSize)
return null;
}
var headers = await ReadHeadersAsync(cancellationToken);
_boundary.ExpectLeadingCrlf = true;
_boundary.ExpectLeadingCrlf();
_currentStream = new MultipartReaderStream(_stream, _boundary) { LengthLimit = BodyLengthLimit };
long? baseStreamOffset = _stream.CanSeek ? (long?)_stream.Position : null;
return new MultipartSection() { Headers = headers, Body = _currentStream, BaseStreamOffset = baseStreamOffset };
Expand Down
145 changes: 81 additions & 64 deletions src/Http/WebUtilities/src/MultipartReaderStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,27 @@ public override int Read(byte[] buffer, int offset, int count)
}
var bufferedData = _innerStream.BufferedData;

// scan for a boundary match, full or partial.
var index = bufferedData.AsSpan().IndexOf(_boundary.BoundaryBytes);
if (index >= 0)
{
// There is data before the boundary, we should return it to the user
if (index != 0)
{
// Sync, it's already buffered
var slice = buffer.AsSpan(0, Math.Min(buffer.Length, index));

var readAmount = _innerStream.Read(slice);
return UpdatePosition(readAmount);
}
else
{
var length = _boundary.BoundaryBytes.Length;

return ReadBoundary(this, length);
}
}

// scan for a partial boundary match.
int read;
if (SubMatch(bufferedData, _boundary.BoundaryBytes, out var matchOffset, out var matchCount))
{
Expand All @@ -181,28 +201,33 @@ public override int Read(byte[] buffer, int offset, int count)
var length = _boundary.BoundaryBytes.Length;
Debug.Assert(matchCount == length);

return ReadBoundary(this, length);
}

// No possible boundary match within the buffered data, return the data from the buffer.
read = _innerStream.Read(buffer, offset, Math.Min(count, bufferedData.Count));
return UpdatePosition(read);

static int ReadBoundary(MultipartReaderStream stream, int length)
{
// "The boundary may be followed by zero or more characters of
// linear whitespace. It is then terminated by either another CRLF"
// or -- for the final boundary.
var boundary = _bytePool.Rent(length);
read = _innerStream.Read(boundary, 0, length);
_bytePool.Return(boundary);
var boundary = stream._bytePool.Rent(length);
var read = stream._innerStream.Read(boundary, 0, length);
stream._bytePool.Return(boundary);
Debug.Assert(read == length); // It should have all been buffered

var remainder = _innerStream.ReadLine(lengthLimit: 100); // Whitespace may exceed the buffer.
var remainder = stream._innerStream.ReadLine(lengthLimit: 100).AsSpan(); // Whitespace may exceed the buffer.
remainder = remainder.Trim();
if (string.Equals("--", remainder, StringComparison.Ordinal))
if (remainder.Equals("--", StringComparison.Ordinal))
{
FinalBoundaryFound = true;
stream.FinalBoundaryFound = true;
}
Debug.Assert(FinalBoundaryFound || string.Equals(string.Empty, remainder, StringComparison.Ordinal), "Un-expected data found on the boundary line: " + remainder);
_finished = true;
Debug.Assert(stream.FinalBoundaryFound || remainder.IsEmpty, "Un-expected data found on the boundary line: " + remainder.ToString());
stream._finished = true;
return 0;
}

// No possible boundary match within the buffered data, return the data from the buffer.
read = _innerStream.Read(buffer, offset, Math.Min(count, bufferedData.Count));
return UpdatePosition(read);
}

public override Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken)
Expand All @@ -222,6 +247,27 @@ public override async ValueTask<int> ReadAsync(Memory<byte> buffer, Cancellation
}
var bufferedData = _innerStream.BufferedData;

var index = bufferedData.AsSpan().IndexOf(_boundary.BoundaryBytes);

if (index >= 0)
{
// There is data before the boundary, we should return it to the user
if (index != 0)
{
var slice = buffer[..Math.Min(buffer.Length, index)];

// Sync, it's already buffered
var readAmount = _innerStream.Read(slice.Span);
return UpdatePosition(readAmount);
}
else
{
var length = _boundary.BoundaryBytes.Length;

return await ReadBoundaryAsync(this, length, cancellationToken);
}
}

// scan for a boundary match, full or partial.
int matchOffset;
int matchCount;
Expand All @@ -231,70 +277,52 @@ public override async ValueTask<int> ReadAsync(Memory<byte> buffer, Cancellation
// We found a possible match, return any data before it.
if (matchOffset > bufferedData.Offset)
{
// Sync, it's already buffered
var slice = buffer[..Math.Min(buffer.Length, matchOffset - bufferedData.Offset)];

// Sync, it's already buffered
read = _innerStream.Read(slice.Span);
return UpdatePosition(read);
}

var length = _boundary.BoundaryBytes!.Length;
var length = _boundary.BoundaryBytes.Length;
Debug.Assert(matchCount == length);

return await ReadBoundaryAsync(this, length, cancellationToken);
}

// No possible boundary match within the buffered data, return the data from the buffer.
read = _innerStream.Read(buffer.Span[..Math.Min(buffer.Length, bufferedData.Count)]);
return UpdatePosition(read);

static async Task<int> ReadBoundaryAsync(MultipartReaderStream stream, int length, CancellationToken cancellationToken)
{
// "The boundary may be followed by zero or more characters of
// linear whitespace. It is then terminated by either another CRLF"
// or -- for the final boundary.
var boundary = _bytePool.Rent(length);
read = _innerStream.Read(boundary, 0, length);
_bytePool.Return(boundary);
var boundary = stream._bytePool.Rent(length);
var read = stream._innerStream.Read(boundary, 0, length);
stream._bytePool.Return(boundary);
Debug.Assert(read == length); // It should have all been buffered

var remainder = await _innerStream.ReadLineAsync(lengthLimit: 100, cancellationToken: cancellationToken); // Whitespace may exceed the buffer.
var remainder = await stream._innerStream.ReadLineAsync(lengthLimit: 100, cancellationToken: cancellationToken); // Whitespace may exceed the buffer.
remainder = remainder.Trim();
if (string.Equals("--", remainder, StringComparison.Ordinal))
{
FinalBoundaryFound = true;
stream.FinalBoundaryFound = true;
}
Debug.Assert(FinalBoundaryFound || string.Equals(string.Empty, remainder, StringComparison.Ordinal), "Un-expected data found on the boundary line: " + remainder);
Debug.Assert(stream.FinalBoundaryFound || string.Equals(string.Empty, remainder, StringComparison.Ordinal), "Un-expected data found on the boundary line: " + remainder);

_finished = true;
stream._finished = true;
return 0;
}

// No possible boundary match within the buffered data, return the data from the buffer.
read = _innerStream.Read(buffer.Span[..Math.Min(buffer.Length, bufferedData.Count)]);
return UpdatePosition(read);
}

// Does segment1 contain all of matchBytes, or does it end with the start of matchBytes?
// 1: AAAAABBBBBCCCCC
// 2: BBBBB
// Or:
// Does segment1 end with the start of matchBytes?
// 1: AAAAABBB
// 2: BBBBB
private bool SubMatch(ArraySegment<byte> segment1, byte[] matchBytes, out int matchOffset, out int matchCount)
private static bool SubMatch(ArraySegment<byte> segment1, ReadOnlySpan<byte> matchBytes, out int matchOffset, out int matchCount)
{
// case 1: does segment1 fully contain matchBytes?
{
var matchBytesLengthMinusOne = matchBytes.Length - 1;
var matchBytesLastByte = matchBytes[matchBytesLengthMinusOne];
var segmentEndMinusMatchBytesLength = segment1.Offset + segment1.Count - matchBytes.Length;

matchOffset = segment1.Offset;
while (matchOffset < segmentEndMinusMatchBytesLength)
{
var lookaheadTailChar = segment1.Array![matchOffset + matchBytesLengthMinusOne];
if (lookaheadTailChar == matchBytesLastByte &&
CompareBuffers(segment1.Array, matchOffset, matchBytes, 0, matchBytesLengthMinusOne) == 0)
{
matchCount = matchBytes.Length;
return true;
}
matchOffset += _boundary.GetSkipValue(lookaheadTailChar);
}
}

// case 2: does segment1 end with the start of matchBytes?
matchOffset = Math.Max(segment1.Offset, segment1.Offset + segment1.Count - matchBytes.Length);
var segmentEnd = segment1.Offset + segment1.Count;

// clear matchCount to zero
Expand All @@ -315,19 +343,8 @@ private bool SubMatch(ArraySegment<byte> segment1, byte[] matchBytes, out int ma
break;
}
}
return matchCount > 0;
}

private static int CompareBuffers(byte[] buffer1, int offset1, byte[] buffer2, int offset2, int count)
{
for (; count-- > 0; offset1++, offset2++)
{
if (buffer1[offset1] != buffer2[offset2])
{
return buffer1[offset1] - buffer2[offset2];
}
}
return 0;
return matchCount > 0;
}

public override void CopyTo(Stream destination, int bufferSize)
Expand Down