Skip to content

Speedup package:crypto (with a focus on md5) #892

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkgs/crypto/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## 3.0.7-wip

- Run `dart format` with the new style.
- Performance improvements.

## 3.0.6

Expand Down
149 changes: 149 additions & 0 deletions pkgs/crypto/benchmark/benchmark.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:io' show exit, stderr;
import 'dart:typed_data';

import 'package:convert/convert.dart';
import 'package:crypto/crypto.dart';

void main(List<String> args) {
Hash? function;
int? customSize;

void setFunction(Hash newFunction, String message) {
if (function != null) {
stderr.writeln('Hash function already set.');
exit(1);
}
function = newFunction;
print('Using hash function $message');
}

for (var arg in args) {
if (arg == 'md5') {
setFunction(md5, 'md5');
} else if (arg == 'sha1') {
setFunction(sha1, 'sha1');
} else if (arg == 'sha256') {
setFunction(sha256, 'sha256');
} else if (arg == 'sha224') {
setFunction(sha224, 'sha224');
} else if (arg == 'sha384') {
setFunction(sha384, 'sha384');
} else if (arg == 'sha512') {
setFunction(sha512, 'sha512');
} else if (arg == 'sha512224') {
setFunction(sha512224, 'sha512/224');
} else if (arg == 'sha512256') {
setFunction(sha512256, 'sha512/256');
} else if (arg.startsWith('--custom=')) {
customSize = int.parse(arg.substring('--custom='.length));
} else {
stderr.writeln('Unknown argument: $arg');
exit(1);
}
}
if (function == null) {
setFunction(md5, 'md5');
}

if (customSize != null) {
doIterationsChunk(function!, mb: customSize, iterations: 1, doPrint: true);
return;
}

// Warmup.
doIterationsChunk(function!, mb: 1, iterations: 100, doPrint: false);

// Benchmarks.
print('One chunk input');
doIterationsChunk(function!, mb: 1, iterations: 1000, doPrint: true);
doIterationsChunk(function!, mb: 10, iterations: 100, doPrint: true);
doIterationsChunk(function!, mb: 100, iterations: 10, doPrint: true);
doIterationsChunk(function!, mb: 1000, iterations: 1, doPrint: true);

print('');
print('Add in 1024 byte chunks:');
doIterationsSmallChunks(function!,
chunkSize: 1024, mb: 1, iterations: 1000, doPrint: true);

print('');
print('Add in 100 byte chunks:');
doIterationsSmallChunks(function!,
chunkSize: 100, mb: 1, iterations: 1000, doPrint: true);

print('');
print('Add in 4 byte chunks:');
doIterationsSmallChunks(function!,
chunkSize: 4, mb: 1, iterations: 1000, doPrint: true);
}

void doIterationsChunk(Hash function,
{required int mb, required int iterations, required bool doPrint}) {
var data = Uint8List(1024 * 1024 * mb);
var runtimesInMs = <double>[];
for (var i = 0; i < iterations; i++) {
runtimesInMs.add(hashChunk(data, function));
}
if (doPrint) {
printStats(runtimesInMs, data.length, iterations);
}
}

void doIterationsSmallChunks(Hash function,
{required int chunkSize,
required int mb,
required int iterations,
required bool doPrint}) {
var data = Uint8List(chunkSize);
var runtimesInMs = <double>[];
var addIterations = mb * 1024 * 1024 ~/ chunkSize;
for (var i = 0; i < iterations; i++) {
runtimesInMs.add(hashSmallChunks(data, addIterations, function));
}
if (doPrint) {
printStats(runtimesInMs, data.length * addIterations, iterations);
}
}

double hashChunk(Uint8List data, Hash function) {
var stopwatch = Stopwatch()..start();
var hash = function.convert(data);
stopwatch.stop();
if (hash.bytes.isEmpty) throw StateError('This should never happen');
return stopwatch.elapsedMicroseconds / 1000;
}

double hashSmallChunks(Uint8List data, int addTimes, Hash function) {
var stopwatch = Stopwatch()..start();

var output = AccumulatorSink<Digest>();
var input = function.startChunkedConversion(output);
for (var i = 0; i < addTimes; i++) {
input.add(data);
}

input.close();
var hash = output.events.single;

stopwatch.stop();
if (hash.bytes.isEmpty) throw StateError('This should never happen');
return stopwatch.elapsedMicroseconds / 1000;
}

void printStats(List<double> runtimesInMs, int dataLength, int iterations) {
var mb = dataLength / 1024 / 1024;
runtimesInMs.sort();
var sum = runtimesInMs.reduce((value, element) => value + element);
var averageRuntimeInMs = sum / runtimesInMs.length;
var averageKbPerMs = dataLength / 1024 / averageRuntimeInMs;
var medianRuntimeInMs = runtimesInMs[runtimesInMs.length ~/ 2];
var medianKbPerMs = dataLength / 1024 / medianRuntimeInMs;
print(
'Processed ${mb.toStringAsFixed(2)} mb of data with an average/median of '
'${averageKbPerMs.toStringAsFixed(2)} / '
'${medianKbPerMs.toStringAsFixed(2)} '
'kb per ms.');
}
130 changes: 72 additions & 58 deletions pkgs/crypto/lib/src/hash_sink.dart
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import 'dart:typed_data';

import 'package:typed_data/typed_data.dart';

import 'digest.dart';
import 'utils.dart';

Expand All @@ -19,11 +17,24 @@ abstract class HashSink implements Sink<List<int>> {
/// Whether the hash function operates on big-endian words.
final Endian _endian;

/// The words in the current chunk.
/// A [ByteData] view of the current chunk of data.
///
/// This is an instance variable to avoid re-allocating.
ByteData? _byteDataView;

/// The actual chunk of bytes currently accumulating.
///
/// The same allocation will be reused over and over again; once full it is
/// passed to the underlying hashing algorithm for processing.
final Uint8List _chunk;

/// The index of the next insertion into the chunk.
int _chunkNextIndex;

/// A [Uint32List] (in specified endian) copy of the chunk.
///
/// This is an instance variable to avoid re-allocating, but its data isn't
/// used across invocations of [_iterate].
final Uint32List _currentChunk;
/// This is an instance variable to avoid re-allocating.
final Uint32List _chunk32;

/// Messages with more than 2^53-1 bits are not supported.
///
Expand All @@ -35,9 +46,6 @@ abstract class HashSink implements Sink<List<int>> {
/// The length of the input data so far, in bytes.
int _lengthInBytes = 0;

/// Data that has yet to be processed by the hash function.
final _pendingData = Uint8Buffer();

/// Whether [close] has been called.
bool _isClosed = false;

Expand Down Expand Up @@ -66,7 +74,9 @@ abstract class HashSink implements Sink<List<int>> {
}) : _endian = endian,
assert(signatureBytes >= 8),
_signatureBytes = signatureBytes,
_currentChunk = Uint32List(chunkSizeInWords);
_chunk = Uint8List(chunkSizeInWords * bytesPerWord),
_chunkNextIndex = 0,
_chunk32 = Uint32List(chunkSizeInWords);

/// Runs a single iteration of the hash computation, updating [digest] with
/// the result.
Expand All @@ -79,18 +89,47 @@ abstract class HashSink implements Sink<List<int>> {
void add(List<int> data) {
if (_isClosed) throw StateError('Hash.add() called after close().');
_lengthInBytes += data.length;
_pendingData.addAll(data);
_iterate();
_addData(data);
}

void _addData(List<int> data) {
var dataIndex = 0;
var chunkNextIndex = _chunkNextIndex;
final size = _chunk.length;
_byteDataView ??= _chunk.buffer.asByteData();
while (true) {
// Check if there is enough data left in [data] for a full chunk.
var restEnd = chunkNextIndex + data.length - dataIndex;
if (restEnd < size) {
// There is not enough data, so just add into [_chunk].
_chunk.setRange(chunkNextIndex, restEnd, data, dataIndex);
_chunkNextIndex = restEnd;
return;
}

// There is enough data to fill the chunk. Fill it and process it.
_chunk.setRange(chunkNextIndex, size, data, dataIndex);
dataIndex += size - chunkNextIndex;

// Now do endian conversion to words.
var j = 0;
do {
_chunk32[j] = _byteDataView!.getUint32(j * bytesPerWord, _endian);
j++;
} while (j < _chunk32.length);

updateHash(_chunk32);
chunkNextIndex = 0;
}
}

@override
void close() {
if (_isClosed) return;
_isClosed = true;

_finalizeData();
_iterate();
assert(_pendingData.isEmpty);
_finalizeAndProcessData();
assert(_chunkNextIndex == 0);
_sink.add(Digest(_byteDigest()));
_sink.close();
}
Expand All @@ -108,65 +147,38 @@ abstract class HashSink implements Sink<List<int>> {
return byteDigest;
}

/// Iterates through [_pendingData], updating the hash computation for each
/// chunk.
void _iterate() {
var pendingDataBytes = _pendingData.buffer.asByteData();
var pendingDataChunks = _pendingData.length ~/ _currentChunk.lengthInBytes;
for (var i = 0; i < pendingDataChunks; i++) {
// Copy words from the pending data buffer into the current chunk buffer.
for (var j = 0; j < _currentChunk.length; j++) {
_currentChunk[j] = pendingDataBytes.getUint32(
i * _currentChunk.lengthInBytes + j * bytesPerWord,
_endian,
);
}

// Run the hash function on the current chunk.
updateHash(_currentChunk);
}

// Remove all pending data up to the last clean chunk break.
_pendingData.removeRange(
0,
pendingDataChunks * _currentChunk.lengthInBytes,
);
}

/// Finalizes [_pendingData].
/// Finalizes the data and finishes the hash.
///
/// This adds a 1 bit to the end of the message, and expands it with 0 bits to
/// pad it out.
void _finalizeData() {
// Pad out the data with 0x80, eight or sixteen 0s, and as many more 0s
// as we need to land cleanly on a chunk boundary.
_pendingData.add(0x80);
void _finalizeAndProcessData() {
if (_lengthInBytes > _maxMessageLengthInBytes) {
throw UnsupportedError(
'Hashing is unsupported for messages with more than 2^53 bits.',
);
}

final contentsLength = _lengthInBytes + 1 /* 0x80 */ + _signatureBytes;
final finalizedLength = _roundUp(
contentsLength,
_currentChunk.lengthInBytes,
_chunk.lengthInBytes,
);

for (var i = 0; i < finalizedLength - contentsLength; i++) {
_pendingData.add(0);
}
// Prepare the finalization data.
var padding = Uint8List(finalizedLength - _lengthInBytes);
// Pad out the data with 0x80, eight or sixteen 0s, and as many more 0s
// as we need to land cleanly on a chunk boundary.
padding[0] = 0x80;

if (_lengthInBytes > _maxMessageLengthInBytes) {
throw UnsupportedError(
'Hashing is unsupported for messages with more than 2^53 bits.',
);
}
// The rest is already 0-bytes.

var lengthInBits = _lengthInBytes * bitsPerByte;

// Add the full length of the input data as a 64-bit value at the end of the
// hash. Note: we're only writing out 64 bits, so skip ahead 8 if the
// signature is 128-bit.
final offset = _pendingData.length + (_signatureBytes - 8);

_pendingData.addAll(Uint8List(_signatureBytes));
var byteData = _pendingData.buffer.asByteData();
final offset = padding.length - 8;
var byteData = padding.buffer.asByteData();

// We're essentially doing byteData.setUint64(offset, lengthInBits, _endian)
// here, but that method isn't supported on dart2js so we implement it
Expand All @@ -180,6 +192,8 @@ abstract class HashSink implements Sink<List<int>> {
byteData.setUint32(offset, lowBits, _endian);
byteData.setUint32(offset + bytesPerWord, highBits, _endian);
}

_addData(padding);
}

/// Rounds [val] up to the next multiple of [n], as long as [n] is a power of
Expand Down
Loading