diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/CompressionKind.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/CompressionKind.cs index 1779ea4349..7501017470 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/CompressionKind.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/CompressionKind.cs @@ -2,11 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.IO; using System.IO.Compression; -using Microsoft.ML.CommandLine; -using Microsoft.ML.Data.IO.Zlib; using Microsoft.ML.Internal.Utilities; namespace Microsoft.ML.Data.IO @@ -65,107 +62,4 @@ public static Stream DecompressStream(this CompressionKind compression, Stream s } } } - - /// - /// A loadable class to parameterize compression. - /// - public abstract class Compression - { - public abstract CompressionKind Kind { get; } - - /// - /// Generate an appropriate wrapping compressing stream for the codec. This - /// stream will be closable and disposable, without closing or disposing of - /// the passed in stream. The scheme for compression is parameterized by the - /// instance. - /// - /// The stream to which compressed data will be written - /// A stream to which the user can write uncompressed data - public virtual Stream Open(Stream stream) - { - return Kind.CompressStream(stream); - } - - // Named with "Impl" suffix since otherwise it was difficult to disambiguate - // with other identifiers. - public sealed class NoneImpl : Compression - { - public override CompressionKind Kind { get { return CompressionKind.None; } } - } - - public sealed class ZlibImpl : Compression - { - public abstract class ArgumentsBase - { - [Argument(ArgumentType.AtMostOnce, HelpText = "Level of compression from 0 to 9", ShortName = "c")] - - public int? CompressionLevel = 9; - - [Argument(ArgumentType.AtMostOnce, HelpText = "Window bits from 8 to 15, higher values enable more useful run length encodings", ShortName = "w")] - public int WindowBits = 15; - - [Argument(ArgumentType.AtMostOnce, HelpText = "Level of memory from 1 to 9, with higher values using more memory but enabling better, faster compression", ShortName = "m")] - public int MemoryLevel = 9; - - [Argument(ArgumentType.AtMostOnce, HelpText = "Compression strategy to employ", ShortName = "s")] - public Constants.Strategy Strategy = Constants.Strategy.DefaultStrategy; - } - - public sealed class DeflateArguments : ArgumentsBase - { - } - - public sealed class ZlibArguments : ArgumentsBase - { - } - - public override CompressionKind Kind - { - get { return CompressionKind.Deflate; } - } - - private readonly int _windowBits; - private readonly Constants.Level _level; - private readonly bool _isDeflate; - private readonly int _memoryLevel; - private readonly Constants.Strategy _strategy; - - private ZlibImpl(ArgumentsBase args, bool isDeflate) - { - Contracts.CheckUserArg(args.CompressionLevel == null || - (0 <= args.CompressionLevel && args.CompressionLevel <= 9), - nameof(args.CompressionLevel), "Must be in range 0 to 9 or null"); - Contracts.CheckUserArg(8 <= args.WindowBits && args.WindowBits <= 15, nameof(args.WindowBits), "Must be in range 8 to 15"); - Contracts.CheckUserArg(1 <= args.MemoryLevel && args.MemoryLevel <= 9, nameof(args.MemoryLevel), "Must be in range 1 to 9"); - Contracts.CheckUserArg(Enum.IsDefined(typeof(Constants.Strategy), args.Strategy), nameof(args.Strategy), "Value was not defined"); - - if (args.CompressionLevel == null) - _level = Constants.Level.DefaultCompression; - else - _level = (Constants.Level)args.CompressionLevel; - Contracts.Assert(Enum.IsDefined(typeof(Constants.Level), _level)); - _windowBits = args.WindowBits; - _isDeflate = isDeflate; - _memoryLevel = args.MemoryLevel; - _strategy = args.Strategy; - } - - public ZlibImpl(DeflateArguments args) - : this(args, isDeflate: true) - { - Contracts.Assert(Kind == CompressionKind.Deflate); - } - - public ZlibImpl(ZlibArguments args) - : this(args, isDeflate: false) - { - // Contracts.Assert(Kind == CompressionKind.Zlib); - } - - public override Stream Open(Stream stream) - { - return new ZDeflateStream(stream, _level, _strategy, _memoryLevel, !_isDeflate, _windowBits); - } - } - } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Constants.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Constants.cs deleted file mode 100644 index 4aaec0db62..0000000000 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Constants.cs +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Data.IO.Zlib -{ - /// - /// See zlib.h - /// - public static class Constants - { - /// - /// Maximum size of history buffer inside zlib. - /// - public const int MaxBufferSize = 15; - - public enum Flush - { - NoFlush = 0, - PartialFlush = 1, - SyncFlush = 2, - FullFlush = 3, - Finish = 4, - Block = 5, - Trees = 6, - }; - - public enum RetCode - { - VersionError = -6, - BufError = -5, - MemError = -4, - DataError = -3, - StreamError = -2, - Errno = -1, - OK = 0, - StreamEnd = 1, - NeedDict = 2, - } - - public enum Level - { - DefaultCompression = -1, - Level0 = 0, - NoCompression = 0, - BestSpeed = 1, - Level1 = 1, - Level2 = 2, - Level3 = 3, - Level4 = 4, - Level5 = 5, - Level6 = 6, - Level7 = 7, - Level8 = 8, - BestCompression = 9, - Level9 = 9, - } - - public enum Strategy - { - DefaultStrategy = 0, - Filtered = 1, - HuffmanOnly = 2, - Rle = 3, - Fixed = 4, - } - - public enum Type - { - Binary = 0, - Ascii = 1, - Text = 1, - Unknown = 2, - } - } -} diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/ZDeflateStream.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/ZDeflateStream.cs deleted file mode 100644 index b7d2a98ac9..0000000000 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/ZDeflateStream.cs +++ /dev/null @@ -1,223 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.IO; - -namespace Microsoft.ML.Data.IO.Zlib -{ - public sealed class ZDeflateStream : Stream - { - private readonly Stream _compressed; - private readonly byte[] _buffer; - - private ZStream _zstrm; - private bool _disposed; - - public ZDeflateStream(Stream compressed, Constants.Level level = Constants.Level.BestCompression, - Constants.Strategy strategy = Constants.Strategy.DefaultStrategy, int memLevel = 9, - bool useZlibFormat = false, int windowBits = Constants.MaxBufferSize) - { - Constants.RetCode ret; - _compressed = compressed; - _buffer = new byte[1 << 15]; - unsafe - { - fixed (ZStream* pZstream = &_zstrm) - { - ret = Zlib.DeflateInit2(pZstream, (int)level, 8, useZlibFormat ? windowBits : -windowBits, memLevel, strategy); - } - } - if (ret != Constants.RetCode.OK) - throw Contracts.Except("Could not initialize zstream. Error code: {0}", ret); - _zstrm.AvailOut = (uint)_buffer.Length; - } - - protected override void Dispose(bool disposing) - { - if (_disposed) - return; - _disposed = true; - - Constants.RetCode disposeRet = Constants.RetCode.StreamEnd; - if (disposing) - { - unsafe - { - fixed (byte* pOutput = _buffer) - fixed (ZStream* pZstream = &_zstrm) - { - pZstream->AvailIn = 0; - pZstream->NextIn = null; - pZstream->NextOut = pOutput + BufferUsed; - do - { - RefreshOutput(pOutput); - disposeRet = Zlib.deflate(pZstream, Constants.Flush.Finish); - } while (disposeRet == Constants.RetCode.OK); - if (disposeRet == Constants.RetCode.StreamEnd) - { - Flush(); - _compressed.Flush(); - } - } - } - } - Constants.RetCode ret; - unsafe - { - fixed (ZStream* pZstream = &_zstrm) - { - ret = Zlib.deflateEnd(pZstream); - } - } - base.Dispose(disposing); - if (disposing) - { - GC.SuppressFinalize(this); - if (disposeRet != Constants.RetCode.StreamEnd) - throw Contracts.Except("Zlib deflate failed with {0}", disposeRet); - if (ret != Constants.RetCode.OK) - throw Contracts.Except("Zlib deflateEnd failed with {0}", ret); - } - } - - ~ZDeflateStream() - { - Dispose(false); - } - - public override bool CanRead - { - get { return false; } - } - - public override bool CanSeek - { - get { return false; } - } - - public override bool CanWrite - { - get { return true; } - } - - private int BufferUsed - { - get - { - Contracts.Assert(0 <= _zstrm.AvailOut); - Contracts.Assert(_zstrm.AvailOut <= _buffer.Length); - return _buffer.Length - (int)_zstrm.AvailOut; - } - } - - public override void Flush() - { - if (BufferUsed <= 0) - return; - _compressed.Write(_buffer, 0, BufferUsed); - _zstrm.AvailOut = (uint)_buffer.Length; - } - - public override long Length - { - get { throw Contracts.ExceptNotSupp(); } - } - - public override long Position - { - get - { - throw Contracts.ExceptNotSupp(); - } - set - { - throw Contracts.ExceptNotSupp(); - } - } - - public override int Read(byte[] buffer, int offset, int count) - { - throw Contracts.ExceptNotImpl(); - } - - public override long Seek(long offset, SeekOrigin origin) - { - throw Contracts.ExceptNotImpl(); - } - - public override void SetLength(long value) - { - throw Contracts.ExceptNotImpl(); - } - - public override void Write(byte[] buffer, int offset, int count) - { - Contracts.CheckValue(buffer, nameof(buffer)); - Contracts.CheckParamValue(offset >= 0, offset, nameof(offset), "offset can't be negative value"); - Contracts.CheckParamValue(offset < buffer.Length, offset, nameof(offset), "offset can't be greater than buffer length"); - Contracts.CheckParamValue(count >= 0, count, nameof(count), "count can't be negative value"); - Contracts.CheckParamValue(count <= buffer.Length - offset, count, nameof(count), - "count should be less or equal than difference between buffer length and offset"); - - int length = buffer.Length; - if (count == 0) - return; - unsafe - { - fixed (byte* pOutput = &_buffer[0]) - fixed (byte* pInput = &buffer[offset]) - { - RawWrite(pInput, pOutput, count); - } - } - } - - /// - /// Check zlib internal buffer and if it's full flush its results to compressed stream. - /// - /// link internal buffer - private unsafe void RefreshOutput(byte* pOutput) - { -#if DEBUG - fixed (byte* bufferPointer = &_buffer[0]) - { - Contracts.Assert(pOutput == bufferPointer); - } -#endif - if (_zstrm.AvailOut != 0) - return; - Flush(); - _zstrm.NextOut = pOutput; - } - - private unsafe void RawWrite(byte* buffer, byte* pOutput, int count) - { -#if DEBUG - fixed (byte* bufferPointer = &_buffer[0]) - { - Contracts.Assert(pOutput == bufferPointer); - } -#endif - Constants.RetCode ret; - _zstrm.AvailIn = (uint)count; - _zstrm.NextIn = buffer; - _zstrm.NextOut = pOutput + BufferUsed; - do - { - RefreshOutput(pOutput); - fixed (ZStream* pZstream = &_zstrm) - { - ret = Zlib.deflate(pZstream, Constants.Flush.NoFlush); - } - if (ret != Constants.RetCode.OK) - { - throw Contracts.Except("Zlib.deflate failed with {0}", ret); - } - } while (_zstrm.AvailIn > 0); - _zstrm.NextIn = null; - } - } -} diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/ZInflateStream.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/ZInflateStream.cs deleted file mode 100644 index 871627858f..0000000000 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/ZInflateStream.cs +++ /dev/null @@ -1,150 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.IO; - -namespace Microsoft.ML.Data.IO.Zlib -{ - public sealed class ZInflateStream : Stream - { - private readonly Stream _compressed; - private readonly byte[] _buffer; - - private int _bufferUsed; - private ZStream _zstrm; - private bool _disposed; - - public override bool CanRead => true; - - public override bool CanSeek => false; - - public override bool CanWrite => false; - - public override long Length { get { throw Contracts.ExceptNotSupp(); } } - - public override long Position { - get { throw Contracts.ExceptNotSupp(); } - set { throw Contracts.ExceptNotSupp(); } - } - - public ZInflateStream(Stream compressed, bool useZlibFormat = false) - { - Constants.RetCode ret; - _compressed = compressed; - _buffer = new byte[1 << 15]; - unsafe - { - fixed (ZStream* pZstream = &_zstrm) - { - ret = Zlib.InflateInit2(pZstream, useZlibFormat ? Constants.MaxBufferSize : -Constants.MaxBufferSize); - } - } - if (ret != Constants.RetCode.OK) - throw Contracts.Except("Could not initialize zstream. Error code: {0}", ret); - _bufferUsed = 0; - } - - protected override void Dispose(bool disposing) - { - if (_disposed) - return; - _disposed = true; - Constants.RetCode ret; - unsafe - { - fixed (ZStream* pZstream = &_zstrm) - { - ret = Zlib.inflateEnd(pZstream); - } - } - base.Dispose(disposing); - if (disposing) - { - GC.SuppressFinalize(this); - if (ret != Constants.RetCode.OK) - throw Contracts.Except("Zlib inflateEnd failed with {0}", ret); - } - } - - ~ZInflateStream() - { - Dispose(false); - } - - public override void Flush() - { - } - - public override int Read(byte[] buffer, int offset, int count) - { - Contracts.CheckValue(buffer, nameof(buffer)); - Contracts.CheckParamValue(offset >= 0, offset, nameof(offset), "Must be non-negative value"); - Contracts.CheckParamValue(offset < buffer.Length, offset, nameof(offset), "Must be greater than buffer length"); - Contracts.CheckParamValue(count >= 0, count, nameof(count), "Must be non-negative value"); - Contracts.CheckParamValue(count <= buffer.Length - offset, count, nameof(count), - "Must or equal than difference between buffer length and offset"); - if (count == 0) - return 0; - unsafe - { - fixed (byte* pInput = &_buffer[0]) - fixed (byte* pOutput = &buffer[offset]) - { - return InternalRead(pInput, pOutput, count); - } - } - throw Contracts.Except("Bad offset {0} and count {1} for length {2} buffer", offset, count, buffer.Length); - } - - private unsafe int InternalRead(byte* pInput, byte* pOutput, int count) - { - Constants.RetCode ret; - - _zstrm.NextIn = pInput + _bufferUsed - _zstrm.AvailIn; - _zstrm.NextOut = pOutput; - _zstrm.AvailOut = (uint)count; - do - { - if (_compressed != null && (_bufferUsed == 0 || _zstrm.AvailIn == 0)) - { - _bufferUsed = _compressed.Read(_buffer, 0, _buffer.Length); - _zstrm.AvailIn = (uint)_bufferUsed; - if (_bufferUsed == 0) - break; - _zstrm.NextIn = pInput; - } - else - _zstrm.NextIn = pInput + _bufferUsed - _zstrm.AvailIn; - - if (_zstrm.AvailIn == 0) - return 0; - - fixed (ZStream* pZstream = &_zstrm) - { - ret = Zlib.inflate(pZstream, Constants.Flush.NoFlush); - if (!(ret == Constants.RetCode.StreamEnd || ret == Constants.RetCode.OK)) - throw Contracts.Except($"{nameof(Zlib.inflate)} failed with {ret}"); - } - } while (ret != Constants.RetCode.StreamEnd && _zstrm.AvailOut != 0); - - return count - (int)_zstrm.AvailOut; - } - - public override long Seek(long offset, SeekOrigin origin) - { - throw Contracts.ExceptNotSupp(); - } - - public override void SetLength(long value) - { - throw Contracts.ExceptNotSupp(); - } - - public override void Write(byte[] buffer, int offset, int count) - { - throw Contracts.ExceptNotSupp(); - } - } -} diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs deleted file mode 100644 index 879394acfe..0000000000 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/Zlib/Zlib.cs +++ /dev/null @@ -1,115 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Runtime.InteropServices; -using System.Security; - -namespace Microsoft.ML.Data.IO.Zlib -{ - internal static class Zlib - { - public const string DllPath = "zlib.dll"; - -#pragma warning disable IDE1006 - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - private static extern unsafe Constants.RetCode deflateInit2_(ZStream* strm, int level, int method, int windowBits, - int memLevel, Constants.Strategy strategy, byte* version, int streamSize); - - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - private static extern unsafe Constants.RetCode inflateInit2_(ZStream* strm, int windowBits, byte* version, int streamSize); - - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - private static extern unsafe byte* zlibVersion(); - - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static extern unsafe Constants.RetCode deflateEnd(ZStream* strm); - - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static extern unsafe Constants.RetCode deflate(ZStream* strm, Constants.Flush flush); - - public static unsafe Constants.RetCode DeflateInit2(ZStream* strm, int level, int method, int windowBits, - int memLevel, Constants.Strategy strategy) - { - return deflateInit2_(strm, level, method, windowBits, memLevel, strategy, zlibVersion(), sizeof(ZStream)); - } - - public static unsafe Constants.RetCode InflateInit2(ZStream* strm, int windowBits) - { - return inflateInit2_(strm, windowBits, zlibVersion(), sizeof(ZStream)); - } - - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static extern unsafe Constants.RetCode inflate(ZStream* strm, Constants.Flush flush); - - [DllImport(DllPath), SuppressUnmanagedCodeSecurity] - public static extern unsafe Constants.RetCode inflateEnd(ZStream* strm); -#pragma warning restore IDE1006 - } - - [StructLayout(LayoutKind.Sequential)] - internal unsafe struct ZStream - { - /// - /// Pointer to input buffer. Zlib inflate and deflate routine consumes data from this buffer. - /// - public byte* NextIn; - /// - /// Number of bytes available at next_in. - /// - public uint AvailIn; - /// - /// Total number of input bytes read so far. - /// - public uint TotalIn; - - /// - /// Pointer to output buffer. Zlib inflate and deflate routine produce output to this location. - /// - public byte* NextOut; - /// - /// Remaining free space at next_out. - /// - public uint AvailOut; - /// - /// Total number of bytes output so far. - /// - public uint TotalOut; - - /// - /// Last error message, NULL if no error. - /// - public byte* Msg; - /// - /// Internal state struct. - /// - public IntPtr State; - - /// - /// Used to allocate the internal state. - /// - public IntPtr Zalloc; - /// - /// Used to free the internal state. - /// - public IntPtr Zfree; - /// - /// Private data object passed to zalloc and zfree. - /// - public IntPtr Opaque; - - /// - /// Best guess about the data type: binary or text. - /// - public int DataType; - /// - /// Adler32 value of the uncompressed data. - /// - public uint Adler; - /// - /// Reserved for future use. - /// - public uint Reserved; - } -}